@ai-dev-methodologies/rlp-desk 0.17.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +49 -0
- package/package.json +1 -1
- package/src/governance.md +19 -3
- package/src/scripts/.run_src_verify.zsh +3725 -0
- package/src/scripts/init_ralph_desk.zsh +3 -2
- package/src/scripts/lib_ralph_desk.zsh +114 -3
- package/src/scripts/run_ralph_desk.zsh +714 -131
|
@@ -0,0 +1,3725 @@
|
|
|
1
|
+
#!/bin/zsh
|
|
2
|
+
set -uo pipefail
|
|
3
|
+
# NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
|
|
4
|
+
# because the main loop uses explicit error checks throughout.
|
|
5
|
+
|
|
6
|
+
# =============================================================================
|
|
7
|
+
# Ralph Desk Tmux Runner
|
|
8
|
+
#
|
|
9
|
+
# Implements the Leader loop from governance.md section 7 as a shell script.
|
|
10
|
+
# Uses tmux proven patterns: write-then-notify, pane IDs (%N),
|
|
11
|
+
# copy-mode guards, verification-based retry, heartbeat monitoring,
|
|
12
|
+
# idle pane nudging, exponential backoff restarts, atomic file writes.
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# LOOP_NAME=<slug> ./run_ralph_desk.zsh
|
|
16
|
+
#
|
|
17
|
+
# Required env:
|
|
18
|
+
# LOOP_NAME - slug identifier for the campaign
|
|
19
|
+
#
|
|
20
|
+
# Optional env:
|
|
21
|
+
# ROOT - project root (default: $PWD)
|
|
22
|
+
# MAX_ITER - max iterations (default: 20)
|
|
23
|
+
# WORKER_MODEL - claude model for Worker (default: sonnet)
|
|
24
|
+
# VERIFIER_MODEL - claude model for Verifier (default: opus)
|
|
25
|
+
# POLL_INTERVAL - seconds between signal checks (default: 5)
|
|
26
|
+
# ITER_TIMEOUT - per-iteration timeout in seconds (default: 600)
|
|
27
|
+
# HEARTBEAT_STALE_THRESHOLD - seconds before heartbeat is stale (default: 120)
|
|
28
|
+
# MAX_RESTARTS - max restart attempts per worker (default: 3)
|
|
29
|
+
# IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
|
|
30
|
+
# MAX_NUDGES - max nudges per pane per iteration (default: 3)
|
|
31
|
+
#
|
|
32
|
+
# Per-role codex config:
|
|
33
|
+
# WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.5)
|
|
34
|
+
# WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
|
|
35
|
+
# VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.5)
|
|
36
|
+
# VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
|
|
37
|
+
#
|
|
38
|
+
# Consensus scope:
|
|
39
|
+
# CONSENSUS_SCOPE - when consensus applies (default: all)
|
|
40
|
+
# all=every verify, final-only=final ALL only
|
|
41
|
+
#
|
|
42
|
+
# Dependencies: tmux, claude CLI, jq
|
|
43
|
+
# Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
|
|
44
|
+
# =============================================================================
|
|
45
|
+
|
|
46
|
+
# --- Environment Variables ---
|
|
47
|
+
SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
|
|
48
|
+
ROOT="${ROOT:-$PWD}"
|
|
49
|
+
MAX_ITER="${MAX_ITER:-20}"
|
|
50
|
+
WORKER_MODEL="${WORKER_MODEL:-haiku}"
|
|
51
|
+
VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
|
|
52
|
+
FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
|
|
53
|
+
POLL_INTERVAL="${POLL_INTERVAL:-5}"
|
|
54
|
+
ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
|
|
55
|
+
HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
|
|
56
|
+
MAX_RESTARTS="${MAX_RESTARTS:-3}"
|
|
57
|
+
IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
|
|
58
|
+
MAX_NUDGES="${MAX_NUDGES:-3}"
|
|
59
|
+
WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
|
|
60
|
+
WITH_SELF_VERIFICATION_REQUESTED="$WITH_SELF_VERIFICATION" # preserves original user intent for traceability (governance §1f)
|
|
61
|
+
SV_SKIPPED_REASON="" # set when SV is disabled despite user request
|
|
62
|
+
|
|
63
|
+
# v0.14.0 — zsh runner restored as primary tmux mode path.
|
|
64
|
+
# v5.7 §4.2's deprecation gate (rejected --flywheel/--flywheel-guard/
|
|
65
|
+
# --with-self-verification) is removed: the Node port shipped without
|
|
66
|
+
# zsh-equivalent safety nets (heartbeat, copy-mode guard, prompt-stall,
|
|
67
|
+
# no-progress, stale-context, claude model upgrade chain, etc.), so the
|
|
68
|
+
# Node leader is now reserved for `--mode agent` (LLM-driven) only.
|
|
69
|
+
# `--mode tmux` invocations from src/node/run.mjs delegate here as a
|
|
70
|
+
# subprocess via env vars. ARCH Wave C / ADR-001: FLYWHEEL and FLYWHEEL_GUARD
|
|
71
|
+
# are NOT implemented in the zsh leader (no dispatch site) and are deprecated —
|
|
72
|
+
# do NOT claim otherwise. WITH_SELF_VERIFICATION is forwarded for traceability,
|
|
73
|
+
# but the SV report is produced by the Node post-pass in run.mjs runTmuxViaZsh
|
|
74
|
+
# after this script exits (this script keeps its $TMUX early-return to avoid the
|
|
75
|
+
# `claude --print` no-TTY hang).
|
|
76
|
+
AUTONOMOUS_MODE="${AUTONOMOUS_MODE:-0}" # 1=don't stop on ambiguity, PRD is authoritative
|
|
77
|
+
# P1-E Lane enforcement: WARN-only by default; --lane-strict opts into BLOCKED
|
|
78
|
+
# escalation. governance §7¾. The opt-in defaults to "warn"; "strict" trips
|
|
79
|
+
# BLOCKED with reason_category=infra_failure + recoverable=true (downgrade
|
|
80
|
+
# from terminal_alert) so an inaccurate mtime audit cannot terminally kill a
|
|
81
|
+
# campaign.
|
|
82
|
+
LANE_MODE="${LANE_MODE:-warn}"
|
|
83
|
+
# US-018 R6 P1-F Test density: WARN by default; --test-density-strict turns
|
|
84
|
+
# init exit non-zero when any AC has < 3 tests (governance §7f).
|
|
85
|
+
TEST_DENSITY_MODE="${TEST_DENSITY_MODE:-warn}"
|
|
86
|
+
# US-021 R9 P2-I consecutive_blocks circuit breaker (governance §8). When the
|
|
87
|
+
# same canonical block reason fires N times in a row the runner writes
|
|
88
|
+
# .sisyphus/mission-abort.json and exits non-zero so contract defects don't
|
|
89
|
+
# silently loop. infra_failure category and the very first iteration are exempt.
|
|
90
|
+
BLOCK_CB_THRESHOLD="${BLOCK_CB_THRESHOLD:-3}"
|
|
91
|
+
CONSECUTIVE_BLOCKS=0
|
|
92
|
+
LAST_BLOCK_REASON=""
|
|
93
|
+
|
|
94
|
+
# US-021 R9 P2-I: track repeated same-reason blocks. infra_failure category and
|
|
95
|
+
# the very first iteration are exempt (mission setup blocks shouldn't trip
|
|
96
|
+
# the abort). Returns 0 if loop should continue, 1 (after writing
|
|
97
|
+
# mission-abort.json) if the threshold is reached.
|
|
98
|
+
# US-023 R11 P2-K: guarantee at least one cost-log.jsonl entry per campaign.
|
|
99
|
+
# An empty cost-log can mean either "no usage recorded" or "logging broken" —
|
|
100
|
+
# we make the distinction observable by always emitting a final entry on exit
|
|
101
|
+
# (idempotent via COST_LOG_FINAL_WRITTEN). Wired into the existing cleanup trap.
|
|
102
|
+
COST_LOG_FINAL_WRITTEN=0
|
|
103
|
+
_emit_final_cost_log() {
|
|
104
|
+
if [[ "${COST_LOG_FINAL_WRITTEN:-0}" -ne 0 ]]; then
|
|
105
|
+
return 0
|
|
106
|
+
fi
|
|
107
|
+
COST_LOG_FINAL_WRITTEN=1
|
|
108
|
+
if [[ -n "${ITERATION:-}" && -n "${LOGS_DIR:-}" ]]; then
|
|
109
|
+
write_cost_log "${ITERATION:-0}" 2>/dev/null || true
|
|
110
|
+
fi
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# US-024 R12 P0: tmux pane/session lifecycle monitor.
|
|
114
|
+
# Single authoritative timeout: 5 attempts × 1s sleep = 5s budget.
|
|
115
|
+
# Invoked at 3 sites: create_session post-finish, main loop iter entry, and
|
|
116
|
+
# every send-keys/paste post-action before the wait-loop. Writes infra_failure
|
|
117
|
+
# BLOCKED sentinel and exits 1 when any pane or the session is dead beyond budget.
|
|
118
|
+
_r12_check_lifecycle() {
|
|
119
|
+
local site="${1:-unknown}"
|
|
120
|
+
local _attempts=0
|
|
121
|
+
while ! _verify_session_alive "$SESSION_NAME" || \
|
|
122
|
+
! _verify_pane_alive "$LEADER_PANE" || \
|
|
123
|
+
! _verify_pane_alive "$WORKER_PANE" || \
|
|
124
|
+
! _verify_pane_alive "$VERIFIER_PANE"; do
|
|
125
|
+
(( _attempts++ ))
|
|
126
|
+
if (( _attempts >= 5 )); then
|
|
127
|
+
log_error "[r12:$site] tmux session/pane dead after 5x1s polling (5s authoritative budget). session=$SESSION_NAME panes leader=$LEADER_PANE worker=$WORKER_PANE verifier=$VERIFIER_PANE"
|
|
128
|
+
tmux list-panes -a -F '#{session_name}:#{pane_id} dead=#{pane_dead}' 2>&1 | head -20 >> "${DEBUG_LOG:-/dev/null}"
|
|
129
|
+
write_blocked_sentinel "tmux session/pane dead during $site" "${CURRENT_US:-ALL}" "infra_failure"
|
|
130
|
+
exit 1
|
|
131
|
+
fi
|
|
132
|
+
sleep 1
|
|
133
|
+
done
|
|
134
|
+
return 0
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
_check_consecutive_blocks() {
|
|
138
|
+
local reason="$1"
|
|
139
|
+
local category="${2:-metric_failure}"
|
|
140
|
+
local iter="${3:-${ITERATION:-0}}"
|
|
141
|
+
if [[ "$category" == "infra_failure" ]] || (( iter <= 1 )); then
|
|
142
|
+
LAST_BLOCK_REASON=""
|
|
143
|
+
CONSECUTIVE_BLOCKS=0
|
|
144
|
+
return 0
|
|
145
|
+
fi
|
|
146
|
+
local canonical
|
|
147
|
+
canonical=$(_canonical_block_reason "$reason" 2>/dev/null)
|
|
148
|
+
if [[ "$canonical" == "$LAST_BLOCK_REASON" && -n "$canonical" ]]; then
|
|
149
|
+
CONSECUTIVE_BLOCKS=$((CONSECUTIVE_BLOCKS + 1))
|
|
150
|
+
else
|
|
151
|
+
CONSECUTIVE_BLOCKS=1
|
|
152
|
+
LAST_BLOCK_REASON="$canonical"
|
|
153
|
+
fi
|
|
154
|
+
if (( CONSECUTIVE_BLOCKS >= BLOCK_CB_THRESHOLD )); then
|
|
155
|
+
local abort_dir="$DESK/.sisyphus"
|
|
156
|
+
mkdir -p "$abort_dir" 2>/dev/null
|
|
157
|
+
local abort_file="$abort_dir/mission-abort.json"
|
|
158
|
+
printf '{"reason":"consecutive_blocks","count":%s,"last_reason":"%s","threshold":%s,"timestamp":"%s"}\n' \
|
|
159
|
+
"$CONSECUTIVE_BLOCKS" "$canonical" "$BLOCK_CB_THRESHOLD" \
|
|
160
|
+
"$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$abort_file"
|
|
161
|
+
log_error "Mission abort: same canonical block reason '$canonical' repeated $CONSECUTIVE_BLOCKS times (>= $BLOCK_CB_THRESHOLD)"
|
|
162
|
+
return 1
|
|
163
|
+
fi
|
|
164
|
+
return 0
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
# --- Engine Selection (auto-detect from model format) ---
|
|
168
|
+
# claude models (haiku/sonnet/opus) with :effort → claude engine + effort
|
|
169
|
+
# codex models (gpt-*/spark) with :reasoning → codex engine + reasoning
|
|
170
|
+
# plain name → claude engine (no effort/reasoning)
|
|
171
|
+
_auto_detect_engine() {
|
|
172
|
+
local model_var="$1" engine_var="$2" codex_model_var="$3" codex_reasoning_var="$4" effort_var="${5:-}"
|
|
173
|
+
local model_val="${(P)model_var}"
|
|
174
|
+
if [[ "$model_val" == *:* ]]; then
|
|
175
|
+
local model_part="${model_val%%:*}"
|
|
176
|
+
local level_part="${model_val##*:}"
|
|
177
|
+
case "$model_part" in
|
|
178
|
+
haiku|sonnet|opus)
|
|
179
|
+
# Claude model with effort — keep engine as claude, store effort
|
|
180
|
+
eval "$engine_var=claude"
|
|
181
|
+
eval "$model_var=$model_part"
|
|
182
|
+
[[ -n "$effort_var" ]] && eval "$effort_var=$level_part"
|
|
183
|
+
;;
|
|
184
|
+
*)
|
|
185
|
+
# Codex model with reasoning
|
|
186
|
+
[[ "$model_part" == "spark" ]] && model_part="gpt-5.3-codex-spark"
|
|
187
|
+
eval "$engine_var=codex"
|
|
188
|
+
eval "$model_var=$model_part"
|
|
189
|
+
[[ -n "$codex_model_var" ]] && eval "$codex_model_var=$model_part"
|
|
190
|
+
[[ -n "$codex_reasoning_var" ]] && eval "$codex_reasoning_var=$level_part"
|
|
191
|
+
;;
|
|
192
|
+
esac
|
|
193
|
+
fi
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
WORKER_ENGINE="${WORKER_ENGINE:-claude}"
|
|
197
|
+
VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}"
|
|
198
|
+
FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}"
|
|
199
|
+
|
|
200
|
+
# Effort levels for Claude models (set by _auto_detect_engine or CLI --worker-model opus:max)
|
|
201
|
+
WORKER_EFFORT="${WORKER_EFFORT:-}"
|
|
202
|
+
VERIFIER_EFFORT="${VERIFIER_EFFORT:-}"
|
|
203
|
+
FINAL_VERIFIER_EFFORT="${FINAL_VERIFIER_EFFORT:-}"
|
|
204
|
+
|
|
205
|
+
# Auto-detect engine from model format for env var path (CLI path uses parse_model_flag)
|
|
206
|
+
_auto_detect_engine WORKER_MODEL WORKER_ENGINE WORKER_CODEX_MODEL WORKER_CODEX_REASONING WORKER_EFFORT
|
|
207
|
+
_auto_detect_engine VERIFIER_MODEL VERIFIER_ENGINE VERIFIER_CODEX_MODEL VERIFIER_CODEX_REASONING VERIFIER_EFFORT
|
|
208
|
+
_auto_detect_engine FINAL_VERIFIER_MODEL FINAL_VERIFIER_ENGINE "" "" FINAL_VERIFIER_EFFORT
|
|
209
|
+
WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.5}"
|
|
210
|
+
WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
|
|
211
|
+
VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.5}"
|
|
212
|
+
VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
|
|
213
|
+
CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
214
|
+
|
|
215
|
+
# --- Verify Mode ---
|
|
216
|
+
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
217
|
+
# Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
|
|
218
|
+
CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
|
|
219
|
+
CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.5:medium}" # per-US cross-verifier (lighter)
|
|
220
|
+
FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.5:high}" # final cross-verifier (stricter)
|
|
221
|
+
# Legacy compat: map old flags to CONSENSUS_MODE
|
|
222
|
+
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
223
|
+
CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
|
|
224
|
+
elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
|
|
225
|
+
CONSENSUS_MODE="final-only"
|
|
226
|
+
fi
|
|
227
|
+
CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-${CONSENSUS_MODE}}"
|
|
228
|
+
CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
|
|
229
|
+
# Effective CB threshold: doubled when consensus mode active
|
|
230
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
231
|
+
EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
|
|
232
|
+
else
|
|
233
|
+
EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
|
|
234
|
+
fi
|
|
235
|
+
_API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
|
|
236
|
+
_API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
|
|
237
|
+
|
|
238
|
+
# --- Derived Paths ---
|
|
239
|
+
DESK="$ROOT/${RLP_DESK_RUNTIME_DIR:-.rlp-desk}"
|
|
240
|
+
# v0.13.0: legacy detection — refuse to run when .claude/ralph-desk/ is still
|
|
241
|
+
# present. init mode auto-migrates; run mode protects in-flight campaigns.
|
|
242
|
+
if [[ -d "$ROOT/.claude/ralph-desk" ]]; then
|
|
243
|
+
print -u2 "ERROR: Legacy .claude/ralph-desk/ detected at $ROOT/.claude/ralph-desk."
|
|
244
|
+
print -u2 "Run mode does not auto-migrate to protect in-flight campaigns."
|
|
245
|
+
print -u2 "Run: mv .claude/ralph-desk ${RLP_DESK_RUNTIME_DIR:-.rlp-desk} then re-run."
|
|
246
|
+
exit 1
|
|
247
|
+
fi
|
|
248
|
+
# US-026 R14 P0: project-root-hashed runner lockfile prevents duplicate runner spawns
|
|
249
|
+
# on the same project root while allowing parallel runs across different projects.
|
|
250
|
+
# shasum is mac-default; sha1sum on Linux; cksum is POSIX-final fallback.
|
|
251
|
+
ROOT_HASH=$(printf '%s' "$ROOT" | { shasum 2>/dev/null || sha1sum 2>/dev/null || cksum; } | awk '{print substr($1,1,8)}')
|
|
252
|
+
RUNNER_LOCKFILE_PATH="$DESK/logs/.rlp-desk-runner-$ROOT_HASH.lock"
|
|
253
|
+
RUNNER_LOCKDIR="${RUNNER_LOCKFILE_PATH}.d"
|
|
254
|
+
PROMPTS_DIR="$DESK/prompts"
|
|
255
|
+
CONTEXT_DIR="$DESK/context"
|
|
256
|
+
MEMOS_DIR="$DESK/memos"
|
|
257
|
+
LOGS_DIR="$DESK/logs/$SLUG"
|
|
258
|
+
RUNTIME_DIR="$LOGS_DIR/runtime"
|
|
259
|
+
PRD_FILE="$DESK/plans/prd-$SLUG.md"
|
|
260
|
+
TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
|
|
261
|
+
# --- Analytics Directory (v5.7 §4.11.b: project-local) ---
|
|
262
|
+
# Was previously $HOME/.claude/ralph-desk/analytics/<slug>--<hash> (cross-project
|
|
263
|
+
# rollup). With v0.12.0 the canonical location is project-local; cross-project
|
|
264
|
+
# rollup is the Leader's responsibility via ~/.claude/ralph-desk/registry.jsonl
|
|
265
|
+
# (Worker/Verifier prompts never reference the registry path — see §4.11.c).
|
|
266
|
+
ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
|
|
267
|
+
ANALYTICS_DIR="$DESK/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
|
|
268
|
+
CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
|
|
269
|
+
METADATA_FILE="$ANALYTICS_DIR/metadata.json"
|
|
270
|
+
WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
|
|
271
|
+
VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
|
|
272
|
+
CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
|
|
273
|
+
MEMORY_FILE="$MEMOS_DIR/${SLUG}-memory.md"
|
|
274
|
+
SIGNAL_FILE="$MEMOS_DIR/${SLUG}-iter-signal.json"
|
|
275
|
+
DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
|
|
276
|
+
VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
|
|
277
|
+
# v0.14.2 Bug Report #4: codex sometimes writes the verdict file to the
|
|
278
|
+
# pre-v0.13.0 legacy path despite the prompt instructing otherwise (CWD
|
|
279
|
+
# heuristics inside the codex CLI). Track the legacy path so the no-progress
|
|
280
|
+
# watcher and the harvest step can both fall back to it before BLOCKing the
|
|
281
|
+
# campaign. Auto-migration logic lives in _migrate_legacy_verdict().
|
|
282
|
+
LEGACY_VERDICT_FILE="$ROOT/.claude/ralph-desk/memos/${SLUG}-verify-verdict.json"
|
|
283
|
+
COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
|
|
284
|
+
BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
|
|
285
|
+
LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
|
|
286
|
+
STATUS_FILE="$RUNTIME_DIR/status.json"
|
|
287
|
+
SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
|
|
288
|
+
WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
|
|
289
|
+
VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
|
|
290
|
+
COST_LOG="$LOGS_DIR/cost-log.jsonl"
|
|
291
|
+
|
|
292
|
+
# --- Session Naming ---
|
|
293
|
+
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
294
|
+
SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
|
|
295
|
+
|
|
296
|
+
# --- State Tracking ---
|
|
297
|
+
typeset -A LAST_PANE_CONTENT
|
|
298
|
+
typeset -A PANE_IDLE_SINCE
|
|
299
|
+
typeset -A WORKER_RESTARTS
|
|
300
|
+
typeset -A US_FAIL_HISTORY
|
|
301
|
+
STALE_CONTEXT_COUNT=0
|
|
302
|
+
HEARTBEAT_STALE_COUNT=0
|
|
303
|
+
MONITOR_FAILURE_COUNT=0
|
|
304
|
+
CONSECUTIVE_FAILURES=0
|
|
305
|
+
PREV_CONTEXT_HASH=""
|
|
306
|
+
PREV_PRD_HASH=""
|
|
307
|
+
PREV_PRD_US_LIST=""
|
|
308
|
+
_PRD_CHANGED=0
|
|
309
|
+
ITERATION=0
|
|
310
|
+
START_TIME=$(date +%s)
|
|
311
|
+
BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
|
|
312
|
+
CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
|
|
313
|
+
SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
|
|
314
|
+
VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
|
|
315
|
+
CONSENSUS_ROUND=0 # current consensus round for current US
|
|
316
|
+
US_LIST="" # comma-separated US IDs from PRD (per-us mode)
|
|
317
|
+
LOCKFILE_ACQUIRED=0
|
|
318
|
+
LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
|
|
319
|
+
_SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
|
|
320
|
+
_LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
|
|
321
|
+
_MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
|
|
322
|
+
_ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
|
|
323
|
+
_ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
|
|
324
|
+
|
|
325
|
+
# =============================================================================
|
|
326
|
+
# Utility Functions
|
|
327
|
+
# =============================================================================
|
|
328
|
+
|
|
329
|
+
DEBUG="${DEBUG:-0}"
|
|
330
|
+
DEBUG_LOG="$ANALYTICS_DIR/debug.log"
|
|
331
|
+
|
|
332
|
+
# Source shared business logic
|
|
333
|
+
LIB_DIR="/Users/kyjin/dev/own/ai-dev-methodologies/ai-dev-methodologies-hq/workspace/rlp-desk/src/scripts"
|
|
334
|
+
source "$LIB_DIR/lib_ralph_desk.zsh"
|
|
335
|
+
|
|
336
|
+
# A16: Warn if running in foreground (may conflict with Claude Code pane)
|
|
337
|
+
if [[ -z "${RLP_BACKGROUND:-}" ]]; then
|
|
338
|
+
echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
|
|
339
|
+
echo " Recommended: launch via Bash tool with run_in_background: true" >&2
|
|
340
|
+
echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
|
|
341
|
+
fi
|
|
342
|
+
|
|
343
|
+
# check_dead_pane() — determine if pane command indicates a dead/exited process
|
|
344
|
+
# Engine-aware: bash is normal for codex workers (trigger runs in bash),
|
|
345
|
+
# but indicates dead pane for claude workers.
|
|
346
|
+
# Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
|
|
347
|
+
# Returns: 0 if dead, 1 if alive
|
|
348
|
+
check_dead_pane() {
|
|
349
|
+
local poll_cmd="$1"
|
|
350
|
+
local engine="${2:-claude}"
|
|
351
|
+
local role="${3:-worker}"
|
|
352
|
+
|
|
353
|
+
if [[ -z "$poll_cmd" ]]; then
|
|
354
|
+
return 0 # empty = dead
|
|
355
|
+
elif [[ "$poll_cmd" == "zsh" ]]; then
|
|
356
|
+
return 0 # bare zsh = dead
|
|
357
|
+
elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
|
|
358
|
+
return 0 # bash = dead for claude (codex uses bash trigger)
|
|
359
|
+
fi
|
|
360
|
+
return 1 # alive
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
# launch_worker_codex() — launch codex Worker TUI, send instruction, verify submission
|
|
364
|
+
# Matches launch_worker_claude() pattern for consistent tmux-visible execution.
|
|
365
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
|
|
366
|
+
# Returns: 0 on success, 1 on fatal failure
|
|
367
|
+
launch_worker_codex() {
|
|
368
|
+
local pane_id="$1"
|
|
369
|
+
local prompt_file="$2"
|
|
370
|
+
local iter="$3"
|
|
371
|
+
local worker_launch="$4"
|
|
372
|
+
|
|
373
|
+
log " Launching Worker codex TUI in pane $pane_id..."
|
|
374
|
+
# Clean pane before launch: kill any lingering process, ensure fresh shell
|
|
375
|
+
local _pre_cmd
|
|
376
|
+
_pre_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null || echo "")
|
|
377
|
+
if [[ "$_pre_cmd" != "zsh" && "$_pre_cmd" != "bash" && -n "$_pre_cmd" ]]; then
|
|
378
|
+
log_debug "Worker pane has lingering process ($_pre_cmd), cleaning..."
|
|
379
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
|
|
380
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 1
|
|
381
|
+
fi
|
|
382
|
+
paste_to_pane "$pane_id" "$worker_launch"
|
|
383
|
+
tmux send-keys -t "$pane_id" C-m
|
|
384
|
+
|
|
385
|
+
# Wait for codex TUI prompt (›) instead of shell prompt
|
|
386
|
+
local _codex_ready=0
|
|
387
|
+
local _codex_wait=0
|
|
388
|
+
while (( _codex_wait < 30 )); do
|
|
389
|
+
sleep 1
|
|
390
|
+
local _pane_text
|
|
391
|
+
_pane_text=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
|
|
392
|
+
if echo "$_pane_text" | grep -q '›' 2>/dev/null; then
|
|
393
|
+
_codex_ready=1
|
|
394
|
+
log_debug "Worker codex TUI ready after ${_codex_wait}s"
|
|
395
|
+
break
|
|
396
|
+
fi
|
|
397
|
+
(( _codex_wait++ ))
|
|
398
|
+
done
|
|
399
|
+
if (( ! _codex_ready )); then
|
|
400
|
+
log_error "Worker codex TUI not ready after 30s"
|
|
401
|
+
return 1
|
|
402
|
+
fi
|
|
403
|
+
|
|
404
|
+
# Send instruction to codex TUI
|
|
405
|
+
sleep 1
|
|
406
|
+
local worker_instruction="Read and execute the instructions in $prompt_file"
|
|
407
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
408
|
+
tmux send-keys -t "$pane_id" C-m
|
|
409
|
+
log_debug "Worker codex instruction sent (${#worker_instruction} chars)"
|
|
410
|
+
|
|
411
|
+
# Submit loop — verify codex started working
|
|
412
|
+
local submit_attempts=0
|
|
413
|
+
while (( submit_attempts < 15 )); do
|
|
414
|
+
sleep 2
|
|
415
|
+
local pane_check
|
|
416
|
+
pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
417
|
+
if echo "$pane_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
|
|
418
|
+
log_debug "Worker codex started working after $((submit_attempts + 1)) checks"
|
|
419
|
+
break
|
|
420
|
+
fi
|
|
421
|
+
if (( submit_attempts == 8 )); then
|
|
422
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
423
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
424
|
+
sleep 0.1
|
|
425
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
426
|
+
tmux send-keys -t "$pane_id" C-m
|
|
427
|
+
fi
|
|
428
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
429
|
+
sleep 0.3
|
|
430
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
431
|
+
(( submit_attempts++ ))
|
|
432
|
+
done
|
|
433
|
+
return 0
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
# launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
|
|
437
|
+
# Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
|
|
438
|
+
# restart recovery on submit failure.
|
|
439
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
|
|
440
|
+
# Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
|
|
441
|
+
launch_worker_claude() {
|
|
442
|
+
local pane_id="$1"
|
|
443
|
+
local prompt_file="$2"
|
|
444
|
+
local iter="$3"
|
|
445
|
+
local worker_launch="$4"
|
|
446
|
+
|
|
447
|
+
log " Launching Worker claude in pane $pane_id..."
|
|
448
|
+
paste_to_pane "$pane_id" "$worker_launch"
|
|
449
|
+
tmux send-keys -t "$pane_id" C-m
|
|
450
|
+
|
|
451
|
+
# Wait for claude TUI to be ready
|
|
452
|
+
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
453
|
+
log_error "Worker claude failed to start"
|
|
454
|
+
return 1
|
|
455
|
+
fi
|
|
456
|
+
|
|
457
|
+
# Send instruction to claude TUI
|
|
458
|
+
sleep 3
|
|
459
|
+
local worker_instruction="Read and execute the instructions in $prompt_file"
|
|
460
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
461
|
+
tmux send-keys -t "$pane_id" C-m
|
|
462
|
+
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
463
|
+
|
|
464
|
+
# 15-iteration submit loop — verify claude started working
|
|
465
|
+
local submit_attempts=0
|
|
466
|
+
while (( submit_attempts < 15 )); do
|
|
467
|
+
sleep 2
|
|
468
|
+
local pane_check
|
|
469
|
+
pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
470
|
+
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
|
|
471
|
+
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
472
|
+
log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
473
|
+
break
|
|
474
|
+
fi
|
|
475
|
+
# Every 3 failed attempts, re-send full instruction
|
|
476
|
+
if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
|
|
477
|
+
log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
|
|
478
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
479
|
+
sleep 0.2
|
|
480
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
481
|
+
sleep 0.15
|
|
482
|
+
tmux send-keys -t "$pane_id" C-m
|
|
483
|
+
sleep 1
|
|
484
|
+
fi
|
|
485
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
486
|
+
sleep 0.3
|
|
487
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
488
|
+
(( submit_attempts++ ))
|
|
489
|
+
done
|
|
490
|
+
|
|
491
|
+
# If 15 attempts failed, restart claude and retry
|
|
492
|
+
if (( submit_attempts >= 15 )); then
|
|
493
|
+
log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
|
|
494
|
+
log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
|
|
495
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
496
|
+
sleep 0.5
|
|
497
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
498
|
+
sleep 2
|
|
499
|
+
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
500
|
+
paste_to_pane "$pane_id" "$worker_launch"
|
|
501
|
+
tmux send-keys -t "$pane_id" C-m
|
|
502
|
+
if wait_for_pane_ready "$pane_id" 30; then
|
|
503
|
+
sleep 3
|
|
504
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
505
|
+
tmux send-keys -t "$pane_id" C-m
|
|
506
|
+
log " Worker restarted and instruction re-sent"
|
|
507
|
+
log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
|
|
508
|
+
else
|
|
509
|
+
log_error "Worker restart failed — pane not ready"
|
|
510
|
+
log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
|
|
511
|
+
fi
|
|
512
|
+
fi
|
|
513
|
+
|
|
514
|
+
return 0
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
# launch_verifier_codex() — launch codex Verifier TUI, send instruction, verify submission
|
|
518
|
+
# Matches launch_verifier_claude() pattern for consistent tmux-visible execution.
|
|
519
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
520
|
+
# Returns: 0 on success
|
|
521
|
+
launch_verifier_codex() {
|
|
522
|
+
local pane_id="$1"
|
|
523
|
+
local prompt_file="$2"
|
|
524
|
+
local iter="$3"
|
|
525
|
+
local verifier_launch="$4"
|
|
526
|
+
|
|
527
|
+
log " Launching Verifier codex TUI in pane $pane_id..."
|
|
528
|
+
# Clean pane before launch: kill any lingering process, ensure fresh shell
|
|
529
|
+
local _pre_cmd
|
|
530
|
+
_pre_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null || echo "")
|
|
531
|
+
if [[ "$_pre_cmd" != "zsh" && "$_pre_cmd" != "bash" && -n "$_pre_cmd" ]]; then
|
|
532
|
+
log_debug "Verifier pane has lingering process ($_pre_cmd), cleaning..."
|
|
533
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
|
|
534
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 1
|
|
535
|
+
fi
|
|
536
|
+
paste_to_pane "$pane_id" "$verifier_launch"
|
|
537
|
+
tmux send-keys -t "$pane_id" C-m
|
|
538
|
+
|
|
539
|
+
# Wait for codex TUI prompt (›) instead of shell prompt
|
|
540
|
+
local _codex_ready=0
|
|
541
|
+
local _codex_wait=0
|
|
542
|
+
while (( _codex_wait < 30 )); do
|
|
543
|
+
sleep 1
|
|
544
|
+
local _pane_text
|
|
545
|
+
_pane_text=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
|
|
546
|
+
if echo "$_pane_text" | grep -q '›' 2>/dev/null; then
|
|
547
|
+
_codex_ready=1
|
|
548
|
+
log_debug "Verifier codex TUI ready after ${_codex_wait}s"
|
|
549
|
+
break
|
|
550
|
+
fi
|
|
551
|
+
(( _codex_wait++ ))
|
|
552
|
+
done
|
|
553
|
+
if (( ! _codex_ready )); then
|
|
554
|
+
log_error "Verifier codex TUI not ready after 30s"
|
|
555
|
+
return 1
|
|
556
|
+
fi
|
|
557
|
+
|
|
558
|
+
sleep 1
|
|
559
|
+
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
560
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
561
|
+
tmux send-keys -t "$pane_id" C-m
|
|
562
|
+
log_debug "Verifier codex instruction sent"
|
|
563
|
+
|
|
564
|
+
# Submit loop — verify codex started working
|
|
565
|
+
local submit_attempts=0
|
|
566
|
+
while (( submit_attempts < 15 )); do
|
|
567
|
+
sleep 2
|
|
568
|
+
local vs_check
|
|
569
|
+
vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
570
|
+
if echo "$vs_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
|
|
571
|
+
log_debug "Verifier codex started working after $((submit_attempts + 1)) checks"
|
|
572
|
+
break
|
|
573
|
+
fi
|
|
574
|
+
if (( submit_attempts == 8 )); then
|
|
575
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
576
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
577
|
+
sleep 0.1
|
|
578
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
579
|
+
tmux send-keys -t "$pane_id" C-m
|
|
580
|
+
fi
|
|
581
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
582
|
+
sleep 0.3
|
|
583
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
584
|
+
(( submit_attempts++ ))
|
|
585
|
+
done
|
|
586
|
+
return 0
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
# launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
|
|
590
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
591
|
+
# Returns: 0 on success
|
|
592
|
+
launch_verifier_claude() {
|
|
593
|
+
local pane_id="$1"
|
|
594
|
+
local prompt_file="$2"
|
|
595
|
+
local iter="$3"
|
|
596
|
+
local verifier_launch="$4"
|
|
597
|
+
|
|
598
|
+
log " Launching Verifier claude in pane $pane_id..."
|
|
599
|
+
paste_to_pane "$pane_id" "$verifier_launch"
|
|
600
|
+
tmux send-keys -t "$pane_id" C-m
|
|
601
|
+
|
|
602
|
+
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
603
|
+
log_error "Verifier failed to start"
|
|
604
|
+
return 1
|
|
605
|
+
fi
|
|
606
|
+
|
|
607
|
+
sleep 3
|
|
608
|
+
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
609
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
610
|
+
tmux send-keys -t "$pane_id" C-m
|
|
611
|
+
log_debug "Verifier instruction sent directly"
|
|
612
|
+
|
|
613
|
+
# Submit loop — verify verifier started working
|
|
614
|
+
local submit_attempts=0
|
|
615
|
+
while (( submit_attempts < 15 )); do
|
|
616
|
+
sleep 2
|
|
617
|
+
local vs_check
|
|
618
|
+
vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
619
|
+
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
620
|
+
log_debug "Verifier started working after $((submit_attempts + 1)) checks"
|
|
621
|
+
break
|
|
622
|
+
fi
|
|
623
|
+
if (( submit_attempts == 8 )); then
|
|
624
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
625
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
626
|
+
sleep 0.1
|
|
627
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
628
|
+
tmux send-keys -t "$pane_id" C-m
|
|
629
|
+
fi
|
|
630
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
631
|
+
sleep 0.3
|
|
632
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
633
|
+
(( submit_attempts++ ))
|
|
634
|
+
done
|
|
635
|
+
return 0
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
# handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
|
|
639
|
+
# On exit: check done-claim, auto-generate iter-signal.
|
|
640
|
+
# Args: $1=iteration $2=signal_file
|
|
641
|
+
# Returns: 0 (signal generated), 1 (error)
|
|
642
|
+
# Bug #8 PR-B (codex critic P1.2 fix): shared 4-way gate used by both
|
|
643
|
+
# handle_worker_exit_codex and the inline-polling A4 path. Returns:
|
|
644
|
+
# 0 = synthesize allowed (caller writes signal_file + emits audit)
|
|
645
|
+
# 1 = BLOCKED (this function already wrote sentinel + emitted audit)
|
|
646
|
+
# Args: $1=iter $2=us_id $3=audit_clean_code (e.g. codex_exit_with_done_claim
|
|
647
|
+
# or inline_polling_a4_clean)
|
|
648
|
+
_bug8_check_synth_allowed() {
|
|
649
|
+
local iter="$1"
|
|
650
|
+
local us_id="${2:-${CURRENT_US:-ALL}}"
|
|
651
|
+
local audit_clean="$3"
|
|
652
|
+
|
|
653
|
+
# Gate 1: done-claim must exist.
|
|
654
|
+
if [[ ! -f "$DONE_CLAIM_FILE" ]]; then
|
|
655
|
+
log_error " Bug #8: no done-claim. Refusing to synthesize verify signal."
|
|
656
|
+
log_debug "[GOV] iter=$iter bug8=block_codex_exit_no_done_claim"
|
|
657
|
+
write_blocked_sentinel \
|
|
658
|
+
"Codex worker exited without writing done-claim (refusing to synthesize verify signal)" \
|
|
659
|
+
"$us_id" \
|
|
660
|
+
"infra_failure"
|
|
661
|
+
_emit_a4_fallback_audit "$us_id" "$iter" "blocked_codex_exit_no_done_claim"
|
|
662
|
+
return 1
|
|
663
|
+
fi
|
|
664
|
+
|
|
665
|
+
# Gate 2: git toplevel must equal $ROOT (canonicalized — macOS resolves
|
|
666
|
+
# /var → /private/var, NTFS may have 8.3 short paths; compare realpaths).
|
|
667
|
+
local _bug8_top _bug8_top_canon _bug8_root_canon
|
|
668
|
+
_bug8_top=$(git -C "$ROOT" rev-parse --show-toplevel 2>/dev/null)
|
|
669
|
+
_bug8_top_canon=$(cd "$_bug8_top" 2>/dev/null && pwd -P 2>/dev/null)
|
|
670
|
+
_bug8_root_canon=$(cd "$ROOT" 2>/dev/null && pwd -P 2>/dev/null)
|
|
671
|
+
if [[ -z "$_bug8_top" || "$_bug8_top_canon" != "$_bug8_root_canon" ]]; then
|
|
672
|
+
log_error " Bug #8: git unverifiable at \$ROOT=$ROOT (toplevel='$_bug8_top'). Refusing synthesis."
|
|
673
|
+
log_debug "[GOV] iter=$iter bug8=block_git_unverifiable root=$ROOT toplevel=$_bug8_top"
|
|
674
|
+
write_blocked_sentinel \
|
|
675
|
+
"git status unverifiable at $ROOT (toplevel='$_bug8_top'); refusing to synthesize verify signal" \
|
|
676
|
+
"$us_id" \
|
|
677
|
+
"infra_failure"
|
|
678
|
+
_emit_a4_fallback_audit "$us_id" "$iter" "blocked_git_unverifiable"
|
|
679
|
+
return 1
|
|
680
|
+
fi
|
|
681
|
+
|
|
682
|
+
# Gate 3: tree must be clean.
|
|
683
|
+
local _bug8_dirty
|
|
684
|
+
_bug8_dirty=$(git -C "$ROOT" status --porcelain 2>/dev/null)
|
|
685
|
+
if [[ -n "$_bug8_dirty" ]]; then
|
|
686
|
+
local _bug8_first5
|
|
687
|
+
_bug8_first5=$(printf '%s\n' "$_bug8_dirty" | head -n 5 | tr '\n' '|' | sed 's/|$//')
|
|
688
|
+
log_error " Bug #8: done-claim present but tree dirty. Refusing synthesis. dirty: $_bug8_first5"
|
|
689
|
+
log_debug "[GOV] iter=$iter bug8=block_dirty_tree us_id=$us_id dirty='$_bug8_first5'"
|
|
690
|
+
write_blocked_sentinel \
|
|
691
|
+
"worker_incomplete_uncommitted: done-claim present but tree dirty ($_bug8_first5)" \
|
|
692
|
+
"$us_id" \
|
|
693
|
+
"metric_failure"
|
|
694
|
+
_emit_a4_fallback_audit "$us_id" "$iter" "blocked_dirty_tree"
|
|
695
|
+
return 1
|
|
696
|
+
fi
|
|
697
|
+
|
|
698
|
+
# All gates passed — synthesize allowed.
|
|
699
|
+
return 0
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
handle_worker_exit_codex() {
|
|
703
|
+
local iter="$1"
|
|
704
|
+
local signal_file="$2"
|
|
705
|
+
|
|
706
|
+
log " Codex worker process exited. Checking for done-claim + clean tree..."
|
|
707
|
+
|
|
708
|
+
if ! _bug8_check_synth_allowed "$iter" "${CURRENT_US:-ALL}" "codex_exit_with_done_claim"; then
|
|
709
|
+
return 1
|
|
710
|
+
fi
|
|
711
|
+
|
|
712
|
+
# All 3 gates passed: done-claim present, git OK, tree clean → synthesize.
|
|
713
|
+
local dc_us_id
|
|
714
|
+
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
715
|
+
log " Codex worker completed with done-claim (us_id=$dc_us_id) and clean tree. Auto-generating signal."
|
|
716
|
+
echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit (clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
717
|
+
# v0.15.4 PR-B2-FIX: codex worker pane already exited — reaper would no-op,
|
|
718
|
+
# but lock done-claim as defense-in-depth so any orphaned subprocess cannot
|
|
719
|
+
# rewrite the file before lib_ralph_desk.zsh:602 archives it.
|
|
720
|
+
_lock_sentinel "$DONE_CLAIM_FILE"
|
|
721
|
+
_emit_a4_fallback_audit "$dc_us_id" "$iter" "codex_exit_with_done_claim_clean"
|
|
722
|
+
return 0
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
# handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
|
|
726
|
+
# Args: $1=pane_id $2=iteration $3=trigger_file
|
|
727
|
+
# Returns: 0 (restarted), 1 (max restarts exceeded)
|
|
728
|
+
handle_worker_exit_claude() {
|
|
729
|
+
local pane_id="$1"
|
|
730
|
+
local iter="$2"
|
|
731
|
+
local trigger_file="$3"
|
|
732
|
+
|
|
733
|
+
log_error "Worker exited without writing signal file"
|
|
734
|
+
if restart_worker "$pane_id" "$iter" "$trigger_file"; then
|
|
735
|
+
return 0
|
|
736
|
+
else
|
|
737
|
+
return 1
|
|
738
|
+
fi
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
# --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
|
|
742
|
+
replace_worker_pane() {
|
|
743
|
+
local old_pane="$1"
|
|
744
|
+
local role="$2" # "worker" or "verifier"
|
|
745
|
+
|
|
746
|
+
log " Replacing dead $role pane $old_pane..."
|
|
747
|
+
tmux kill-pane -t "$old_pane" 2>/dev/null
|
|
748
|
+
|
|
749
|
+
# Create fresh pane maintaining original layout: worker(top-right) / verifier(bottom-right)
|
|
750
|
+
local new_pane
|
|
751
|
+
if [[ "$role" == "verifier" ]]; then
|
|
752
|
+
# Verifier goes below worker: split vertically from worker pane
|
|
753
|
+
if tmux display-message -t "$WORKER_PANE" -p '#{pane_id}' &>/dev/null; then
|
|
754
|
+
new_pane=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
755
|
+
else
|
|
756
|
+
# Fallback: worker pane also dead, split horizontally from leader
|
|
757
|
+
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
758
|
+
fi
|
|
759
|
+
else
|
|
760
|
+
# Worker goes above verifier: split vertically before verifier pane
|
|
761
|
+
if tmux display-message -t "$VERIFIER_PANE" -p '#{pane_id}' &>/dev/null; then
|
|
762
|
+
new_pane=$(tmux split-window -v -b -d -t "$VERIFIER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
763
|
+
else
|
|
764
|
+
# Fallback: verifier pane also dead, split horizontally from leader
|
|
765
|
+
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
766
|
+
fi
|
|
767
|
+
fi
|
|
768
|
+
|
|
769
|
+
log " New $role pane: $new_pane (replaced $old_pane)"
|
|
770
|
+
log_debug "[FLOW] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
|
|
771
|
+
|
|
772
|
+
# Update session-config.json with new pane ID
|
|
773
|
+
if [[ -f "$SESSION_CONFIG" ]]; then
|
|
774
|
+
jq --arg role "$role" --arg pane "$new_pane" \
|
|
775
|
+
'.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
|
|
776
|
+
log_debug "Updated session-config.json: $role pane → $new_pane"
|
|
777
|
+
fi
|
|
778
|
+
|
|
779
|
+
echo "$new_pane"
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
# =============================================================================
|
|
783
|
+
# Dependency Checks
|
|
784
|
+
# =============================================================================
|
|
785
|
+
|
|
786
|
+
# --- governance.md s7 step 1: Validate prerequisites before starting ---
|
|
787
|
+
check_dependencies() {
|
|
788
|
+
local missing=0
|
|
789
|
+
|
|
790
|
+
if ! command -v tmux >/dev/null 2>&1; then
|
|
791
|
+
log_error "tmux is required but not found. Install with: brew install tmux"
|
|
792
|
+
missing=1
|
|
793
|
+
fi
|
|
794
|
+
|
|
795
|
+
# claude required only when claude engine is used for Worker or Verifier execution;
|
|
796
|
+
# codex-only campaigns can run without claude — generate_sv_report degrades gracefully
|
|
797
|
+
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
798
|
+
if ! command -v claude >/dev/null 2>&1; then
|
|
799
|
+
log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
|
|
800
|
+
missing=1
|
|
801
|
+
fi
|
|
802
|
+
fi
|
|
803
|
+
|
|
804
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
805
|
+
log_error "jq is required but not found. Install with: brew install jq"
|
|
806
|
+
missing=1
|
|
807
|
+
fi
|
|
808
|
+
|
|
809
|
+
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
810
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
811
|
+
if ! command -v codex >/dev/null 2>&1; then
|
|
812
|
+
log_error "codex CLI not found. Install: npm install -g @openai/codex"
|
|
813
|
+
missing=1
|
|
814
|
+
fi
|
|
815
|
+
fi
|
|
816
|
+
|
|
817
|
+
if (( missing )); then
|
|
818
|
+
exit 1
|
|
819
|
+
fi
|
|
820
|
+
|
|
821
|
+
# Resolve full path to claude binary when claude engine is in use
|
|
822
|
+
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
823
|
+
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
|
|
824
|
+
log " Claude binary: $CLAUDE_BIN"
|
|
825
|
+
fi
|
|
826
|
+
|
|
827
|
+
# Resolve codex binary if needed
|
|
828
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
829
|
+
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
830
|
+
log " Codex binary: $CODEX_BIN"
|
|
831
|
+
fi
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
# =============================================================================
|
|
835
|
+
# Session Management (tmux pattern: pane IDs)
|
|
836
|
+
# =============================================================================
|
|
837
|
+
|
|
838
|
+
# --- governance.md s7 step 1: Check for existing sessions ---
|
|
839
|
+
check_existing_sessions() {
|
|
840
|
+
local current_session
|
|
841
|
+
current_session=$(tmux display-message -p '#{session_name}' 2>/dev/null || echo "")
|
|
842
|
+
local existing
|
|
843
|
+
existing=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-${SLUG}-" | grep -v "^${current_session}$" || true)
|
|
844
|
+
if [[ -n "$existing" ]]; then
|
|
845
|
+
log_error "Existing tmux session(s) found for slug '$SLUG':"
|
|
846
|
+
echo "$existing" | while read -r s; do
|
|
847
|
+
echo " - $s"
|
|
848
|
+
done
|
|
849
|
+
echo ""
|
|
850
|
+
echo "Kill existing session first:"
|
|
851
|
+
echo " tmux kill-session -t <session-name>"
|
|
852
|
+
exit 1
|
|
853
|
+
fi
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
# --- governance.md s7 step 1: Create tmux session with pane IDs (%N) ---
|
|
857
|
+
create_session() {
|
|
858
|
+
log "Creating tmux session: $SESSION_NAME"
|
|
859
|
+
|
|
860
|
+
# tmux split-pane pattern
|
|
861
|
+
if [[ -n "${TMUX:-}" ]]; then
|
|
862
|
+
# Inside tmux: split CURRENT pane in place
|
|
863
|
+
# Current pane stays as-is (leader/user stays here)
|
|
864
|
+
# Worker/Verifier appear on the RIGHT, user sees them immediately
|
|
865
|
+
LEADER_PANE=$(tmux display-message -p '#{pane_id}')
|
|
866
|
+
SESSION_NAME=$(tmux display-message -p '#{session_name}')
|
|
867
|
+
log " Splitting current pane in session: $SESSION_NAME"
|
|
868
|
+
|
|
869
|
+
# -h off current pane → right column (worker)
|
|
870
|
+
WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
871
|
+
# -v off worker → stacked below on right (verifier)
|
|
872
|
+
VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
873
|
+
else
|
|
874
|
+
# Outside tmux: wrap current terminal into a new tmux session and attach
|
|
875
|
+
# tmux pattern: user sees panes immediately, no separate attach needed
|
|
876
|
+
# US-025 R13 P0: verify tmux new-session exit code; if collision + RLP_BACKGROUND,
|
|
877
|
+
# disambiguate with -bg-<epoch>-<pid> suffix and a residual has-session loop.
|
|
878
|
+
if ! tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT" 2>/dev/null; then
|
|
879
|
+
if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then
|
|
880
|
+
if [[ "${RLP_BACKGROUND:-0}" == "1" ]]; then
|
|
881
|
+
SESSION_NAME="${SESSION_NAME}-bg-$(date +%s)-$$"
|
|
882
|
+
while tmux has-session -t "$SESSION_NAME" 2>/dev/null; do
|
|
883
|
+
SESSION_NAME="${SESSION_NAME}-$(awk 'BEGIN{srand();print int(1000+rand()*9000)}')"
|
|
884
|
+
done
|
|
885
|
+
tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT" || {
|
|
886
|
+
log_error "tmux new-session retry failed for $SESSION_NAME"
|
|
887
|
+
exit 1
|
|
888
|
+
}
|
|
889
|
+
else
|
|
890
|
+
log_error "tmux new-session failed: session $SESSION_NAME already exists (set RLP_BACKGROUND=1 to auto-rename)"
|
|
891
|
+
exit 1
|
|
892
|
+
fi
|
|
893
|
+
else
|
|
894
|
+
log_error "tmux new-session failed and session does not exist: $SESSION_NAME"
|
|
895
|
+
exit 1
|
|
896
|
+
fi
|
|
897
|
+
fi
|
|
898
|
+
# destroy-unattached off keeps the session alive when no tmux client is attached.
|
|
899
|
+
# Best-effort only: it does NOT survive manual `tmux kill-session` or tmux server restart.
|
|
900
|
+
# If either happens, R12 (lifecycle monitor) detects it and writes infra_failure BLOCKED.
|
|
901
|
+
if [[ "${RLP_BACKGROUND:-0}" == "1" ]]; then
|
|
902
|
+
tmux set-option -t "$SESSION_NAME" destroy-unattached off 2>/dev/null
|
|
903
|
+
fi
|
|
904
|
+
LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
|
|
905
|
+
WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
906
|
+
VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
907
|
+
|
|
908
|
+
fi
|
|
909
|
+
|
|
910
|
+
# Set pane titles and enable border labels for visual distinction
|
|
911
|
+
local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
|
|
912
|
+
local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
|
|
913
|
+
[[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
|
|
914
|
+
tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
|
|
915
|
+
tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
|
|
916
|
+
tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
|
|
917
|
+
# Color-coded pane borders: green=leader, blue=worker, yellow=verifier
|
|
918
|
+
tmux set-option -p -t "$LEADER_PANE" pane-border-style "fg=green" 2>/dev/null
|
|
919
|
+
tmux set-option -p -t "$WORKER_PANE" pane-border-style "fg=blue" 2>/dev/null
|
|
920
|
+
tmux set-option -p -t "$VERIFIER_PANE" pane-border-style "fg=yellow" 2>/dev/null
|
|
921
|
+
# Show pane titles in border
|
|
922
|
+
tmux set-option pane-border-status top 2>/dev/null
|
|
923
|
+
tmux set-option pane-border-format "#{?pane_active,#[fg=white bold],#[fg=grey]} #{pane_title} " 2>/dev/null
|
|
924
|
+
|
|
925
|
+
log " Leader pane: $LEADER_PANE"
|
|
926
|
+
log " Worker pane: $WORKER_PANE"
|
|
927
|
+
log " Verifier pane: $VERIFIER_PANE"
|
|
928
|
+
|
|
929
|
+
# US-024 R12 P0: lifecycle check site #1 — verify all panes/session alive after creation.
|
|
930
|
+
_r12_check_lifecycle "create_session"
|
|
931
|
+
|
|
932
|
+
# AC12: Capture baseline commit before writing session config
|
|
933
|
+
BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
|
|
934
|
+
|
|
935
|
+
# Truncate cost-log for fresh run (previous data in versioned campaign reports)
|
|
936
|
+
# NOTE: ': >' not bare '>' — in zsh a bare redirect with no command runs $NULLCMD
|
|
937
|
+
# (=cat), which blocks reading stdin when the leader has an open TTY (D-1 dogfood hang).
|
|
938
|
+
: > "$COST_LOG"
|
|
939
|
+
|
|
940
|
+
# v5.7 §4.2: WITH_SELF_VERIFICATION=1 is hard-rejected at script entry now,
|
|
941
|
+
# so by the time we reach create_session() the flag is guaranteed to be 0.
|
|
942
|
+
# The legacy "NOTE: Agent-mode only; disabling" log line was removed because
|
|
943
|
+
# the deprecation banner at startup is more honest (we exit 2, we don't
|
|
944
|
+
# silently disable).
|
|
945
|
+
|
|
946
|
+
# Write session config (atomic write)
|
|
947
|
+
echo '{
|
|
948
|
+
"session_name": "'"$SESSION_NAME"'",
|
|
949
|
+
"slug": "'"$SLUG"'",
|
|
950
|
+
"created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
|
|
951
|
+
"baseline_commit": "'"$BASELINE_COMMIT"'",
|
|
952
|
+
"panes": {
|
|
953
|
+
"leader": "'"$LEADER_PANE"'",
|
|
954
|
+
"worker": "'"$WORKER_PANE"'",
|
|
955
|
+
"verifier": "'"$VERIFIER_PANE"'"
|
|
956
|
+
},
|
|
957
|
+
"pid": '$$',
|
|
958
|
+
"root": "'"$ROOT"'",
|
|
959
|
+
"models": {
|
|
960
|
+
"worker": "'"$WORKER_MODEL"'",
|
|
961
|
+
"verifier": "'"$VERIFIER_MODEL"'"
|
|
962
|
+
},
|
|
963
|
+
"engines": {
|
|
964
|
+
"worker": "'"$WORKER_ENGINE"'",
|
|
965
|
+
"verifier": "'"$VERIFIER_ENGINE"'",
|
|
966
|
+
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
967
|
+
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
968
|
+
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
969
|
+
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
|
|
970
|
+
},
|
|
971
|
+
"verification": {
|
|
972
|
+
"verify_mode": "'"$VERIFY_MODE"'",
|
|
973
|
+
"consensus_mode": "'"$CONSENSUS_MODE"'"
|
|
974
|
+
},
|
|
975
|
+
"config": {
|
|
976
|
+
"max_iter": '"$MAX_ITER"',
|
|
977
|
+
"poll_interval": '"$POLL_INTERVAL"',
|
|
978
|
+
"iter_timeout": '"$ITER_TIMEOUT"',
|
|
979
|
+
"heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
|
|
980
|
+
"max_restarts": '"$MAX_RESTARTS"',
|
|
981
|
+
"idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
|
|
982
|
+
"max_nudges": '"$MAX_NUDGES"',
|
|
983
|
+
"cb_threshold": '"$CB_THRESHOLD"',
|
|
984
|
+
"effective_cb_threshold": '"$EFFECTIVE_CB_THRESHOLD"',
|
|
985
|
+
"with_self_verification": '"$WITH_SELF_VERIFICATION"',
|
|
986
|
+
"with_self_verification_requested": '"$WITH_SELF_VERIFICATION_REQUESTED"',
|
|
987
|
+
"sv_skipped_reason": "'"$SV_SKIPPED_REASON"'",
|
|
988
|
+
"lane_mode": "'"$LANE_MODE"'",
|
|
989
|
+
"autonomous_mode": '"$AUTONOMOUS_MODE"'
|
|
990
|
+
}
|
|
991
|
+
}' | atomic_write "$SESSION_CONFIG"
|
|
992
|
+
|
|
993
|
+
log " Session config: $SESSION_CONFIG"
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
# =============================================================================
|
|
997
|
+
# Copy-Mode Guard (tmux pattern)
|
|
998
|
+
# =============================================================================
|
|
999
|
+
|
|
1000
|
+
# --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
|
|
1001
|
+
check_copy_mode() {
|
|
1002
|
+
local pane_id="$1"
|
|
1003
|
+
local in_mode
|
|
1004
|
+
in_mode=$(tmux display-message -p -t "$pane_id" '#{pane_in_mode}' 2>/dev/null) || return 1
|
|
1005
|
+
if [[ "$in_mode" -eq 1 ]]; then
|
|
1006
|
+
return 1 # pane is in copy mode, cannot send keys
|
|
1007
|
+
fi
|
|
1008
|
+
return 0
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
# =============================================================================
|
|
1012
|
+
# Verification-Based Send Retry (tmux pattern)
|
|
1013
|
+
# =============================================================================
|
|
1014
|
+
|
|
1015
|
+
# --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
|
|
1016
|
+
paste_to_pane() {
|
|
1017
|
+
local pane_id="$1"
|
|
1018
|
+
local text="$2"
|
|
1019
|
+
local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
|
|
1020
|
+
echo -n "$text" > "$tmpbuf"
|
|
1021
|
+
tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
|
|
1022
|
+
tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
|
|
1023
|
+
rm -f "$tmpbuf"
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
# --- governance.md s7 step 5: Send with copy-mode guard and retry ---
|
|
1027
|
+
safe_send_keys() {
|
|
1028
|
+
local pane_id="$1"
|
|
1029
|
+
local text="$2"
|
|
1030
|
+
|
|
1031
|
+
# --- Exact tmux sendToWorker pattern (tmux-session.js:527-626) ---
|
|
1032
|
+
|
|
1033
|
+
# Guard: copy-mode captures keys; skip entirely
|
|
1034
|
+
if ! check_copy_mode "$pane_id"; then
|
|
1035
|
+
log_debug " Pane $pane_id in copy mode, skipping send"
|
|
1036
|
+
return 1
|
|
1037
|
+
fi
|
|
1038
|
+
|
|
1039
|
+
# Check for trust prompt and auto-dismiss
|
|
1040
|
+
local initial_capture
|
|
1041
|
+
initial_capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
|
|
1042
|
+
local pane_busy=0
|
|
1043
|
+
if echo "$initial_capture" | grep -q "esc to interrupt" 2>/dev/null; then
|
|
1044
|
+
pane_busy=1
|
|
1045
|
+
fi
|
|
1046
|
+
if echo "$initial_capture" | grep -q "Do you trust" 2>/dev/null; then
|
|
1047
|
+
log_debug " Trust prompt detected, dismissing"
|
|
1048
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1049
|
+
sleep 0.12
|
|
1050
|
+
fi
|
|
1051
|
+
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
1052
|
+
if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
1053
|
+
log_debug " Permission prompt detected, auto-approving"
|
|
1054
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1055
|
+
sleep 0.3
|
|
1056
|
+
fi
|
|
1057
|
+
# Auto-dismiss codex update prompt (select Skip)
|
|
1058
|
+
if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
1059
|
+
log_debug " Codex update prompt detected, selecting Skip"
|
|
1060
|
+
tmux send-keys -t "$pane_id" "2" C-m
|
|
1061
|
+
sleep 0.2
|
|
1062
|
+
fi
|
|
1063
|
+
# Send text via buffer paste (reliable for long strings)
|
|
1064
|
+
log_debug " Pasting text to pane $pane_id (${#text} chars)"
|
|
1065
|
+
paste_to_pane "$pane_id" "$text"
|
|
1066
|
+
|
|
1067
|
+
# Allow input buffer to settle (tmux: 150ms)
|
|
1068
|
+
sleep 0.15
|
|
1069
|
+
|
|
1070
|
+
# Submit: up to 6 rounds of C-m double-press
|
|
1071
|
+
local round=0
|
|
1072
|
+
while (( round < 6 )); do
|
|
1073
|
+
sleep 0.1
|
|
1074
|
+
if (( round == 0 && pane_busy )); then
|
|
1075
|
+
# Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
|
|
1076
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1077
|
+
else
|
|
1078
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1079
|
+
sleep 0.2
|
|
1080
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1081
|
+
fi
|
|
1082
|
+
sleep 0.14
|
|
1083
|
+
|
|
1084
|
+
# Check if text was consumed
|
|
1085
|
+
local check_capture
|
|
1086
|
+
check_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
|
|
1087
|
+
if ! echo "$check_capture" | grep -qF "$text" 2>/dev/null; then
|
|
1088
|
+
log_debug " Text consumed after round $((round + 1))"
|
|
1089
|
+
return 0
|
|
1090
|
+
fi
|
|
1091
|
+
sleep 0.14
|
|
1092
|
+
(( round++ ))
|
|
1093
|
+
done
|
|
1094
|
+
|
|
1095
|
+
# Safety gate: copy-mode check
|
|
1096
|
+
if ! check_copy_mode "$pane_id"; then
|
|
1097
|
+
log_debug " Copy mode activated during send, aborting"
|
|
1098
|
+
return 1
|
|
1099
|
+
fi
|
|
1100
|
+
|
|
1101
|
+
# Adaptive fallback: C-u clear line, resend (tmux pattern)
|
|
1102
|
+
log_debug " Adaptive retry — clearing line and resending"
|
|
1103
|
+
tmux send-keys -t "$pane_id" C-u
|
|
1104
|
+
sleep 0.08
|
|
1105
|
+
if ! check_copy_mode "$pane_id"; then
|
|
1106
|
+
return 1
|
|
1107
|
+
fi
|
|
1108
|
+
paste_to_pane "$pane_id" "$text"
|
|
1109
|
+
sleep 0.12
|
|
1110
|
+
local retry_round=0
|
|
1111
|
+
while (( retry_round < 4 )); do
|
|
1112
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1113
|
+
sleep 0.18
|
|
1114
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1115
|
+
sleep 0.14
|
|
1116
|
+
local retry_capture
|
|
1117
|
+
retry_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
|
|
1118
|
+
if ! echo "$retry_capture" | grep -qF "$text" 2>/dev/null; then
|
|
1119
|
+
log_debug " Text consumed after adaptive retry round $((retry_round + 1))"
|
|
1120
|
+
return 0
|
|
1121
|
+
fi
|
|
1122
|
+
(( retry_round++ ))
|
|
1123
|
+
done
|
|
1124
|
+
|
|
1125
|
+
# Fail-open: one last nudge
|
|
1126
|
+
if ! check_copy_mode "$pane_id"; then
|
|
1127
|
+
return 1
|
|
1128
|
+
fi
|
|
1129
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1130
|
+
sleep 0.12
|
|
1131
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1132
|
+
log_debug " Fail-open — text may or may not have been submitted"
|
|
1133
|
+
return 0
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
# =============================================================================
|
|
1137
|
+
# Wait for Pane Ready (tmux pattern: paneLooksReady)
|
|
1138
|
+
# =============================================================================
|
|
1139
|
+
|
|
1140
|
+
wait_for_pane_ready() {
|
|
1141
|
+
local pane_id="$1"
|
|
1142
|
+
local timeout="${2:-10}" # tmux default: 10s
|
|
1143
|
+
local start=$(date +%s)
|
|
1144
|
+
log " Waiting for pane $pane_id ready..."
|
|
1145
|
+
while (( $(date +%s) - start < timeout )); do
|
|
1146
|
+
local captured
|
|
1147
|
+
captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
|
|
1148
|
+
|
|
1149
|
+
# Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
|
|
1150
|
+
if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
|
|
1151
|
+
log " Trust prompt detected, auto-dismissing..."
|
|
1152
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1153
|
+
sleep 0.12
|
|
1154
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1155
|
+
sleep 2
|
|
1156
|
+
continue
|
|
1157
|
+
fi
|
|
1158
|
+
|
|
1159
|
+
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
1160
|
+
if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
|
|
1161
|
+
log " Permission prompt detected, auto-approving..."
|
|
1162
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1163
|
+
sleep 0.5
|
|
1164
|
+
continue
|
|
1165
|
+
fi
|
|
1166
|
+
|
|
1167
|
+
# Auto-dismiss codex update prompt (select Skip = option 2)
|
|
1168
|
+
if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
1169
|
+
log " Codex update prompt detected, selecting Skip..."
|
|
1170
|
+
tmux send-keys -t "$pane_id" "2" C-m
|
|
1171
|
+
sleep 0.5
|
|
1172
|
+
continue
|
|
1173
|
+
fi
|
|
1174
|
+
|
|
1175
|
+
# tmux paneLooksReady: check each line for prompt char at line start
|
|
1176
|
+
local ready=0
|
|
1177
|
+
echo "$captured" | while IFS= read -r line; do
|
|
1178
|
+
local trimmed="${line## }"
|
|
1179
|
+
if [[ "$trimmed" == ❯* || "$trimmed" == \>* || "$trimmed" == ›* || "$trimmed" == »* ]]; then
|
|
1180
|
+
ready=1
|
|
1181
|
+
break
|
|
1182
|
+
fi
|
|
1183
|
+
done 2>/dev/null
|
|
1184
|
+
|
|
1185
|
+
# Also check via grep as fallback
|
|
1186
|
+
if echo "$captured" | tail -5 | grep -qE '^\s*[❯›]' 2>/dev/null; then
|
|
1187
|
+
ready=1
|
|
1188
|
+
fi
|
|
1189
|
+
|
|
1190
|
+
if (( ready )) || echo "$captured" | tail -3 | grep -qE '^\s*[❯›>]' 2>/dev/null; then
|
|
1191
|
+
# Check no active task running
|
|
1192
|
+
if ! echo "$captured" | grep -q "esc to interrupt" 2>/dev/null; then
|
|
1193
|
+
log " Pane $pane_id is ready."
|
|
1194
|
+
return 0
|
|
1195
|
+
fi
|
|
1196
|
+
fi
|
|
1197
|
+
sleep 0.25
|
|
1198
|
+
done
|
|
1199
|
+
# Timeout — return success anyway (fail-open, let safe_send_keys handle it)
|
|
1200
|
+
log " Pane $pane_id ready timeout after ${timeout}s (proceeding anyway)"
|
|
1201
|
+
return 0
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
# =============================================================================
|
|
1205
|
+
# Heartbeat Monitoring (tmux pattern)
|
|
1206
|
+
# =============================================================================
|
|
1207
|
+
|
|
1208
|
+
# --- governance.md s7 step 5+6: Check heartbeat freshness ---
|
|
1209
|
+
check_heartbeat() {
|
|
1210
|
+
local hb_file="$1"
|
|
1211
|
+
local threshold="$HEARTBEAT_STALE_THRESHOLD"
|
|
1212
|
+
|
|
1213
|
+
if [[ ! -f "$hb_file" ]]; then
|
|
1214
|
+
return 1
|
|
1215
|
+
fi
|
|
1216
|
+
|
|
1217
|
+
local hb_epoch now_epoch
|
|
1218
|
+
# Read epoch seconds directly (avoids timezone parsing bugs)
|
|
1219
|
+
hb_epoch=$(jq -r '.epoch // empty' "$hb_file" 2>/dev/null) || return 1
|
|
1220
|
+
|
|
1221
|
+
if [[ -z "$hb_epoch" ]]; then
|
|
1222
|
+
return 1
|
|
1223
|
+
fi
|
|
1224
|
+
|
|
1225
|
+
now_epoch=$(date +%s)
|
|
1226
|
+
(( now_epoch - hb_epoch < threshold ))
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
# Check if heartbeat indicates process has exited
|
|
1230
|
+
check_heartbeat_exited() {
|
|
1231
|
+
local hb_file="$1"
|
|
1232
|
+
if [[ ! -f "$hb_file" ]]; then
|
|
1233
|
+
return 1
|
|
1234
|
+
fi
|
|
1235
|
+
local hb_status
|
|
1236
|
+
hb_status=$(jq -r '.status // empty' "$hb_file" 2>/dev/null)
|
|
1237
|
+
[[ "$hb_status" == "exited" ]]
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
# =============================================================================
|
|
1241
|
+
# Idle Pane Nudging (tmux pattern)
|
|
1242
|
+
# =============================================================================
|
|
1243
|
+
|
|
1244
|
+
# --- v5.7 §4.13.a: Mid-execution permission-prompt auto-dismiss (Bug 4 fix) ---
|
|
1245
|
+
# claude CLI v2.1.114+ surfaces TUI-layer prompts ("Do you want to create...")
|
|
1246
|
+
# even with --dangerously-skip-permissions on certain Write paths. Without this
|
|
1247
|
+
# helper, Workers/Verifiers hang until IDLE_NUDGE_THRESHOLD timeout.
|
|
1248
|
+
#
|
|
1249
|
+
# Window-bounded match (codex Critic v5.7): require both a prompt phrase AND a
|
|
1250
|
+
# TUI affordance marker on the SAME, PREVIOUS, or NEXT line. Whole-capture dual
|
|
1251
|
+
# grep would let unrelated text trigger Enter (R-V5-9 false-positive).
|
|
1252
|
+
# Per-pane 3-second debounce prevents rapid double-Enter.
|
|
1253
|
+
zmodload zsh/datetime 2>/dev/null || true
|
|
1254
|
+
_now_s() { print -- "${EPOCHSECONDS:-$(date +%s)}"; }
|
|
1255
|
+
|
|
1256
|
+
typeset -gA LAST_AUTO_APPROVE_TS
|
|
1257
|
+
# v5.7 §4.16: track when each pane FIRST entered a prompt-stuck state.
|
|
1258
|
+
# Cleared on first capture without prompt visible. Used for bounded
|
|
1259
|
+
# prompt-stall escalation (BLOCKED `prompt_stall`) so alive-but-stuck
|
|
1260
|
+
# Workers can't infinite-wait (codex Critic HIGH finding).
|
|
1261
|
+
typeset -gA PANE_PROMPT_STUCK_SINCE
|
|
1262
|
+
typeset -gA PANE_DISMISS_FAILED_COUNT
|
|
1263
|
+
PROMPT_STALL_TIMEOUT="${PROMPT_STALL_TIMEOUT:-300}" # 5 min default
|
|
1264
|
+
PROMPT_DISMISS_FAIL_LIMIT="${PROMPT_DISMISS_FAIL_LIMIT:-20}" # ~100s of fruitless dismiss attempts
|
|
1265
|
+
|
|
1266
|
+
# v5.7 §4.17: generic no-progress timeout (codex Critic HIGH — closes the gap
|
|
1267
|
+
# where an undetected prompt or alive-but-frozen Worker bypasses Layer 4).
|
|
1268
|
+
# Independent of prompt detection: if pane content stops changing for this many
|
|
1269
|
+
# seconds AND signal file still missing, write BLOCKED `infra_failure` reason
|
|
1270
|
+
# `worker_no_progress` so silent infinite-wait is impossible.
|
|
1271
|
+
PROGRESS_NO_CHANGE_TIMEOUT="${PROGRESS_NO_CHANGE_TIMEOUT:-600}" # 10 min default
|
|
1272
|
+
typeset -gA PANE_LAST_CHANGE_TS # epoch when content last changed
|
|
1273
|
+
typeset -gA PANE_LAST_CONTENT_FOR_PROGRESS # captured content for diff
|
|
1274
|
+
|
|
1275
|
+
# v0.14.1: codex post-work idle UI grace. When a verifier pane shows codex's
|
|
1276
|
+
# "Worked for Xm Ys" idle line at byte-stasis time, grant one extra
|
|
1277
|
+
# CODEX_IDLE_GRACE_S (default 120s) before BLOCK. Per-pane bookkeeping to
|
|
1278
|
+
# avoid granting it repeatedly. Bug Report #3 (BOS 2026-05-04).
|
|
1279
|
+
CODEX_IDLE_GRACE_S="${CODEX_IDLE_GRACE_S:-120}"
|
|
1280
|
+
typeset -gA PANE_CODEX_IDLE_GRACED
|
|
1281
|
+
# v0.14.2: per-verifier-pane trace flag — log the verdict-lookup outcome
|
|
1282
|
+
# exactly once per byte-stasis transition. Bug Report #4 (BOS 2026-05-05).
|
|
1283
|
+
typeset -gA PANE_VERIFIER_TRACE_LOGGED
|
|
1284
|
+
|
|
1285
|
+
# v5.7 §4.17: default-No prompt detection. Pressing Enter on these means
|
|
1286
|
+
# CANCEL/REJECT, not approve — so we BLOCK with traceability instead of
|
|
1287
|
+
# silently auto-dismissing the wrong way.
|
|
1288
|
+
typeset -g _DEFAULT_NO_RE='\[y/N\]|\(yes/no, default no\)|default[: ]+no|^[[:space:]]*N\)'
|
|
1289
|
+
|
|
1290
|
+
# v5.7 §4.16: broadened prompt detection (codex Critic MEDIUM).
|
|
1291
|
+
# v5.7 §4.20 (E2E real-claude-CLI finding): claude v2.1.114+ uses new trust
|
|
1292
|
+
# prompt format ("Quick safety check: Is this a project you ... trust?")
|
|
1293
|
+
# and a numbered picker with `❯` cursor adjacent to the digit ("❯1.Yes").
|
|
1294
|
+
# Old patterns ("Do you trust") missed it entirely → Worker hung 5min until
|
|
1295
|
+
# iter-timeout. Adds: Quick safety check|trust this (folder|directory) for
|
|
1296
|
+
# PROMPT_RE; ❯\s*\d+\. (zero-or-more space) and `Enter to confirm` / `1\.
|
|
1297
|
+
# (Yes|No)` for AFFORDANCE_RE.
|
|
1298
|
+
typeset -g _PROMPT_RE='Do you (want to|trust)|Confirm execution|Are you sure|Continue\?|Proceed\?|Allow this|Approve this|Press y to|Choose an option|Select \[|Quick safety check|trust this (folder|directory)|Is this a project you'
|
|
1299
|
+
typeset -g _AFFORDANCE_RE='\(y/n\)|\[Y/n\]|\[y/N\]|\(yes/no|❯[[:space:]]*[0-9]+\.|(^|[[:space:]])1\) (Yes|No)|(^|[[:space:]])[YyNn]\)|press (y|enter) to|Enter to confirm'
|
|
1300
|
+
|
|
1301
|
+
# v5.7 §4.18 (E2E real-tmux + omc benchmarking): "active task" markers used
|
|
1302
|
+
# to distinguish a Worker that is busy producing output (and may legitimately
|
|
1303
|
+
# print "(y/n)" inside its body text) from a Worker that is *idle at an
|
|
1304
|
+
# unrecognized prompt*. Mirrors omc-team's `paneHasActiveTask` heuristic
|
|
1305
|
+
# (src/team/tmux-session.ts:659). When ANY of these markers is in the recent
|
|
1306
|
+
# pane tail, the Worker is alive — auto_dismiss must NOT fast-fail on a
|
|
1307
|
+
# suspected-unknown prompt because the affordance text is just transcript.
|
|
1308
|
+
typeset -g _ACTIVE_TASK_RE='esc to interrupt|background terminal running|^[[:space:]]*[·✻][[:space:]]+[A-Za-z]+(\.{3}|…)'
|
|
1309
|
+
|
|
1310
|
+
auto_dismiss_prompts() {
|
|
1311
|
+
local pane_id="$1"
|
|
1312
|
+
local now
|
|
1313
|
+
now=$(_now_s)
|
|
1314
|
+
local last=${LAST_AUTO_APPROVE_TS[$pane_id]:-0}
|
|
1315
|
+
|
|
1316
|
+
local capture
|
|
1317
|
+
# v5.7 §4.21 (E2E real-claude-CLI finding): claude v2.x trust prompt wraps
|
|
1318
|
+
# to ~30 lines on narrow panes. -S -10 missed the question header. -50
|
|
1319
|
+
# covers the full prompt.
|
|
1320
|
+
capture=$(tmux capture-pane -t "$pane_id" -p -S -50 2>/dev/null) || return 0
|
|
1321
|
+
|
|
1322
|
+
# v5.7 §4.21 (E2E real-claude-CLI finding): claude v2.x trust prompt is
|
|
1323
|
+
# multi-line and wraps narrowly, so per-line PROMPT_RE+AFFORDANCE adjacency
|
|
1324
|
+
# misses it. Special-case the signature ("Quick safety check ... Enter to
|
|
1325
|
+
# confirm" with `❯N.Yes` cursor on option 1). This is default-Yes — Enter
|
|
1326
|
+
# approves trust.
|
|
1327
|
+
# §4.21.b: tmux narrow-pane wrap breaks the question phrase across lines
|
|
1328
|
+
# (`Quick safety\n check`). Normalize all whitespace to single spaces so
|
|
1329
|
+
# substring matching works regardless of pane width.
|
|
1330
|
+
local _norm_capture="${capture//[$'\n\r\t']/ }"
|
|
1331
|
+
while [[ "$_norm_capture" == *" "* ]]; do _norm_capture="${_norm_capture// / }"; done
|
|
1332
|
+
if { [[ "$_norm_capture" == *"Quick safety check"* ]] || [[ "$_norm_capture" == *"trust this folder"* ]] || [[ "$_norm_capture" == *"trust this directory"* ]]; } \
|
|
1333
|
+
&& [[ "$_norm_capture" == *"Enter to confirm"* ]] \
|
|
1334
|
+
&& [[ "$_norm_capture" =~ '❯ ?[0-9]+\. ?Yes' ]]; then
|
|
1335
|
+
if (( now - last >= 3 )); then
|
|
1336
|
+
log " Claude v2.x trust prompt detected in pane $pane_id, auto-approving (Enter)"
|
|
1337
|
+
log_debug "[FLOW] claude_trust_prompt_auto_approved=true pane=$pane_id"
|
|
1338
|
+
tmux send-keys -t "$pane_id" Enter 2>/dev/null
|
|
1339
|
+
LAST_AUTO_APPROVE_TS[$pane_id]=$now
|
|
1340
|
+
fi
|
|
1341
|
+
return 0
|
|
1342
|
+
fi
|
|
1343
|
+
# Older claude trust prompt format (omc-team parity).
|
|
1344
|
+
if [[ "$_norm_capture" == *"Do you trust the contents of this directory"* ]] \
|
|
1345
|
+
&& { [[ "$_norm_capture" =~ 'Yes,[[:space:]]*continue' ]] || [[ "$_norm_capture" == *"Press enter to continue"* ]]; }; then
|
|
1346
|
+
if (( now - last >= 3 )); then
|
|
1347
|
+
log " Claude (legacy) trust prompt detected in pane $pane_id, auto-approving (Enter)"
|
|
1348
|
+
log_debug "[FLOW] claude_trust_prompt_auto_approved=true pane=$pane_id"
|
|
1349
|
+
tmux send-keys -t "$pane_id" Enter 2>/dev/null
|
|
1350
|
+
LAST_AUTO_APPROVE_TS[$pane_id]=$now
|
|
1351
|
+
fi
|
|
1352
|
+
return 0
|
|
1353
|
+
fi
|
|
1354
|
+
|
|
1355
|
+
local -a lines
|
|
1356
|
+
lines=("${(@f)capture}")
|
|
1357
|
+
local i n=${#lines[@]} prompt_visible=0
|
|
1358
|
+
# v5.7 §4.23 (E2E real-claude-CLI finding): tmux narrow-pane wrap breaks
|
|
1359
|
+
# multi-line prompts (e.g. "Do you want to\nmake this edit to\nfile.md?\n
|
|
1360
|
+
# ❯ 1. Yes") so PROMPT+AFFORDANCE±1 line-adjacency misses them. Fix: run
|
|
1361
|
+
# the match against the LAST 15 normalized lines (whitespace collapsed)
|
|
1362
|
+
# — where the active prompt sits — as a single string. PROMPT_RE +
|
|
1363
|
+
# AFFORDANCE_RE both present → auto-Enter unless DEFAULT_NO_RE present
|
|
1364
|
+
# (BLOCK). §4.17.b is preserved: full-capture default-No scan protects
|
|
1365
|
+
# against scrollback contamination.
|
|
1366
|
+
local _tail_start=$((n > 15 ? n - 14 : 1))
|
|
1367
|
+
local _tail_normalized=""
|
|
1368
|
+
for ((i=_tail_start; i <= n; i++)); do
|
|
1369
|
+
_tail_normalized+="${lines[i]} "
|
|
1370
|
+
done
|
|
1371
|
+
while [[ "$_tail_normalized" == *" "* ]]; do _tail_normalized="${_tail_normalized// / }"; done
|
|
1372
|
+
local default_no_seen=0
|
|
1373
|
+
local sample_pattern="${_tail_normalized:0:120}"
|
|
1374
|
+
if [[ "$_tail_normalized" =~ $_PROMPT_RE ]] && [[ "$_tail_normalized" =~ $_AFFORDANCE_RE ]]; then
|
|
1375
|
+
prompt_visible=1
|
|
1376
|
+
fi
|
|
1377
|
+
# Default-No scan: full capture, not just tail (scrollback contamination guard).
|
|
1378
|
+
if [[ "$capture" =~ $_DEFAULT_NO_RE ]]; then
|
|
1379
|
+
default_no_seen=1
|
|
1380
|
+
fi
|
|
1381
|
+
|
|
1382
|
+
if (( default_no_seen )); then
|
|
1383
|
+
# v5.7 §4.17 + §4.17.b: default-No prompts ([y/N], "default: no") cannot
|
|
1384
|
+
# be auto-Enter'd safely — pressing Enter would CANCEL the operation.
|
|
1385
|
+
# If the pane has ANY default-No prompt visible (even alongside older
|
|
1386
|
+
# default-Yes prompts in scrollback), BLOCK with traceability.
|
|
1387
|
+
log_error "Default-No prompt detected in pane $pane_id — cannot safely auto-dismiss"
|
|
1388
|
+
log_debug "[GOV] default_no_prompt_detected=true pane=$pane_id action=block"
|
|
1389
|
+
write_blocked_sentinel \
|
|
1390
|
+
"Pane shows a default-No / explicit-No-default permission prompt. Auto-Enter would CANCEL the operation rather than approve it. Operator must manually respond with 'y' or extend prompt-handling logic. Pattern: $sample_pattern" \
|
|
1391
|
+
"${CURRENT_US:-ALL}" \
|
|
1392
|
+
"infra_failure"
|
|
1393
|
+
return 0
|
|
1394
|
+
fi
|
|
1395
|
+
|
|
1396
|
+
if (( prompt_visible )); then
|
|
1397
|
+
# All visible prompts are default-Yes-equivalent — safe to auto-Enter.
|
|
1398
|
+
if [[ -z "${PANE_PROMPT_STUCK_SINCE[$pane_id]:-}" ]]; then
|
|
1399
|
+
PANE_PROMPT_STUCK_SINCE[$pane_id]=$now
|
|
1400
|
+
fi
|
|
1401
|
+
if (( now - last >= 3 )); then
|
|
1402
|
+
log " Permission prompt detected in pane $pane_id, auto-approving (Enter)"
|
|
1403
|
+
log_debug "[FLOW] permission_prompt_auto_approved=true pane=$pane_id"
|
|
1404
|
+
tmux send-keys -t "$pane_id" Enter 2>/dev/null
|
|
1405
|
+
LAST_AUTO_APPROVE_TS[$pane_id]=$now
|
|
1406
|
+
PANE_DISMISS_FAILED_COUNT[$pane_id]=$((${PANE_DISMISS_FAILED_COUNT[$pane_id]:-0} + 1))
|
|
1407
|
+
fi
|
|
1408
|
+
return 0
|
|
1409
|
+
fi
|
|
1410
|
+
|
|
1411
|
+
# v5.7 §4.18: unknown-prompt fast-fail (E2E + omc benchmarking finding).
|
|
1412
|
+
# If pane has an affordance marker (y/n bracket etc.) but NO recognized
|
|
1413
|
+
# PROMPT_RE phrasing, the Worker is likely awaiting an unknown variant of
|
|
1414
|
+
# a yes/no prompt. omc-team's principle (tmux-session.ts:639): never
|
|
1415
|
+
# auto-Enter on unknown prompts — pressing Enter could approve OR cancel
|
|
1416
|
+
# depending on default. BLOCK immediately so the operator can extend the
|
|
1417
|
+
# PROMPT_RE catalog, instead of waiting 10 min for the freeze timeout.
|
|
1418
|
+
#
|
|
1419
|
+
# False-positive guard: skip if any "active task" marker is present
|
|
1420
|
+
# (esc to interrupt / background terminal / spinner) — that means the
|
|
1421
|
+
# Worker is producing output and the affordance text is just transcript.
|
|
1422
|
+
local active=0
|
|
1423
|
+
local affordance_seen=0
|
|
1424
|
+
local sample=""
|
|
1425
|
+
for ((i=1; i <= n; i++)); do
|
|
1426
|
+
if [[ "${lines[i]}" =~ $_ACTIVE_TASK_RE ]]; then
|
|
1427
|
+
active=1
|
|
1428
|
+
break
|
|
1429
|
+
fi
|
|
1430
|
+
done
|
|
1431
|
+
if (( ! active )); then
|
|
1432
|
+
# Only check the last 5 non-empty lines (where an idle prompt would sit).
|
|
1433
|
+
local -a tail_lines
|
|
1434
|
+
tail_lines=()
|
|
1435
|
+
local k
|
|
1436
|
+
for ((k=n; k >= 1 && ${#tail_lines[@]} < 5; k--)); do
|
|
1437
|
+
[[ -z "${lines[k]}" ]] && continue
|
|
1438
|
+
tail_lines=("${lines[k]}" "${tail_lines[@]}")
|
|
1439
|
+
done
|
|
1440
|
+
for line in "${tail_lines[@]}"; do
|
|
1441
|
+
if [[ "$line" =~ $_AFFORDANCE_RE ]]; then
|
|
1442
|
+
affordance_seen=1
|
|
1443
|
+
sample="${line:0:120}"
|
|
1444
|
+
break
|
|
1445
|
+
fi
|
|
1446
|
+
done
|
|
1447
|
+
fi
|
|
1448
|
+
if (( affordance_seen )); then
|
|
1449
|
+
# Re-check default-No (could be the active prompt's bracket — must BLOCK).
|
|
1450
|
+
local default_no_in_tail=0
|
|
1451
|
+
for line in "${tail_lines[@]}"; do
|
|
1452
|
+
if [[ "$line" =~ $_DEFAULT_NO_RE ]]; then
|
|
1453
|
+
default_no_in_tail=1
|
|
1454
|
+
break
|
|
1455
|
+
fi
|
|
1456
|
+
done
|
|
1457
|
+
local reason
|
|
1458
|
+
if (( default_no_in_tail )); then
|
|
1459
|
+
reason="Pane shows a default-No affordance ([y/N], 'default: no') but the surrounding prompt phrasing is not in PROMPT_RE. Auto-Enter would CANCEL. Operator must respond manually or extend PROMPT_RE. Sample: $sample"
|
|
1460
|
+
else
|
|
1461
|
+
reason="Pane shows a y/n affordance marker without a recognized prompt phrasing — likely an unknown CLI prompt variant. Refusing to guess auto-Enter (which could be the wrong default). Operator must respond manually or extend PROMPT_RE. Sample: $sample"
|
|
1462
|
+
fi
|
|
1463
|
+
log_error "Unknown-prompt affordance detected in pane $pane_id — fast-fail BLOCK"
|
|
1464
|
+
log_debug "[GOV] unknown_prompt_detected=true pane=$pane_id action=block default_no=$default_no_in_tail"
|
|
1465
|
+
write_blocked_sentinel "$reason" "${CURRENT_US:-ALL}" "infra_failure"
|
|
1466
|
+
return 0
|
|
1467
|
+
fi
|
|
1468
|
+
# No prompt visible — clear stall tracking so re-entry is fresh.
|
|
1469
|
+
if [[ -n "${PANE_PROMPT_STUCK_SINCE[$pane_id]:-}" ]]; then
|
|
1470
|
+
log_debug "[FLOW] prompt_cleared=true pane=$pane_id"
|
|
1471
|
+
# zsh: unset assoc-array member via reset to empty + delete key.
|
|
1472
|
+
PANE_PROMPT_STUCK_SINCE[$pane_id]=""
|
|
1473
|
+
PANE_DISMISS_FAILED_COUNT[$pane_id]=""
|
|
1474
|
+
unset "PANE_PROMPT_STUCK_SINCE[$pane_id]"
|
|
1475
|
+
unset "PANE_DISMISS_FAILED_COUNT[$pane_id]"
|
|
1476
|
+
fi
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
# v5.7 §4.16: bounded prompt-stall escalation (codex Critic HIGH finding).
|
|
1480
|
+
# Closes the "alive process → extend indefinitely" gap: if a pane stays in
|
|
1481
|
+
# prompt-visible state for PROMPT_STALL_TIMEOUT (default 5min) OR
|
|
1482
|
+
# auto_dismiss has tried PROMPT_DISMISS_FAIL_LIMIT times without progress,
|
|
1483
|
+
# write BLOCKED `prompt_stall` so the campaign exits with traceability
|
|
1484
|
+
# instead of infinite-waiting.
|
|
1485
|
+
#
|
|
1486
|
+
# Returns 0 if pane is fine; returns 1 (and writes BLOCKED sentinel) if
|
|
1487
|
+
# stall threshold exceeded — caller should propagate the failure.
|
|
1488
|
+
check_prompt_stall() {
|
|
1489
|
+
local pane_id="$1"
|
|
1490
|
+
local us_id="${2:-${CURRENT_US:-ALL}}"
|
|
1491
|
+
local stuck_since=${PANE_PROMPT_STUCK_SINCE[$pane_id]:-0}
|
|
1492
|
+
(( stuck_since == 0 )) && return 0
|
|
1493
|
+
local now
|
|
1494
|
+
now=$(_now_s)
|
|
1495
|
+
local stuck_for=$(( now - stuck_since ))
|
|
1496
|
+
local fail_count=${PANE_DISMISS_FAILED_COUNT[$pane_id]:-0}
|
|
1497
|
+
|
|
1498
|
+
if (( stuck_for >= PROMPT_STALL_TIMEOUT )) || (( fail_count >= PROMPT_DISMISS_FAIL_LIMIT )); then
|
|
1499
|
+
log_error "Pane $pane_id stuck on prompt for ${stuck_for}s ($fail_count dismiss attempts) — escalating to BLOCKED"
|
|
1500
|
+
log_debug "[GOV] iter=${ITERATION:-0} prompt_stall_escalated=true pane=$pane_id stuck_for=${stuck_for}s dismiss_attempts=$fail_count threshold=${PROMPT_STALL_TIMEOUT}s"
|
|
1501
|
+
write_blocked_sentinel \
|
|
1502
|
+
"Pane stuck on TUI prompt for ${stuck_for}s after ${fail_count} dismiss attempts. Auto-dismiss patterns may need to be widened (see ~/.claude/ralph-desk/known-prompts.txt convention) or the underlying claude CLI prompt is genuinely unsupported. No documentation produced for this iteration." \
|
|
1503
|
+
"$us_id" \
|
|
1504
|
+
"infra_failure"
|
|
1505
|
+
return 1
|
|
1506
|
+
fi
|
|
1507
|
+
return 0
|
|
1508
|
+
}
|
|
1509
|
+
|
|
1510
|
+
# v0.14.1 / v0.14.2: codex post-work idle UI detector. The codex CLI shows
|
|
1511
|
+
# a status line like "─ Worked for 5m 36s ──" + a "› " prompt + "Context
|
|
1512
|
+
# X% left" / model + suggestion ("Improve documentation in @filename")
|
|
1513
|
+
# after it finishes the verifier task and is waiting for the next user
|
|
1514
|
+
# input. This is NOT a permission prompt — it is a successful idle state.
|
|
1515
|
+
# The byte-stasis check below mistook this for "frozen" and BLOCKED a
|
|
1516
|
+
# verifier whose verdict file was already on disk. v0.14.2 Bug Report #4
|
|
1517
|
+
# observed the v0.14.1 patterns being too narrow (BOS 12th launch had
|
|
1518
|
+
# extra horizontal-rule wrapping that broke the strict dash-bracket regex)
|
|
1519
|
+
# — relaxed below to multiple independent markers; ANY one fires idle.
|
|
1520
|
+
is_codex_idle_ui() {
|
|
1521
|
+
local pane_text="$1"
|
|
1522
|
+
# 1. "Worked for Xm Ys" — most reliable codex idle marker.
|
|
1523
|
+
print -- "$pane_text" | grep -qE 'Worked for [0-9]+m [0-9]+s' && return 0
|
|
1524
|
+
# 2. "Context X% left" status bar — appears whenever codex is alive +
|
|
1525
|
+
# waiting at the prompt; captures the case where horizontal rules
|
|
1526
|
+
# above were stripped by tmux capture truncation.
|
|
1527
|
+
print -- "$pane_text" | grep -qE 'Context [0-9]+%[[:space:]]*left' && return 0
|
|
1528
|
+
# 3. codex model + branch line (e.g. "gpt-5.5 high · feature/...") —
|
|
1529
|
+
# only printed alongside the idle prompt, never during work.
|
|
1530
|
+
print -- "$pane_text" | grep -qE 'gpt-[0-9]+(\.[0-9]+)? (low|medium|high|xhigh) ·' && return 0
|
|
1531
|
+
# 4. codex default-suggestion prompt prefix at line start. v0.14.1 had
|
|
1532
|
+
# only "›" but BOS Bug #4 showed the leading character can be wrapped
|
|
1533
|
+
# by tmux narrowness — also accept the suggestion phrases verbatim.
|
|
1534
|
+
print -- "$pane_text" | grep -qE 'Improve documentation in @|Summarize recent commits|Explain (this )?code' && return 0
|
|
1535
|
+
return 1
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1538
|
+
# v0.14.2 Bug Report #4 H1: codex sometimes lands the verdict at the
|
|
1539
|
+
# pre-v0.13.0 legacy path (`<root>/.claude/ralph-desk/memos/...`) instead
|
|
1540
|
+
# of `.rlp-desk/memos/`, even when the prompt instructs otherwise. When
|
|
1541
|
+
# we observe the legacy file with valid JSON, atomically rename it into
|
|
1542
|
+
# place so the rest of the pipeline (harvest + analytics + sentinels)
|
|
1543
|
+
# sees a single canonical path. Best-effort: any failure leaves the file
|
|
1544
|
+
# untouched and the campaign keeps polling.
|
|
1545
|
+
_migrate_legacy_verdict() {
|
|
1546
|
+
[[ -n "${LEGACY_VERDICT_FILE:-}" && -f "$LEGACY_VERDICT_FILE" ]] || return 1
|
|
1547
|
+
jq -e . "$LEGACY_VERDICT_FILE" >/dev/null 2>&1 || return 1
|
|
1548
|
+
log "Verdict file found at legacy path ${LEGACY_VERDICT_FILE} — moving to ${VERDICT_FILE}"
|
|
1549
|
+
log_debug "[GOV] iter=${ITERATION:-0} legacy_verdict_migrated=true from=${LEGACY_VERDICT_FILE} to=${VERDICT_FILE}"
|
|
1550
|
+
mkdir -p "$(dirname "$VERDICT_FILE")" 2>/dev/null
|
|
1551
|
+
mv -f "$LEGACY_VERDICT_FILE" "$VERDICT_FILE" 2>/dev/null && return 0
|
|
1552
|
+
return 1
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
# v0.14.1 / v0.14.2: verdict-aware short-circuit. When the pane being
|
|
1556
|
+
# polled is the verifier pane AND a valid verdict file already exists on
|
|
1557
|
+
# disk (canonical path OR legacy path that we then auto-migrate), the
|
|
1558
|
+
# verifier has finished its work — the harvest step (run_single_verifier
|
|
1559
|
+
# / consensus loop) is the one that should observe the verdict, not the
|
|
1560
|
+
# generic no-progress watcher. Returning 0 here lets the outer loop keep
|
|
1561
|
+
# polling instead of escalating BLOCKED. Bug Reports #3 (BOS 2026-05-04)
|
|
1562
|
+
# + #4 (BOS 2026-05-05).
|
|
1563
|
+
_verifier_pane_has_verdict() {
|
|
1564
|
+
local pane_id="$1"
|
|
1565
|
+
[[ "$pane_id" == "${VERIFIER_PANE:-}" || "$pane_id" == "${FINAL_VERIFIER_PANE:-}" ]] || return 1
|
|
1566
|
+
# Canonical path first.
|
|
1567
|
+
if [[ -n "${VERDICT_FILE:-}" && -f "$VERDICT_FILE" ]]; then
|
|
1568
|
+
jq -e . "$VERDICT_FILE" >/dev/null 2>&1 && return 0
|
|
1569
|
+
fi
|
|
1570
|
+
# v0.14.2 Fix-D: codex may have written to the legacy path. Try to
|
|
1571
|
+
# migrate; success means the canonical file is now in place.
|
|
1572
|
+
_migrate_legacy_verdict && return 0
|
|
1573
|
+
return 1
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
# v0.14.5 Bug Report #6 Fix-M (worker mirror of Fix-A/Fix-D):
|
|
1577
|
+
# Worker (claude sonnet 1m) writes commit + iter-signal.json verify signal
|
|
1578
|
+
# then claude CLI parks at its idle prompt. check_no_progress observes
|
|
1579
|
+
# byte-stasis on the worker pane and would BLOCK after 600s even though
|
|
1580
|
+
# the signal is on disk. When the pane is the worker pane AND a valid
|
|
1581
|
+
# iter-signal is on disk, defer to the harvest step (poll_for_signal in
|
|
1582
|
+
# run_single_worker) instead of escalating BLOCKED.
|
|
1583
|
+
_worker_pane_has_signal() {
|
|
1584
|
+
local pane_id="$1"
|
|
1585
|
+
[[ -n "${WORKER_PANE:-}" && "$pane_id" == "${WORKER_PANE}" ]] || return 1
|
|
1586
|
+
[[ -n "${SIGNAL_FILE:-}" && -s "$SIGNAL_FILE" ]] || return 1
|
|
1587
|
+
jq -e . "$SIGNAL_FILE" >/dev/null 2>&1 || return 1
|
|
1588
|
+
local iter_field us_field status_field
|
|
1589
|
+
iter_field=$(jq -r '.iteration // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1590
|
+
us_field=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1591
|
+
status_field=$(jq -r '.status // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1592
|
+
[[ "$iter_field" =~ ^[0-9]+$ ]] || return 1
|
|
1593
|
+
[[ -n "$us_field" ]] || return 1
|
|
1594
|
+
[[ "$status_field" == "verify" || "$status_field" == "verify_partial" ]] || return 1
|
|
1595
|
+
return 0
|
|
1596
|
+
}
|
|
1597
|
+
|
|
1598
|
+
# v5.7 §4.17 (codex Critic HIGH): generic no-progress timeout — independent
|
|
1599
|
+
# of prompt detection. Closes the gap where an undetected prompt or alive-
|
|
1600
|
+
# but-frozen Worker can bypass Layer 4 and infinite-wait.
|
|
1601
|
+
#
|
|
1602
|
+
# Strategy: capture pane content each call, hash/compare to last; if
|
|
1603
|
+
# unchanged for PROGRESS_NO_CHANGE_TIMEOUT (default 10min), write BLOCKED.
|
|
1604
|
+
# Returns 0 if pane is making progress (or first call); 1 (and writes
|
|
1605
|
+
# BLOCKED) if no-progress threshold exceeded.
|
|
1606
|
+
check_no_progress() {
|
|
1607
|
+
local pane_id="$1"
|
|
1608
|
+
local us_id="${2:-${CURRENT_US:-ALL}}"
|
|
1609
|
+
local now
|
|
1610
|
+
now=$(_now_s)
|
|
1611
|
+
local capture
|
|
1612
|
+
capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null) || return 0
|
|
1613
|
+
|
|
1614
|
+
# v0.14.1 Fix-A / v0.14.2 Fix-D: codex verifier writes verdict, then
|
|
1615
|
+
# sits at "Worked for Xm Ys" idle UI. byte-stasis would BLOCK after
|
|
1616
|
+
# 600s even though the verdict is on disk. Check both canonical and
|
|
1617
|
+
# legacy verdict paths — auto-migrate legacy if found — and defer to
|
|
1618
|
+
# the harvest step when the pane is a verifier pane.
|
|
1619
|
+
if _verifier_pane_has_verdict "$pane_id"; then
|
|
1620
|
+
PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
|
|
1621
|
+
PANE_LAST_CHANGE_TS[$pane_id]=$now
|
|
1622
|
+
return 0
|
|
1623
|
+
fi
|
|
1624
|
+
# v0.14.5 Bug Report #6 Fix-M: claude worker finishes (commit + iter-signal
|
|
1625
|
+
# write) then parks at its idle prompt. byte-stasis would BLOCK after 600s
|
|
1626
|
+
# even though the signal is on disk. Worker mirror of the verifier branch
|
|
1627
|
+
# above — defer to poll_for_signal harvest when SIGNAL_FILE is valid.
|
|
1628
|
+
if _worker_pane_has_signal "$pane_id"; then
|
|
1629
|
+
PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
|
|
1630
|
+
PANE_LAST_CHANGE_TS[$pane_id]=$now
|
|
1631
|
+
log_debug "[GOV] iter=${ITERATION:-0} worker_progress_check=signal_present pane=$pane_id signal=${SIGNAL_FILE}"
|
|
1632
|
+
return 0
|
|
1633
|
+
fi
|
|
1634
|
+
# v0.14.2: root-cause tracing for Bug Report #4. When the watcher is
|
|
1635
|
+
# examining a verifier pane that does NOT have a verdict yet, log once
|
|
1636
|
+
# per byte-stasis transition so post-mortem can tell whether the
|
|
1637
|
+
# verdict was missing entirely vs. the idle-UI grace was the gating
|
|
1638
|
+
# factor. Idempotent flag lives in PANE_VERIFIER_TRACE_LOGGED.
|
|
1639
|
+
if [[ "$pane_id" == "${VERIFIER_PANE:-}" || "$pane_id" == "${FINAL_VERIFIER_PANE:-}" ]]; then
|
|
1640
|
+
if [[ -z "${PANE_VERIFIER_TRACE_LOGGED[$pane_id]:-}" ]]; then
|
|
1641
|
+
PANE_VERIFIER_TRACE_LOGGED[$pane_id]=1
|
|
1642
|
+
log_debug "[GOV] iter=${ITERATION:-0} verifier_progress_check=miss pane=$pane_id verdict_canonical=${VERDICT_FILE} verdict_canonical_exists=$([[ -f "$VERDICT_FILE" ]] && echo true || echo false) verdict_legacy=${LEGACY_VERDICT_FILE:-unset} verdict_legacy_exists=$([[ -f "${LEGACY_VERDICT_FILE:-/nonexistent}" ]] && echo true || echo false)"
|
|
1643
|
+
fi
|
|
1644
|
+
fi
|
|
1645
|
+
|
|
1646
|
+
local last_content="${PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]:-}"
|
|
1647
|
+
if [[ "$capture" != "$last_content" ]]; then
|
|
1648
|
+
PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
|
|
1649
|
+
PANE_LAST_CHANGE_TS[$pane_id]=$now
|
|
1650
|
+
return 0
|
|
1651
|
+
fi
|
|
1652
|
+
|
|
1653
|
+
local last_change=${PANE_LAST_CHANGE_TS[$pane_id]:-0}
|
|
1654
|
+
if (( last_change == 0 )); then
|
|
1655
|
+
PANE_LAST_CHANGE_TS[$pane_id]=$now
|
|
1656
|
+
return 0
|
|
1657
|
+
fi
|
|
1658
|
+
|
|
1659
|
+
local frozen_for=$(( now - last_change ))
|
|
1660
|
+
if (( frozen_for >= PROGRESS_NO_CHANGE_TIMEOUT )); then
|
|
1661
|
+
# v0.14.1 Fix-B: even without a verdict file, codex sometimes parks at
|
|
1662
|
+
# its idle UI mid-run (e.g. partial-write window before atomic mv).
|
|
1663
|
+
# Grant one-time +CODEX_IDLE_GRACE_S grace before escalating so we do
|
|
1664
|
+
# not BLOCK at the exact second the verdict is being mv'd into place.
|
|
1665
|
+
if is_codex_idle_ui "$capture"; then
|
|
1666
|
+
local already_graced="${PANE_CODEX_IDLE_GRACED[$pane_id]:-0}"
|
|
1667
|
+
if (( already_graced == 0 )); then
|
|
1668
|
+
PANE_CODEX_IDLE_GRACED[$pane_id]=1
|
|
1669
|
+
PANE_LAST_CHANGE_TS[$pane_id]=$now
|
|
1670
|
+
log "Pane $pane_id at codex idle UI for ${frozen_for}s — granting +${CODEX_IDLE_GRACE_S}s grace before BLOCK escalation"
|
|
1671
|
+
log_debug "[GOV] iter=${ITERATION:-0} codex_idle_grace=true pane=$pane_id grace_s=${CODEX_IDLE_GRACE_S}"
|
|
1672
|
+
return 0
|
|
1673
|
+
fi
|
|
1674
|
+
fi
|
|
1675
|
+
log_error "Pane $pane_id has not changed for ${frozen_for}s — alive but frozen. Escalating to BLOCKED."
|
|
1676
|
+
log_debug "[GOV] iter=${ITERATION:-0} no_progress_escalated=true pane=$pane_id frozen_for=${frozen_for}s threshold=${PROGRESS_NO_CHANGE_TIMEOUT}s"
|
|
1677
|
+
write_blocked_sentinel \
|
|
1678
|
+
"Pane content has been unchanged for ${frozen_for}s (>= ${PROGRESS_NO_CHANGE_TIMEOUT}s threshold). Worker process may be alive but stuck on an undetected prompt, hung network call, or genuine deadlock. No documentation produced; manual inspection required." \
|
|
1679
|
+
"$us_id" \
|
|
1680
|
+
"infra_failure"
|
|
1681
|
+
return 1
|
|
1682
|
+
fi
|
|
1683
|
+
return 0
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
# --- governance.md s7 step 5+6: Nudge idle panes ---
|
|
1687
|
+
check_and_nudge_idle_pane() {
|
|
1688
|
+
local pane_id="$1"
|
|
1689
|
+
local nudge_count_var="$2"
|
|
1690
|
+
|
|
1691
|
+
# v5.7 §4.13.a: auto-dismiss permission prompts before idle check.
|
|
1692
|
+
# Otherwise Worker hangs at "Do you want to create..." until nudge timeout.
|
|
1693
|
+
auto_dismiss_prompts "$pane_id"
|
|
1694
|
+
|
|
1695
|
+
local current_content
|
|
1696
|
+
current_content=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -3)
|
|
1697
|
+
|
|
1698
|
+
if [[ "$current_content" == "${LAST_PANE_CONTENT[$pane_id]:-}" ]]; then
|
|
1699
|
+
local idle_since="${PANE_IDLE_SINCE[$pane_id]:-$(date +%s)}"
|
|
1700
|
+
local now
|
|
1701
|
+
now=$(date +%s)
|
|
1702
|
+
if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
|
|
1703
|
+
# A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
|
|
1704
|
+
local _nudge_capture
|
|
1705
|
+
_nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
|
|
1706
|
+
if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
|
|
1707
|
+
log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
|
|
1708
|
+
else
|
|
1709
|
+
local count=${(P)nudge_count_var}
|
|
1710
|
+
if (( count < MAX_NUDGES )); then
|
|
1711
|
+
log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
|
|
1712
|
+
safe_send_keys "$pane_id" ""
|
|
1713
|
+
(( count++ ))
|
|
1714
|
+
eval "$nudge_count_var=$count"
|
|
1715
|
+
fi
|
|
1716
|
+
fi
|
|
1717
|
+
fi
|
|
1718
|
+
else
|
|
1719
|
+
LAST_PANE_CONTENT[$pane_id]="$current_content"
|
|
1720
|
+
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
1721
|
+
fi
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
# =============================================================================
|
|
1725
|
+
# Exponential Backoff Restart (tmux pattern)
|
|
1726
|
+
# =============================================================================
|
|
1727
|
+
|
|
1728
|
+
# --- governance.md s7 step 5: Restart dead workers with backoff ---
|
|
1729
|
+
restart_worker() {
|
|
1730
|
+
local pane_id="$1"
|
|
1731
|
+
local iter="$2"
|
|
1732
|
+
local trigger_file="$3"
|
|
1733
|
+
|
|
1734
|
+
# Codex workers are 1-shot exec; restart is not applicable
|
|
1735
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1736
|
+
log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
|
|
1737
|
+
return 1
|
|
1738
|
+
fi
|
|
1739
|
+
|
|
1740
|
+
local restart_count="${WORKER_RESTARTS[$iter]:-0}"
|
|
1741
|
+
|
|
1742
|
+
if (( restart_count >= MAX_RESTARTS )); then
|
|
1743
|
+
log_error "Worker exceeded max restarts ($MAX_RESTARTS) for iteration $iter"
|
|
1744
|
+
return 1 # caller writes BLOCKED
|
|
1745
|
+
fi
|
|
1746
|
+
|
|
1747
|
+
# Exponential backoff: 5s, 10s, 20s, 60s (cap)
|
|
1748
|
+
local -a delays=(5 10 20 60)
|
|
1749
|
+
local delay=${delays[$((restart_count + 1))]:-60}
|
|
1750
|
+
log " Restarting worker (attempt $((restart_count + 1))/$MAX_RESTARTS) after ${delay}s backoff..."
|
|
1751
|
+
sleep "$delay"
|
|
1752
|
+
|
|
1753
|
+
# Kill existing claude, wait for shell prompt
|
|
1754
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
1755
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
1756
|
+
sleep 2
|
|
1757
|
+
|
|
1758
|
+
# Re-launch worker (tmux interactive pattern)
|
|
1759
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1760
|
+
safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
1761
|
+
else
|
|
1762
|
+
safe_send_keys "$pane_id" "$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
|
|
1763
|
+
fi
|
|
1764
|
+
WORKER_RESTARTS[$iter]=$((restart_count + 1))
|
|
1765
|
+
return 0
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
# =============================================================================
|
|
1769
|
+
# Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
|
|
1770
|
+
# =============================================================================
|
|
1771
|
+
|
|
1772
|
+
# Per-US PRD injection helper
|
|
1773
|
+
# Substitutes the full PRD path with a per-US split path in the Worker prompt base.
|
|
1774
|
+
# Falls back to the full PRD with a stderr warning if the split file is missing.
|
|
1775
|
+
# Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
|
|
1776
|
+
inject_per_us_prd() {
|
|
1777
|
+
local prompt_base="$1"
|
|
1778
|
+
local full_prd="$2"
|
|
1779
|
+
local per_us_prd="${3:-}"
|
|
1780
|
+
|
|
1781
|
+
if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
|
|
1782
|
+
sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
|
|
1783
|
+
else
|
|
1784
|
+
if [[ -n "$per_us_prd" ]]; then
|
|
1785
|
+
echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
|
|
1786
|
+
fi
|
|
1787
|
+
cat "$prompt_base"
|
|
1788
|
+
fi
|
|
1789
|
+
}
|
|
1790
|
+
|
|
1791
|
+
# --- governance.md s7 step 4+5: Write prompt and trigger to files ---
|
|
1792
|
+
# NEVER send prompt content through tmux send-keys.
|
|
1793
|
+
# Write payloads to files, send only short trigger commands (<200 chars).
|
|
1794
|
+
write_worker_trigger() {
|
|
1795
|
+
local iter="$1"
|
|
1796
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-prompt.md"
|
|
1797
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-trigger.sh"
|
|
1798
|
+
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-output.log"
|
|
1799
|
+
|
|
1800
|
+
# Build the worker prompt: base prompt + iteration context
|
|
1801
|
+
local contract
|
|
1802
|
+
contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -5)
|
|
1803
|
+
|
|
1804
|
+
# Check for fix contract from previous verifier failure
|
|
1805
|
+
local prev_iter=$((iter - 1))
|
|
1806
|
+
local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
|
|
1807
|
+
|
|
1808
|
+
# Compute next unverified US before prompt assembly (required for per-US PRD injection)
|
|
1809
|
+
local next_us=""
|
|
1810
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
1811
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
1812
|
+
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
1813
|
+
next_us="$us"
|
|
1814
|
+
break
|
|
1815
|
+
fi
|
|
1816
|
+
done
|
|
1817
|
+
fi
|
|
1818
|
+
|
|
1819
|
+
{
|
|
1820
|
+
# Per-US PRD injection: substitute full PRD path with per-US split path when available
|
|
1821
|
+
local per_us_prd=""
|
|
1822
|
+
[[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
|
|
1823
|
+
inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
|
|
1824
|
+
echo ""
|
|
1825
|
+
echo "---"
|
|
1826
|
+
echo "## Iteration Context"
|
|
1827
|
+
echo "- **Iteration**: $iter"
|
|
1828
|
+
echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
|
|
1829
|
+
echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
|
|
1830
|
+
if (( _PRD_CHANGED )); then
|
|
1831
|
+
echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
|
|
1832
|
+
fi
|
|
1833
|
+
|
|
1834
|
+
# Include fix contract if previous verifier failed
|
|
1835
|
+
if [[ -f "$fix_contract_file" ]]; then
|
|
1836
|
+
echo ""
|
|
1837
|
+
echo "---"
|
|
1838
|
+
echo "## IMPORTANT: Fix Contract from Verifier (iteration $prev_iter)"
|
|
1839
|
+
echo "The Verifier REJECTED your previous work. You MUST fix the issues below."
|
|
1840
|
+
echo "Do NOT just resubmit — actually change the code to address each issue."
|
|
1841
|
+
echo ""
|
|
1842
|
+
cat "$fix_contract_file"
|
|
1843
|
+
fi
|
|
1844
|
+
|
|
1845
|
+
# Per-US mode: tell Worker exactly which US to work on
|
|
1846
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
1847
|
+
if [[ -n "$next_us" ]]; then
|
|
1848
|
+
echo ""
|
|
1849
|
+
echo "---"
|
|
1850
|
+
echo "## PER-US SCOPE LOCK (this iteration) — OVERRIDES memory contract"
|
|
1851
|
+
echo "**IGNORE the 'Next Iteration Contract' from memory if it references a different story.**"
|
|
1852
|
+
echo "The Leader has determined that **${next_us}** is the next unverified story."
|
|
1853
|
+
echo "You MUST implement ONLY **${next_us}** in this iteration."
|
|
1854
|
+
echo "Do NOT implement any other user stories."
|
|
1855
|
+
# Per-US test-spec injection: point Worker to scoped test-spec if available
|
|
1856
|
+
local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
|
|
1857
|
+
if [[ -f "$per_us_test_spec" ]]; then
|
|
1858
|
+
echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
|
|
1859
|
+
else
|
|
1860
|
+
echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
|
|
1861
|
+
fi
|
|
1862
|
+
echo "When done, you MUST WRITE (not just print) the verify signal to the iter-signal FILE at: ${SIGNAL_FILE}"
|
|
1863
|
+
echo "Write this exact JSON to that file (us_id=\"${next_us}\", not \"ALL\"): {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", \"summary\": \"what was done\", \"timestamp\": \"ISO\"}"
|
|
1864
|
+
echo ""
|
|
1865
|
+
echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
|
|
1866
|
+
elif [[ -n "$VERIFIED_US" ]]; then
|
|
1867
|
+
# All individual US verified — this is the final full verify iteration
|
|
1868
|
+
echo ""
|
|
1869
|
+
echo "---"
|
|
1870
|
+
echo "## FINAL VERIFICATION ITERATION"
|
|
1871
|
+
echo "All individual US have been verified: $VERIFIED_US"
|
|
1872
|
+
echo "Run all tests and verification commands to confirm everything works together."
|
|
1873
|
+
echo "Signal verify with us_id=\"ALL\" for the final full verification."
|
|
1874
|
+
fi
|
|
1875
|
+
elif [[ "$VERIFY_MODE" = "batch" ]]; then
|
|
1876
|
+
echo ""
|
|
1877
|
+
echo "---"
|
|
1878
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
1879
|
+
echo "## BATCH MODE — CONTINUE FROM PARTIAL PROGRESS"
|
|
1880
|
+
echo "The following US have already been verified: **$VERIFIED_US**"
|
|
1881
|
+
echo "- Do NOT re-implement these — they are done."
|
|
1882
|
+
echo "- Focus ONLY on the remaining unverified user stories."
|
|
1883
|
+
echo '- Signal verify with us_id="ALL" when the remaining stories are complete.'
|
|
1884
|
+
else
|
|
1885
|
+
echo "## BATCH MODE OVERRIDE"
|
|
1886
|
+
echo "Ignore any per-US signal instructions above. In batch mode:"
|
|
1887
|
+
echo "- Implement ALL user stories in this iteration"
|
|
1888
|
+
echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
|
|
1889
|
+
echo "- Do NOT signal verify after individual stories"
|
|
1890
|
+
fi
|
|
1891
|
+
fi
|
|
1892
|
+
|
|
1893
|
+
# Autonomous mode: don't stop on ambiguity, PRD is authoritative
|
|
1894
|
+
if (( AUTONOMOUS_MODE )); then
|
|
1895
|
+
echo ""
|
|
1896
|
+
echo "---"
|
|
1897
|
+
echo "## AUTONOMOUS MODE"
|
|
1898
|
+
echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
|
|
1899
|
+
echo "**Resolution priority**: PRD > test-spec > context > memory"
|
|
1900
|
+
echo "If documents disagree, follow PRD and proceed. Log any conflict you find by"
|
|
1901
|
+
echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
|
|
1902
|
+
echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
|
|
1903
|
+
echo "Do NOT wait for human input. Keep working."
|
|
1904
|
+
fi
|
|
1905
|
+
} | atomic_write "$prompt_file"
|
|
1906
|
+
|
|
1907
|
+
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
1908
|
+
# Engine-specific launch command (expanded at write time)
|
|
1909
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1910
|
+
local engine_cmd="${CODEX_BIN:-codex} \\
|
|
1911
|
+
-m $WORKER_CODEX_MODEL \\
|
|
1912
|
+
-c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
|
|
1913
|
+
--disable plugins --dangerously-bypass-approvals-and-sandbox \\
|
|
1914
|
+
\"\$(cat $prompt_file)\""
|
|
1915
|
+
local engine_comment="# Run codex with fresh context (fallback trigger — TUI primary launch via launch_worker_codex)"
|
|
1916
|
+
else
|
|
1917
|
+
local engine_cmd
|
|
1918
|
+
engine_cmd=$(build_claude_cmd print "$WORKER_MODEL" "$prompt_file" "$output_log" "$WORKER_EFFORT")
|
|
1919
|
+
local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 5)"
|
|
1920
|
+
fi
|
|
1921
|
+
|
|
1922
|
+
{
|
|
1923
|
+
cat <<TRIGGER_EOF
|
|
1924
|
+
#!/bin/zsh
|
|
1925
|
+
# Trigger for iteration $iter worker - generated by run_ralph_desk.zsh
|
|
1926
|
+
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
1927
|
+
|
|
1928
|
+
HEARTBEAT_FILE="$WORKER_HEARTBEAT"
|
|
1929
|
+
|
|
1930
|
+
# Background heartbeat writer (tmux pattern)
|
|
1931
|
+
(
|
|
1932
|
+
while true; do
|
|
1933
|
+
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
1934
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
1935
|
+
sleep 15
|
|
1936
|
+
done
|
|
1937
|
+
) &
|
|
1938
|
+
HEARTBEAT_PID=\$!
|
|
1939
|
+
|
|
1940
|
+
$engine_comment
|
|
1941
|
+
$engine_cmd
|
|
1942
|
+
|
|
1943
|
+
# Cleanup heartbeat writer
|
|
1944
|
+
kill \$HEARTBEAT_PID 2>/dev/null
|
|
1945
|
+
wait \$HEARTBEAT_PID 2>/dev/null
|
|
1946
|
+
echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
1947
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
1948
|
+
TRIGGER_EOF
|
|
1949
|
+
} | atomic_write "$trigger_file"
|
|
1950
|
+
chmod +x "$trigger_file"
|
|
1951
|
+
|
|
1952
|
+
log " Worker prompt: $prompt_file"
|
|
1953
|
+
log " Worker trigger: $trigger_file"
|
|
1954
|
+
}
|
|
1955
|
+
|
|
1956
|
+
write_verifier_trigger() {
|
|
1957
|
+
local iter="$1"
|
|
1958
|
+
local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
|
|
1959
|
+
local verifier_model="${3:-$VERIFIER_MODEL}"
|
|
1960
|
+
local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
|
|
1961
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
1962
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
1963
|
+
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
|
|
1964
|
+
|
|
1965
|
+
# Read us_id from iter-signal.json for per-US scoping
|
|
1966
|
+
local us_id=""
|
|
1967
|
+
if [[ -f "$SIGNAL_FILE" ]]; then
|
|
1968
|
+
us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1969
|
+
fi
|
|
1970
|
+
|
|
1971
|
+
# Build verifier prompt from base with US scope
|
|
1972
|
+
{
|
|
1973
|
+
cat "$VERIFIER_PROMPT_BASE"
|
|
1974
|
+
echo ""
|
|
1975
|
+
echo "---"
|
|
1976
|
+
echo "## Verification Context"
|
|
1977
|
+
echo "- **Iteration**: $iter"
|
|
1978
|
+
echo "- **Done Claim**: $DONE_CLAIM_FILE"
|
|
1979
|
+
echo "- **Verify Mode**: $VERIFY_MODE"
|
|
1980
|
+
if [[ -n "$us_id" ]]; then
|
|
1981
|
+
if [[ "$us_id" = "ALL" ]]; then
|
|
1982
|
+
echo "- **Scope**: FULL VERIFY — check ALL acceptance criteria from the PRD"
|
|
1983
|
+
else
|
|
1984
|
+
echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
|
|
1985
|
+
fi
|
|
1986
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
1987
|
+
echo "- **Previously verified US**: $VERIFIED_US"
|
|
1988
|
+
echo "- **Note**: Skip re-verifying the above US. Focus on unverified stories."
|
|
1989
|
+
fi
|
|
1990
|
+
fi
|
|
1991
|
+
|
|
1992
|
+
# Autonomous mode: don't stop on ambiguity, PRD is authoritative
|
|
1993
|
+
if (( AUTONOMOUS_MODE )); then
|
|
1994
|
+
echo ""
|
|
1995
|
+
echo "---"
|
|
1996
|
+
echo "## AUTONOMOUS MODE"
|
|
1997
|
+
echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
|
|
1998
|
+
echo "**Resolution priority**: PRD > test-spec > context > memory"
|
|
1999
|
+
echo "If documents disagree, follow PRD and proceed. Log any conflict by"
|
|
2000
|
+
echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
|
|
2001
|
+
echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
|
|
2002
|
+
echo "Do NOT wait for human input. Keep verifying."
|
|
2003
|
+
fi
|
|
2004
|
+
} | atomic_write "$prompt_file"
|
|
2005
|
+
|
|
2006
|
+
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
2007
|
+
# Engine-specific launch command (expanded at write time)
|
|
2008
|
+
if [[ "$verifier_engine" = "codex" ]]; then
|
|
2009
|
+
local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
|
|
2010
|
+
-c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
|
|
2011
|
+
--disable plugins --dangerously-bypass-approvals-and-sandbox \\
|
|
2012
|
+
\"\$(cat $prompt_file)\" \\
|
|
2013
|
+
> >(tee $output_log) 2>&1"
|
|
2014
|
+
local engine_comment="# Run codex with fresh context (governance.md s7 step 7) — process substitution preserves tty"
|
|
2015
|
+
else
|
|
2016
|
+
local engine_cmd
|
|
2017
|
+
engine_cmd=$(build_claude_cmd print "$verifier_model" "$prompt_file" "$output_log" "$VERIFIER_EFFORT")
|
|
2018
|
+
local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 7)"
|
|
2019
|
+
fi
|
|
2020
|
+
|
|
2021
|
+
{
|
|
2022
|
+
cat <<TRIGGER_EOF
|
|
2023
|
+
#!/bin/zsh
|
|
2024
|
+
# Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
|
|
2025
|
+
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
2026
|
+
|
|
2027
|
+
HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
|
|
2028
|
+
|
|
2029
|
+
# Background heartbeat writer (tmux pattern)
|
|
2030
|
+
(
|
|
2031
|
+
while true; do
|
|
2032
|
+
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
2033
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
2034
|
+
sleep 15
|
|
2035
|
+
done
|
|
2036
|
+
) &
|
|
2037
|
+
HEARTBEAT_PID=\$!
|
|
2038
|
+
|
|
2039
|
+
$engine_comment
|
|
2040
|
+
$engine_cmd
|
|
2041
|
+
|
|
2042
|
+
# Cleanup heartbeat writer
|
|
2043
|
+
kill \$HEARTBEAT_PID 2>/dev/null
|
|
2044
|
+
wait \$HEARTBEAT_PID 2>/dev/null
|
|
2045
|
+
echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
2046
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
2047
|
+
TRIGGER_EOF
|
|
2048
|
+
} | atomic_write "$trigger_file"
|
|
2049
|
+
chmod +x "$trigger_file"
|
|
2050
|
+
|
|
2051
|
+
log " Verifier prompt: $prompt_file"
|
|
2052
|
+
log " Verifier trigger: $trigger_file"
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
# =============================================================================
|
|
2056
|
+
# Cleanup (trap handler)
|
|
2057
|
+
# =============================================================================
|
|
2058
|
+
|
|
2059
|
+
cleanup() {
|
|
2060
|
+
log "Cleaning up..."
|
|
2061
|
+
|
|
2062
|
+
# Remove lockfile
|
|
2063
|
+
if (( LOCKFILE_ACQUIRED )); then
|
|
2064
|
+
rm -f "$LOCKFILE_PATH" 2>/dev/null
|
|
2065
|
+
else
|
|
2066
|
+
log_debug "cleanup: lockfile not owned by this process, skipping removal"
|
|
2067
|
+
fi
|
|
2068
|
+
|
|
2069
|
+
# US-026 R14 P0: remove project-scoped runner lockfile if owned by this slug
|
|
2070
|
+
if [[ -f "$RUNNER_LOCKFILE_PATH" ]]; then
|
|
2071
|
+
local own_slug
|
|
2072
|
+
own_slug=$(jq -r '.slug' "$RUNNER_LOCKFILE_PATH" 2>/dev/null)
|
|
2073
|
+
if [[ "$own_slug" == "$SLUG" ]]; then
|
|
2074
|
+
rm -rf "$RUNNER_LOCKDIR" "$RUNNER_LOCKFILE_PATH" 2>/dev/null
|
|
2075
|
+
fi
|
|
2076
|
+
fi
|
|
2077
|
+
|
|
2078
|
+
# Kill claude processes then kill panes
|
|
2079
|
+
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
2080
|
+
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
2081
|
+
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
2082
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
2083
|
+
fi
|
|
2084
|
+
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
2085
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
2086
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
2087
|
+
fi
|
|
2088
|
+
sleep 2
|
|
2089
|
+
# Kill panes on completion
|
|
2090
|
+
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
2091
|
+
tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
|
|
2092
|
+
fi
|
|
2093
|
+
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
2094
|
+
tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
|
|
2095
|
+
fi
|
|
2096
|
+
log " Panes cleaned up."
|
|
2097
|
+
|
|
2098
|
+
# Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
|
|
2099
|
+
setopt local_options nonomatch 2>/dev/null
|
|
2100
|
+
rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
|
|
2101
|
+
|
|
2102
|
+
# AC4: Generate campaign report on all terminal states (always-on)
|
|
2103
|
+
generate_campaign_report
|
|
2104
|
+
|
|
2105
|
+
# US-001: Generate SV report after campaign report (tmux mode)
|
|
2106
|
+
generate_sv_report
|
|
2107
|
+
|
|
2108
|
+
# Print summary
|
|
2109
|
+
local end_time
|
|
2110
|
+
end_time=$(date +%s)
|
|
2111
|
+
local elapsed=$(( end_time - START_TIME ))
|
|
2112
|
+
local minutes=$(( elapsed / 60 ))
|
|
2113
|
+
local seconds=$(( elapsed % 60 ))
|
|
2114
|
+
|
|
2115
|
+
local final_status="UNKNOWN"
|
|
2116
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
|
|
2117
|
+
elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
|
|
2118
|
+
else final_status="TIMEOUT"; fi
|
|
2119
|
+
|
|
2120
|
+
# --- Update metadata.json with final status ---
|
|
2121
|
+
if [[ -f "$METADATA_FILE" ]]; then
|
|
2122
|
+
jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
2123
|
+
'.campaign_status = $status | .end_time = $end_time' \
|
|
2124
|
+
"$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
|
|
2125
|
+
fi
|
|
2126
|
+
|
|
2127
|
+
if (( DEBUG )); then
|
|
2128
|
+
local end_ts=$(date +%s)
|
|
2129
|
+
local elapsed=$((end_ts - START_TIME))
|
|
2130
|
+
|
|
2131
|
+
log_debug "[FLOW] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
|
|
2132
|
+
|
|
2133
|
+
# --- Validation ---
|
|
2134
|
+
log_debug "[FLOW] === Execution Validation ==="
|
|
2135
|
+
|
|
2136
|
+
# 1. Did the correct verify mode run?
|
|
2137
|
+
log_debug "[FLOW] verify_mode=$VERIFY_MODE configured=true"
|
|
2138
|
+
|
|
2139
|
+
# 2. Per-US: were all US individually verified?
|
|
2140
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
2141
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
2142
|
+
local expected_us=""
|
|
2143
|
+
if [[ -f "$prd_file" ]]; then
|
|
2144
|
+
expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
2145
|
+
fi
|
|
2146
|
+
local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
2147
|
+
local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
2148
|
+
|
|
2149
|
+
if [[ "$final_status" = "COMPLETE" ]]; then
|
|
2150
|
+
if (( verified_count >= expected_count )); then
|
|
2151
|
+
log_debug "[FLOW] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
|
|
2152
|
+
else
|
|
2153
|
+
log_debug "[FLOW] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
|
|
2154
|
+
fi
|
|
2155
|
+
else
|
|
2156
|
+
log_debug "[FLOW] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
|
|
2157
|
+
fi
|
|
2158
|
+
fi
|
|
2159
|
+
|
|
2160
|
+
# 3. Consensus: were both engines used?
|
|
2161
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
2162
|
+
if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
|
|
2163
|
+
log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
2164
|
+
else
|
|
2165
|
+
log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
2166
|
+
fi
|
|
2167
|
+
fi
|
|
2168
|
+
|
|
2169
|
+
# 4. Engine match: did the configured engines actually run?
|
|
2170
|
+
local worker_dispatches=$(grep -c '\[FLOW\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
2171
|
+
local verifier_dispatches=$(grep -c '\[FLOW\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
2172
|
+
log_debug "[FLOW] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
|
|
2173
|
+
|
|
2174
|
+
# 5. Fix loops: how many fix contracts were generated?
|
|
2175
|
+
local fix_count=$(grep -c '\[DECIDE\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
2176
|
+
log_debug "[FLOW] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
|
|
2177
|
+
|
|
2178
|
+
# 6. Circuit breakers: any triggered?
|
|
2179
|
+
local cb_count=$(grep -c '\[GOV\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
2180
|
+
log_debug "[FLOW] circuit_breakers_triggered=$cb_count"
|
|
2181
|
+
|
|
2182
|
+
# 7. Overall result
|
|
2183
|
+
log_debug "[FLOW] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
|
|
2184
|
+
fi
|
|
2185
|
+
|
|
2186
|
+
echo ""
|
|
2187
|
+
echo "============================================================"
|
|
2188
|
+
echo " Ralph Desk Tmux Runner - Session Complete"
|
|
2189
|
+
echo "============================================================"
|
|
2190
|
+
echo " Session: $SESSION_NAME"
|
|
2191
|
+
echo " Slug: $SLUG"
|
|
2192
|
+
echo " Iterations: $ITERATION / $MAX_ITER"
|
|
2193
|
+
echo " Elapsed: ${minutes}m ${seconds}s"
|
|
2194
|
+
echo ""
|
|
2195
|
+
|
|
2196
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
2197
|
+
echo " Final State: COMPLETE"
|
|
2198
|
+
elif [[ -f "$BLOCKED_SENTINEL" ]]; then
|
|
2199
|
+
echo " Final State: BLOCKED"
|
|
2200
|
+
else
|
|
2201
|
+
echo " Final State: STOPPED (interrupted or timeout)"
|
|
2202
|
+
fi
|
|
2203
|
+
|
|
2204
|
+
echo ""
|
|
2205
|
+
echo " Tmux session left alive for inspection:"
|
|
2206
|
+
echo " tmux attach -t $SESSION_NAME"
|
|
2207
|
+
echo " tmux kill-session -t $SESSION_NAME"
|
|
2208
|
+
echo "============================================================"
|
|
2209
|
+
}
|
|
2210
|
+
|
|
2211
|
+
# =============================================================================
|
|
2212
|
+
# Poll Loop (used for both Worker and Verifier)
|
|
2213
|
+
# =============================================================================
|
|
2214
|
+
|
|
2215
|
+
# --- governance.md s7 step 5+6: Poll for signal file with heartbeat monitoring ---
|
|
2216
|
+
poll_for_signal() {
|
|
2217
|
+
local signal_file="$1"
|
|
2218
|
+
local heartbeat_file="$2"
|
|
2219
|
+
local pane_id="$3"
|
|
2220
|
+
local trigger_file="$4"
|
|
2221
|
+
local role="$5" # "worker" or "verifier"
|
|
2222
|
+
local nudge_count=0
|
|
2223
|
+
local api_retry_count=0
|
|
2224
|
+
local poll_start
|
|
2225
|
+
poll_start=$(date +%s)
|
|
2226
|
+
|
|
2227
|
+
# Initialize idle tracking for this pane
|
|
2228
|
+
LAST_PANE_CONTENT[$pane_id]=""
|
|
2229
|
+
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
2230
|
+
|
|
2231
|
+
while true; do
|
|
2232
|
+
local now
|
|
2233
|
+
now=$(date +%s)
|
|
2234
|
+
local elapsed=$(( now - poll_start ))
|
|
2235
|
+
|
|
2236
|
+
# Per-iteration timeout check
|
|
2237
|
+
if (( elapsed >= ITER_TIMEOUT )); then
|
|
2238
|
+
log_error "$role timed out after ${ITER_TIMEOUT}s for iteration $ITERATION"
|
|
2239
|
+
return 1 # timeout
|
|
2240
|
+
fi
|
|
2241
|
+
|
|
2242
|
+
# Check if signal file appeared
|
|
2243
|
+
if [[ -f "$signal_file" ]]; then
|
|
2244
|
+
# Bug #7-extra (BOS 2026-05-06): file existence is NOT enough. Worker
|
|
2245
|
+
# (claude opus) writes via Claude Code's Write tool, which is not
|
|
2246
|
+
# guaranteed atomic — the file can appear with empty / partial JSON
|
|
2247
|
+
# before the write completes. Verifier was being dispatched against a
|
|
2248
|
+
# half-written iter-signal.json. Validate that the file holds a single
|
|
2249
|
+
# parseable, non-null JSON value (`jq -e .`) before accepting; any
|
|
2250
|
+
# failure simply continues polling (next tick re-reads). Note: `jq
|
|
2251
|
+
# empty` was rejected because it accepts an EMPTY file as "zero
|
|
2252
|
+
# documents" — the exact race window we need to reject.
|
|
2253
|
+
if jq -e . "$signal_file" >/dev/null 2>&1; then
|
|
2254
|
+
log " Signal file detected: $signal_file"
|
|
2255
|
+
return 0 # success
|
|
2256
|
+
fi
|
|
2257
|
+
# Empty / truncated / mid-write JSON. Stay in the polling loop and let
|
|
2258
|
+
# the next tick re-read once the writer has finished.
|
|
2259
|
+
log_debug "[bug7-extra] $role signal file present but JSON not yet valid — continue polling"
|
|
2260
|
+
fi
|
|
2261
|
+
|
|
2262
|
+
# A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
|
|
2263
|
+
# ONLY for Worker polling — Verifier waits for verdict file, not done-claim
|
|
2264
|
+
#
|
|
2265
|
+
# v5.7 §4.14 (Bug 5 fix, CRITICAL): if Worker pane shows a pending TUI
|
|
2266
|
+
# permission prompt (`Do you want to ...` with `(y/n)` / `❯ 1.` affordance),
|
|
2267
|
+
# Worker is NOT done — it's stuck mid-write after the first done-claim pass.
|
|
2268
|
+
# Suspending A4 fallback in this case prevents premature Verifier dispatch
|
|
2269
|
+
# against partial Worker output. auto_dismiss_prompts() will already have
|
|
2270
|
+
# tried to clear the prompt; if it's still visible the worker is in a
|
|
2271
|
+
# multi-prompt sequence and needs more time, not an A4 short-circuit.
|
|
2272
|
+
if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
|
|
2273
|
+
local _a4_capture
|
|
2274
|
+
_a4_capture=$(tmux capture-pane -t "$pane_id" -p -S -50 2>/dev/null || true)
|
|
2275
|
+
local -a _a4_lines
|
|
2276
|
+
_a4_lines=("${(@f)_a4_capture}")
|
|
2277
|
+
local _a4_i _a4_n=${#_a4_lines[@]} _a4_blocked=0
|
|
2278
|
+
for ((_a4_i=1; _a4_i <= _a4_n; _a4_i++)); do
|
|
2279
|
+
if [[ "${_a4_lines[_a4_i]}" =~ $_PROMPT_RE ]]; then
|
|
2280
|
+
local _a4_prev="${_a4_lines[_a4_i-1]:-}"
|
|
2281
|
+
local _a4_cur="${_a4_lines[_a4_i]}"
|
|
2282
|
+
local _a4_next="${_a4_lines[_a4_i+1]:-}"
|
|
2283
|
+
if [[ "$_a4_prev" =~ $_AFFORDANCE_RE || "$_a4_cur" =~ $_AFFORDANCE_RE || "$_a4_next" =~ $_AFFORDANCE_RE ]]; then
|
|
2284
|
+
_a4_blocked=1
|
|
2285
|
+
break
|
|
2286
|
+
fi
|
|
2287
|
+
fi
|
|
2288
|
+
done
|
|
2289
|
+
if (( _a4_blocked )); then
|
|
2290
|
+
log " Worker pane has pending permission prompt — A4 fallback suspended (Bug 5 guard)"
|
|
2291
|
+
log_debug "[GOV] iter=$ITERATION a4_fallback_suspended=true reason=worker_prompt_pending pane=$pane_id"
|
|
2292
|
+
# Continue polling; do NOT auto-generate signal. auto_dismiss_prompts will
|
|
2293
|
+
# try to dismiss on the next loop iteration.
|
|
2294
|
+
else
|
|
2295
|
+
local dc_us_id
|
|
2296
|
+
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
2297
|
+
if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
|
|
2298
|
+
# Bug #8 PR-B: defer to shared 4-way gate (codex critic P1.2).
|
|
2299
|
+
# _bug8_check_synth_allowed handles done-claim/git/dirty-tree gates
|
|
2300
|
+
# uniformly across handle_worker_exit_codex AND this inline path so
|
|
2301
|
+
# both codex-exit and inline-polling A4 enforce the same contract.
|
|
2302
|
+
if _bug8_check_synth_allowed "$ITERATION" "$dc_us_id" "inline_polling_a4_clean"; then
|
|
2303
|
+
log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Tree clean — auto-generating signal (A4 fallback)."
|
|
2304
|
+
log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
|
|
2305
|
+
# v0.15.4 PR-B2-FIX: Worker pane is alive and idling post-done-claim
|
|
2306
|
+
# (the canonical Bug #5/7 race window). Reap before synthesizing the
|
|
2307
|
+
# signal so the worker cannot revise done-claim or emit a late
|
|
2308
|
+
# iter-signal that races the leader's synthesized one. Mirror of
|
|
2309
|
+
# Bug #7 Fix-Q parity at run_ralph_desk.zsh:3181 — kill before lock,
|
|
2310
|
+
# lock before synth-write so the next leader read sees a frozen
|
|
2311
|
+
# done-claim and a fresh signal_file in that order.
|
|
2312
|
+
_kill_pane_process "$pane_id" "worker-a4"
|
|
2313
|
+
_lock_sentinel "$DONE_CLAIM_FILE"
|
|
2314
|
+
echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim + clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' | atomic_write "$signal_file"
|
|
2315
|
+
_emit_a4_fallback_audit "$dc_us_id" "$ITERATION" "inline_polling_a4_clean"
|
|
2316
|
+
return 0
|
|
2317
|
+
else
|
|
2318
|
+
# Bug #8 PR-B (codex critic round-2 P2): hard-stop rc=2 so the
|
|
2319
|
+
# main worker loop (L3119) treats this BLOCKED as terminal,
|
|
2320
|
+
# matching the handle_worker_exit_codex blocked path. rc=1 is
|
|
2321
|
+
# ambiguous — caller may interpret it as a recoverable poll
|
|
2322
|
+
# failure and re-loop while the BLOCKED sentinel is on disk.
|
|
2323
|
+
return 2
|
|
2324
|
+
fi
|
|
2325
|
+
fi
|
|
2326
|
+
fi
|
|
2327
|
+
fi
|
|
2328
|
+
|
|
2329
|
+
# API transient-error recovery with bounded backoff
|
|
2330
|
+
local pane_output_for_retry
|
|
2331
|
+
pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
|
|
2332
|
+
local is_api_text_retry=0
|
|
2333
|
+
if [[ -n "$pane_output_for_retry" ]] &&
|
|
2334
|
+
( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
|
|
2335
|
+
|| echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
|
|
2336
|
+
|| echo "$pane_output_for_retry" | grep -qi 'overloaded' \
|
|
2337
|
+
|| echo "$pane_output_for_retry" | grep -qi 'too many requests' \
|
|
2338
|
+
|| echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
|
|
2339
|
+
is_api_text_retry=1
|
|
2340
|
+
fi
|
|
2341
|
+
|
|
2342
|
+
if (( is_api_text_retry )) || is_api_error "$pane_id"; then
|
|
2343
|
+
(( api_retry_count++ ))
|
|
2344
|
+
log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
|
|
2345
|
+
if (( api_retry_count >= _API_MAX_RETRIES )); then
|
|
2346
|
+
log_error "API unavailable after ${_API_MAX_RETRIES} retries"
|
|
2347
|
+
write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries" "" "infra_failure"
|
|
2348
|
+
return 2
|
|
2349
|
+
fi
|
|
2350
|
+
# A5: If pane shows "queued messages" or rate-limit corruption, restart pane
|
|
2351
|
+
if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
|
|
2352
|
+
log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
|
|
2353
|
+
log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
|
|
2354
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
|
|
2355
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
|
|
2356
|
+
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
2357
|
+
fi
|
|
2358
|
+
sleep "$_API_RETRY_INTERVAL_S"
|
|
2359
|
+
continue
|
|
2360
|
+
else
|
|
2361
|
+
api_retry_count=0
|
|
2362
|
+
fi
|
|
2363
|
+
|
|
2364
|
+
# Check heartbeat freshness (tmux pattern)
|
|
2365
|
+
if [[ -f "$heartbeat_file" ]]; then
|
|
2366
|
+
if check_heartbeat_exited "$heartbeat_file"; then
|
|
2367
|
+
# Process exited but no signal file -- give a brief grace period
|
|
2368
|
+
sleep 3
|
|
2369
|
+
if [[ -f "$signal_file" ]]; then
|
|
2370
|
+
log " Signal file detected after process exit: $signal_file"
|
|
2371
|
+
return 0
|
|
2372
|
+
fi
|
|
2373
|
+
# Dispatch to engine-specific exit handler
|
|
2374
|
+
if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
|
|
2375
|
+
# Bug #8 PR-B: handle_worker_exit_codex now returns 1 when it has
|
|
2376
|
+
# written a BLOCKED sentinel (no done-claim, dirty tree, git
|
|
2377
|
+
# unverifiable). Propagate the return so main loop stops, instead
|
|
2378
|
+
# of swallowing it with `return 0` and continuing as if the poll
|
|
2379
|
+
# had succeeded.
|
|
2380
|
+
if handle_worker_exit_codex "$ITERATION" "$signal_file"; then
|
|
2381
|
+
return 0
|
|
2382
|
+
else
|
|
2383
|
+
return 2
|
|
2384
|
+
fi
|
|
2385
|
+
fi
|
|
2386
|
+
# Claude path (or verifier of any engine)
|
|
2387
|
+
if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
2388
|
+
# Reset poll timer for the restart
|
|
2389
|
+
poll_start=$(date +%s)
|
|
2390
|
+
nudge_count=0
|
|
2391
|
+
LAST_PANE_CONTENT[$pane_id]=""
|
|
2392
|
+
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
2393
|
+
sleep "$POLL_INTERVAL"
|
|
2394
|
+
continue
|
|
2395
|
+
else
|
|
2396
|
+
return 1 # max restarts exceeded
|
|
2397
|
+
fi
|
|
2398
|
+
fi
|
|
2399
|
+
|
|
2400
|
+
if ! check_heartbeat "$heartbeat_file"; then
|
|
2401
|
+
log " WARNING: $role heartbeat stale (>${HEARTBEAT_STALE_THRESHOLD}s)"
|
|
2402
|
+
(( HEARTBEAT_STALE_COUNT++ ))
|
|
2403
|
+
# Circuit breaker: 3 consecutive heartbeat stale events
|
|
2404
|
+
if (( HEARTBEAT_STALE_COUNT >= 3 )); then
|
|
2405
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
|
|
2406
|
+
log_error "Circuit breaker: 3 consecutive heartbeat stale events"
|
|
2407
|
+
return 1
|
|
2408
|
+
fi
|
|
2409
|
+
# Attempt restart
|
|
2410
|
+
if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
2411
|
+
poll_start=$(date +%s)
|
|
2412
|
+
nudge_count=0
|
|
2413
|
+
continue
|
|
2414
|
+
else
|
|
2415
|
+
return 1
|
|
2416
|
+
fi
|
|
2417
|
+
else
|
|
2418
|
+
# Heartbeat is fresh, reset stale counter
|
|
2419
|
+
HEARTBEAT_STALE_COUNT=0
|
|
2420
|
+
fi
|
|
2421
|
+
fi
|
|
2422
|
+
|
|
2423
|
+
# Dead pane detection during poll: check if claude/codex process died
|
|
2424
|
+
local poll_cmd
|
|
2425
|
+
poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
|
|
2426
|
+
# Dead pane detection — delegates to check_dead_pane() for engine-aware logic
|
|
2427
|
+
if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
|
|
2428
|
+
log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
|
|
2429
|
+
log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
|
|
2430
|
+
# Return failure so caller can handle recovery
|
|
2431
|
+
return 1
|
|
2432
|
+
fi
|
|
2433
|
+
|
|
2434
|
+
# v5.7 §4.13.a: window-bounded prompt auto-dismiss (replaces broad inline grep).
|
|
2435
|
+
# check_and_nudge_idle_pane also calls auto_dismiss_prompts internally, but
|
|
2436
|
+
# we keep this explicit call so dismiss happens BEFORE the idle/nudge check
|
|
2437
|
+
# and is logged with iter context.
|
|
2438
|
+
auto_dismiss_prompts "$pane_id"
|
|
2439
|
+
|
|
2440
|
+
# v5.7 §4.16: bounded prompt-stall escalation. If pane has been prompt-stuck
|
|
2441
|
+
# for PROMPT_STALL_TIMEOUT (5min default) or dismiss attempts exceed
|
|
2442
|
+
# PROMPT_DISMISS_FAIL_LIMIT, write BLOCKED `infra_failure` and exit the poll.
|
|
2443
|
+
# Closes the "alive process = infinite extend" gap (codex Critic HIGH).
|
|
2444
|
+
if ! check_prompt_stall "$pane_id"; then
|
|
2445
|
+
return 2 # signal: hard-failed, do not retry
|
|
2446
|
+
fi
|
|
2447
|
+
|
|
2448
|
+
# v5.7 §4.17 (codex Critic HIGH): generic no-progress timeout. Catches
|
|
2449
|
+
# undetected prompts, hung network calls, or any other alive-but-frozen
|
|
2450
|
+
# state. PROGRESS_NO_CHANGE_TIMEOUT defaults to 10 minutes. Independent
|
|
2451
|
+
# of regex prompt detection — fires whenever pane content is byte-equal
|
|
2452
|
+
# for too long even when Worker process is "alive".
|
|
2453
|
+
if ! check_no_progress "$pane_id"; then
|
|
2454
|
+
return 2 # hard-failed, infra_failure recorded
|
|
2455
|
+
fi
|
|
2456
|
+
|
|
2457
|
+
# Idle pane nudging (tmux pattern)
|
|
2458
|
+
check_and_nudge_idle_pane "$pane_id" "nudge_count"
|
|
2459
|
+
|
|
2460
|
+
sleep "$POLL_INTERVAL"
|
|
2461
|
+
done
|
|
2462
|
+
}
|
|
2463
|
+
|
|
2464
|
+
# =============================================================================
|
|
2465
|
+
# Consensus Verification (run two verifiers sequentially in same pane)
|
|
2466
|
+
# =============================================================================
|
|
2467
|
+
|
|
2468
|
+
# --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
|
|
2469
|
+
run_single_verifier() {
|
|
2470
|
+
local iter="$1"
|
|
2471
|
+
local engine="$2" # claude|codex
|
|
2472
|
+
local model="$3" # model for this verifier
|
|
2473
|
+
local suffix="$4" # "-claude" or "-codex"
|
|
2474
|
+
local verdict_dest="$5" # where to copy the verdict file
|
|
2475
|
+
|
|
2476
|
+
# Write trigger for this engine
|
|
2477
|
+
write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
|
|
2478
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
2479
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
2480
|
+
|
|
2481
|
+
# Clean previous Verifier session (with dead pane detection)
|
|
2482
|
+
local verifier_cmd
|
|
2483
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
2484
|
+
if [[ -z "$verifier_cmd" ]]; then
|
|
2485
|
+
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
2486
|
+
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
2487
|
+
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
2488
|
+
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
2489
|
+
log " New verifier pane: $VERIFIER_PANE"
|
|
2490
|
+
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
2491
|
+
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
2492
|
+
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
2493
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
2494
|
+
sleep 0.2
|
|
2495
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
2496
|
+
sleep 0.3
|
|
2497
|
+
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
2498
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
2499
|
+
sleep 0.5
|
|
2500
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
2501
|
+
sleep 2
|
|
2502
|
+
fi
|
|
2503
|
+
# Always ensure clean shell state before launching new verifier
|
|
2504
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
2505
|
+
# Clear pane to avoid residual text interference
|
|
2506
|
+
tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
|
|
2507
|
+
sleep 0.5
|
|
2508
|
+
|
|
2509
|
+
# Remove previous verdict file
|
|
2510
|
+
rm -f "$VERDICT_FILE" 2>/dev/null
|
|
2511
|
+
|
|
2512
|
+
# Launch verifier — dispatch to engine-specific function
|
|
2513
|
+
local verifier_launch
|
|
2514
|
+
if [[ "$engine" = "codex" ]]; then
|
|
2515
|
+
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
2516
|
+
launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
|
|
2517
|
+
log_debug "Verifier$suffix codex TUI dispatched"
|
|
2518
|
+
else
|
|
2519
|
+
verifier_launch="$(build_claude_cmd tui "$model" "" "" "$VERIFIER_EFFORT")"
|
|
2520
|
+
if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
|
|
2521
|
+
log_error "Verifier$suffix failed to start"
|
|
2522
|
+
return 1
|
|
2523
|
+
fi
|
|
2524
|
+
log_debug "Verifier$suffix claude dispatched"
|
|
2525
|
+
fi
|
|
2526
|
+
|
|
2527
|
+
# Poll for verdict
|
|
2528
|
+
if [[ "$engine" = "codex" ]]; then
|
|
2529
|
+
# Codex exec: file poll + short grace period after verdict detected
|
|
2530
|
+
log " Polling for verify-verdict.json ($suffix, codex TUI)..."
|
|
2531
|
+
local codex_poll_start
|
|
2532
|
+
codex_poll_start=$(date +%s)
|
|
2533
|
+
local _verdict_detected_at=0
|
|
2534
|
+
while true; do
|
|
2535
|
+
# Wait for verdict file with valid JSON
|
|
2536
|
+
if [[ -f "$VERDICT_FILE" ]] && jq . "$VERDICT_FILE" >/dev/null 2>&1; then
|
|
2537
|
+
if (( _verdict_detected_at == 0 )); then
|
|
2538
|
+
_verdict_detected_at=$(date +%s)
|
|
2539
|
+
log " Verdict file detected. Grace period (30s) for codex to finalize..."
|
|
2540
|
+
fi
|
|
2541
|
+
# Grace period: 30s after verdict detection, proceed regardless of pane state
|
|
2542
|
+
local _grace_elapsed=$(( $(date +%s) - _verdict_detected_at ))
|
|
2543
|
+
if (( _grace_elapsed >= 30 )); then
|
|
2544
|
+
log " Grace period complete. Proceeding."
|
|
2545
|
+
break
|
|
2546
|
+
fi
|
|
2547
|
+
# Early exit: if pane returned to shell, no need to wait
|
|
2548
|
+
local _pane_cmd
|
|
2549
|
+
_pane_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null || echo "")
|
|
2550
|
+
if [[ "$_pane_cmd" = "zsh" || "$_pane_cmd" = "bash" || -z "$_pane_cmd" ]]; then
|
|
2551
|
+
log " Codex verifier$suffix process exited. Proceeding."
|
|
2552
|
+
break
|
|
2553
|
+
fi
|
|
2554
|
+
fi
|
|
2555
|
+
local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
|
|
2556
|
+
if (( codex_elapsed >= ITER_TIMEOUT )); then
|
|
2557
|
+
if (( _verdict_detected_at > 0 )); then
|
|
2558
|
+
log " Codex verifier$suffix timed out waiting, but verdict exists. Proceeding."
|
|
2559
|
+
break
|
|
2560
|
+
fi
|
|
2561
|
+
log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
|
|
2562
|
+
return 1
|
|
2563
|
+
fi
|
|
2564
|
+
sleep "$POLL_INTERVAL"
|
|
2565
|
+
done
|
|
2566
|
+
else
|
|
2567
|
+
# Claude: use full poll_for_signal with heartbeat/nudge
|
|
2568
|
+
log " Polling for verify-verdict.json ($suffix)..."
|
|
2569
|
+
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
|
|
2570
|
+
local verifier_poll_rc=$?
|
|
2571
|
+
if (( verifier_poll_rc == 2 )); then
|
|
2572
|
+
return 1
|
|
2573
|
+
fi
|
|
2574
|
+
log_error "Verifier$suffix poll failed"
|
|
2575
|
+
return 1
|
|
2576
|
+
fi
|
|
2577
|
+
fi
|
|
2578
|
+
|
|
2579
|
+
# Bug #7 Fix-Q/R: reap verifier pane the moment we accept the verdict so
|
|
2580
|
+
# codex/claude cannot keep self-reviewing and rewrite verify-verdict.json.
|
|
2581
|
+
# Lock applied AFTER cp so the archived snapshot is also frozen at intent.
|
|
2582
|
+
_kill_pane_process "$VERIFIER_PANE" "verifier-${suffix}"
|
|
2583
|
+
|
|
2584
|
+
# Copy verdict to destination
|
|
2585
|
+
cp "$VERDICT_FILE" "$verdict_dest"
|
|
2586
|
+
_lock_sentinel "$VERDICT_FILE"
|
|
2587
|
+
# PR-0b-narrow: stamp leader handshake ack on the verdict (audit-only).
|
|
2588
|
+
_stamp_ack_field "$VERDICT_FILE"
|
|
2589
|
+
log " Verifier$suffix verdict saved to $verdict_dest"
|
|
2590
|
+
return 0
|
|
2591
|
+
}
|
|
2592
|
+
|
|
2593
|
+
# --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
|
|
2594
|
+
# Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
|
|
2595
|
+
# Sets FAILED_US global on failure.
|
|
2596
|
+
run_sequential_final_verify() {
|
|
2597
|
+
local iter="$1"
|
|
2598
|
+
FAILED_US=""
|
|
2599
|
+
|
|
2600
|
+
log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
|
|
2601
|
+
log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
|
|
2602
|
+
|
|
2603
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
2604
|
+
log " Final verify: checking $us..."
|
|
2605
|
+
|
|
2606
|
+
# Temporarily override signal file to scope verifier to this US
|
|
2607
|
+
local orig_signal
|
|
2608
|
+
orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
|
|
2609
|
+
echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
|
|
2610
|
+
|
|
2611
|
+
# Write scoped verifier trigger
|
|
2612
|
+
write_verifier_trigger "$iter"
|
|
2613
|
+
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
|
|
2614
|
+
|
|
2615
|
+
# Clean verifier pane
|
|
2616
|
+
local verifier_cmd
|
|
2617
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
2618
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
2619
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
|
|
2620
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
|
|
2621
|
+
fi
|
|
2622
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
2623
|
+
|
|
2624
|
+
# Launch verifier
|
|
2625
|
+
local verifier_launch
|
|
2626
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2627
|
+
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
2628
|
+
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
|
|
2629
|
+
else
|
|
2630
|
+
verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
|
|
2631
|
+
launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
|
|
2632
|
+
log_error "Failed to launch verifier for $us"
|
|
2633
|
+
FAILED_US="$us"
|
|
2634
|
+
return 1
|
|
2635
|
+
}
|
|
2636
|
+
fi
|
|
2637
|
+
|
|
2638
|
+
# Poll for verdict
|
|
2639
|
+
rm -f "$VERDICT_FILE"
|
|
2640
|
+
local poll_rc=0
|
|
2641
|
+
poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
|
|
2642
|
+
if (( poll_rc != 0 )); then
|
|
2643
|
+
log_error "Verifier poll failed for $us (rc=$poll_rc)"
|
|
2644
|
+
FAILED_US="$us"
|
|
2645
|
+
return 1
|
|
2646
|
+
fi
|
|
2647
|
+
|
|
2648
|
+
# Bug #7 Fix-Q/R: reap verifier pane between per-US final verifications so
|
|
2649
|
+
# the previous codex/claude TUI cannot continue running while the next per-
|
|
2650
|
+
# US verifier dispatch reuses the same pane.
|
|
2651
|
+
_kill_pane_process "$VERIFIER_PANE" "verifier-final"
|
|
2652
|
+
_lock_sentinel "$VERDICT_FILE"
|
|
2653
|
+
# PR-0b-narrow: stamp leader handshake ack on the verdict (audit-only).
|
|
2654
|
+
_stamp_ack_field "$VERDICT_FILE"
|
|
2655
|
+
|
|
2656
|
+
# Check verdict
|
|
2657
|
+
local verdict
|
|
2658
|
+
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
2659
|
+
if [[ "$verdict" != "pass" ]]; then
|
|
2660
|
+
FAILED_US="$us"
|
|
2661
|
+
log " Sequential final verify FAILED at $us"
|
|
2662
|
+
log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
|
|
2663
|
+
return 1
|
|
2664
|
+
fi
|
|
2665
|
+
log " Sequential final verify: $us PASSED"
|
|
2666
|
+
|
|
2667
|
+
# Archive per-US final verdict
|
|
2668
|
+
cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
|
|
2669
|
+
done
|
|
2670
|
+
|
|
2671
|
+
# Integration check: run tests if VERIFICATION_CMD is set
|
|
2672
|
+
if [[ -n "${VERIFICATION_CMD:-}" ]]; then
|
|
2673
|
+
log " Running integration test suite after sequential verify..."
|
|
2674
|
+
log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
|
|
2675
|
+
if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
|
|
2676
|
+
log " Integration test suite FAILED"
|
|
2677
|
+
FAILED_US="integration"
|
|
2678
|
+
return 2
|
|
2679
|
+
fi
|
|
2680
|
+
log " Integration test suite PASSED"
|
|
2681
|
+
fi
|
|
2682
|
+
|
|
2683
|
+
log " Sequential final verify: ALL PASSED"
|
|
2684
|
+
return 0
|
|
2685
|
+
}
|
|
2686
|
+
|
|
2687
|
+
# --- US-005: Determine whether consensus verification should run for this signal ---
|
|
2688
|
+
# Returns 0 (use consensus) or 1 (single engine).
|
|
2689
|
+
# Uses unified CONSENSUS_MODE: off|all|final-only
|
|
2690
|
+
_should_use_consensus() {
|
|
2691
|
+
local signal_us_id="${1:-}"
|
|
2692
|
+
case "$CONSENSUS_MODE" in
|
|
2693
|
+
all) return 0 ;;
|
|
2694
|
+
final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
|
|
2695
|
+
off|*) return 1 ;;
|
|
2696
|
+
esac
|
|
2697
|
+
}
|
|
2698
|
+
|
|
2699
|
+
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
2700
|
+
run_consensus_verification() {
|
|
2701
|
+
local iter="$1"
|
|
2702
|
+
local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
|
|
2703
|
+
local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
|
|
2704
|
+
|
|
2705
|
+
CONSENSUS_ROUND=0
|
|
2706
|
+
CLAUDE_VERDICT=""
|
|
2707
|
+
CODEX_VERDICT=""
|
|
2708
|
+
|
|
2709
|
+
while (( CONSENSUS_ROUND < 6 )); do
|
|
2710
|
+
(( CONSENSUS_ROUND++ ))
|
|
2711
|
+
log " Consensus round $CONSENSUS_ROUND/6..."
|
|
2712
|
+
|
|
2713
|
+
# Run claude verifier first
|
|
2714
|
+
local _claude_t0=$(date +%s)
|
|
2715
|
+
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
2716
|
+
log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
|
|
2717
|
+
return 1
|
|
2718
|
+
fi
|
|
2719
|
+
ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
|
|
2720
|
+
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
2721
|
+
# A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
|
|
2722
|
+
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
2723
|
+
log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
|
|
2724
|
+
log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
|
|
2725
|
+
rm -f "$claude_verdict_file" 2>/dev/null
|
|
2726
|
+
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
2727
|
+
log_error "Claude verifier retry also failed"
|
|
2728
|
+
return 1
|
|
2729
|
+
fi
|
|
2730
|
+
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
2731
|
+
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
2732
|
+
log_error "Claude verdict still null after retry — consensus cannot proceed"
|
|
2733
|
+
return 1
|
|
2734
|
+
fi
|
|
2735
|
+
fi
|
|
2736
|
+
log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
2737
|
+
|
|
2738
|
+
# consensus-fail-fast removed (complexity vs value too low)
|
|
2739
|
+
|
|
2740
|
+
# Run codex verifier second
|
|
2741
|
+
local _codex_t0=$(date +%s)
|
|
2742
|
+
if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
|
|
2743
|
+
log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
|
|
2744
|
+
return 1
|
|
2745
|
+
fi
|
|
2746
|
+
ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
|
|
2747
|
+
CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
|
|
2748
|
+
log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
|
|
2749
|
+
|
|
2750
|
+
log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
|
|
2751
|
+
local _combined_action="retry"
|
|
2752
|
+
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
|
|
2753
|
+
elif (( CONSENSUS_ROUND >= 6 )); then _combined_action="blocked"
|
|
2754
|
+
fi
|
|
2755
|
+
log_debug "[GOV] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
|
|
2756
|
+
|
|
2757
|
+
# Both pass → success
|
|
2758
|
+
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
|
|
2759
|
+
# Create merged verdict with per-engine details
|
|
2760
|
+
{
|
|
2761
|
+
echo '{'
|
|
2762
|
+
echo ' "verdict": "pass",'
|
|
2763
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
2764
|
+
echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
|
|
2765
|
+
echo ' "recommended_state_transition": "complete",'
|
|
2766
|
+
echo ' "consensus": {'
|
|
2767
|
+
echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
|
|
2768
|
+
echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
|
|
2769
|
+
echo ' "round": '"$CONSENSUS_ROUND"
|
|
2770
|
+
echo ' }'
|
|
2771
|
+
echo '}'
|
|
2772
|
+
} | atomic_write "$VERDICT_FILE"
|
|
2773
|
+
return 0
|
|
2774
|
+
fi
|
|
2775
|
+
|
|
2776
|
+
# Consensus disagreement
|
|
2777
|
+
log_debug "[GOV] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
|
|
2778
|
+
|
|
2779
|
+
# NOTE: pre_existing_failure heuristic was removed (v0.3.5).
|
|
2780
|
+
# It used unreliable grep-in-description string matching to classify
|
|
2781
|
+
# consensus failures as "pre-existing", bypassing the consensus rule.
|
|
2782
|
+
# Consensus disagreement now ALWAYS flows to fix contract.
|
|
2783
|
+
# Codex CLI crash (no verdict file) is handled upstream via run_single_verifier return 1 → BLOCKED.
|
|
2784
|
+
|
|
2785
|
+
# --- Consensus disagreement: build fix contract ---
|
|
2786
|
+
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
|
|
2787
|
+
{
|
|
2788
|
+
echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
|
|
2789
|
+
echo ""
|
|
2790
|
+
echo "## Claude Verdict: $CLAUDE_VERDICT"
|
|
2791
|
+
if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
2792
|
+
echo "### Claude Issues"
|
|
2793
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
2794
|
+
fi
|
|
2795
|
+
echo ""
|
|
2796
|
+
echo "## Codex Verdict: $CODEX_VERDICT"
|
|
2797
|
+
if [[ "$CODEX_VERDICT" = "fail" ]]; then
|
|
2798
|
+
echo "### Codex Issues"
|
|
2799
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
2800
|
+
fi
|
|
2801
|
+
echo ""
|
|
2802
|
+
echo "## Traceability"
|
|
2803
|
+
echo "Only changes that resolve a listed issue are allowed."
|
|
2804
|
+
} | atomic_write "$fix_contract"
|
|
2805
|
+
|
|
2806
|
+
log " Combined fix contract: $fix_contract"
|
|
2807
|
+
|
|
2808
|
+
# If this is not the last round, the caller will dispatch the Worker with the fix contract
|
|
2809
|
+
# For now, write a fail verdict so the main loop can handle the fix loop
|
|
2810
|
+
if (( CONSENSUS_ROUND < 6 )); then
|
|
2811
|
+
# Create a merged fail verdict for the main loop — include issues from BOTH verdicts
|
|
2812
|
+
local merged_issues="[]"
|
|
2813
|
+
local claude_issues codex_issues
|
|
2814
|
+
claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
|
|
2815
|
+
codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
|
|
2816
|
+
merged_issues=$(echo "$claude_issues $codex_issues" | jq -s 'add // []')
|
|
2817
|
+
{
|
|
2818
|
+
echo '{'
|
|
2819
|
+
echo ' "verdict": "fail",'
|
|
2820
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
2821
|
+
echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/6): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
2822
|
+
echo ' "issues": '"$merged_issues"','
|
|
2823
|
+
echo ' "recommended_state_transition": "continue",'
|
|
2824
|
+
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
|
|
2825
|
+
echo '}'
|
|
2826
|
+
} | atomic_write "$VERDICT_FILE"
|
|
2827
|
+
return 2 # special return: consensus disagreement, needs retry
|
|
2828
|
+
fi
|
|
2829
|
+
done
|
|
2830
|
+
|
|
2831
|
+
# Max consensus rounds exceeded — include issues from both verdicts
|
|
2832
|
+
log_error "Consensus failed after 6 rounds"
|
|
2833
|
+
local final_claude_issues final_codex_issues final_merged_issues
|
|
2834
|
+
final_claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
|
|
2835
|
+
final_codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
|
|
2836
|
+
final_merged_issues=$(echo "$final_claude_issues $final_codex_issues" | jq -s 'add // []')
|
|
2837
|
+
{
|
|
2838
|
+
echo '{'
|
|
2839
|
+
echo ' "verdict": "fail",'
|
|
2840
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
2841
|
+
echo ' "summary": "Consensus failed after 6 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
2842
|
+
echo ' "issues": '"$final_merged_issues"','
|
|
2843
|
+
echo ' "recommended_state_transition": "blocked",'
|
|
2844
|
+
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 6 }'
|
|
2845
|
+
echo '}'
|
|
2846
|
+
} | atomic_write "$VERDICT_FILE"
|
|
2847
|
+
return 1
|
|
2848
|
+
}
|
|
2849
|
+
|
|
2850
|
+
# =============================================================================
|
|
2851
|
+
# Main Leader Loop
|
|
2852
|
+
# =============================================================================
|
|
2853
|
+
|
|
2854
|
+
main() {
|
|
2855
|
+
# --- US-026 R14 P0: project-scoped runner lockfile (mkdir atomic) ---
|
|
2856
|
+
# Prevents duplicate runners on the same project root regardless of slug.
|
|
2857
|
+
# Different ROOT_HASH allows independent parallel runners across projects.
|
|
2858
|
+
mkdir -p "$(dirname "$RUNNER_LOCKFILE_PATH")" 2>/dev/null
|
|
2859
|
+
if ! mkdir "$RUNNER_LOCKDIR" 2>/dev/null; then
|
|
2860
|
+
local existing existing_slug
|
|
2861
|
+
existing=$(jq -r '.pid' "$RUNNER_LOCKFILE_PATH" 2>/dev/null || echo 0)
|
|
2862
|
+
existing_slug=$(jq -r '.slug // "unknown"' "$RUNNER_LOCKFILE_PATH" 2>/dev/null || echo unknown)
|
|
2863
|
+
if [[ "$existing" -gt 0 ]] && kill -0 "$existing" 2>/dev/null; then
|
|
2864
|
+
echo "duplicate rlp-desk runner detected on this project root. existing pid=$existing slug=$existing_slug, this attempt slug=$SLUG. exiting." >&2
|
|
2865
|
+
echo " Recover with: rm -rf '$RUNNER_LOCKDIR' '$RUNNER_LOCKFILE_PATH' (only if pid $existing is confirmed dead)" >&2
|
|
2866
|
+
exit 1
|
|
2867
|
+
fi
|
|
2868
|
+
rm -rf "$RUNNER_LOCKDIR"
|
|
2869
|
+
mkdir "$RUNNER_LOCKDIR" 2>/dev/null || {
|
|
2870
|
+
echo "failed to acquire runner lock after stale cleanup; another wrapper raced ahead. exit 1" >&2
|
|
2871
|
+
exit 1
|
|
2872
|
+
}
|
|
2873
|
+
echo "stale runner lockfile cleaned (pid $existing dead) — acquired" >&2
|
|
2874
|
+
fi
|
|
2875
|
+
printf '{"pid":%s,"slug":"%s","root":"%s","started_at":"%s"}\n' \
|
|
2876
|
+
"$$" "$SLUG" "$ROOT" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$RUNNER_LOCKFILE_PATH"
|
|
2877
|
+
|
|
2878
|
+
# --- Lockfile: prevent duplicate execution (ZSH-4 race-safe, v0.17.1) ---
|
|
2879
|
+
# Delegates to acquire_slug_lock (lib_ralph_desk.zsh): atomic set -C fast path +
|
|
2880
|
+
# mkdir-mutex-serialized, PID-reaped stale recovery. Race-safe vs concurrent
|
|
2881
|
+
# recoverers, gap-starters, and a crashed-recoverer mutex leak.
|
|
2882
|
+
if acquire_slug_lock "$LOCKFILE_PATH"; then
|
|
2883
|
+
LOCKFILE_ACQUIRED=1
|
|
2884
|
+
else
|
|
2885
|
+
local lock_pid
|
|
2886
|
+
lock_pid=$(cat "$LOCKFILE_PATH" 2>/dev/null)
|
|
2887
|
+
log_error "Another instance is already running or won the lock race (PID ${lock_pid:-unknown}). Kill it or rm $LOCKFILE_PATH"
|
|
2888
|
+
exit 1
|
|
2889
|
+
fi
|
|
2890
|
+
# US-023 R11 P2-K: chain `_emit_final_cost_log` so cost-log.jsonl is never silently empty on exit.
|
|
2891
|
+
trap '_emit_final_cost_log; cleanup' EXIT INT TERM
|
|
2892
|
+
mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
|
|
2893
|
+
|
|
2894
|
+
# --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
|
|
2895
|
+
mkdir -p "$ANALYTICS_DIR" 2>/dev/null
|
|
2896
|
+
|
|
2897
|
+
# --- debug.log versioning (in analytics dir, --debug only) ---
|
|
2898
|
+
if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
|
|
2899
|
+
local dbg_n=1
|
|
2900
|
+
while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
|
|
2901
|
+
(( dbg_n++ ))
|
|
2902
|
+
done
|
|
2903
|
+
mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
|
|
2904
|
+
fi
|
|
2905
|
+
|
|
2906
|
+
# --- campaign.jsonl versioning (always-on) ---
|
|
2907
|
+
if [[ -f "$CAMPAIGN_JSONL" ]]; then
|
|
2908
|
+
local cj_n=1
|
|
2909
|
+
while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
|
|
2910
|
+
(( cj_n++ ))
|
|
2911
|
+
done
|
|
2912
|
+
mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
|
|
2913
|
+
fi
|
|
2914
|
+
|
|
2915
|
+
# --- metadata.json: always write at campaign start (cross-project identification) ---
|
|
2916
|
+
jq -n \
|
|
2917
|
+
--arg slug "$SLUG" \
|
|
2918
|
+
--arg project_root "$ROOT" \
|
|
2919
|
+
--arg project_name "$(basename "$ROOT")" \
|
|
2920
|
+
--arg campaign_status "running" \
|
|
2921
|
+
--arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
2922
|
+
--arg end_time "" \
|
|
2923
|
+
--arg worker_model "$WORKER_MODEL" \
|
|
2924
|
+
--arg verifier_model "$VERIFIER_MODEL" \
|
|
2925
|
+
--argjson debug "$DEBUG" \
|
|
2926
|
+
--argjson with_sv "$WITH_SELF_VERIFICATION" \
|
|
2927
|
+
--argjson with_sv_requested "$WITH_SELF_VERIFICATION_REQUESTED" \
|
|
2928
|
+
--arg sv_skipped_reason "$SV_SKIPPED_REASON" \
|
|
2929
|
+
--arg lane_mode "$LANE_MODE" \
|
|
2930
|
+
--argjson consensus "${VERIFY_CONSENSUS:-0}" \
|
|
2931
|
+
'{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, with_self_verification_requested: $with_sv_requested, sv_skipped_reason: $sv_skipped_reason, lane_mode: $lane_mode, consensus: $consensus}' \
|
|
2932
|
+
> "$METADATA_FILE"
|
|
2933
|
+
|
|
2934
|
+
# --- Startup ---
|
|
2935
|
+
log "Ralph Desk Tmux Runner starting..."
|
|
2936
|
+
log " Slug: $SLUG"
|
|
2937
|
+
log " Root: $ROOT"
|
|
2938
|
+
log " Max iterations: $MAX_ITER"
|
|
2939
|
+
log " Worker model: $WORKER_MODEL"
|
|
2940
|
+
log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
|
|
2941
|
+
log " Verify mode: $VERIFY_MODE"
|
|
2942
|
+
log " Consensus mode: $CONSENSUS_MODE"
|
|
2943
|
+
log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
|
|
2944
|
+
log " Poll interval: ${POLL_INTERVAL}s"
|
|
2945
|
+
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
2946
|
+
# --- Debug: Log execution plan ---
|
|
2947
|
+
if (( DEBUG )); then
|
|
2948
|
+
# Extract US IDs from PRD
|
|
2949
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
2950
|
+
local us_list=""
|
|
2951
|
+
if [[ -f "$prd_file" ]]; then
|
|
2952
|
+
us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
2953
|
+
fi
|
|
2954
|
+
local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
|
|
2955
|
+
|
|
2956
|
+
log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
2957
|
+
log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
2958
|
+
log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
2959
|
+
log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
|
|
2960
|
+
log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION (requested=$WITH_SELF_VERIFICATION_REQUESTED skipped=${SV_SKIPPED_REASON:-none}) debug=$DEBUG"
|
|
2961
|
+
|
|
2962
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
2963
|
+
# Build expected flow
|
|
2964
|
+
local expected_flow=""
|
|
2965
|
+
for us in $(echo "$us_list" | tr ',' ' '); do
|
|
2966
|
+
expected_flow="${expected_flow}worker->verify($us)->"
|
|
2967
|
+
done
|
|
2968
|
+
expected_flow="${expected_flow}verify(ALL)->COMPLETE"
|
|
2969
|
+
log_debug "[OPTION] expected_flow=$expected_flow"
|
|
2970
|
+
else
|
|
2971
|
+
log_debug "[OPTION] expected_flow=worker(all)->verify(ALL)->COMPLETE"
|
|
2972
|
+
fi
|
|
2973
|
+
|
|
2974
|
+
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
2975
|
+
log_debug "[OPTION] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
|
|
2976
|
+
fi
|
|
2977
|
+
fi
|
|
2978
|
+
|
|
2979
|
+
# Extract US list for per-US sequencing
|
|
2980
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
2981
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
2982
|
+
if [[ -f "$prd_file" ]]; then
|
|
2983
|
+
US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
2984
|
+
fi
|
|
2985
|
+
|
|
2986
|
+
# Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
|
|
2987
|
+
local memory_file="$DESK/memos/${SLUG}-memory.md"
|
|
2988
|
+
if [[ -f "$memory_file" ]]; then
|
|
2989
|
+
local completed_us
|
|
2990
|
+
completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
2991
|
+
if [[ -n "$completed_us" ]]; then
|
|
2992
|
+
VERIFIED_US="$completed_us"
|
|
2993
|
+
log " Loaded completed stories from memory: $VERIFIED_US"
|
|
2994
|
+
log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
|
|
2995
|
+
fi
|
|
2996
|
+
fi
|
|
2997
|
+
|
|
2998
|
+
# D1: Fallback — restore verified_us from status.json if memory had none
|
|
2999
|
+
if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
|
|
3000
|
+
local status_verified
|
|
3001
|
+
status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
|
|
3002
|
+
if [[ -n "$status_verified" ]]; then
|
|
3003
|
+
VERIFIED_US="$status_verified"
|
|
3004
|
+
log " Restored verified_us from status.json: $VERIFIED_US"
|
|
3005
|
+
log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
|
|
3006
|
+
fi
|
|
3007
|
+
fi
|
|
3008
|
+
fi
|
|
3009
|
+
|
|
3010
|
+
# Initialize PRD snapshot state for live update detection
|
|
3011
|
+
PREV_PRD_HASH=$(compute_prd_hash)
|
|
3012
|
+
PREV_PRD_US_LIST=$(count_prd_us)
|
|
3013
|
+
|
|
3014
|
+
# Dependency checks
|
|
3015
|
+
check_dependencies
|
|
3016
|
+
|
|
3017
|
+
# Print security warning (governance.md s7: --dangerously-skip-permissions)
|
|
3018
|
+
print_security_warning
|
|
3019
|
+
|
|
3020
|
+
# Validate scaffold
|
|
3021
|
+
validate_scaffold
|
|
3022
|
+
|
|
3023
|
+
# Check for existing sessions
|
|
3024
|
+
check_existing_sessions
|
|
3025
|
+
|
|
3026
|
+
# Create tmux session with pane IDs (governance.md s7 step 1)
|
|
3027
|
+
create_session
|
|
3028
|
+
|
|
3029
|
+
# Set trap for cleanup on exit/error
|
|
3030
|
+
# US-023 R11 P2-K: chain `_emit_final_cost_log` so cost-log.jsonl is never silently empty.
|
|
3031
|
+
trap '_emit_final_cost_log; cleanup' EXIT
|
|
3032
|
+
|
|
3033
|
+
# Initialize context hash for stale detection
|
|
3034
|
+
PREV_CONTEXT_HASH=$(compute_context_hash)
|
|
3035
|
+
|
|
3036
|
+
# --- governance.md s7: Leader Loop ---
|
|
3037
|
+
local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced — Worker extends indefinitely when active
|
|
3038
|
+
|
|
3039
|
+
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
3040
|
+
# US-024 R12 P0: lifecycle check site #2 — verify session/panes alive at iter entry.
|
|
3041
|
+
_r12_check_lifecycle "iter_start"
|
|
3042
|
+
log ""
|
|
3043
|
+
log "========== Iteration $ITERATION / $MAX_ITER =========="
|
|
3044
|
+
local ITER_START_TIME
|
|
3045
|
+
ITER_START_TIME=$(date +%s)
|
|
3046
|
+
local _iter_contract=""
|
|
3047
|
+
_iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
|
|
3048
|
+
log_debug "[FLOW] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
|
|
3049
|
+
|
|
3050
|
+
# --- governance.md s7 step 1: Check sentinels ---
|
|
3051
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
3052
|
+
log "COMPLETE sentinel found. Campaign succeeded."
|
|
3053
|
+
update_status "complete" "complete"
|
|
3054
|
+
return 0
|
|
3055
|
+
fi
|
|
3056
|
+
if [[ -f "$BLOCKED_SENTINEL" ]]; then
|
|
3057
|
+
log "BLOCKED sentinel found. Campaign blocked."
|
|
3058
|
+
update_status "blocked" "blocked"
|
|
3059
|
+
return 1
|
|
3060
|
+
fi
|
|
3061
|
+
|
|
3062
|
+
# PR-A (Bug #10): operator-recovery hygiene check.
|
|
3063
|
+
# When the operator hand-rolls a `phase=verify` recovery (jq-patches
|
|
3064
|
+
# status.json, writes manual iter-signal.json + done-claim.json, deletes
|
|
3065
|
+
# the blocked sentinel), the leader MUST honor that work instead of
|
|
3066
|
+
# deleting the artifacts and resetting to phase=worker. Mirrors the
|
|
3067
|
+
# Node-side guard in src/node/runner/campaign-main-loop.mjs.
|
|
3068
|
+
local SKIP_NEXT_WORKER=0
|
|
3069
|
+
local LAST_PHASE=""
|
|
3070
|
+
if [[ -f "$STATUS_FILE" ]] && command -v jq >/dev/null 2>&1; then
|
|
3071
|
+
LAST_PHASE=$(jq -r '.phase // ""' "$STATUS_FILE" 2>/dev/null)
|
|
3072
|
+
fi
|
|
3073
|
+
if [[ "$LAST_PHASE" == "verify" ]]; then
|
|
3074
|
+
local _iter_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
3075
|
+
if _validate_operator_recovery_artifacts \
|
|
3076
|
+
"$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$STATUS_FILE" "$_iter_prompt"; then
|
|
3077
|
+
log "[recovery] Resuming verify phase — operator manual recovery detected (iter=$ITERATION)"
|
|
3078
|
+
log_debug "[recovery] iter=$ITERATION skip_worker=true reason=manual_recovery_validated"
|
|
3079
|
+
SKIP_NEXT_WORKER=1
|
|
3080
|
+
else
|
|
3081
|
+
log "[recovery] phase=verify ignored: ${RECOVERY_FAIL_REASON}"
|
|
3082
|
+
log_debug "[recovery] iter=$ITERATION skip_worker=false reason=\"${RECOVERY_FAIL_REASON}\""
|
|
3083
|
+
fi
|
|
3084
|
+
fi
|
|
3085
|
+
|
|
3086
|
+
# PR-E (Phase C1, stabilization): operator-cleared BLOCKED recovery.
|
|
3087
|
+
# Pair to PR-A above. Runs AFTER PR-A (so phase=verify wins) and skipped
|
|
3088
|
+
# when SKIP_NEXT_WORKER=1 (PR-A already honored). Resets stale counters
|
|
3089
|
+
# in status.json when operator manually deleted the BLOCKED sentinel.
|
|
3090
|
+
# Mirrors Node `_validateBlockedRecovery` + branch in campaign-main-loop.mjs.
|
|
3091
|
+
if [[ "$LAST_PHASE" == "blocked" && "$SKIP_NEXT_WORKER" -eq 0 ]]; then
|
|
3092
|
+
local _blocked_sidecar="$MEMOS_DIR/${SLUG}-blocked.json"
|
|
3093
|
+
if _validate_blocked_recovery \
|
|
3094
|
+
"$BLOCKED_SENTINEL" "$_blocked_sidecar" "$STATUS_FILE"; then
|
|
3095
|
+
local _prev_reason
|
|
3096
|
+
_prev_reason=$(jq -r '.last_block_reason // ""' "$STATUS_FILE" 2>/dev/null)
|
|
3097
|
+
log "[recovery] Operator-cleared BLOCKED detected (was: ${_prev_reason:-unrecorded}). Resetting counters and resuming as worker. iter=$ITERATION"
|
|
3098
|
+
log_debug "[recovery] iter=$ITERATION blocked_recovery=applied reason=\"${BLOCKED_RECOVERY_FAIL_REASON:-sidecar absent or recoverable=true}\""
|
|
3099
|
+
# Reset counters in-process. update_status writes fresh status when
|
|
3100
|
+
# next phase transition fires. Operator's intent was a clean restart.
|
|
3101
|
+
CONSECUTIVE_FAILURES=0
|
|
3102
|
+
CONSECUTIVE_BLOCKS=0
|
|
3103
|
+
LAST_BLOCK_REASON=""
|
|
3104
|
+
# Archive sidecar (rename, not delete) for audit trail.
|
|
3105
|
+
_archive_recovered_sidecar "$_blocked_sidecar"
|
|
3106
|
+
else
|
|
3107
|
+
log "[recovery] phase=blocked ignored: ${BLOCKED_RECOVERY_FAIL_REASON}"
|
|
3108
|
+
log_debug "[recovery] iter=$ITERATION blocked_recovery=skipped reason=\"${BLOCKED_RECOVERY_FAIL_REASON}\""
|
|
3109
|
+
fi
|
|
3110
|
+
fi
|
|
3111
|
+
|
|
3112
|
+
if (( ! SKIP_NEXT_WORKER )); then
|
|
3113
|
+
# --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
|
|
3114
|
+
# Bug #7 Fix-R cleanup: unlock 0o444 sentinels written by the previous
|
|
3115
|
+
# iteration's reaper before rm so cleanup does not log permission noise.
|
|
3116
|
+
_unlock_sentinel "$SIGNAL_FILE"
|
|
3117
|
+
_unlock_sentinel "$VERDICT_FILE"
|
|
3118
|
+
rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
|
|
3119
|
+
rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
|
|
3120
|
+
|
|
3121
|
+
# --- Clean previous claude session in panes (one-shot lifecycle) ---
|
|
3122
|
+
# Only needed from iteration 2 onwards (iteration 1 has fresh panes)
|
|
3123
|
+
if (( ITERATION > 1 )); then
|
|
3124
|
+
# Send C-c first (in case claude is mid-task), then /exit
|
|
3125
|
+
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
3126
|
+
sleep 1
|
|
3127
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
3128
|
+
sleep 2
|
|
3129
|
+
# Wait for shell prompt before proceeding
|
|
3130
|
+
wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
|
|
3131
|
+
fi
|
|
3132
|
+
fi
|
|
3133
|
+
|
|
3134
|
+
# Reset per-iteration state
|
|
3135
|
+
local worker_nudge_count=0
|
|
3136
|
+
local verifier_nudge_count=0
|
|
3137
|
+
ITER_VERIFIER_START=""
|
|
3138
|
+
ITER_VERIFIER_END=""
|
|
3139
|
+
|
|
3140
|
+
# --- US-004: detect PRD changes for live update + re-split ---
|
|
3141
|
+
check_prd_update
|
|
3142
|
+
|
|
3143
|
+
# AC1: capture worker start timestamp (still set for downstream telemetry
|
|
3144
|
+
# even when the worker dispatch is skipped — recovery still consumes time).
|
|
3145
|
+
ITER_WORKER_START=$(date +%s)
|
|
3146
|
+
|
|
3147
|
+
local worker_launch=""
|
|
3148
|
+
if (( ! SKIP_NEXT_WORKER )); then
|
|
3149
|
+
# --- governance.md s7 step 4: Build worker prompt + trigger ---
|
|
3150
|
+
write_worker_trigger "$ITERATION"
|
|
3151
|
+
local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
3152
|
+
|
|
3153
|
+
update_status "worker" "running"
|
|
3154
|
+
|
|
3155
|
+
# --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
|
|
3156
|
+
log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
3157
|
+
|
|
3158
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
3159
|
+
worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
3160
|
+
if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
3161
|
+
write_blocked_sentinel "Worker codex failed to start in pane" "" "infra_failure"
|
|
3162
|
+
update_status "blocked" "worker_start_failed"
|
|
3163
|
+
return 1
|
|
3164
|
+
fi
|
|
3165
|
+
else
|
|
3166
|
+
worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
|
|
3167
|
+
if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
3168
|
+
write_blocked_sentinel "Worker claude failed to start in pane" "" "infra_failure"
|
|
3169
|
+
update_status "blocked" "worker_start_failed"
|
|
3170
|
+
return 1
|
|
3171
|
+
fi
|
|
3172
|
+
fi
|
|
3173
|
+
else
|
|
3174
|
+
# PR-A (Bug #10): one-shot recovery path. The operator's iter-signal.json
|
|
3175
|
+
# is already on disk; polling below picks it up immediately and the loop
|
|
3176
|
+
# transitions cleanly into the verifier phase. Persist phase=verify so a
|
|
3177
|
+
# subsequent crash-and-relaunch sees the same contract. SKIP_NEXT_WORKER
|
|
3178
|
+
# is local to this iteration so iter-N+1 dispatches the worker normally.
|
|
3179
|
+
update_status "verify" "running"
|
|
3180
|
+
log "[recovery] Skipping worker dispatch for iter=$ITERATION (one-shot, honoring operator manual recovery)"
|
|
3181
|
+
fi
|
|
3182
|
+
|
|
3183
|
+
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
3184
|
+
# US-024 R12 P0: lifecycle check site #3 — verify panes alive after worker dispatch, before wait-loop.
|
|
3185
|
+
_r12_check_lifecycle "post_send"
|
|
3186
|
+
log " Polling for iter-signal.json..."
|
|
3187
|
+
local worker_poll_done=0
|
|
3188
|
+
while (( ! worker_poll_done )); do
|
|
3189
|
+
local worker_poll_rc=0
|
|
3190
|
+
if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
|
|
3191
|
+
worker_poll_done=1
|
|
3192
|
+
log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
|
|
3193
|
+
# Bug #7 Fix-Q/R: reap worker pane immediately so claude/codex cannot
|
|
3194
|
+
# self-review and rewrite iter-signal.json (1m43s drift observed).
|
|
3195
|
+
_kill_pane_process "$WORKER_PANE" "worker"
|
|
3196
|
+
_lock_sentinel "$SIGNAL_FILE"
|
|
3197
|
+
# v0.15.4 PR-B2-FIX: same worker pass also produced done-claim. Freeze
|
|
3198
|
+
# it alongside iter-signal so Bug #8 gates and the iter-NNN-done-claim
|
|
3199
|
+
# archive (lib_ralph_desk.zsh:602) read a snapshot the worker can no
|
|
3200
|
+
# longer revise. Symmetric with iter-signal/verdict lock contract.
|
|
3201
|
+
_lock_sentinel "$DONE_CLAIM_FILE"
|
|
3202
|
+
# PR-0b-narrow: stamp leader handshake ack on the iter-signal (audit-only).
|
|
3203
|
+
_stamp_ack_field "$SIGNAL_FILE"
|
|
3204
|
+
else
|
|
3205
|
+
worker_poll_rc=$?
|
|
3206
|
+
if (( worker_poll_rc == 2 )); then
|
|
3207
|
+
return 1
|
|
3208
|
+
fi
|
|
3209
|
+
# Check if Worker is still actively running (not stuck)
|
|
3210
|
+
local worker_cmd
|
|
3211
|
+
worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
3212
|
+
if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
|
|
3213
|
+
# Process alive — extend indefinitely (no hard ceiling kill)
|
|
3214
|
+
# Stale-context breaker and nudge system handle truly stuck workers
|
|
3215
|
+
local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
|
|
3216
|
+
local ceiling_exceeded=""
|
|
3217
|
+
if (( iter_elapsed >= HARD_CEILING )); then
|
|
3218
|
+
ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s — not enforced, logged only]"
|
|
3219
|
+
log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
|
|
3220
|
+
log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
|
|
3221
|
+
fi
|
|
3222
|
+
log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
|
|
3223
|
+
log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
|
|
3224
|
+
log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
|
|
3225
|
+
update_status "worker" "slow"
|
|
3226
|
+
# Loop continues — re-poll same iteration
|
|
3227
|
+
else
|
|
3228
|
+
# Worker is truly dead/stuck
|
|
3229
|
+
(( MONITOR_FAILURE_COUNT++ ))
|
|
3230
|
+
log_debug "[GOV] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
|
|
3231
|
+
if (( MONITOR_FAILURE_COUNT >= 3 )); then
|
|
3232
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
|
|
3233
|
+
write_blocked_sentinel "3 consecutive monitor failures (worker not active)" "" "infra_failure"
|
|
3234
|
+
update_status "blocked" "monitor_failures"
|
|
3235
|
+
return 1
|
|
3236
|
+
fi
|
|
3237
|
+
log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3) — will retry"
|
|
3238
|
+
update_status "worker" "poll_failed"
|
|
3239
|
+
log_debug "[FLOW] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd retry=true"
|
|
3240
|
+
# v0.14.3 P0-5 (Bug Report #5): previously this branch wrote BLOCKED
|
|
3241
|
+
# unconditionally even at counter 1/3, so a single transient
|
|
3242
|
+
# worker-dead detection halted the campaign in 5s instead of
|
|
3243
|
+
# honoring the 3-strike circuit breaker above (L3001-3006). Removed
|
|
3244
|
+
# the unconditional sentinel write; the loop now continues so the
|
|
3245
|
+
# next polling tick can either confirm the dead state (counter
|
|
3246
|
+
# eventually reaches 3 → BLOCKED) or recover (worker resumes →
|
|
3247
|
+
# MONITOR_FAILURE_COUNT reset on success at L3025).
|
|
3248
|
+
fi
|
|
3249
|
+
fi
|
|
3250
|
+
done
|
|
3251
|
+
|
|
3252
|
+
if [[ ! -f "$SIGNAL_FILE" ]]; then
|
|
3253
|
+
log_debug "[FLOW] iter=$ITERATION no_signal_after_poll=true continuing"
|
|
3254
|
+
# No signal — monitor failure, go to next iteration
|
|
3255
|
+
continue
|
|
3256
|
+
fi
|
|
3257
|
+
|
|
3258
|
+
# Reset monitor failure count on success
|
|
3259
|
+
MONITOR_FAILURE_COUNT=0
|
|
3260
|
+
|
|
3261
|
+
# AC1: capture worker end timestamp; reset consensus timing
|
|
3262
|
+
ITER_WORKER_END=$(date +%s)
|
|
3263
|
+
ITER_VERIFIER_CLAUDE_DURATION_S=""
|
|
3264
|
+
ITER_VERIFIER_CODEX_DURATION_S=""
|
|
3265
|
+
|
|
3266
|
+
# --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
|
|
3267
|
+
local signal_status
|
|
3268
|
+
signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
|
|
3269
|
+
local signal_summary
|
|
3270
|
+
signal_summary=$(jq -r '.summary // "no summary"' "$SIGNAL_FILE" 2>/dev/null)
|
|
3271
|
+
|
|
3272
|
+
log " Worker signal: status=$signal_status summary=\"$signal_summary\""
|
|
3273
|
+
|
|
3274
|
+
# Read us_id early for EXEC logging (also used later in verify branch)
|
|
3275
|
+
local signal_us_id_early=""
|
|
3276
|
+
signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
3277
|
+
log_debug "[FLOW] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
|
|
3278
|
+
|
|
3279
|
+
case "$signal_status" in
|
|
3280
|
+
continue)
|
|
3281
|
+
# --- governance.md s7 step 6: continue -> go to step 8 ---
|
|
3282
|
+
log " Worker requests continue. Moving to next iteration."
|
|
3283
|
+
update_status "worker" "continue"
|
|
3284
|
+
;;
|
|
3285
|
+
verify_partial)
|
|
3286
|
+
# US-019 R7 P1-G: Worker explicitly verified a subset of ACs and deferred the rest.
|
|
3287
|
+
# Verifier evaluates only verified_acs. Malformed (empty verified_acs) downgrades to blocked.
|
|
3288
|
+
local vp_count
|
|
3289
|
+
vp_count=$(jq -r '.verified_acs // [] | length' "$SIGNAL_FILE" 2>/dev/null || echo 0)
|
|
3290
|
+
if [[ "$vp_count" -eq 0 ]]; then
|
|
3291
|
+
log " Worker signal verify_partial but verified_acs is empty — downgrading to blocked (verify_partial_malformed)."
|
|
3292
|
+
local vp_us_id
|
|
3293
|
+
vp_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
3294
|
+
write_blocked_sentinel "verify_partial_malformed: empty verified_acs" "${vp_us_id:-${CURRENT_US:-ALL}}" "mission_abort"
|
|
3295
|
+
update_status "blocked" "verify_partial_malformed"
|
|
3296
|
+
break
|
|
3297
|
+
fi
|
|
3298
|
+
log " Worker signal verify_partial (verified_acs count=$vp_count). Routing to verify path."
|
|
3299
|
+
signal_status="verify"
|
|
3300
|
+
;&
|
|
3301
|
+
verify)
|
|
3302
|
+
# --- governance.md s7 step 7: Execute Verifier ---
|
|
3303
|
+
# Read us_id from signal for per-US scoping
|
|
3304
|
+
local signal_us_id=""
|
|
3305
|
+
signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
3306
|
+
log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
|
|
3307
|
+
|
|
3308
|
+
# AC1: capture verifier start timestamp
|
|
3309
|
+
ITER_VERIFIER_START=$(date +%s)
|
|
3310
|
+
|
|
3311
|
+
update_status "verifier" "running"
|
|
3312
|
+
|
|
3313
|
+
# --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
|
|
3314
|
+
if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
|
|
3315
|
+
log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
|
|
3316
|
+
local seq_rc=0
|
|
3317
|
+
run_sequential_final_verify "$ITERATION" || seq_rc=$?
|
|
3318
|
+
if (( seq_rc == 0 )); then
|
|
3319
|
+
write_complete_sentinel "Sequential final verify passed (all US verified individually)"
|
|
3320
|
+
update_status "complete" "pass"
|
|
3321
|
+
write_campaign_jsonl "$ITERATION" "ALL" "pass"
|
|
3322
|
+
return 0
|
|
3323
|
+
else
|
|
3324
|
+
# Sequential verify failed — fall through to fix loop with failed US
|
|
3325
|
+
log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
|
|
3326
|
+
signal_us_id="${FAILED_US:-ALL}"
|
|
3327
|
+
# Synthesize a fail verdict for the fix loop
|
|
3328
|
+
echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
|
|
3329
|
+
fi
|
|
3330
|
+
fi
|
|
3331
|
+
|
|
3332
|
+
# --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
|
|
3333
|
+
local use_consensus=0
|
|
3334
|
+
_should_use_consensus "$signal_us_id" && use_consensus=1
|
|
3335
|
+
|
|
3336
|
+
# --- Consensus vs single verification ---
|
|
3337
|
+
if (( use_consensus )); then
|
|
3338
|
+
# US-004: Run consensus verification (claude + codex sequentially)
|
|
3339
|
+
local consensus_rc=0
|
|
3340
|
+
run_consensus_verification "$ITERATION" || consensus_rc=$?
|
|
3341
|
+
|
|
3342
|
+
if (( consensus_rc == 2 )); then
|
|
3343
|
+
# Consensus disagreement — treat as fail, fix loop will handle
|
|
3344
|
+
log " Consensus disagreement, treating as fail."
|
|
3345
|
+
elif (( consensus_rc != 0 )); then
|
|
3346
|
+
# Consensus verification failed entirely
|
|
3347
|
+
log_error "Consensus verification failed"
|
|
3348
|
+
write_blocked_sentinel "Consensus verification failed after max rounds" "" "repeat_axis"
|
|
3349
|
+
update_status "blocked" "consensus_failed"
|
|
3350
|
+
return 1
|
|
3351
|
+
fi
|
|
3352
|
+
else
|
|
3353
|
+
# Standard single-engine verification
|
|
3354
|
+
write_verifier_trigger "$ITERATION"
|
|
3355
|
+
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
|
|
3356
|
+
|
|
3357
|
+
# Step 7a: Clean previous Verifier session (with dead pane detection)
|
|
3358
|
+
local verifier_cmd
|
|
3359
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
3360
|
+
if [[ -z "$verifier_cmd" ]]; then
|
|
3361
|
+
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
3362
|
+
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
3363
|
+
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
3364
|
+
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
3365
|
+
log " New verifier pane: $VERIFIER_PANE"
|
|
3366
|
+
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
3367
|
+
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
3368
|
+
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
3369
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
3370
|
+
sleep 0.2
|
|
3371
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
3372
|
+
sleep 0.3
|
|
3373
|
+
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
3374
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
3375
|
+
sleep 0.5
|
|
3376
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
3377
|
+
sleep 2
|
|
3378
|
+
fi
|
|
3379
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
3380
|
+
|
|
3381
|
+
local verifier_launch
|
|
3382
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
3383
|
+
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
3384
|
+
else
|
|
3385
|
+
verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
|
|
3386
|
+
fi
|
|
3387
|
+
log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
|
|
3388
|
+
|
|
3389
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
3390
|
+
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
|
|
3391
|
+
else
|
|
3392
|
+
if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
|
|
3393
|
+
update_status "verifier" "start_failed"
|
|
3394
|
+
continue
|
|
3395
|
+
fi
|
|
3396
|
+
fi
|
|
3397
|
+
|
|
3398
|
+
# Poll for verify-verdict.json
|
|
3399
|
+
log " Polling for verify-verdict.json..."
|
|
3400
|
+
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
3401
|
+
local verifier_poll_rc=$?
|
|
3402
|
+
if (( verifier_poll_rc == 2 )); then
|
|
3403
|
+
return 1
|
|
3404
|
+
fi
|
|
3405
|
+
log_error "Verifier poll failed"
|
|
3406
|
+
# Verifier is dead/stuck — BLOCK and let user decide
|
|
3407
|
+
write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection." "" "infra_failure"
|
|
3408
|
+
update_status "blocked" "verifier_dead"
|
|
3409
|
+
return 1
|
|
3410
|
+
fi
|
|
3411
|
+
# Bug #7 Fix-Q/R: reap verifier pane immediately so codex cannot
|
|
3412
|
+
# rewrite verify-verdict.json post-detect (mtime drift fix).
|
|
3413
|
+
_kill_pane_process "$VERIFIER_PANE" "verifier"
|
|
3414
|
+
_lock_sentinel "$VERDICT_FILE"
|
|
3415
|
+
# PR-0b-narrow: stamp leader handshake ack on the verdict (audit-only).
|
|
3416
|
+
_stamp_ack_field "$VERDICT_FILE"
|
|
3417
|
+
fi
|
|
3418
|
+
|
|
3419
|
+
# AC1: capture verifier end timestamp
|
|
3420
|
+
ITER_VERIFIER_END=$(date +%s)
|
|
3421
|
+
|
|
3422
|
+
# --- governance.md s7 step 7: Read verdict via jq ---
|
|
3423
|
+
local verdict
|
|
3424
|
+
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
3425
|
+
local recommended
|
|
3426
|
+
recommended=$(jq -r '.recommended_state_transition' "$VERDICT_FILE" 2>/dev/null)
|
|
3427
|
+
local verdict_summary
|
|
3428
|
+
verdict_summary=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
3429
|
+
|
|
3430
|
+
log " Verifier: verdict=$verdict recommended=$recommended"
|
|
3431
|
+
log " Verifier summary: \"$verdict_summary\""
|
|
3432
|
+
local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
|
|
3433
|
+
log_debug "[GOV] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
|
|
3434
|
+
|
|
3435
|
+
case "$verdict" in
|
|
3436
|
+
pass)
|
|
3437
|
+
CONSECUTIVE_FAILURES=0
|
|
3438
|
+
CONSENSUS_ROUND=0
|
|
3439
|
+
_SAME_US_FAIL_COUNT=0
|
|
3440
|
+
_LAST_FAILED_US=""
|
|
3441
|
+
if (( _MODEL_UPGRADED )); then
|
|
3442
|
+
log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
|
|
3443
|
+
log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
|
|
3444
|
+
WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
|
|
3445
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
3446
|
+
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
3447
|
+
WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
|
|
3448
|
+
fi
|
|
3449
|
+
_MODEL_UPGRADED=0
|
|
3450
|
+
fi
|
|
3451
|
+
|
|
3452
|
+
# --- Verified US tracking (both per-us and batch modes) ---
|
|
3453
|
+
if [[ -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
|
|
3454
|
+
# Add this US to verified list
|
|
3455
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
3456
|
+
VERIFIED_US="${VERIFIED_US},${signal_us_id}"
|
|
3457
|
+
else
|
|
3458
|
+
VERIFIED_US="$signal_us_id"
|
|
3459
|
+
fi
|
|
3460
|
+
log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
|
|
3461
|
+
log_debug "[FLOW] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
|
|
3462
|
+
update_status "verifier" "pass_us"
|
|
3463
|
+
# Worker will do next US on next iteration
|
|
3464
|
+
elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
|
|
3465
|
+
# Final full verify passed or complete recommended
|
|
3466
|
+
write_complete_sentinel "$verdict_summary"
|
|
3467
|
+
update_status "complete" "pass"
|
|
3468
|
+
write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
|
|
3469
|
+
return 0
|
|
3470
|
+
else
|
|
3471
|
+
log " Verifier passed but did not recommend complete. Continuing."
|
|
3472
|
+
update_status "verifier" "pass_continue"
|
|
3473
|
+
fi
|
|
3474
|
+
;;
|
|
3475
|
+
fail)
|
|
3476
|
+
# --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
|
|
3477
|
+
|
|
3478
|
+
# Parse per_us_results from verdict to track partial progress (batch + per-us)
|
|
3479
|
+
local _prev_verified="$VERIFIED_US"
|
|
3480
|
+
if jq -e '.per_us_results' "$VERDICT_FILE" &>/dev/null; then
|
|
3481
|
+
local _newly_passed
|
|
3482
|
+
_newly_passed=$(jq -r '.per_us_results | to_entries[] | select(.value == "pass") | .key' "$VERDICT_FILE" 2>/dev/null)
|
|
3483
|
+
for _pus in $(echo "$_newly_passed"); do
|
|
3484
|
+
if ! echo ",$VERIFIED_US," | grep -q ",$_pus,"; then
|
|
3485
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
3486
|
+
VERIFIED_US="${VERIFIED_US},${_pus}"
|
|
3487
|
+
else
|
|
3488
|
+
VERIFIED_US="$_pus"
|
|
3489
|
+
fi
|
|
3490
|
+
log " Partial progress: $_pus passed (overall FAIL). Verified so far: $VERIFIED_US"
|
|
3491
|
+
fi
|
|
3492
|
+
done
|
|
3493
|
+
log_debug "[FLOW] iter=$ITERATION partial_progress prev=$_prev_verified now=$VERIFIED_US"
|
|
3494
|
+
fi
|
|
3495
|
+
|
|
3496
|
+
# Partial progress resets consecutive failures (progress was made)
|
|
3497
|
+
if [[ "$VERIFIED_US" != "$_prev_verified" ]]; then
|
|
3498
|
+
CONSECUTIVE_FAILURES=0
|
|
3499
|
+
log " Progress detected — consecutive_failures reset to 0"
|
|
3500
|
+
log_debug "[GOV] iter=$ITERATION consecutive_failures_reset=partial_progress"
|
|
3501
|
+
fi
|
|
3502
|
+
|
|
3503
|
+
(( CONSECUTIVE_FAILURES++ ))
|
|
3504
|
+
record_us_failure "${signal_us_id:-unknown}"
|
|
3505
|
+
check_model_upgrade "${signal_us_id:-unknown}"
|
|
3506
|
+
|
|
3507
|
+
# Mid-CB warning: alert at halfway point (governance §8 early warning)
|
|
3508
|
+
if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
|
|
3509
|
+
log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
|
|
3510
|
+
log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
|
|
3511
|
+
fi
|
|
3512
|
+
local verdict_summary_fail
|
|
3513
|
+
verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
3514
|
+
log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
|
|
3515
|
+
|
|
3516
|
+
# Extract issues from verdict for next Worker's fix contract
|
|
3517
|
+
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).fix-contract.md"
|
|
3518
|
+
{
|
|
3519
|
+
echo "# Fix Contract (from Verifier iteration $ITERATION)"
|
|
3520
|
+
echo ""
|
|
3521
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
3522
|
+
echo "## Verified US (do NOT re-implement these)"
|
|
3523
|
+
echo "$VERIFIED_US" | tr ',' '\n' | sed 's/^/- /'
|
|
3524
|
+
echo ""
|
|
3525
|
+
echo "**Focus ONLY on unverified user stories. The above are already verified.**"
|
|
3526
|
+
echo ""
|
|
3527
|
+
fi
|
|
3528
|
+
echo "## Summary"
|
|
3529
|
+
echo "$verdict_summary_fail"
|
|
3530
|
+
echo ""
|
|
3531
|
+
echo "## Issues (from verify-verdict.json)"
|
|
3532
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$VERDICT_FILE" 2>/dev/null || echo "- (no structured issues available)"
|
|
3533
|
+
echo ""
|
|
3534
|
+
echo "## Next Iteration Contract"
|
|
3535
|
+
jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
|
|
3536
|
+
} | atomic_write "$fix_contract"
|
|
3537
|
+
log " Fix contract: $fix_contract"
|
|
3538
|
+
log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
|
|
3539
|
+
|
|
3540
|
+
# Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
|
|
3541
|
+
if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
|
|
3542
|
+
# For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
|
|
3543
|
+
_ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
|
|
3544
|
+
if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
|
|
3545
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
|
|
3546
|
+
log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
|
|
3547
|
+
write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures" "" "repeat_axis"
|
|
3548
|
+
else
|
|
3549
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
|
|
3550
|
+
log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
3551
|
+
write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures" "" "repeat_axis"
|
|
3552
|
+
fi
|
|
3553
|
+
update_status "blocked" "consecutive_failures"
|
|
3554
|
+
return 1
|
|
3555
|
+
fi
|
|
3556
|
+
|
|
3557
|
+
update_status "verifier" "fail"
|
|
3558
|
+
;;
|
|
3559
|
+
request_info)
|
|
3560
|
+
# --- governance.md s7 step 7: request_info (degraded in tmux mode) ---
|
|
3561
|
+
local verdict_summary_ri
|
|
3562
|
+
verdict_summary_ri=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
3563
|
+
log " Verifier requests info (degraded in tmux lean mode)."
|
|
3564
|
+
log " Questions: \"$verdict_summary_ri\""
|
|
3565
|
+
log " Treating as soft fail — Worker will see verdict in next iteration."
|
|
3566
|
+
update_status "verifier" "request_info"
|
|
3567
|
+
;;
|
|
3568
|
+
blocked)
|
|
3569
|
+
local _verdict_cat
|
|
3570
|
+
_verdict_cat=$(_classify_cross_us_or_metric "$verdict_summary")
|
|
3571
|
+
write_blocked_sentinel "Verifier verdict: blocked - $verdict_summary" "" "$_verdict_cat"
|
|
3572
|
+
update_status "blocked" "verifier_blocked"
|
|
3573
|
+
return 1
|
|
3574
|
+
;;
|
|
3575
|
+
*)
|
|
3576
|
+
log_error "Unknown verdict: $verdict"
|
|
3577
|
+
update_status "verifier" "unknown_verdict"
|
|
3578
|
+
;;
|
|
3579
|
+
esac
|
|
3580
|
+
;;
|
|
3581
|
+
blocked)
|
|
3582
|
+
# --- governance.md s7 step 6: blocked -> write sentinel ---
|
|
3583
|
+
local _signal_cat
|
|
3584
|
+
_signal_cat=$(_classify_cross_us_or_metric "$signal_summary")
|
|
3585
|
+
write_blocked_sentinel "Worker reported blocked: $signal_summary" "" "$_signal_cat"
|
|
3586
|
+
update_status "blocked" "worker_blocked"
|
|
3587
|
+
return 1
|
|
3588
|
+
;;
|
|
3589
|
+
*)
|
|
3590
|
+
log_error "Unknown signal status: $signal_status"
|
|
3591
|
+
update_status "worker" "unknown_status"
|
|
3592
|
+
;;
|
|
3593
|
+
esac
|
|
3594
|
+
|
|
3595
|
+
# --- step 7d: Archive iteration artifacts before cleanup ---
|
|
3596
|
+
archive_iter_artifacts "$ITERATION"
|
|
3597
|
+
|
|
3598
|
+
# --- AC5: Write per-iteration cost estimate ---
|
|
3599
|
+
write_cost_log "$ITERATION"
|
|
3600
|
+
write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
|
|
3601
|
+
|
|
3602
|
+
# --- governance.md s7 step 8: Write result log ---
|
|
3603
|
+
write_result_log "$ITERATION" "$signal_status"
|
|
3604
|
+
|
|
3605
|
+
# --- governance.md s7 step 8: Circuit breaker - stale context check ---
|
|
3606
|
+
if ! check_stale_context; then
|
|
3607
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
|
|
3608
|
+
write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)" "" "context_limit"
|
|
3609
|
+
update_status "blocked" "stale_context"
|
|
3610
|
+
return 1
|
|
3611
|
+
fi
|
|
3612
|
+
|
|
3613
|
+
# --- governance.md s7 step 8: Update status ---
|
|
3614
|
+
update_status "idle" "${signal_status:-unknown}"
|
|
3615
|
+
done
|
|
3616
|
+
|
|
3617
|
+
# Max iterations reached
|
|
3618
|
+
log "Max iterations ($MAX_ITER) reached."
|
|
3619
|
+
update_status "timeout" "max_iter"
|
|
3620
|
+
return 1
|
|
3621
|
+
}
|
|
3622
|
+
|
|
3623
|
+
# =============================================================================
|
|
3624
|
+
# Entry Point
|
|
3625
|
+
# =============================================================================
|
|
3626
|
+
|
|
3627
|
+
# --- CLI: parse --worker-model / --verifier-model flags ---
|
|
3628
|
+
# These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
|
|
3629
|
+
# Format: "model:reasoning" → codex engine; "model-name" → claude engine
|
|
3630
|
+
_cli_i=1
|
|
3631
|
+
while (( _cli_i <= $# )); do
|
|
3632
|
+
case "${@[$_cli_i]}" in
|
|
3633
|
+
--worker-model)
|
|
3634
|
+
(( _cli_i++ ))
|
|
3635
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
|
|
3636
|
+
WORKER_ENGINE="${_cli_parsed%% *}"
|
|
3637
|
+
_cli_rest="${_cli_parsed#* }"
|
|
3638
|
+
WORKER_MODEL="${_cli_rest%% *}"
|
|
3639
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
3640
|
+
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
3641
|
+
WORKER_CODEX_REASONING="${_cli_rest##* }"
|
|
3642
|
+
elif [[ "$_cli_rest" == *" "* ]]; then
|
|
3643
|
+
WORKER_EFFORT="${_cli_rest##* }"
|
|
3644
|
+
fi
|
|
3645
|
+
;;
|
|
3646
|
+
--verifier-model)
|
|
3647
|
+
(( _cli_i++ ))
|
|
3648
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
|
|
3649
|
+
VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
3650
|
+
_cli_rest="${_cli_parsed#* }"
|
|
3651
|
+
VERIFIER_MODEL="${_cli_rest%% *}"
|
|
3652
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
3653
|
+
VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
|
|
3654
|
+
VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
3655
|
+
elif [[ "$_cli_rest" == *" "* ]]; then
|
|
3656
|
+
VERIFIER_EFFORT="${_cli_rest##* }"
|
|
3657
|
+
fi
|
|
3658
|
+
;;
|
|
3659
|
+
--lock-worker-model)
|
|
3660
|
+
LOCK_WORKER_MODEL=1
|
|
3661
|
+
;;
|
|
3662
|
+
--autonomous)
|
|
3663
|
+
AUTONOMOUS_MODE=1
|
|
3664
|
+
;;
|
|
3665
|
+
--lane-strict)
|
|
3666
|
+
# P1-E opt-in: lane mtime audit escalates to BLOCKED instead of WARN.
|
|
3667
|
+
# See governance §7¾.
|
|
3668
|
+
LANE_MODE="strict"
|
|
3669
|
+
;;
|
|
3670
|
+
--test-density-strict)
|
|
3671
|
+
# US-018 R6 P1-F opt-in: AC with < 3 tests fails init (exit 1) instead of WARN.
|
|
3672
|
+
# See governance §7f.
|
|
3673
|
+
TEST_DENSITY_MODE="strict"
|
|
3674
|
+
;;
|
|
3675
|
+
--final-verifier-model)
|
|
3676
|
+
(( _cli_i++ ))
|
|
3677
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
|
|
3678
|
+
FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
3679
|
+
_cli_rest="${_cli_parsed#* }"
|
|
3680
|
+
FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
|
|
3681
|
+
if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
|
|
3682
|
+
FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
|
|
3683
|
+
FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
3684
|
+
elif [[ "$_cli_rest" == *" "* ]]; then
|
|
3685
|
+
FINAL_VERIFIER_EFFORT="${_cli_rest##* }"
|
|
3686
|
+
fi
|
|
3687
|
+
;;
|
|
3688
|
+
--consensus)
|
|
3689
|
+
(( _cli_i++ ))
|
|
3690
|
+
CONSENSUS_MODE="${@[$_cli_i]:-off}"
|
|
3691
|
+
;;
|
|
3692
|
+
--consensus-model)
|
|
3693
|
+
(( _cli_i++ ))
|
|
3694
|
+
CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.5:medium}"
|
|
3695
|
+
;;
|
|
3696
|
+
--final-consensus-model)
|
|
3697
|
+
(( _cli_i++ ))
|
|
3698
|
+
FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.5:high}"
|
|
3699
|
+
;;
|
|
3700
|
+
--final-consensus)
|
|
3701
|
+
# Legacy: map to new --consensus final-only
|
|
3702
|
+
CONSENSUS_MODE="final-only"
|
|
3703
|
+
;;
|
|
3704
|
+
--verify-consensus)
|
|
3705
|
+
# Legacy: map to new --consensus all
|
|
3706
|
+
CONSENSUS_MODE="all"
|
|
3707
|
+
;;
|
|
3708
|
+
esac
|
|
3709
|
+
(( _cli_i++ ))
|
|
3710
|
+
done
|
|
3711
|
+
unset _cli_i _cli_parsed _cli_rest
|
|
3712
|
+
|
|
3713
|
+
# Require tmux — tmux mode only works inside an active tmux session
|
|
3714
|
+
if [[ -z "${TMUX:-}" ]]; then
|
|
3715
|
+
echo "ERROR: tmux mode requires running inside a tmux session."
|
|
3716
|
+
echo ""
|
|
3717
|
+
echo " Start tmux first, then retry:"
|
|
3718
|
+
echo " tmux"
|
|
3719
|
+
echo " LOOP_NAME=$SLUG $0"
|
|
3720
|
+
echo ""
|
|
3721
|
+
echo " Or use Agent() mode instead (no tmux needed):"
|
|
3722
|
+
echo " /rlp-desk run $SLUG"
|
|
3723
|
+
exit 1
|
|
3724
|
+
fi
|
|
3725
|
+
|