@ai-dev-methodologies/rlp-desk 0.7.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -0
- package/docs/blueprints/blueprint-pivot-step.md +137 -0
- package/docs/plans/validated-snacking-crayon.md +407 -0
- package/package.json +5 -2
- package/scripts/postinstall.js +91 -51
- package/scripts/uninstall.js +18 -9
- package/src/commands/rlp-desk.md +10 -3
- package/src/governance.md +2 -1
- package/src/node/cli/command-builder.mjs +96 -0
- package/src/node/init/campaign-initializer.mjs +235 -0
- package/src/node/polling/signal-poller.mjs +106 -0
- package/src/node/prompts/prompt-assembler.mjs +213 -0
- package/src/node/reporting/campaign-reporting.mjs +257 -0
- package/src/node/run.mjs +234 -0
- package/src/node/runner/campaign-main-loop.mjs +624 -0
- package/src/node/shared/fs.mjs +23 -0
- package/src/node/shared/paths.mjs +28 -0
- package/src/node/tmux/pane-manager.mjs +77 -0
- package/docs/blueprints/blueprint-v0.4-evolution.md +0 -347
- package/docs/prompts/ralplan-codex-review.md +0 -55
- package/docs/superpowers/plans/2026-04-06-worker-verifier-prompt-restructure.md +0 -179
- package/src/scripts/init_ralph_desk.zsh +0 -885
- package/src/scripts/lib_ralph_desk.zsh +0 -904
- package/src/scripts/run_ralph_desk.zsh +0 -2750
|
@@ -1,2750 +0,0 @@
|
|
|
1
|
-
#!/bin/zsh
|
|
2
|
-
set -uo pipefail
|
|
3
|
-
# NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
|
|
4
|
-
# because the main loop uses explicit error checks throughout.
|
|
5
|
-
|
|
6
|
-
# =============================================================================
|
|
7
|
-
# Ralph Desk Tmux Runner
|
|
8
|
-
#
|
|
9
|
-
# Implements the Leader loop from governance.md section 7 as a shell script.
|
|
10
|
-
# Uses tmux proven patterns: write-then-notify, pane IDs (%N),
|
|
11
|
-
# copy-mode guards, verification-based retry, heartbeat monitoring,
|
|
12
|
-
# idle pane nudging, exponential backoff restarts, atomic file writes.
|
|
13
|
-
#
|
|
14
|
-
# Usage:
|
|
15
|
-
# LOOP_NAME=<slug> ./run_ralph_desk.zsh
|
|
16
|
-
#
|
|
17
|
-
# Required env:
|
|
18
|
-
# LOOP_NAME - slug identifier for the campaign
|
|
19
|
-
#
|
|
20
|
-
# Optional env:
|
|
21
|
-
# ROOT - project root (default: $PWD)
|
|
22
|
-
# MAX_ITER - max iterations (default: 20)
|
|
23
|
-
# WORKER_MODEL - claude model for Worker (default: sonnet)
|
|
24
|
-
# VERIFIER_MODEL - claude model for Verifier (default: opus)
|
|
25
|
-
# POLL_INTERVAL - seconds between signal checks (default: 5)
|
|
26
|
-
# ITER_TIMEOUT - per-iteration timeout in seconds (default: 600)
|
|
27
|
-
# HEARTBEAT_STALE_THRESHOLD - seconds before heartbeat is stale (default: 120)
|
|
28
|
-
# MAX_RESTARTS - max restart attempts per worker (default: 3)
|
|
29
|
-
# IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
|
|
30
|
-
# MAX_NUDGES - max nudges per pane per iteration (default: 3)
|
|
31
|
-
#
|
|
32
|
-
# Per-role codex config:
|
|
33
|
-
# WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.4)
|
|
34
|
-
# WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
|
|
35
|
-
# VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.4)
|
|
36
|
-
# VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
|
|
37
|
-
#
|
|
38
|
-
# Consensus scope:
|
|
39
|
-
# CONSENSUS_SCOPE - when consensus applies (default: all)
|
|
40
|
-
# all=every verify, final-only=final ALL only
|
|
41
|
-
#
|
|
42
|
-
# Dependencies: tmux, claude CLI, jq
|
|
43
|
-
# Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
|
|
44
|
-
# =============================================================================
|
|
45
|
-
|
|
46
|
-
# --- Environment Variables ---
|
|
47
|
-
SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
|
|
48
|
-
ROOT="${ROOT:-$PWD}"
|
|
49
|
-
MAX_ITER="${MAX_ITER:-20}"
|
|
50
|
-
WORKER_MODEL="${WORKER_MODEL:-haiku}"
|
|
51
|
-
VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
|
|
52
|
-
FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
|
|
53
|
-
POLL_INTERVAL="${POLL_INTERVAL:-5}"
|
|
54
|
-
ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
|
|
55
|
-
HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
|
|
56
|
-
MAX_RESTARTS="${MAX_RESTARTS:-3}"
|
|
57
|
-
IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
|
|
58
|
-
MAX_NUDGES="${MAX_NUDGES:-3}"
|
|
59
|
-
WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
|
|
60
|
-
AUTONOMOUS_MODE="${AUTONOMOUS_MODE:-0}" # 1=don't stop on ambiguity, PRD is authoritative
|
|
61
|
-
|
|
62
|
-
# --- Engine Selection (auto-detect from model format) ---
|
|
63
|
-
# claude models (haiku/sonnet/opus) with :effort → claude engine + effort
|
|
64
|
-
# codex models (gpt-*/spark) with :reasoning → codex engine + reasoning
|
|
65
|
-
# plain name → claude engine (no effort/reasoning)
|
|
66
|
-
_auto_detect_engine() {
|
|
67
|
-
local model_var="$1" engine_var="$2" codex_model_var="$3" codex_reasoning_var="$4" effort_var="${5:-}"
|
|
68
|
-
local model_val="${(P)model_var}"
|
|
69
|
-
if [[ "$model_val" == *:* ]]; then
|
|
70
|
-
local model_part="${model_val%%:*}"
|
|
71
|
-
local level_part="${model_val##*:}"
|
|
72
|
-
case "$model_part" in
|
|
73
|
-
haiku|sonnet|opus)
|
|
74
|
-
# Claude model with effort — keep engine as claude, store effort
|
|
75
|
-
eval "$engine_var=claude"
|
|
76
|
-
eval "$model_var=$model_part"
|
|
77
|
-
[[ -n "$effort_var" ]] && eval "$effort_var=$level_part"
|
|
78
|
-
;;
|
|
79
|
-
*)
|
|
80
|
-
# Codex model with reasoning
|
|
81
|
-
[[ "$model_part" == "spark" ]] && model_part="gpt-5.3-codex-spark"
|
|
82
|
-
eval "$engine_var=codex"
|
|
83
|
-
eval "$model_var=$model_part"
|
|
84
|
-
[[ -n "$codex_model_var" ]] && eval "$codex_model_var=$model_part"
|
|
85
|
-
[[ -n "$codex_reasoning_var" ]] && eval "$codex_reasoning_var=$level_part"
|
|
86
|
-
;;
|
|
87
|
-
esac
|
|
88
|
-
fi
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
WORKER_ENGINE="${WORKER_ENGINE:-claude}"
|
|
92
|
-
VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}"
|
|
93
|
-
FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}"
|
|
94
|
-
|
|
95
|
-
# Effort levels for Claude models (set by _auto_detect_engine or CLI --worker-model opus:max)
|
|
96
|
-
WORKER_EFFORT="${WORKER_EFFORT:-}"
|
|
97
|
-
VERIFIER_EFFORT="${VERIFIER_EFFORT:-}"
|
|
98
|
-
FINAL_VERIFIER_EFFORT="${FINAL_VERIFIER_EFFORT:-}"
|
|
99
|
-
|
|
100
|
-
# Auto-detect engine from model format for env var path (CLI path uses parse_model_flag)
|
|
101
|
-
_auto_detect_engine WORKER_MODEL WORKER_ENGINE WORKER_CODEX_MODEL WORKER_CODEX_REASONING WORKER_EFFORT
|
|
102
|
-
_auto_detect_engine VERIFIER_MODEL VERIFIER_ENGINE VERIFIER_CODEX_MODEL VERIFIER_CODEX_REASONING VERIFIER_EFFORT
|
|
103
|
-
_auto_detect_engine FINAL_VERIFIER_MODEL FINAL_VERIFIER_ENGINE "" "" FINAL_VERIFIER_EFFORT
|
|
104
|
-
WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
|
|
105
|
-
WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
|
|
106
|
-
VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
|
|
107
|
-
VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
|
|
108
|
-
CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
109
|
-
|
|
110
|
-
# --- Verify Mode ---
|
|
111
|
-
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
112
|
-
# Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
|
|
113
|
-
CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
|
|
114
|
-
CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.4:medium}" # per-US cross-verifier (lighter)
|
|
115
|
-
FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.4:high}" # final cross-verifier (stricter)
|
|
116
|
-
# Legacy compat: map old flags to CONSENSUS_MODE
|
|
117
|
-
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
118
|
-
CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
|
|
119
|
-
elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
|
|
120
|
-
CONSENSUS_MODE="final-only"
|
|
121
|
-
fi
|
|
122
|
-
CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-${CONSENSUS_MODE}}"
|
|
123
|
-
CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
|
|
124
|
-
# Effective CB threshold: doubled when consensus mode active
|
|
125
|
-
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
126
|
-
EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
|
|
127
|
-
else
|
|
128
|
-
EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
|
|
129
|
-
fi
|
|
130
|
-
_API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
|
|
131
|
-
_API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
|
|
132
|
-
|
|
133
|
-
# --- Derived Paths ---
|
|
134
|
-
DESK="$ROOT/.claude/ralph-desk"
|
|
135
|
-
PROMPTS_DIR="$DESK/prompts"
|
|
136
|
-
CONTEXT_DIR="$DESK/context"
|
|
137
|
-
MEMOS_DIR="$DESK/memos"
|
|
138
|
-
LOGS_DIR="$DESK/logs/$SLUG"
|
|
139
|
-
RUNTIME_DIR="$LOGS_DIR/runtime"
|
|
140
|
-
PRD_FILE="$DESK/plans/prd-$SLUG.md"
|
|
141
|
-
TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
|
|
142
|
-
# --- Analytics Directory (user-level, cross-project) ---
|
|
143
|
-
ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
|
|
144
|
-
ANALYTICS_DIR="$HOME/.claude/ralph-desk/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
|
|
145
|
-
CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
|
|
146
|
-
METADATA_FILE="$ANALYTICS_DIR/metadata.json"
|
|
147
|
-
WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
|
|
148
|
-
VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
|
|
149
|
-
CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
|
|
150
|
-
MEMORY_FILE="$MEMOS_DIR/${SLUG}-memory.md"
|
|
151
|
-
SIGNAL_FILE="$MEMOS_DIR/${SLUG}-iter-signal.json"
|
|
152
|
-
DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
|
|
153
|
-
VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
|
|
154
|
-
COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
|
|
155
|
-
BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
|
|
156
|
-
LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
|
|
157
|
-
STATUS_FILE="$RUNTIME_DIR/status.json"
|
|
158
|
-
SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
|
|
159
|
-
WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
|
|
160
|
-
VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
|
|
161
|
-
COST_LOG="$LOGS_DIR/cost-log.jsonl"
|
|
162
|
-
|
|
163
|
-
# --- Session Naming ---
|
|
164
|
-
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
165
|
-
SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
|
|
166
|
-
|
|
167
|
-
# --- State Tracking ---
|
|
168
|
-
typeset -A LAST_PANE_CONTENT
|
|
169
|
-
typeset -A PANE_IDLE_SINCE
|
|
170
|
-
typeset -A WORKER_RESTARTS
|
|
171
|
-
typeset -A US_FAIL_HISTORY
|
|
172
|
-
STALE_CONTEXT_COUNT=0
|
|
173
|
-
HEARTBEAT_STALE_COUNT=0
|
|
174
|
-
MONITOR_FAILURE_COUNT=0
|
|
175
|
-
CONSECUTIVE_FAILURES=0
|
|
176
|
-
PREV_CONTEXT_HASH=""
|
|
177
|
-
PREV_PRD_HASH=""
|
|
178
|
-
PREV_PRD_US_LIST=""
|
|
179
|
-
_PRD_CHANGED=0
|
|
180
|
-
ITERATION=0
|
|
181
|
-
START_TIME=$(date +%s)
|
|
182
|
-
BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
|
|
183
|
-
CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
|
|
184
|
-
SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
|
|
185
|
-
VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
|
|
186
|
-
CONSENSUS_ROUND=0 # current consensus round for current US
|
|
187
|
-
US_LIST="" # comma-separated US IDs from PRD (per-us mode)
|
|
188
|
-
LOCKFILE_ACQUIRED=0
|
|
189
|
-
LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
|
|
190
|
-
_SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
|
|
191
|
-
_LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
|
|
192
|
-
_MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
|
|
193
|
-
_ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
|
|
194
|
-
_ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
|
|
195
|
-
|
|
196
|
-
# =============================================================================
|
|
197
|
-
# Utility Functions
|
|
198
|
-
# =============================================================================
|
|
199
|
-
|
|
200
|
-
DEBUG="${DEBUG:-0}"
|
|
201
|
-
DEBUG_LOG="$ANALYTICS_DIR/debug.log"
|
|
202
|
-
|
|
203
|
-
# Source shared business logic
|
|
204
|
-
LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
205
|
-
source "$LIB_DIR/lib_ralph_desk.zsh"
|
|
206
|
-
|
|
207
|
-
# A16: Warn if running in foreground (may conflict with Claude Code pane)
|
|
208
|
-
if [[ -z "${RLP_BACKGROUND:-}" ]]; then
|
|
209
|
-
echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
|
|
210
|
-
echo " Recommended: launch via Bash tool with run_in_background: true" >&2
|
|
211
|
-
echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
|
|
212
|
-
fi
|
|
213
|
-
|
|
214
|
-
# check_dead_pane() — determine if pane command indicates a dead/exited process
|
|
215
|
-
# Engine-aware: bash is normal for codex workers (trigger runs in bash),
|
|
216
|
-
# but indicates dead pane for claude workers.
|
|
217
|
-
# Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
|
|
218
|
-
# Returns: 0 if dead, 1 if alive
|
|
219
|
-
check_dead_pane() {
|
|
220
|
-
local poll_cmd="$1"
|
|
221
|
-
local engine="${2:-claude}"
|
|
222
|
-
local role="${3:-worker}"
|
|
223
|
-
|
|
224
|
-
if [[ -z "$poll_cmd" ]]; then
|
|
225
|
-
return 0 # empty = dead
|
|
226
|
-
elif [[ "$poll_cmd" == "zsh" ]]; then
|
|
227
|
-
return 0 # bare zsh = dead
|
|
228
|
-
elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
|
|
229
|
-
return 0 # bash = dead for claude (codex uses bash trigger)
|
|
230
|
-
fi
|
|
231
|
-
return 1 # alive
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
# launch_worker_codex() — launch codex Worker TUI, send instruction, verify submission
|
|
235
|
-
# Matches launch_worker_claude() pattern for consistent tmux-visible execution.
|
|
236
|
-
# Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
|
|
237
|
-
# Returns: 0 on success, 1 on fatal failure
|
|
238
|
-
launch_worker_codex() {
|
|
239
|
-
local pane_id="$1"
|
|
240
|
-
local prompt_file="$2"
|
|
241
|
-
local iter="$3"
|
|
242
|
-
local worker_launch="$4"
|
|
243
|
-
|
|
244
|
-
log " Launching Worker codex TUI in pane $pane_id..."
|
|
245
|
-
paste_to_pane "$pane_id" "$worker_launch"
|
|
246
|
-
tmux send-keys -t "$pane_id" C-m
|
|
247
|
-
|
|
248
|
-
# Wait for codex TUI to be ready
|
|
249
|
-
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
250
|
-
log_error "Worker codex failed to start"
|
|
251
|
-
return 1
|
|
252
|
-
fi
|
|
253
|
-
|
|
254
|
-
# Send instruction to codex TUI
|
|
255
|
-
sleep 3
|
|
256
|
-
local worker_instruction="Read and execute the instructions in $prompt_file"
|
|
257
|
-
paste_to_pane "$pane_id" "$worker_instruction"
|
|
258
|
-
tmux send-keys -t "$pane_id" C-m
|
|
259
|
-
log_debug "Worker codex instruction sent (${#worker_instruction} chars)"
|
|
260
|
-
|
|
261
|
-
# Submit loop — verify codex started working
|
|
262
|
-
local submit_attempts=0
|
|
263
|
-
while (( submit_attempts < 15 )); do
|
|
264
|
-
sleep 2
|
|
265
|
-
local pane_check
|
|
266
|
-
pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
267
|
-
if echo "$pane_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
|
|
268
|
-
log_debug "Worker codex started working after $((submit_attempts + 1)) checks"
|
|
269
|
-
break
|
|
270
|
-
fi
|
|
271
|
-
if (( submit_attempts == 8 )); then
|
|
272
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
273
|
-
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
274
|
-
sleep 0.1
|
|
275
|
-
paste_to_pane "$pane_id" "$worker_instruction"
|
|
276
|
-
tmux send-keys -t "$pane_id" C-m
|
|
277
|
-
fi
|
|
278
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
279
|
-
sleep 0.3
|
|
280
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
281
|
-
(( submit_attempts++ ))
|
|
282
|
-
done
|
|
283
|
-
return 0
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
# launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
|
|
287
|
-
# Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
|
|
288
|
-
# restart recovery on submit failure.
|
|
289
|
-
# Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
|
|
290
|
-
# Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
|
|
291
|
-
launch_worker_claude() {
|
|
292
|
-
local pane_id="$1"
|
|
293
|
-
local prompt_file="$2"
|
|
294
|
-
local iter="$3"
|
|
295
|
-
local worker_launch="$4"
|
|
296
|
-
|
|
297
|
-
log " Launching Worker claude in pane $pane_id..."
|
|
298
|
-
paste_to_pane "$pane_id" "$worker_launch"
|
|
299
|
-
tmux send-keys -t "$pane_id" C-m
|
|
300
|
-
|
|
301
|
-
# Wait for claude TUI to be ready
|
|
302
|
-
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
303
|
-
log_error "Worker claude failed to start"
|
|
304
|
-
return 1
|
|
305
|
-
fi
|
|
306
|
-
|
|
307
|
-
# Send instruction to claude TUI
|
|
308
|
-
sleep 3
|
|
309
|
-
local worker_instruction="Read and execute the instructions in $prompt_file"
|
|
310
|
-
paste_to_pane "$pane_id" "$worker_instruction"
|
|
311
|
-
tmux send-keys -t "$pane_id" C-m
|
|
312
|
-
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
313
|
-
|
|
314
|
-
# 15-iteration submit loop — verify claude started working
|
|
315
|
-
local submit_attempts=0
|
|
316
|
-
while (( submit_attempts < 15 )); do
|
|
317
|
-
sleep 2
|
|
318
|
-
local pane_check
|
|
319
|
-
pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
320
|
-
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
|
|
321
|
-
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
322
|
-
log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
323
|
-
break
|
|
324
|
-
fi
|
|
325
|
-
# Every 3 failed attempts, re-send full instruction
|
|
326
|
-
if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
|
|
327
|
-
log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
|
|
328
|
-
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
329
|
-
sleep 0.2
|
|
330
|
-
paste_to_pane "$pane_id" "$worker_instruction"
|
|
331
|
-
sleep 0.15
|
|
332
|
-
tmux send-keys -t "$pane_id" C-m
|
|
333
|
-
sleep 1
|
|
334
|
-
fi
|
|
335
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
336
|
-
sleep 0.3
|
|
337
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
338
|
-
(( submit_attempts++ ))
|
|
339
|
-
done
|
|
340
|
-
|
|
341
|
-
# If 15 attempts failed, restart claude and retry
|
|
342
|
-
if (( submit_attempts >= 15 )); then
|
|
343
|
-
log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
|
|
344
|
-
log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
|
|
345
|
-
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
346
|
-
sleep 0.5
|
|
347
|
-
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
348
|
-
sleep 2
|
|
349
|
-
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
350
|
-
paste_to_pane "$pane_id" "$worker_launch"
|
|
351
|
-
tmux send-keys -t "$pane_id" C-m
|
|
352
|
-
if wait_for_pane_ready "$pane_id" 30; then
|
|
353
|
-
sleep 3
|
|
354
|
-
paste_to_pane "$pane_id" "$worker_instruction"
|
|
355
|
-
tmux send-keys -t "$pane_id" C-m
|
|
356
|
-
log " Worker restarted and instruction re-sent"
|
|
357
|
-
log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
|
|
358
|
-
else
|
|
359
|
-
log_error "Worker restart failed — pane not ready"
|
|
360
|
-
log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
|
|
361
|
-
fi
|
|
362
|
-
fi
|
|
363
|
-
|
|
364
|
-
return 0
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
# launch_verifier_codex() — launch codex Verifier TUI, send instruction, verify submission
|
|
368
|
-
# Matches launch_verifier_claude() pattern for consistent tmux-visible execution.
|
|
369
|
-
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
370
|
-
# Returns: 0 on success
|
|
371
|
-
launch_verifier_codex() {
|
|
372
|
-
local pane_id="$1"
|
|
373
|
-
local prompt_file="$2"
|
|
374
|
-
local iter="$3"
|
|
375
|
-
local verifier_launch="$4"
|
|
376
|
-
|
|
377
|
-
log " Launching Verifier codex TUI in pane $pane_id..."
|
|
378
|
-
paste_to_pane "$pane_id" "$verifier_launch"
|
|
379
|
-
tmux send-keys -t "$pane_id" C-m
|
|
380
|
-
|
|
381
|
-
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
382
|
-
log_error "Verifier codex failed to start"
|
|
383
|
-
return 1
|
|
384
|
-
fi
|
|
385
|
-
|
|
386
|
-
sleep 3
|
|
387
|
-
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
388
|
-
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
389
|
-
tmux send-keys -t "$pane_id" C-m
|
|
390
|
-
log_debug "Verifier codex instruction sent"
|
|
391
|
-
|
|
392
|
-
# Submit loop — verify codex started working
|
|
393
|
-
local submit_attempts=0
|
|
394
|
-
while (( submit_attempts < 15 )); do
|
|
395
|
-
sleep 2
|
|
396
|
-
local vs_check
|
|
397
|
-
vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
398
|
-
if echo "$vs_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
|
|
399
|
-
log_debug "Verifier codex started working after $((submit_attempts + 1)) checks"
|
|
400
|
-
break
|
|
401
|
-
fi
|
|
402
|
-
if (( submit_attempts == 8 )); then
|
|
403
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
404
|
-
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
405
|
-
sleep 0.1
|
|
406
|
-
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
407
|
-
tmux send-keys -t "$pane_id" C-m
|
|
408
|
-
fi
|
|
409
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
410
|
-
sleep 0.3
|
|
411
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
412
|
-
(( submit_attempts++ ))
|
|
413
|
-
done
|
|
414
|
-
return 0
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
# launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
|
|
418
|
-
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
419
|
-
# Returns: 0 on success
|
|
420
|
-
launch_verifier_claude() {
|
|
421
|
-
local pane_id="$1"
|
|
422
|
-
local prompt_file="$2"
|
|
423
|
-
local iter="$3"
|
|
424
|
-
local verifier_launch="$4"
|
|
425
|
-
|
|
426
|
-
log " Launching Verifier claude in pane $pane_id..."
|
|
427
|
-
paste_to_pane "$pane_id" "$verifier_launch"
|
|
428
|
-
tmux send-keys -t "$pane_id" C-m
|
|
429
|
-
|
|
430
|
-
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
431
|
-
log_error "Verifier failed to start"
|
|
432
|
-
return 1
|
|
433
|
-
fi
|
|
434
|
-
|
|
435
|
-
sleep 3
|
|
436
|
-
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
437
|
-
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
438
|
-
tmux send-keys -t "$pane_id" C-m
|
|
439
|
-
log_debug "Verifier instruction sent directly"
|
|
440
|
-
|
|
441
|
-
# Submit loop — verify verifier started working
|
|
442
|
-
local submit_attempts=0
|
|
443
|
-
while (( submit_attempts < 15 )); do
|
|
444
|
-
sleep 2
|
|
445
|
-
local vs_check
|
|
446
|
-
vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
447
|
-
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
448
|
-
log_debug "Verifier started working after $((submit_attempts + 1)) checks"
|
|
449
|
-
break
|
|
450
|
-
fi
|
|
451
|
-
if (( submit_attempts == 8 )); then
|
|
452
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
453
|
-
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
454
|
-
sleep 0.1
|
|
455
|
-
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
456
|
-
tmux send-keys -t "$pane_id" C-m
|
|
457
|
-
fi
|
|
458
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
459
|
-
sleep 0.3
|
|
460
|
-
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
461
|
-
(( submit_attempts++ ))
|
|
462
|
-
done
|
|
463
|
-
return 0
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
# handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
|
|
467
|
-
# On exit: check done-claim, auto-generate iter-signal.
|
|
468
|
-
# Args: $1=iteration $2=signal_file
|
|
469
|
-
# Returns: 0 (signal generated), 1 (error)
|
|
470
|
-
handle_worker_exit_codex() {
|
|
471
|
-
local iter="$1"
|
|
472
|
-
local signal_file="$2"
|
|
473
|
-
|
|
474
|
-
log " Codex worker process exited. Checking for done-claim..."
|
|
475
|
-
if [[ -f "$DONE_CLAIM_FILE" ]]; then
|
|
476
|
-
local dc_us_id
|
|
477
|
-
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
478
|
-
log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
|
|
479
|
-
echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
480
|
-
else
|
|
481
|
-
log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
|
|
482
|
-
local current_us
|
|
483
|
-
current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
|
|
484
|
-
local mem_us
|
|
485
|
-
mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
|
|
486
|
-
[[ -n "$mem_us" ]] && current_us="$mem_us"
|
|
487
|
-
echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
488
|
-
fi
|
|
489
|
-
return 0
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
# handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
|
|
493
|
-
# Args: $1=pane_id $2=iteration $3=trigger_file
|
|
494
|
-
# Returns: 0 (restarted), 1 (max restarts exceeded)
|
|
495
|
-
handle_worker_exit_claude() {
|
|
496
|
-
local pane_id="$1"
|
|
497
|
-
local iter="$2"
|
|
498
|
-
local trigger_file="$3"
|
|
499
|
-
|
|
500
|
-
log_error "Worker exited without writing signal file"
|
|
501
|
-
if restart_worker "$pane_id" "$iter" "$trigger_file"; then
|
|
502
|
-
return 0
|
|
503
|
-
else
|
|
504
|
-
return 1
|
|
505
|
-
fi
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
# --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
|
|
509
|
-
replace_worker_pane() {
|
|
510
|
-
local old_pane="$1"
|
|
511
|
-
local role="$2" # "worker" or "verifier"
|
|
512
|
-
|
|
513
|
-
log " Replacing dead $role pane $old_pane..."
|
|
514
|
-
tmux kill-pane -t "$old_pane" 2>/dev/null
|
|
515
|
-
|
|
516
|
-
# Create fresh pane maintaining original layout: worker(top-right) / verifier(bottom-right)
|
|
517
|
-
local new_pane
|
|
518
|
-
if [[ "$role" == "verifier" ]]; then
|
|
519
|
-
# Verifier goes below worker: split vertically from worker pane
|
|
520
|
-
if tmux display-message -t "$WORKER_PANE" -p '#{pane_id}' &>/dev/null; then
|
|
521
|
-
new_pane=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
522
|
-
else
|
|
523
|
-
# Fallback: worker pane also dead, split horizontally from leader
|
|
524
|
-
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
525
|
-
fi
|
|
526
|
-
else
|
|
527
|
-
# Worker goes above verifier: split vertically before verifier pane
|
|
528
|
-
if tmux display-message -t "$VERIFIER_PANE" -p '#{pane_id}' &>/dev/null; then
|
|
529
|
-
new_pane=$(tmux split-window -v -b -d -t "$VERIFIER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
530
|
-
else
|
|
531
|
-
# Fallback: verifier pane also dead, split horizontally from leader
|
|
532
|
-
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
533
|
-
fi
|
|
534
|
-
fi
|
|
535
|
-
|
|
536
|
-
log " New $role pane: $new_pane (replaced $old_pane)"
|
|
537
|
-
log_debug "[FLOW] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
|
|
538
|
-
|
|
539
|
-
# Update session-config.json with new pane ID
|
|
540
|
-
if [[ -f "$SESSION_CONFIG" ]]; then
|
|
541
|
-
jq --arg role "$role" --arg pane "$new_pane" \
|
|
542
|
-
'.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
|
|
543
|
-
log_debug "Updated session-config.json: $role pane → $new_pane"
|
|
544
|
-
fi
|
|
545
|
-
|
|
546
|
-
echo "$new_pane"
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
# =============================================================================
|
|
550
|
-
# Dependency Checks
|
|
551
|
-
# =============================================================================
|
|
552
|
-
|
|
553
|
-
# --- governance.md s7 step 1: Validate prerequisites before starting ---
|
|
554
|
-
check_dependencies() {
|
|
555
|
-
local missing=0
|
|
556
|
-
|
|
557
|
-
if ! command -v tmux >/dev/null 2>&1; then
|
|
558
|
-
log_error "tmux is required but not found. Install with: brew install tmux"
|
|
559
|
-
missing=1
|
|
560
|
-
fi
|
|
561
|
-
|
|
562
|
-
# claude required only when claude engine is used for Worker or Verifier execution;
|
|
563
|
-
# codex-only campaigns can run without claude — generate_sv_report degrades gracefully
|
|
564
|
-
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
565
|
-
if ! command -v claude >/dev/null 2>&1; then
|
|
566
|
-
log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
|
|
567
|
-
missing=1
|
|
568
|
-
fi
|
|
569
|
-
fi
|
|
570
|
-
|
|
571
|
-
if ! command -v jq >/dev/null 2>&1; then
|
|
572
|
-
log_error "jq is required but not found. Install with: brew install jq"
|
|
573
|
-
missing=1
|
|
574
|
-
fi
|
|
575
|
-
|
|
576
|
-
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
577
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
578
|
-
if ! command -v codex >/dev/null 2>&1; then
|
|
579
|
-
log_error "codex CLI not found. Install: npm install -g @openai/codex"
|
|
580
|
-
missing=1
|
|
581
|
-
fi
|
|
582
|
-
fi
|
|
583
|
-
|
|
584
|
-
if (( missing )); then
|
|
585
|
-
exit 1
|
|
586
|
-
fi
|
|
587
|
-
|
|
588
|
-
# Resolve full path to claude binary when claude engine is in use
|
|
589
|
-
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
590
|
-
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
|
|
591
|
-
log " Claude binary: $CLAUDE_BIN"
|
|
592
|
-
fi
|
|
593
|
-
|
|
594
|
-
# Resolve codex binary if needed
|
|
595
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
596
|
-
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
597
|
-
log " Codex binary: $CODEX_BIN"
|
|
598
|
-
fi
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
# =============================================================================
|
|
602
|
-
# Session Management (tmux pattern: pane IDs)
|
|
603
|
-
# =============================================================================
|
|
604
|
-
|
|
605
|
-
# --- governance.md s7 step 1: Check for existing sessions ---
|
|
606
|
-
check_existing_sessions() {
|
|
607
|
-
local current_session
|
|
608
|
-
current_session=$(tmux display-message -p '#{session_name}' 2>/dev/null || echo "")
|
|
609
|
-
local existing
|
|
610
|
-
existing=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-${SLUG}-" | grep -v "^${current_session}$" || true)
|
|
611
|
-
if [[ -n "$existing" ]]; then
|
|
612
|
-
log_error "Existing tmux session(s) found for slug '$SLUG':"
|
|
613
|
-
echo "$existing" | while read -r s; do
|
|
614
|
-
echo " - $s"
|
|
615
|
-
done
|
|
616
|
-
echo ""
|
|
617
|
-
echo "Kill existing session first:"
|
|
618
|
-
echo " tmux kill-session -t <session-name>"
|
|
619
|
-
exit 1
|
|
620
|
-
fi
|
|
621
|
-
}
|
|
622
|
-
|
|
623
|
-
# --- governance.md s7 step 1: Create tmux session with pane IDs (%N) ---
|
|
624
|
-
create_session() {
|
|
625
|
-
log "Creating tmux session: $SESSION_NAME"
|
|
626
|
-
|
|
627
|
-
# tmux split-pane pattern
|
|
628
|
-
if [[ -n "${TMUX:-}" ]]; then
|
|
629
|
-
# Inside tmux: split CURRENT pane in place
|
|
630
|
-
# Current pane stays as-is (leader/user stays here)
|
|
631
|
-
# Worker/Verifier appear on the RIGHT, user sees them immediately
|
|
632
|
-
LEADER_PANE=$(tmux display-message -p '#{pane_id}')
|
|
633
|
-
SESSION_NAME=$(tmux display-message -p '#{session_name}')
|
|
634
|
-
log " Splitting current pane in session: $SESSION_NAME"
|
|
635
|
-
|
|
636
|
-
# -h off current pane → right column (worker)
|
|
637
|
-
WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
638
|
-
# -v off worker → stacked below on right (verifier)
|
|
639
|
-
VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
640
|
-
else
|
|
641
|
-
# Outside tmux: wrap current terminal into a new tmux session and attach
|
|
642
|
-
# tmux pattern: user sees panes immediately, no separate attach needed
|
|
643
|
-
tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT"
|
|
644
|
-
LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
|
|
645
|
-
WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
646
|
-
VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
647
|
-
|
|
648
|
-
fi
|
|
649
|
-
|
|
650
|
-
# Set pane titles and enable border labels for visual distinction
|
|
651
|
-
local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
|
|
652
|
-
local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
|
|
653
|
-
[[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
|
|
654
|
-
tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
|
|
655
|
-
tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
|
|
656
|
-
tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
|
|
657
|
-
# Color-coded pane borders: green=leader, blue=worker, yellow=verifier
|
|
658
|
-
tmux set-option -p -t "$LEADER_PANE" pane-border-style "fg=green" 2>/dev/null
|
|
659
|
-
tmux set-option -p -t "$WORKER_PANE" pane-border-style "fg=blue" 2>/dev/null
|
|
660
|
-
tmux set-option -p -t "$VERIFIER_PANE" pane-border-style "fg=yellow" 2>/dev/null
|
|
661
|
-
# Show pane titles in border
|
|
662
|
-
tmux set-option pane-border-status top 2>/dev/null
|
|
663
|
-
tmux set-option pane-border-format "#{?pane_active,#[fg=white bold],#[fg=grey]} #{pane_title} " 2>/dev/null
|
|
664
|
-
|
|
665
|
-
log " Leader pane: $LEADER_PANE"
|
|
666
|
-
log " Worker pane: $WORKER_PANE"
|
|
667
|
-
log " Verifier pane: $VERIFIER_PANE"
|
|
668
|
-
|
|
669
|
-
# AC12: Capture baseline commit before writing session config
|
|
670
|
-
BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
|
|
671
|
-
|
|
672
|
-
# Truncate cost-log for fresh run (previous data in versioned campaign reports)
|
|
673
|
-
> "$COST_LOG"
|
|
674
|
-
|
|
675
|
-
# SV flag warning for tmux mode
|
|
676
|
-
if (( WITH_SELF_VERIFICATION )); then
|
|
677
|
-
log " NOTE: --with-self-verification recorded but SV report generation is Agent-mode only"
|
|
678
|
-
fi
|
|
679
|
-
|
|
680
|
-
# Write session config (atomic write)
|
|
681
|
-
echo '{
|
|
682
|
-
"session_name": "'"$SESSION_NAME"'",
|
|
683
|
-
"slug": "'"$SLUG"'",
|
|
684
|
-
"created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
|
|
685
|
-
"baseline_commit": "'"$BASELINE_COMMIT"'",
|
|
686
|
-
"panes": {
|
|
687
|
-
"leader": "'"$LEADER_PANE"'",
|
|
688
|
-
"worker": "'"$WORKER_PANE"'",
|
|
689
|
-
"verifier": "'"$VERIFIER_PANE"'"
|
|
690
|
-
},
|
|
691
|
-
"pid": '$$',
|
|
692
|
-
"root": "'"$ROOT"'",
|
|
693
|
-
"models": {
|
|
694
|
-
"worker": "'"$WORKER_MODEL"'",
|
|
695
|
-
"verifier": "'"$VERIFIER_MODEL"'"
|
|
696
|
-
},
|
|
697
|
-
"engines": {
|
|
698
|
-
"worker": "'"$WORKER_ENGINE"'",
|
|
699
|
-
"verifier": "'"$VERIFIER_ENGINE"'",
|
|
700
|
-
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
701
|
-
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
702
|
-
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
703
|
-
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
|
|
704
|
-
},
|
|
705
|
-
"verification": {
|
|
706
|
-
"verify_mode": "'"$VERIFY_MODE"'",
|
|
707
|
-
"consensus_mode": "'"$CONSENSUS_MODE"'"
|
|
708
|
-
},
|
|
709
|
-
"config": {
|
|
710
|
-
"max_iter": '"$MAX_ITER"',
|
|
711
|
-
"poll_interval": '"$POLL_INTERVAL"',
|
|
712
|
-
"iter_timeout": '"$ITER_TIMEOUT"',
|
|
713
|
-
"heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
|
|
714
|
-
"max_restarts": '"$MAX_RESTARTS"',
|
|
715
|
-
"idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
|
|
716
|
-
"max_nudges": '"$MAX_NUDGES"',
|
|
717
|
-
"cb_threshold": '"$CB_THRESHOLD"',
|
|
718
|
-
"effective_cb_threshold": '"$EFFECTIVE_CB_THRESHOLD"',
|
|
719
|
-
"with_self_verification": '"$WITH_SELF_VERIFICATION"',
|
|
720
|
-
"autonomous_mode": '"$AUTONOMOUS_MODE"'
|
|
721
|
-
}
|
|
722
|
-
}' | atomic_write "$SESSION_CONFIG"
|
|
723
|
-
|
|
724
|
-
log " Session config: $SESSION_CONFIG"
|
|
725
|
-
}
|
|
726
|
-
|
|
727
|
-
# =============================================================================
|
|
728
|
-
# Copy-Mode Guard (tmux pattern)
|
|
729
|
-
# =============================================================================
|
|
730
|
-
|
|
731
|
-
# --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
|
|
732
|
-
check_copy_mode() {
|
|
733
|
-
local pane_id="$1"
|
|
734
|
-
local in_mode
|
|
735
|
-
in_mode=$(tmux display-message -p -t "$pane_id" '#{pane_in_mode}' 2>/dev/null) || return 1
|
|
736
|
-
if [[ "$in_mode" -eq 1 ]]; then
|
|
737
|
-
return 1 # pane is in copy mode, cannot send keys
|
|
738
|
-
fi
|
|
739
|
-
return 0
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
# =============================================================================
|
|
743
|
-
# Verification-Based Send Retry (tmux pattern)
|
|
744
|
-
# =============================================================================
|
|
745
|
-
|
|
746
|
-
# --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
|
|
747
|
-
paste_to_pane() {
|
|
748
|
-
local pane_id="$1"
|
|
749
|
-
local text="$2"
|
|
750
|
-
local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
|
|
751
|
-
echo -n "$text" > "$tmpbuf"
|
|
752
|
-
tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
|
|
753
|
-
tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
|
|
754
|
-
rm -f "$tmpbuf"
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
# --- governance.md s7 step 5: Send with copy-mode guard and retry ---
|
|
758
|
-
safe_send_keys() {
|
|
759
|
-
local pane_id="$1"
|
|
760
|
-
local text="$2"
|
|
761
|
-
|
|
762
|
-
# --- Exact tmux sendToWorker pattern (tmux-session.js:527-626) ---
|
|
763
|
-
|
|
764
|
-
# Guard: copy-mode captures keys; skip entirely
|
|
765
|
-
if ! check_copy_mode "$pane_id"; then
|
|
766
|
-
log_debug " Pane $pane_id in copy mode, skipping send"
|
|
767
|
-
return 1
|
|
768
|
-
fi
|
|
769
|
-
|
|
770
|
-
# Check for trust prompt and auto-dismiss
|
|
771
|
-
local initial_capture
|
|
772
|
-
initial_capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
|
|
773
|
-
local pane_busy=0
|
|
774
|
-
if echo "$initial_capture" | grep -q "esc to interrupt" 2>/dev/null; then
|
|
775
|
-
pane_busy=1
|
|
776
|
-
fi
|
|
777
|
-
if echo "$initial_capture" | grep -q "Do you trust" 2>/dev/null; then
|
|
778
|
-
log_debug " Trust prompt detected, dismissing"
|
|
779
|
-
tmux send-keys -t "$pane_id" C-m
|
|
780
|
-
sleep 0.12
|
|
781
|
-
fi
|
|
782
|
-
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
783
|
-
if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
784
|
-
log_debug " Permission prompt detected, auto-approving"
|
|
785
|
-
tmux send-keys -t "$pane_id" C-m
|
|
786
|
-
sleep 0.3
|
|
787
|
-
fi
|
|
788
|
-
# Auto-dismiss codex update prompt (select Skip)
|
|
789
|
-
if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
790
|
-
log_debug " Codex update prompt detected, selecting Skip"
|
|
791
|
-
tmux send-keys -t "$pane_id" "2" C-m
|
|
792
|
-
sleep 0.2
|
|
793
|
-
fi
|
|
794
|
-
# Send text via buffer paste (reliable for long strings)
|
|
795
|
-
log_debug " Pasting text to pane $pane_id (${#text} chars)"
|
|
796
|
-
paste_to_pane "$pane_id" "$text"
|
|
797
|
-
|
|
798
|
-
# Allow input buffer to settle (tmux: 150ms)
|
|
799
|
-
sleep 0.15
|
|
800
|
-
|
|
801
|
-
# Submit: up to 6 rounds of C-m double-press
|
|
802
|
-
local round=0
|
|
803
|
-
while (( round < 6 )); do
|
|
804
|
-
sleep 0.1
|
|
805
|
-
if (( round == 0 && pane_busy )); then
|
|
806
|
-
# Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
|
|
807
|
-
tmux send-keys -t "$pane_id" C-m
|
|
808
|
-
else
|
|
809
|
-
tmux send-keys -t "$pane_id" C-m
|
|
810
|
-
sleep 0.2
|
|
811
|
-
tmux send-keys -t "$pane_id" C-m
|
|
812
|
-
fi
|
|
813
|
-
sleep 0.14
|
|
814
|
-
|
|
815
|
-
# Check if text was consumed
|
|
816
|
-
local check_capture
|
|
817
|
-
check_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
|
|
818
|
-
if ! echo "$check_capture" | grep -qF "$text" 2>/dev/null; then
|
|
819
|
-
log_debug " Text consumed after round $((round + 1))"
|
|
820
|
-
return 0
|
|
821
|
-
fi
|
|
822
|
-
sleep 0.14
|
|
823
|
-
(( round++ ))
|
|
824
|
-
done
|
|
825
|
-
|
|
826
|
-
# Safety gate: copy-mode check
|
|
827
|
-
if ! check_copy_mode "$pane_id"; then
|
|
828
|
-
log_debug " Copy mode activated during send, aborting"
|
|
829
|
-
return 1
|
|
830
|
-
fi
|
|
831
|
-
|
|
832
|
-
# Adaptive fallback: C-u clear line, resend (tmux pattern)
|
|
833
|
-
log_debug " Adaptive retry — clearing line and resending"
|
|
834
|
-
tmux send-keys -t "$pane_id" C-u
|
|
835
|
-
sleep 0.08
|
|
836
|
-
if ! check_copy_mode "$pane_id"; then
|
|
837
|
-
return 1
|
|
838
|
-
fi
|
|
839
|
-
paste_to_pane "$pane_id" "$text"
|
|
840
|
-
sleep 0.12
|
|
841
|
-
local retry_round=0
|
|
842
|
-
while (( retry_round < 4 )); do
|
|
843
|
-
tmux send-keys -t "$pane_id" C-m
|
|
844
|
-
sleep 0.18
|
|
845
|
-
tmux send-keys -t "$pane_id" C-m
|
|
846
|
-
sleep 0.14
|
|
847
|
-
local retry_capture
|
|
848
|
-
retry_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
|
|
849
|
-
if ! echo "$retry_capture" | grep -qF "$text" 2>/dev/null; then
|
|
850
|
-
log_debug " Text consumed after adaptive retry round $((retry_round + 1))"
|
|
851
|
-
return 0
|
|
852
|
-
fi
|
|
853
|
-
(( retry_round++ ))
|
|
854
|
-
done
|
|
855
|
-
|
|
856
|
-
# Fail-open: one last nudge
|
|
857
|
-
if ! check_copy_mode "$pane_id"; then
|
|
858
|
-
return 1
|
|
859
|
-
fi
|
|
860
|
-
tmux send-keys -t "$pane_id" C-m
|
|
861
|
-
sleep 0.12
|
|
862
|
-
tmux send-keys -t "$pane_id" C-m
|
|
863
|
-
log_debug " Fail-open — text may or may not have been submitted"
|
|
864
|
-
return 0
|
|
865
|
-
}
|
|
866
|
-
|
|
867
|
-
# =============================================================================
|
|
868
|
-
# Wait for Pane Ready (tmux pattern: paneLooksReady)
|
|
869
|
-
# =============================================================================
|
|
870
|
-
|
|
871
|
-
wait_for_pane_ready() {
|
|
872
|
-
local pane_id="$1"
|
|
873
|
-
local timeout="${2:-10}" # tmux default: 10s
|
|
874
|
-
local start=$(date +%s)
|
|
875
|
-
log " Waiting for pane $pane_id ready..."
|
|
876
|
-
while (( $(date +%s) - start < timeout )); do
|
|
877
|
-
local captured
|
|
878
|
-
captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
|
|
879
|
-
|
|
880
|
-
# Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
|
|
881
|
-
if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
|
|
882
|
-
log " Trust prompt detected, auto-dismissing..."
|
|
883
|
-
tmux send-keys -t "$pane_id" C-m
|
|
884
|
-
sleep 0.12
|
|
885
|
-
tmux send-keys -t "$pane_id" C-m
|
|
886
|
-
sleep 2
|
|
887
|
-
continue
|
|
888
|
-
fi
|
|
889
|
-
|
|
890
|
-
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
891
|
-
if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
|
|
892
|
-
log " Permission prompt detected, auto-approving..."
|
|
893
|
-
tmux send-keys -t "$pane_id" C-m
|
|
894
|
-
sleep 0.5
|
|
895
|
-
continue
|
|
896
|
-
fi
|
|
897
|
-
|
|
898
|
-
# Auto-dismiss codex update prompt (select Skip = option 2)
|
|
899
|
-
if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
900
|
-
log " Codex update prompt detected, selecting Skip..."
|
|
901
|
-
tmux send-keys -t "$pane_id" "2" C-m
|
|
902
|
-
sleep 0.5
|
|
903
|
-
continue
|
|
904
|
-
fi
|
|
905
|
-
|
|
906
|
-
# tmux paneLooksReady: check each line for prompt char at line start
|
|
907
|
-
local ready=0
|
|
908
|
-
echo "$captured" | while IFS= read -r line; do
|
|
909
|
-
local trimmed="${line## }"
|
|
910
|
-
if [[ "$trimmed" == ❯* || "$trimmed" == \>* || "$trimmed" == ›* || "$trimmed" == »* ]]; then
|
|
911
|
-
ready=1
|
|
912
|
-
break
|
|
913
|
-
fi
|
|
914
|
-
done 2>/dev/null
|
|
915
|
-
|
|
916
|
-
# Also check via grep as fallback
|
|
917
|
-
if echo "$captured" | tail -5 | grep -qE '^\s*[❯›]' 2>/dev/null; then
|
|
918
|
-
ready=1
|
|
919
|
-
fi
|
|
920
|
-
|
|
921
|
-
if (( ready )) || echo "$captured" | tail -3 | grep -qE '^\s*[❯›>]' 2>/dev/null; then
|
|
922
|
-
# Check no active task running
|
|
923
|
-
if ! echo "$captured" | grep -q "esc to interrupt" 2>/dev/null; then
|
|
924
|
-
log " Pane $pane_id is ready."
|
|
925
|
-
return 0
|
|
926
|
-
fi
|
|
927
|
-
fi
|
|
928
|
-
sleep 0.25
|
|
929
|
-
done
|
|
930
|
-
# Timeout — return success anyway (fail-open, let safe_send_keys handle it)
|
|
931
|
-
log " Pane $pane_id ready timeout after ${timeout}s (proceeding anyway)"
|
|
932
|
-
return 0
|
|
933
|
-
}
|
|
934
|
-
|
|
935
|
-
# =============================================================================
|
|
936
|
-
# Heartbeat Monitoring (tmux pattern)
|
|
937
|
-
# =============================================================================
|
|
938
|
-
|
|
939
|
-
# --- governance.md s7 step 5+6: Check heartbeat freshness ---
|
|
940
|
-
check_heartbeat() {
|
|
941
|
-
local hb_file="$1"
|
|
942
|
-
local threshold="$HEARTBEAT_STALE_THRESHOLD"
|
|
943
|
-
|
|
944
|
-
if [[ ! -f "$hb_file" ]]; then
|
|
945
|
-
return 1
|
|
946
|
-
fi
|
|
947
|
-
|
|
948
|
-
local hb_epoch now_epoch
|
|
949
|
-
# Read epoch seconds directly (avoids timezone parsing bugs)
|
|
950
|
-
hb_epoch=$(jq -r '.epoch // empty' "$hb_file" 2>/dev/null) || return 1
|
|
951
|
-
|
|
952
|
-
if [[ -z "$hb_epoch" ]]; then
|
|
953
|
-
return 1
|
|
954
|
-
fi
|
|
955
|
-
|
|
956
|
-
now_epoch=$(date +%s)
|
|
957
|
-
(( now_epoch - hb_epoch < threshold ))
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
# Check if heartbeat indicates process has exited
|
|
961
|
-
check_heartbeat_exited() {
|
|
962
|
-
local hb_file="$1"
|
|
963
|
-
if [[ ! -f "$hb_file" ]]; then
|
|
964
|
-
return 1
|
|
965
|
-
fi
|
|
966
|
-
local hb_status
|
|
967
|
-
hb_status=$(jq -r '.status // empty' "$hb_file" 2>/dev/null)
|
|
968
|
-
[[ "$hb_status" == "exited" ]]
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
# =============================================================================
|
|
972
|
-
# Idle Pane Nudging (tmux pattern)
|
|
973
|
-
# =============================================================================
|
|
974
|
-
|
|
975
|
-
# --- governance.md s7 step 5+6: Nudge idle panes ---
|
|
976
|
-
check_and_nudge_idle_pane() {
|
|
977
|
-
local pane_id="$1"
|
|
978
|
-
local nudge_count_var="$2"
|
|
979
|
-
local current_content
|
|
980
|
-
current_content=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -3)
|
|
981
|
-
|
|
982
|
-
if [[ "$current_content" == "${LAST_PANE_CONTENT[$pane_id]:-}" ]]; then
|
|
983
|
-
local idle_since="${PANE_IDLE_SINCE[$pane_id]:-$(date +%s)}"
|
|
984
|
-
local now
|
|
985
|
-
now=$(date +%s)
|
|
986
|
-
if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
|
|
987
|
-
# A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
|
|
988
|
-
local _nudge_capture
|
|
989
|
-
_nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
|
|
990
|
-
if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
|
|
991
|
-
log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
|
|
992
|
-
else
|
|
993
|
-
local count=${(P)nudge_count_var}
|
|
994
|
-
if (( count < MAX_NUDGES )); then
|
|
995
|
-
log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
|
|
996
|
-
safe_send_keys "$pane_id" ""
|
|
997
|
-
(( count++ ))
|
|
998
|
-
eval "$nudge_count_var=$count"
|
|
999
|
-
fi
|
|
1000
|
-
fi
|
|
1001
|
-
fi
|
|
1002
|
-
else
|
|
1003
|
-
LAST_PANE_CONTENT[$pane_id]="$current_content"
|
|
1004
|
-
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
1005
|
-
fi
|
|
1006
|
-
}
|
|
1007
|
-
|
|
1008
|
-
# =============================================================================
|
|
1009
|
-
# Exponential Backoff Restart (tmux pattern)
|
|
1010
|
-
# =============================================================================
|
|
1011
|
-
|
|
1012
|
-
# --- governance.md s7 step 5: Restart dead workers with backoff ---
|
|
1013
|
-
restart_worker() {
|
|
1014
|
-
local pane_id="$1"
|
|
1015
|
-
local iter="$2"
|
|
1016
|
-
local trigger_file="$3"
|
|
1017
|
-
|
|
1018
|
-
# Codex workers are 1-shot exec; restart is not applicable
|
|
1019
|
-
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1020
|
-
log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
|
|
1021
|
-
return 1
|
|
1022
|
-
fi
|
|
1023
|
-
|
|
1024
|
-
local restart_count="${WORKER_RESTARTS[$iter]:-0}"
|
|
1025
|
-
|
|
1026
|
-
if (( restart_count >= MAX_RESTARTS )); then
|
|
1027
|
-
log_error "Worker exceeded max restarts ($MAX_RESTARTS) for iteration $iter"
|
|
1028
|
-
return 1 # caller writes BLOCKED
|
|
1029
|
-
fi
|
|
1030
|
-
|
|
1031
|
-
# Exponential backoff: 5s, 10s, 20s, 60s (cap)
|
|
1032
|
-
local -a delays=(5 10 20 60)
|
|
1033
|
-
local delay=${delays[$((restart_count + 1))]:-60}
|
|
1034
|
-
log " Restarting worker (attempt $((restart_count + 1))/$MAX_RESTARTS) after ${delay}s backoff..."
|
|
1035
|
-
sleep "$delay"
|
|
1036
|
-
|
|
1037
|
-
# Kill existing claude, wait for shell prompt
|
|
1038
|
-
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
1039
|
-
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
1040
|
-
sleep 2
|
|
1041
|
-
|
|
1042
|
-
# Re-launch worker (tmux interactive pattern)
|
|
1043
|
-
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1044
|
-
safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
1045
|
-
else
|
|
1046
|
-
safe_send_keys "$pane_id" "$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
|
|
1047
|
-
fi
|
|
1048
|
-
WORKER_RESTARTS[$iter]=$((restart_count + 1))
|
|
1049
|
-
return 0
|
|
1050
|
-
}
|
|
1051
|
-
|
|
1052
|
-
# =============================================================================
|
|
1053
|
-
# Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
|
|
1054
|
-
# =============================================================================
|
|
1055
|
-
|
|
1056
|
-
# Per-US PRD injection helper
|
|
1057
|
-
# Substitutes the full PRD path with a per-US split path in the Worker prompt base.
|
|
1058
|
-
# Falls back to the full PRD with a stderr warning if the split file is missing.
|
|
1059
|
-
# Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
|
|
1060
|
-
inject_per_us_prd() {
|
|
1061
|
-
local prompt_base="$1"
|
|
1062
|
-
local full_prd="$2"
|
|
1063
|
-
local per_us_prd="${3:-}"
|
|
1064
|
-
|
|
1065
|
-
if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
|
|
1066
|
-
sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
|
|
1067
|
-
else
|
|
1068
|
-
if [[ -n "$per_us_prd" ]]; then
|
|
1069
|
-
echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
|
|
1070
|
-
fi
|
|
1071
|
-
cat "$prompt_base"
|
|
1072
|
-
fi
|
|
1073
|
-
}
|
|
1074
|
-
|
|
1075
|
-
# --- governance.md s7 step 4+5: Write prompt and trigger to files ---
|
|
1076
|
-
# NEVER send prompt content through tmux send-keys.
|
|
1077
|
-
# Write payloads to files, send only short trigger commands (<200 chars).
|
|
1078
|
-
write_worker_trigger() {
|
|
1079
|
-
local iter="$1"
|
|
1080
|
-
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-prompt.md"
|
|
1081
|
-
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-trigger.sh"
|
|
1082
|
-
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-output.log"
|
|
1083
|
-
|
|
1084
|
-
# Build the worker prompt: base prompt + iteration context
|
|
1085
|
-
local contract
|
|
1086
|
-
contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -5)
|
|
1087
|
-
|
|
1088
|
-
# Check for fix contract from previous verifier failure
|
|
1089
|
-
local prev_iter=$((iter - 1))
|
|
1090
|
-
local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
|
|
1091
|
-
|
|
1092
|
-
# Compute next unverified US before prompt assembly (required for per-US PRD injection)
|
|
1093
|
-
local next_us=""
|
|
1094
|
-
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
1095
|
-
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
1096
|
-
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
1097
|
-
next_us="$us"
|
|
1098
|
-
break
|
|
1099
|
-
fi
|
|
1100
|
-
done
|
|
1101
|
-
fi
|
|
1102
|
-
|
|
1103
|
-
{
|
|
1104
|
-
# Per-US PRD injection: substitute full PRD path with per-US split path when available
|
|
1105
|
-
local per_us_prd=""
|
|
1106
|
-
[[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
|
|
1107
|
-
inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
|
|
1108
|
-
echo ""
|
|
1109
|
-
echo "---"
|
|
1110
|
-
echo "## Iteration Context"
|
|
1111
|
-
echo "- **Iteration**: $iter"
|
|
1112
|
-
echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
|
|
1113
|
-
echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
|
|
1114
|
-
if (( _PRD_CHANGED )); then
|
|
1115
|
-
echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
|
|
1116
|
-
fi
|
|
1117
|
-
|
|
1118
|
-
# Include fix contract if previous verifier failed
|
|
1119
|
-
if [[ -f "$fix_contract_file" ]]; then
|
|
1120
|
-
echo ""
|
|
1121
|
-
echo "---"
|
|
1122
|
-
echo "## IMPORTANT: Fix Contract from Verifier (iteration $prev_iter)"
|
|
1123
|
-
echo "The Verifier REJECTED your previous work. You MUST fix the issues below."
|
|
1124
|
-
echo "Do NOT just resubmit — actually change the code to address each issue."
|
|
1125
|
-
echo ""
|
|
1126
|
-
cat "$fix_contract_file"
|
|
1127
|
-
fi
|
|
1128
|
-
|
|
1129
|
-
# Per-US mode: tell Worker exactly which US to work on
|
|
1130
|
-
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
1131
|
-
if [[ -n "$next_us" ]]; then
|
|
1132
|
-
echo ""
|
|
1133
|
-
echo "---"
|
|
1134
|
-
echo "## PER-US SCOPE LOCK (this iteration) — OVERRIDES memory contract"
|
|
1135
|
-
echo "**IGNORE the 'Next Iteration Contract' from memory if it references a different story.**"
|
|
1136
|
-
echo "The Leader has determined that **${next_us}** is the next unverified story."
|
|
1137
|
-
echo "You MUST implement ONLY **${next_us}** in this iteration."
|
|
1138
|
-
echo "Do NOT implement any other user stories."
|
|
1139
|
-
# Per-US test-spec injection: point Worker to scoped test-spec if available
|
|
1140
|
-
local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
|
|
1141
|
-
if [[ -f "$per_us_test_spec" ]]; then
|
|
1142
|
-
echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
|
|
1143
|
-
else
|
|
1144
|
-
echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
|
|
1145
|
-
fi
|
|
1146
|
-
echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
|
|
1147
|
-
echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
|
|
1148
|
-
echo ""
|
|
1149
|
-
echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
|
|
1150
|
-
elif [[ -n "$VERIFIED_US" ]]; then
|
|
1151
|
-
# All individual US verified — this is the final full verify iteration
|
|
1152
|
-
echo ""
|
|
1153
|
-
echo "---"
|
|
1154
|
-
echo "## FINAL VERIFICATION ITERATION"
|
|
1155
|
-
echo "All individual US have been verified: $VERIFIED_US"
|
|
1156
|
-
echo "Run all tests and verification commands to confirm everything works together."
|
|
1157
|
-
echo "Signal verify with us_id=\"ALL\" for the final full verification."
|
|
1158
|
-
fi
|
|
1159
|
-
elif [[ "$VERIFY_MODE" = "batch" ]]; then
|
|
1160
|
-
echo ""
|
|
1161
|
-
echo "---"
|
|
1162
|
-
if [[ -n "$VERIFIED_US" ]]; then
|
|
1163
|
-
echo "## BATCH MODE — CONTINUE FROM PARTIAL PROGRESS"
|
|
1164
|
-
echo "The following US have already been verified: **$VERIFIED_US**"
|
|
1165
|
-
echo "- Do NOT re-implement these — they are done."
|
|
1166
|
-
echo "- Focus ONLY on the remaining unverified user stories."
|
|
1167
|
-
echo '- Signal verify with us_id="ALL" when the remaining stories are complete.'
|
|
1168
|
-
else
|
|
1169
|
-
echo "## BATCH MODE OVERRIDE"
|
|
1170
|
-
echo "Ignore any per-US signal instructions above. In batch mode:"
|
|
1171
|
-
echo "- Implement ALL user stories in this iteration"
|
|
1172
|
-
echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
|
|
1173
|
-
echo "- Do NOT signal verify after individual stories"
|
|
1174
|
-
fi
|
|
1175
|
-
fi
|
|
1176
|
-
|
|
1177
|
-
# Autonomous mode: don't stop on ambiguity, PRD is authoritative
|
|
1178
|
-
if (( AUTONOMOUS_MODE )); then
|
|
1179
|
-
echo ""
|
|
1180
|
-
echo "---"
|
|
1181
|
-
echo "## AUTONOMOUS MODE"
|
|
1182
|
-
echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
|
|
1183
|
-
echo "**Resolution priority**: PRD > test-spec > context > memory"
|
|
1184
|
-
echo "If documents disagree, follow PRD and proceed. Log any conflict you find by"
|
|
1185
|
-
echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
|
|
1186
|
-
echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
|
|
1187
|
-
echo "Do NOT wait for human input. Keep working."
|
|
1188
|
-
fi
|
|
1189
|
-
} | atomic_write "$prompt_file"
|
|
1190
|
-
|
|
1191
|
-
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
1192
|
-
# Engine-specific launch command (expanded at write time)
|
|
1193
|
-
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1194
|
-
local engine_cmd="${CODEX_BIN:-codex} \\
|
|
1195
|
-
-m $WORKER_CODEX_MODEL \\
|
|
1196
|
-
-c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
|
|
1197
|
-
--disable plugins --dangerously-bypass-approvals-and-sandbox \\
|
|
1198
|
-
\"\$(cat $prompt_file)\""
|
|
1199
|
-
local engine_comment="# Run codex with fresh context (fallback trigger — TUI primary launch via launch_worker_codex)"
|
|
1200
|
-
else
|
|
1201
|
-
local engine_cmd
|
|
1202
|
-
engine_cmd=$(build_claude_cmd print "$WORKER_MODEL" "$prompt_file" "$output_log" "$WORKER_EFFORT")
|
|
1203
|
-
local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 5)"
|
|
1204
|
-
fi
|
|
1205
|
-
|
|
1206
|
-
{
|
|
1207
|
-
cat <<TRIGGER_EOF
|
|
1208
|
-
#!/bin/zsh
|
|
1209
|
-
# Trigger for iteration $iter worker - generated by run_ralph_desk.zsh
|
|
1210
|
-
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
1211
|
-
|
|
1212
|
-
HEARTBEAT_FILE="$WORKER_HEARTBEAT"
|
|
1213
|
-
|
|
1214
|
-
# Background heartbeat writer (tmux pattern)
|
|
1215
|
-
(
|
|
1216
|
-
while true; do
|
|
1217
|
-
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
1218
|
-
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
1219
|
-
sleep 15
|
|
1220
|
-
done
|
|
1221
|
-
) &
|
|
1222
|
-
HEARTBEAT_PID=\$!
|
|
1223
|
-
|
|
1224
|
-
$engine_comment
|
|
1225
|
-
$engine_cmd
|
|
1226
|
-
|
|
1227
|
-
# Cleanup heartbeat writer
|
|
1228
|
-
kill \$HEARTBEAT_PID 2>/dev/null
|
|
1229
|
-
wait \$HEARTBEAT_PID 2>/dev/null
|
|
1230
|
-
echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
1231
|
-
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
1232
|
-
TRIGGER_EOF
|
|
1233
|
-
} | atomic_write "$trigger_file"
|
|
1234
|
-
chmod +x "$trigger_file"
|
|
1235
|
-
|
|
1236
|
-
log " Worker prompt: $prompt_file"
|
|
1237
|
-
log " Worker trigger: $trigger_file"
|
|
1238
|
-
}
|
|
1239
|
-
|
|
1240
|
-
write_verifier_trigger() {
|
|
1241
|
-
local iter="$1"
|
|
1242
|
-
local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
|
|
1243
|
-
local verifier_model="${3:-$VERIFIER_MODEL}"
|
|
1244
|
-
local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
|
|
1245
|
-
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
1246
|
-
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
1247
|
-
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
|
|
1248
|
-
|
|
1249
|
-
# Read us_id from iter-signal.json for per-US scoping
|
|
1250
|
-
local us_id=""
|
|
1251
|
-
if [[ -f "$SIGNAL_FILE" ]]; then
|
|
1252
|
-
us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1253
|
-
fi
|
|
1254
|
-
|
|
1255
|
-
# Build verifier prompt from base with US scope
|
|
1256
|
-
{
|
|
1257
|
-
cat "$VERIFIER_PROMPT_BASE"
|
|
1258
|
-
echo ""
|
|
1259
|
-
echo "---"
|
|
1260
|
-
echo "## Verification Context"
|
|
1261
|
-
echo "- **Iteration**: $iter"
|
|
1262
|
-
echo "- **Done Claim**: $DONE_CLAIM_FILE"
|
|
1263
|
-
echo "- **Verify Mode**: $VERIFY_MODE"
|
|
1264
|
-
if [[ -n "$us_id" ]]; then
|
|
1265
|
-
if [[ "$us_id" = "ALL" ]]; then
|
|
1266
|
-
echo "- **Scope**: FULL VERIFY — check ALL acceptance criteria from the PRD"
|
|
1267
|
-
else
|
|
1268
|
-
echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
|
|
1269
|
-
fi
|
|
1270
|
-
if [[ -n "$VERIFIED_US" ]]; then
|
|
1271
|
-
echo "- **Previously verified US**: $VERIFIED_US"
|
|
1272
|
-
echo "- **Note**: Skip re-verifying the above US. Focus on unverified stories."
|
|
1273
|
-
fi
|
|
1274
|
-
fi
|
|
1275
|
-
|
|
1276
|
-
# Autonomous mode: don't stop on ambiguity, PRD is authoritative
|
|
1277
|
-
if (( AUTONOMOUS_MODE )); then
|
|
1278
|
-
echo ""
|
|
1279
|
-
echo "---"
|
|
1280
|
-
echo "## AUTONOMOUS MODE"
|
|
1281
|
-
echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
|
|
1282
|
-
echo "**Resolution priority**: PRD > test-spec > context > memory"
|
|
1283
|
-
echo "If documents disagree, follow PRD and proceed. Log any conflict by"
|
|
1284
|
-
echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
|
|
1285
|
-
echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
|
|
1286
|
-
echo "Do NOT wait for human input. Keep verifying."
|
|
1287
|
-
fi
|
|
1288
|
-
} | atomic_write "$prompt_file"
|
|
1289
|
-
|
|
1290
|
-
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
1291
|
-
# Engine-specific launch command (expanded at write time)
|
|
1292
|
-
if [[ "$verifier_engine" = "codex" ]]; then
|
|
1293
|
-
local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
|
|
1294
|
-
-c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
|
|
1295
|
-
--disable plugins --dangerously-bypass-approvals-and-sandbox \\
|
|
1296
|
-
\"\$(cat $prompt_file)\" \\
|
|
1297
|
-
> >(tee $output_log) 2>&1"
|
|
1298
|
-
local engine_comment="# Run codex with fresh context (governance.md s7 step 7) — process substitution preserves tty"
|
|
1299
|
-
else
|
|
1300
|
-
local engine_cmd
|
|
1301
|
-
engine_cmd=$(build_claude_cmd print "$verifier_model" "$prompt_file" "$output_log" "$VERIFIER_EFFORT")
|
|
1302
|
-
local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 7)"
|
|
1303
|
-
fi
|
|
1304
|
-
|
|
1305
|
-
{
|
|
1306
|
-
cat <<TRIGGER_EOF
|
|
1307
|
-
#!/bin/zsh
|
|
1308
|
-
# Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
|
|
1309
|
-
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
1310
|
-
|
|
1311
|
-
HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
|
|
1312
|
-
|
|
1313
|
-
# Background heartbeat writer (tmux pattern)
|
|
1314
|
-
(
|
|
1315
|
-
while true; do
|
|
1316
|
-
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
1317
|
-
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
1318
|
-
sleep 15
|
|
1319
|
-
done
|
|
1320
|
-
) &
|
|
1321
|
-
HEARTBEAT_PID=\$!
|
|
1322
|
-
|
|
1323
|
-
$engine_comment
|
|
1324
|
-
$engine_cmd
|
|
1325
|
-
|
|
1326
|
-
# Cleanup heartbeat writer
|
|
1327
|
-
kill \$HEARTBEAT_PID 2>/dev/null
|
|
1328
|
-
wait \$HEARTBEAT_PID 2>/dev/null
|
|
1329
|
-
echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
1330
|
-
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
1331
|
-
TRIGGER_EOF
|
|
1332
|
-
} | atomic_write "$trigger_file"
|
|
1333
|
-
chmod +x "$trigger_file"
|
|
1334
|
-
|
|
1335
|
-
log " Verifier prompt: $prompt_file"
|
|
1336
|
-
log " Verifier trigger: $trigger_file"
|
|
1337
|
-
}
|
|
1338
|
-
|
|
1339
|
-
# =============================================================================
|
|
1340
|
-
# Cleanup (trap handler)
|
|
1341
|
-
# =============================================================================
|
|
1342
|
-
|
|
1343
|
-
cleanup() {
|
|
1344
|
-
log "Cleaning up..."
|
|
1345
|
-
|
|
1346
|
-
# Remove lockfile
|
|
1347
|
-
if (( LOCKFILE_ACQUIRED )); then
|
|
1348
|
-
rm -f "$LOCKFILE_PATH" 2>/dev/null
|
|
1349
|
-
else
|
|
1350
|
-
log_debug "cleanup: lockfile not owned by this process, skipping removal"
|
|
1351
|
-
fi
|
|
1352
|
-
|
|
1353
|
-
# Kill claude processes then kill panes
|
|
1354
|
-
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
1355
|
-
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
1356
|
-
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
1357
|
-
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
1358
|
-
fi
|
|
1359
|
-
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
1360
|
-
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1361
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
1362
|
-
fi
|
|
1363
|
-
sleep 2
|
|
1364
|
-
# Kill panes on completion
|
|
1365
|
-
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
1366
|
-
tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
|
|
1367
|
-
fi
|
|
1368
|
-
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
1369
|
-
tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
|
|
1370
|
-
fi
|
|
1371
|
-
log " Panes cleaned up."
|
|
1372
|
-
|
|
1373
|
-
# Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
|
|
1374
|
-
setopt local_options nonomatch 2>/dev/null
|
|
1375
|
-
rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
|
|
1376
|
-
|
|
1377
|
-
# AC4: Generate campaign report on all terminal states (always-on)
|
|
1378
|
-
generate_campaign_report
|
|
1379
|
-
|
|
1380
|
-
# US-001: Generate SV report after campaign report (tmux mode)
|
|
1381
|
-
generate_sv_report
|
|
1382
|
-
|
|
1383
|
-
# Print summary
|
|
1384
|
-
local end_time
|
|
1385
|
-
end_time=$(date +%s)
|
|
1386
|
-
local elapsed=$(( end_time - START_TIME ))
|
|
1387
|
-
local minutes=$(( elapsed / 60 ))
|
|
1388
|
-
local seconds=$(( elapsed % 60 ))
|
|
1389
|
-
|
|
1390
|
-
local final_status="UNKNOWN"
|
|
1391
|
-
if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
|
|
1392
|
-
elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
|
|
1393
|
-
else final_status="TIMEOUT"; fi
|
|
1394
|
-
|
|
1395
|
-
# --- Update metadata.json with final status ---
|
|
1396
|
-
if [[ -f "$METADATA_FILE" ]]; then
|
|
1397
|
-
jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
1398
|
-
'.campaign_status = $status | .end_time = $end_time' \
|
|
1399
|
-
"$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
|
|
1400
|
-
fi
|
|
1401
|
-
|
|
1402
|
-
if (( DEBUG )); then
|
|
1403
|
-
local end_ts=$(date +%s)
|
|
1404
|
-
local elapsed=$((end_ts - START_TIME))
|
|
1405
|
-
|
|
1406
|
-
log_debug "[FLOW] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
|
|
1407
|
-
|
|
1408
|
-
# --- Validation ---
|
|
1409
|
-
log_debug "[FLOW] === Execution Validation ==="
|
|
1410
|
-
|
|
1411
|
-
# 1. Did the correct verify mode run?
|
|
1412
|
-
log_debug "[FLOW] verify_mode=$VERIFY_MODE configured=true"
|
|
1413
|
-
|
|
1414
|
-
# 2. Per-US: were all US individually verified?
|
|
1415
|
-
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1416
|
-
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1417
|
-
local expected_us=""
|
|
1418
|
-
if [[ -f "$prd_file" ]]; then
|
|
1419
|
-
expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1420
|
-
fi
|
|
1421
|
-
local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
1422
|
-
local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
1423
|
-
|
|
1424
|
-
if [[ "$final_status" = "COMPLETE" ]]; then
|
|
1425
|
-
if (( verified_count >= expected_count )); then
|
|
1426
|
-
log_debug "[FLOW] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
|
|
1427
|
-
else
|
|
1428
|
-
log_debug "[FLOW] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
|
|
1429
|
-
fi
|
|
1430
|
-
else
|
|
1431
|
-
log_debug "[FLOW] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
|
|
1432
|
-
fi
|
|
1433
|
-
fi
|
|
1434
|
-
|
|
1435
|
-
# 3. Consensus: were both engines used?
|
|
1436
|
-
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
1437
|
-
if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
|
|
1438
|
-
log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
1439
|
-
else
|
|
1440
|
-
log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1441
|
-
fi
|
|
1442
|
-
fi
|
|
1443
|
-
|
|
1444
|
-
# 4. Engine match: did the configured engines actually run?
|
|
1445
|
-
local worker_dispatches=$(grep -c '\[FLOW\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1446
|
-
local verifier_dispatches=$(grep -c '\[FLOW\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1447
|
-
log_debug "[FLOW] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
|
|
1448
|
-
|
|
1449
|
-
# 5. Fix loops: how many fix contracts were generated?
|
|
1450
|
-
local fix_count=$(grep -c '\[DECIDE\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1451
|
-
log_debug "[FLOW] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
|
|
1452
|
-
|
|
1453
|
-
# 6. Circuit breakers: any triggered?
|
|
1454
|
-
local cb_count=$(grep -c '\[GOV\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1455
|
-
log_debug "[FLOW] circuit_breakers_triggered=$cb_count"
|
|
1456
|
-
|
|
1457
|
-
# 7. Overall result
|
|
1458
|
-
log_debug "[FLOW] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
|
|
1459
|
-
fi
|
|
1460
|
-
|
|
1461
|
-
echo ""
|
|
1462
|
-
echo "============================================================"
|
|
1463
|
-
echo " Ralph Desk Tmux Runner - Session Complete"
|
|
1464
|
-
echo "============================================================"
|
|
1465
|
-
echo " Session: $SESSION_NAME"
|
|
1466
|
-
echo " Slug: $SLUG"
|
|
1467
|
-
echo " Iterations: $ITERATION / $MAX_ITER"
|
|
1468
|
-
echo " Elapsed: ${minutes}m ${seconds}s"
|
|
1469
|
-
echo ""
|
|
1470
|
-
|
|
1471
|
-
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
1472
|
-
echo " Final State: COMPLETE"
|
|
1473
|
-
elif [[ -f "$BLOCKED_SENTINEL" ]]; then
|
|
1474
|
-
echo " Final State: BLOCKED"
|
|
1475
|
-
else
|
|
1476
|
-
echo " Final State: STOPPED (interrupted or timeout)"
|
|
1477
|
-
fi
|
|
1478
|
-
|
|
1479
|
-
echo ""
|
|
1480
|
-
echo " Tmux session left alive for inspection:"
|
|
1481
|
-
echo " tmux attach -t $SESSION_NAME"
|
|
1482
|
-
echo " tmux kill-session -t $SESSION_NAME"
|
|
1483
|
-
echo "============================================================"
|
|
1484
|
-
}
|
|
1485
|
-
|
|
1486
|
-
# =============================================================================
|
|
1487
|
-
# Poll Loop (used for both Worker and Verifier)
|
|
1488
|
-
# =============================================================================
|
|
1489
|
-
|
|
1490
|
-
# --- governance.md s7 step 5+6: Poll for signal file with heartbeat monitoring ---
|
|
1491
|
-
poll_for_signal() {
|
|
1492
|
-
local signal_file="$1"
|
|
1493
|
-
local heartbeat_file="$2"
|
|
1494
|
-
local pane_id="$3"
|
|
1495
|
-
local trigger_file="$4"
|
|
1496
|
-
local role="$5" # "worker" or "verifier"
|
|
1497
|
-
local nudge_count=0
|
|
1498
|
-
local api_retry_count=0
|
|
1499
|
-
local poll_start
|
|
1500
|
-
poll_start=$(date +%s)
|
|
1501
|
-
|
|
1502
|
-
# Initialize idle tracking for this pane
|
|
1503
|
-
LAST_PANE_CONTENT[$pane_id]=""
|
|
1504
|
-
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
1505
|
-
|
|
1506
|
-
while true; do
|
|
1507
|
-
local now
|
|
1508
|
-
now=$(date +%s)
|
|
1509
|
-
local elapsed=$(( now - poll_start ))
|
|
1510
|
-
|
|
1511
|
-
# Per-iteration timeout check
|
|
1512
|
-
if (( elapsed >= ITER_TIMEOUT )); then
|
|
1513
|
-
log_error "$role timed out after ${ITER_TIMEOUT}s for iteration $ITERATION"
|
|
1514
|
-
return 1 # timeout
|
|
1515
|
-
fi
|
|
1516
|
-
|
|
1517
|
-
# Check if signal file appeared
|
|
1518
|
-
if [[ -f "$signal_file" ]]; then
|
|
1519
|
-
log " Signal file detected: $signal_file"
|
|
1520
|
-
return 0 # success
|
|
1521
|
-
fi
|
|
1522
|
-
|
|
1523
|
-
# A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
|
|
1524
|
-
# ONLY for Worker polling — Verifier waits for verdict file, not done-claim
|
|
1525
|
-
if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
|
|
1526
|
-
local dc_us_id
|
|
1527
|
-
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
1528
|
-
if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
|
|
1529
|
-
log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
|
|
1530
|
-
log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
|
|
1531
|
-
echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
1532
|
-
return 0
|
|
1533
|
-
fi
|
|
1534
|
-
fi
|
|
1535
|
-
|
|
1536
|
-
# API transient-error recovery with bounded backoff
|
|
1537
|
-
local pane_output_for_retry
|
|
1538
|
-
pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
|
|
1539
|
-
local is_api_text_retry=0
|
|
1540
|
-
if [[ -n "$pane_output_for_retry" ]] &&
|
|
1541
|
-
( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
|
|
1542
|
-
|| echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
|
|
1543
|
-
|| echo "$pane_output_for_retry" | grep -qi 'overloaded' \
|
|
1544
|
-
|| echo "$pane_output_for_retry" | grep -qi 'too many requests' \
|
|
1545
|
-
|| echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
|
|
1546
|
-
is_api_text_retry=1
|
|
1547
|
-
fi
|
|
1548
|
-
|
|
1549
|
-
if (( is_api_text_retry )) || is_api_error "$pane_id"; then
|
|
1550
|
-
(( api_retry_count++ ))
|
|
1551
|
-
log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
|
|
1552
|
-
if (( api_retry_count >= _API_MAX_RETRIES )); then
|
|
1553
|
-
log_error "API unavailable after ${_API_MAX_RETRIES} retries"
|
|
1554
|
-
write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries"
|
|
1555
|
-
return 2
|
|
1556
|
-
fi
|
|
1557
|
-
# A5: If pane shows "queued messages" or rate-limit corruption, restart pane
|
|
1558
|
-
if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
|
|
1559
|
-
log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
|
|
1560
|
-
log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
|
|
1561
|
-
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
|
|
1562
|
-
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
|
|
1563
|
-
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
1564
|
-
fi
|
|
1565
|
-
sleep "$_API_RETRY_INTERVAL_S"
|
|
1566
|
-
continue
|
|
1567
|
-
else
|
|
1568
|
-
api_retry_count=0
|
|
1569
|
-
fi
|
|
1570
|
-
|
|
1571
|
-
# Check heartbeat freshness (tmux pattern)
|
|
1572
|
-
if [[ -f "$heartbeat_file" ]]; then
|
|
1573
|
-
if check_heartbeat_exited "$heartbeat_file"; then
|
|
1574
|
-
# Process exited but no signal file -- give a brief grace period
|
|
1575
|
-
sleep 3
|
|
1576
|
-
if [[ -f "$signal_file" ]]; then
|
|
1577
|
-
log " Signal file detected after process exit: $signal_file"
|
|
1578
|
-
return 0
|
|
1579
|
-
fi
|
|
1580
|
-
# Dispatch to engine-specific exit handler
|
|
1581
|
-
if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
|
|
1582
|
-
handle_worker_exit_codex "$ITERATION" "$signal_file"
|
|
1583
|
-
return 0
|
|
1584
|
-
fi
|
|
1585
|
-
# Claude path (or verifier of any engine)
|
|
1586
|
-
if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
1587
|
-
# Reset poll timer for the restart
|
|
1588
|
-
poll_start=$(date +%s)
|
|
1589
|
-
nudge_count=0
|
|
1590
|
-
LAST_PANE_CONTENT[$pane_id]=""
|
|
1591
|
-
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
1592
|
-
sleep "$POLL_INTERVAL"
|
|
1593
|
-
continue
|
|
1594
|
-
else
|
|
1595
|
-
return 1 # max restarts exceeded
|
|
1596
|
-
fi
|
|
1597
|
-
fi
|
|
1598
|
-
|
|
1599
|
-
if ! check_heartbeat "$heartbeat_file"; then
|
|
1600
|
-
log " WARNING: $role heartbeat stale (>${HEARTBEAT_STALE_THRESHOLD}s)"
|
|
1601
|
-
(( HEARTBEAT_STALE_COUNT++ ))
|
|
1602
|
-
# Circuit breaker: 3 consecutive heartbeat stale events
|
|
1603
|
-
if (( HEARTBEAT_STALE_COUNT >= 3 )); then
|
|
1604
|
-
log_debug "[GOV] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
|
|
1605
|
-
log_error "Circuit breaker: 3 consecutive heartbeat stale events"
|
|
1606
|
-
return 1
|
|
1607
|
-
fi
|
|
1608
|
-
# Attempt restart
|
|
1609
|
-
if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
1610
|
-
poll_start=$(date +%s)
|
|
1611
|
-
nudge_count=0
|
|
1612
|
-
continue
|
|
1613
|
-
else
|
|
1614
|
-
return 1
|
|
1615
|
-
fi
|
|
1616
|
-
else
|
|
1617
|
-
# Heartbeat is fresh, reset stale counter
|
|
1618
|
-
HEARTBEAT_STALE_COUNT=0
|
|
1619
|
-
fi
|
|
1620
|
-
fi
|
|
1621
|
-
|
|
1622
|
-
# Dead pane detection during poll: check if claude/codex process died
|
|
1623
|
-
local poll_cmd
|
|
1624
|
-
poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
|
|
1625
|
-
# Dead pane detection — delegates to check_dead_pane() for engine-aware logic
|
|
1626
|
-
if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
|
|
1627
|
-
log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
|
|
1628
|
-
log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
|
|
1629
|
-
# Return failure so caller can handle recovery
|
|
1630
|
-
return 1
|
|
1631
|
-
fi
|
|
1632
|
-
|
|
1633
|
-
# Auto-approve permission prompts during poll
|
|
1634
|
-
local poll_capture
|
|
1635
|
-
poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
1636
|
-
if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
1637
|
-
log " Permission prompt detected during poll, auto-approving..."
|
|
1638
|
-
log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
|
|
1639
|
-
tmux send-keys -t "$pane_id" C-m
|
|
1640
|
-
sleep 0.5
|
|
1641
|
-
fi
|
|
1642
|
-
|
|
1643
|
-
# Idle pane nudging (tmux pattern)
|
|
1644
|
-
check_and_nudge_idle_pane "$pane_id" "nudge_count"
|
|
1645
|
-
|
|
1646
|
-
sleep "$POLL_INTERVAL"
|
|
1647
|
-
done
|
|
1648
|
-
}
|
|
1649
|
-
|
|
1650
|
-
# =============================================================================
|
|
1651
|
-
# Consensus Verification (run two verifiers sequentially in same pane)
|
|
1652
|
-
# =============================================================================
|
|
1653
|
-
|
|
1654
|
-
# --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
|
|
1655
|
-
run_single_verifier() {
|
|
1656
|
-
local iter="$1"
|
|
1657
|
-
local engine="$2" # claude|codex
|
|
1658
|
-
local model="$3" # model for this verifier
|
|
1659
|
-
local suffix="$4" # "-claude" or "-codex"
|
|
1660
|
-
local verdict_dest="$5" # where to copy the verdict file
|
|
1661
|
-
|
|
1662
|
-
# Write trigger for this engine
|
|
1663
|
-
write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
|
|
1664
|
-
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
1665
|
-
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
1666
|
-
|
|
1667
|
-
# Clean previous Verifier session (with dead pane detection)
|
|
1668
|
-
local verifier_cmd
|
|
1669
|
-
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1670
|
-
if [[ -z "$verifier_cmd" ]]; then
|
|
1671
|
-
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
1672
|
-
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
1673
|
-
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
1674
|
-
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
1675
|
-
log " New verifier pane: $VERIFIER_PANE"
|
|
1676
|
-
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
1677
|
-
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
1678
|
-
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
1679
|
-
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
1680
|
-
sleep 0.2
|
|
1681
|
-
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
1682
|
-
sleep 0.3
|
|
1683
|
-
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1684
|
-
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1685
|
-
sleep 0.5
|
|
1686
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
1687
|
-
sleep 2
|
|
1688
|
-
fi
|
|
1689
|
-
# Always ensure clean shell state before launching new verifier
|
|
1690
|
-
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1691
|
-
# Clear pane to avoid residual text interference
|
|
1692
|
-
tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
|
|
1693
|
-
sleep 0.5
|
|
1694
|
-
|
|
1695
|
-
# Remove previous verdict file
|
|
1696
|
-
rm -f "$VERDICT_FILE" 2>/dev/null
|
|
1697
|
-
|
|
1698
|
-
# Launch verifier — dispatch to engine-specific function
|
|
1699
|
-
local verifier_launch
|
|
1700
|
-
if [[ "$engine" = "codex" ]]; then
|
|
1701
|
-
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
1702
|
-
launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
|
|
1703
|
-
log_debug "Verifier$suffix codex TUI dispatched"
|
|
1704
|
-
else
|
|
1705
|
-
verifier_launch="$(build_claude_cmd tui "$model" "" "" "$VERIFIER_EFFORT")"
|
|
1706
|
-
if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
|
|
1707
|
-
log_error "Verifier$suffix failed to start"
|
|
1708
|
-
return 1
|
|
1709
|
-
fi
|
|
1710
|
-
log_debug "Verifier$suffix claude dispatched"
|
|
1711
|
-
fi
|
|
1712
|
-
|
|
1713
|
-
# Poll for verdict
|
|
1714
|
-
if [[ "$engine" = "codex" ]]; then
|
|
1715
|
-
# Codex exec: file poll + wait for process exit to avoid reading partial results
|
|
1716
|
-
log " Polling for verify-verdict.json ($suffix, codex TUI)..."
|
|
1717
|
-
local codex_poll_start
|
|
1718
|
-
codex_poll_start=$(date +%s)
|
|
1719
|
-
local _verdict_detected=0
|
|
1720
|
-
while true; do
|
|
1721
|
-
# Phase 1: wait for verdict file
|
|
1722
|
-
if (( ! _verdict_detected )) && [[ -f "$VERDICT_FILE" ]]; then
|
|
1723
|
-
if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
|
|
1724
|
-
log " Verdict file detected, waiting for codex process to finish..."
|
|
1725
|
-
_verdict_detected=1
|
|
1726
|
-
fi
|
|
1727
|
-
fi
|
|
1728
|
-
# Phase 2: verdict exists, wait for codex to exit (pane returns to shell)
|
|
1729
|
-
if (( _verdict_detected )); then
|
|
1730
|
-
local _pane_cmd
|
|
1731
|
-
_pane_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null || echo "")
|
|
1732
|
-
if [[ "$_pane_cmd" = "zsh" || "$_pane_cmd" = "bash" || -z "$_pane_cmd" ]]; then
|
|
1733
|
-
log " Codex verifier$suffix process exited. Proceeding."
|
|
1734
|
-
# Re-read verdict in case codex updated it before exiting
|
|
1735
|
-
if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
|
|
1736
|
-
break
|
|
1737
|
-
fi
|
|
1738
|
-
fi
|
|
1739
|
-
fi
|
|
1740
|
-
local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
|
|
1741
|
-
if (( codex_elapsed >= ITER_TIMEOUT )); then
|
|
1742
|
-
if (( _verdict_detected )); then
|
|
1743
|
-
log " Codex verifier$suffix timed out waiting for exit, but verdict exists. Proceeding."
|
|
1744
|
-
break
|
|
1745
|
-
fi
|
|
1746
|
-
log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
|
|
1747
|
-
return 1
|
|
1748
|
-
fi
|
|
1749
|
-
sleep "$POLL_INTERVAL"
|
|
1750
|
-
done
|
|
1751
|
-
else
|
|
1752
|
-
# Claude: use full poll_for_signal with heartbeat/nudge
|
|
1753
|
-
log " Polling for verify-verdict.json ($suffix)..."
|
|
1754
|
-
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
|
|
1755
|
-
local verifier_poll_rc=$?
|
|
1756
|
-
if (( verifier_poll_rc == 2 )); then
|
|
1757
|
-
return 1
|
|
1758
|
-
fi
|
|
1759
|
-
log_error "Verifier$suffix poll failed"
|
|
1760
|
-
return 1
|
|
1761
|
-
fi
|
|
1762
|
-
fi
|
|
1763
|
-
|
|
1764
|
-
# Copy verdict to destination
|
|
1765
|
-
cp "$VERDICT_FILE" "$verdict_dest"
|
|
1766
|
-
log " Verifier$suffix verdict saved to $verdict_dest"
|
|
1767
|
-
return 0
|
|
1768
|
-
}
|
|
1769
|
-
|
|
1770
|
-
# --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
|
|
1771
|
-
# Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
|
|
1772
|
-
# Sets FAILED_US global on failure.
|
|
1773
|
-
run_sequential_final_verify() {
|
|
1774
|
-
local iter="$1"
|
|
1775
|
-
FAILED_US=""
|
|
1776
|
-
|
|
1777
|
-
log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
|
|
1778
|
-
log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
|
|
1779
|
-
|
|
1780
|
-
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
1781
|
-
log " Final verify: checking $us..."
|
|
1782
|
-
|
|
1783
|
-
# Temporarily override signal file to scope verifier to this US
|
|
1784
|
-
local orig_signal
|
|
1785
|
-
orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
|
|
1786
|
-
echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
|
|
1787
|
-
|
|
1788
|
-
# Write scoped verifier trigger
|
|
1789
|
-
write_verifier_trigger "$iter"
|
|
1790
|
-
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
|
|
1791
|
-
|
|
1792
|
-
# Clean verifier pane
|
|
1793
|
-
local verifier_cmd
|
|
1794
|
-
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1795
|
-
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1796
|
-
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
|
|
1797
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
|
|
1798
|
-
fi
|
|
1799
|
-
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1800
|
-
|
|
1801
|
-
# Launch verifier
|
|
1802
|
-
local verifier_launch
|
|
1803
|
-
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
1804
|
-
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
1805
|
-
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
|
|
1806
|
-
else
|
|
1807
|
-
verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
|
|
1808
|
-
launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
|
|
1809
|
-
log_error "Failed to launch verifier for $us"
|
|
1810
|
-
FAILED_US="$us"
|
|
1811
|
-
return 1
|
|
1812
|
-
}
|
|
1813
|
-
fi
|
|
1814
|
-
|
|
1815
|
-
# Poll for verdict
|
|
1816
|
-
rm -f "$VERDICT_FILE"
|
|
1817
|
-
local poll_rc=0
|
|
1818
|
-
poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
|
|
1819
|
-
if (( poll_rc != 0 )); then
|
|
1820
|
-
log_error "Verifier poll failed for $us (rc=$poll_rc)"
|
|
1821
|
-
FAILED_US="$us"
|
|
1822
|
-
return 1
|
|
1823
|
-
fi
|
|
1824
|
-
|
|
1825
|
-
# Check verdict
|
|
1826
|
-
local verdict
|
|
1827
|
-
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
1828
|
-
if [[ "$verdict" != "pass" ]]; then
|
|
1829
|
-
FAILED_US="$us"
|
|
1830
|
-
log " Sequential final verify FAILED at $us"
|
|
1831
|
-
log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
|
|
1832
|
-
return 1
|
|
1833
|
-
fi
|
|
1834
|
-
log " Sequential final verify: $us PASSED"
|
|
1835
|
-
|
|
1836
|
-
# Archive per-US final verdict
|
|
1837
|
-
cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
|
|
1838
|
-
done
|
|
1839
|
-
|
|
1840
|
-
# Integration check: run tests if VERIFICATION_CMD is set
|
|
1841
|
-
if [[ -n "${VERIFICATION_CMD:-}" ]]; then
|
|
1842
|
-
log " Running integration test suite after sequential verify..."
|
|
1843
|
-
log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
|
|
1844
|
-
if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
|
|
1845
|
-
log " Integration test suite FAILED"
|
|
1846
|
-
FAILED_US="integration"
|
|
1847
|
-
return 2
|
|
1848
|
-
fi
|
|
1849
|
-
log " Integration test suite PASSED"
|
|
1850
|
-
fi
|
|
1851
|
-
|
|
1852
|
-
log " Sequential final verify: ALL PASSED"
|
|
1853
|
-
return 0
|
|
1854
|
-
}
|
|
1855
|
-
|
|
1856
|
-
# --- US-005: Determine whether consensus verification should run for this signal ---
|
|
1857
|
-
# Returns 0 (use consensus) or 1 (single engine).
|
|
1858
|
-
# Uses unified CONSENSUS_MODE: off|all|final-only
|
|
1859
|
-
_should_use_consensus() {
|
|
1860
|
-
local signal_us_id="${1:-}"
|
|
1861
|
-
case "$CONSENSUS_MODE" in
|
|
1862
|
-
all) return 0 ;;
|
|
1863
|
-
final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
|
|
1864
|
-
off|*) return 1 ;;
|
|
1865
|
-
esac
|
|
1866
|
-
}
|
|
1867
|
-
|
|
1868
|
-
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
1869
|
-
run_consensus_verification() {
|
|
1870
|
-
local iter="$1"
|
|
1871
|
-
local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
|
|
1872
|
-
local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
|
|
1873
|
-
|
|
1874
|
-
CONSENSUS_ROUND=0
|
|
1875
|
-
CLAUDE_VERDICT=""
|
|
1876
|
-
CODEX_VERDICT=""
|
|
1877
|
-
|
|
1878
|
-
while (( CONSENSUS_ROUND < 6 )); do
|
|
1879
|
-
(( CONSENSUS_ROUND++ ))
|
|
1880
|
-
log " Consensus round $CONSENSUS_ROUND/6..."
|
|
1881
|
-
|
|
1882
|
-
# Run claude verifier first
|
|
1883
|
-
local _claude_t0=$(date +%s)
|
|
1884
|
-
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1885
|
-
log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1886
|
-
return 1
|
|
1887
|
-
fi
|
|
1888
|
-
ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
|
|
1889
|
-
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1890
|
-
# A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
|
|
1891
|
-
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
1892
|
-
log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
|
|
1893
|
-
log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
|
|
1894
|
-
rm -f "$claude_verdict_file" 2>/dev/null
|
|
1895
|
-
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1896
|
-
log_error "Claude verifier retry also failed"
|
|
1897
|
-
return 1
|
|
1898
|
-
fi
|
|
1899
|
-
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1900
|
-
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
1901
|
-
log_error "Claude verdict still null after retry — consensus cannot proceed"
|
|
1902
|
-
return 1
|
|
1903
|
-
fi
|
|
1904
|
-
fi
|
|
1905
|
-
log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
1906
|
-
|
|
1907
|
-
# consensus-fail-fast removed (complexity vs value too low)
|
|
1908
|
-
|
|
1909
|
-
# Run codex verifier second
|
|
1910
|
-
local _codex_t0=$(date +%s)
|
|
1911
|
-
if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
|
|
1912
|
-
log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1913
|
-
return 1
|
|
1914
|
-
fi
|
|
1915
|
-
ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
|
|
1916
|
-
CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
|
|
1917
|
-
log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
|
|
1918
|
-
|
|
1919
|
-
log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
|
|
1920
|
-
local _combined_action="retry"
|
|
1921
|
-
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
|
|
1922
|
-
elif (( CONSENSUS_ROUND >= 6 )); then _combined_action="blocked"
|
|
1923
|
-
fi
|
|
1924
|
-
log_debug "[GOV] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
|
|
1925
|
-
|
|
1926
|
-
# Both pass → success
|
|
1927
|
-
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
|
|
1928
|
-
# Create merged verdict with per-engine details
|
|
1929
|
-
{
|
|
1930
|
-
echo '{'
|
|
1931
|
-
echo ' "verdict": "pass",'
|
|
1932
|
-
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1933
|
-
echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
|
|
1934
|
-
echo ' "recommended_state_transition": "complete",'
|
|
1935
|
-
echo ' "consensus": {'
|
|
1936
|
-
echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
|
|
1937
|
-
echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
|
|
1938
|
-
echo ' "round": '"$CONSENSUS_ROUND"
|
|
1939
|
-
echo ' }'
|
|
1940
|
-
echo '}'
|
|
1941
|
-
} | atomic_write "$VERDICT_FILE"
|
|
1942
|
-
return 0
|
|
1943
|
-
fi
|
|
1944
|
-
|
|
1945
|
-
# Consensus disagreement
|
|
1946
|
-
log_debug "[GOV] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
|
|
1947
|
-
|
|
1948
|
-
# NOTE: pre_existing_failure heuristic was removed (v0.3.5).
|
|
1949
|
-
# It used unreliable grep-in-description string matching to classify
|
|
1950
|
-
# consensus failures as "pre-existing", bypassing the consensus rule.
|
|
1951
|
-
# Consensus disagreement now ALWAYS flows to fix contract.
|
|
1952
|
-
# Codex CLI crash (no verdict file) is handled upstream via run_single_verifier return 1 → BLOCKED.
|
|
1953
|
-
|
|
1954
|
-
# --- Consensus disagreement: build fix contract ---
|
|
1955
|
-
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
|
|
1956
|
-
{
|
|
1957
|
-
echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
|
|
1958
|
-
echo ""
|
|
1959
|
-
echo "## Claude Verdict: $CLAUDE_VERDICT"
|
|
1960
|
-
if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
1961
|
-
echo "### Claude Issues"
|
|
1962
|
-
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
1963
|
-
fi
|
|
1964
|
-
echo ""
|
|
1965
|
-
echo "## Codex Verdict: $CODEX_VERDICT"
|
|
1966
|
-
if [[ "$CODEX_VERDICT" = "fail" ]]; then
|
|
1967
|
-
echo "### Codex Issues"
|
|
1968
|
-
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
1969
|
-
fi
|
|
1970
|
-
echo ""
|
|
1971
|
-
echo "## Traceability"
|
|
1972
|
-
echo "Only changes that resolve a listed issue are allowed."
|
|
1973
|
-
} | atomic_write "$fix_contract"
|
|
1974
|
-
|
|
1975
|
-
log " Combined fix contract: $fix_contract"
|
|
1976
|
-
|
|
1977
|
-
# If this is not the last round, the caller will dispatch the Worker with the fix contract
|
|
1978
|
-
# For now, write a fail verdict so the main loop can handle the fix loop
|
|
1979
|
-
if (( CONSENSUS_ROUND < 6 )); then
|
|
1980
|
-
# Create a merged fail verdict for the main loop — include issues from BOTH verdicts
|
|
1981
|
-
local merged_issues="[]"
|
|
1982
|
-
local claude_issues codex_issues
|
|
1983
|
-
claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
|
|
1984
|
-
codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
|
|
1985
|
-
merged_issues=$(echo "$claude_issues $codex_issues" | jq -s 'add // []')
|
|
1986
|
-
{
|
|
1987
|
-
echo '{'
|
|
1988
|
-
echo ' "verdict": "fail",'
|
|
1989
|
-
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1990
|
-
echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/6): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
1991
|
-
echo ' "issues": '"$merged_issues"','
|
|
1992
|
-
echo ' "recommended_state_transition": "continue",'
|
|
1993
|
-
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
|
|
1994
|
-
echo '}'
|
|
1995
|
-
} | atomic_write "$VERDICT_FILE"
|
|
1996
|
-
return 2 # special return: consensus disagreement, needs retry
|
|
1997
|
-
fi
|
|
1998
|
-
done
|
|
1999
|
-
|
|
2000
|
-
# Max consensus rounds exceeded — include issues from both verdicts
|
|
2001
|
-
log_error "Consensus failed after 6 rounds"
|
|
2002
|
-
local final_claude_issues final_codex_issues final_merged_issues
|
|
2003
|
-
final_claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
|
|
2004
|
-
final_codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
|
|
2005
|
-
final_merged_issues=$(echo "$final_claude_issues $final_codex_issues" | jq -s 'add // []')
|
|
2006
|
-
{
|
|
2007
|
-
echo '{'
|
|
2008
|
-
echo ' "verdict": "fail",'
|
|
2009
|
-
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
2010
|
-
echo ' "summary": "Consensus failed after 6 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
2011
|
-
echo ' "issues": '"$final_merged_issues"','
|
|
2012
|
-
echo ' "recommended_state_transition": "blocked",'
|
|
2013
|
-
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 6 }'
|
|
2014
|
-
echo '}'
|
|
2015
|
-
} | atomic_write "$VERDICT_FILE"
|
|
2016
|
-
return 1
|
|
2017
|
-
}
|
|
2018
|
-
|
|
2019
|
-
# =============================================================================
|
|
2020
|
-
# Main Leader Loop
|
|
2021
|
-
# =============================================================================
|
|
2022
|
-
|
|
2023
|
-
main() {
|
|
2024
|
-
# --- Lockfile: prevent duplicate execution ---
|
|
2025
|
-
local lockfile="$LOCKFILE_PATH"
|
|
2026
|
-
mkdir -p "$(dirname "$lockfile")" 2>/dev/null
|
|
2027
|
-
if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
|
|
2028
|
-
local lock_pid
|
|
2029
|
-
lock_pid=$(cat "$lockfile" 2>/dev/null)
|
|
2030
|
-
if kill -0 "$lock_pid" 2>/dev/null; then
|
|
2031
|
-
log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
|
|
2032
|
-
exit 1
|
|
2033
|
-
fi
|
|
2034
|
-
# Stale lock — overwrite
|
|
2035
|
-
log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
|
|
2036
|
-
echo $$ > "$lockfile"
|
|
2037
|
-
LOCKFILE_ACQUIRED=1
|
|
2038
|
-
else
|
|
2039
|
-
LOCKFILE_ACQUIRED=1
|
|
2040
|
-
fi
|
|
2041
|
-
trap cleanup EXIT INT TERM
|
|
2042
|
-
mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
|
|
2043
|
-
|
|
2044
|
-
# --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
|
|
2045
|
-
mkdir -p "$ANALYTICS_DIR" 2>/dev/null
|
|
2046
|
-
|
|
2047
|
-
# --- debug.log versioning (in analytics dir, --debug only) ---
|
|
2048
|
-
if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
|
|
2049
|
-
local dbg_n=1
|
|
2050
|
-
while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
|
|
2051
|
-
(( dbg_n++ ))
|
|
2052
|
-
done
|
|
2053
|
-
mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
|
|
2054
|
-
fi
|
|
2055
|
-
|
|
2056
|
-
# --- campaign.jsonl versioning (always-on) ---
|
|
2057
|
-
if [[ -f "$CAMPAIGN_JSONL" ]]; then
|
|
2058
|
-
local cj_n=1
|
|
2059
|
-
while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
|
|
2060
|
-
(( cj_n++ ))
|
|
2061
|
-
done
|
|
2062
|
-
mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
|
|
2063
|
-
fi
|
|
2064
|
-
|
|
2065
|
-
# --- metadata.json: always write at campaign start (cross-project identification) ---
|
|
2066
|
-
jq -n \
|
|
2067
|
-
--arg slug "$SLUG" \
|
|
2068
|
-
--arg project_root "$ROOT" \
|
|
2069
|
-
--arg project_name "$(basename "$ROOT")" \
|
|
2070
|
-
--arg campaign_status "running" \
|
|
2071
|
-
--arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
2072
|
-
--arg end_time "" \
|
|
2073
|
-
--arg worker_model "$WORKER_MODEL" \
|
|
2074
|
-
--arg verifier_model "$VERIFIER_MODEL" \
|
|
2075
|
-
--argjson debug "$DEBUG" \
|
|
2076
|
-
--argjson with_sv "$WITH_SELF_VERIFICATION" \
|
|
2077
|
-
--argjson consensus "${VERIFY_CONSENSUS:-0}" \
|
|
2078
|
-
'{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
|
|
2079
|
-
> "$METADATA_FILE"
|
|
2080
|
-
|
|
2081
|
-
# --- Startup ---
|
|
2082
|
-
log "Ralph Desk Tmux Runner starting..."
|
|
2083
|
-
log " Slug: $SLUG"
|
|
2084
|
-
log " Root: $ROOT"
|
|
2085
|
-
log " Max iterations: $MAX_ITER"
|
|
2086
|
-
log " Worker model: $WORKER_MODEL"
|
|
2087
|
-
log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
|
|
2088
|
-
log " Verify mode: $VERIFY_MODE"
|
|
2089
|
-
log " Consensus mode: $CONSENSUS_MODE"
|
|
2090
|
-
log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
|
|
2091
|
-
log " Poll interval: ${POLL_INTERVAL}s"
|
|
2092
|
-
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
2093
|
-
# --- Debug: Log execution plan ---
|
|
2094
|
-
if (( DEBUG )); then
|
|
2095
|
-
# Extract US IDs from PRD
|
|
2096
|
-
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
2097
|
-
local us_list=""
|
|
2098
|
-
if [[ -f "$prd_file" ]]; then
|
|
2099
|
-
us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
2100
|
-
fi
|
|
2101
|
-
local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
|
|
2102
|
-
|
|
2103
|
-
log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
2104
|
-
log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
2105
|
-
log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
2106
|
-
log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
|
|
2107
|
-
log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION debug=$DEBUG"
|
|
2108
|
-
|
|
2109
|
-
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
2110
|
-
# Build expected flow
|
|
2111
|
-
local expected_flow=""
|
|
2112
|
-
for us in $(echo "$us_list" | tr ',' ' '); do
|
|
2113
|
-
expected_flow="${expected_flow}worker->verify($us)->"
|
|
2114
|
-
done
|
|
2115
|
-
expected_flow="${expected_flow}verify(ALL)->COMPLETE"
|
|
2116
|
-
log_debug "[OPTION] expected_flow=$expected_flow"
|
|
2117
|
-
else
|
|
2118
|
-
log_debug "[OPTION] expected_flow=worker(all)->verify(ALL)->COMPLETE"
|
|
2119
|
-
fi
|
|
2120
|
-
|
|
2121
|
-
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
2122
|
-
log_debug "[OPTION] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
|
|
2123
|
-
fi
|
|
2124
|
-
fi
|
|
2125
|
-
|
|
2126
|
-
# Extract US list for per-US sequencing
|
|
2127
|
-
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
2128
|
-
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
2129
|
-
if [[ -f "$prd_file" ]]; then
|
|
2130
|
-
US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
2131
|
-
fi
|
|
2132
|
-
|
|
2133
|
-
# Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
|
|
2134
|
-
local memory_file="$DESK/memos/${SLUG}-memory.md"
|
|
2135
|
-
if [[ -f "$memory_file" ]]; then
|
|
2136
|
-
local completed_us
|
|
2137
|
-
completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
2138
|
-
if [[ -n "$completed_us" ]]; then
|
|
2139
|
-
VERIFIED_US="$completed_us"
|
|
2140
|
-
log " Loaded completed stories from memory: $VERIFIED_US"
|
|
2141
|
-
log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
|
|
2142
|
-
fi
|
|
2143
|
-
fi
|
|
2144
|
-
|
|
2145
|
-
# D1: Fallback — restore verified_us from status.json if memory had none
|
|
2146
|
-
if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
|
|
2147
|
-
local status_verified
|
|
2148
|
-
status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
|
|
2149
|
-
if [[ -n "$status_verified" ]]; then
|
|
2150
|
-
VERIFIED_US="$status_verified"
|
|
2151
|
-
log " Restored verified_us from status.json: $VERIFIED_US"
|
|
2152
|
-
log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
|
|
2153
|
-
fi
|
|
2154
|
-
fi
|
|
2155
|
-
fi
|
|
2156
|
-
|
|
2157
|
-
# Initialize PRD snapshot state for live update detection
|
|
2158
|
-
PREV_PRD_HASH=$(compute_prd_hash)
|
|
2159
|
-
PREV_PRD_US_LIST=$(count_prd_us)
|
|
2160
|
-
|
|
2161
|
-
# Dependency checks
|
|
2162
|
-
check_dependencies
|
|
2163
|
-
|
|
2164
|
-
# Print security warning (governance.md s7: --dangerously-skip-permissions)
|
|
2165
|
-
print_security_warning
|
|
2166
|
-
|
|
2167
|
-
# Validate scaffold
|
|
2168
|
-
validate_scaffold
|
|
2169
|
-
|
|
2170
|
-
# Check for existing sessions
|
|
2171
|
-
check_existing_sessions
|
|
2172
|
-
|
|
2173
|
-
# Create tmux session with pane IDs (governance.md s7 step 1)
|
|
2174
|
-
create_session
|
|
2175
|
-
|
|
2176
|
-
# Set trap for cleanup on exit/error
|
|
2177
|
-
trap cleanup EXIT
|
|
2178
|
-
|
|
2179
|
-
# Initialize context hash for stale detection
|
|
2180
|
-
PREV_CONTEXT_HASH=$(compute_context_hash)
|
|
2181
|
-
|
|
2182
|
-
# --- governance.md s7: Leader Loop ---
|
|
2183
|
-
local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced — Worker extends indefinitely when active
|
|
2184
|
-
|
|
2185
|
-
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
2186
|
-
log ""
|
|
2187
|
-
log "========== Iteration $ITERATION / $MAX_ITER =========="
|
|
2188
|
-
local ITER_START_TIME
|
|
2189
|
-
ITER_START_TIME=$(date +%s)
|
|
2190
|
-
local _iter_contract=""
|
|
2191
|
-
_iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
|
|
2192
|
-
log_debug "[FLOW] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
|
|
2193
|
-
|
|
2194
|
-
# --- governance.md s7 step 1: Check sentinels ---
|
|
2195
|
-
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
2196
|
-
log "COMPLETE sentinel found. Campaign succeeded."
|
|
2197
|
-
update_status "complete" "complete"
|
|
2198
|
-
return 0
|
|
2199
|
-
fi
|
|
2200
|
-
if [[ -f "$BLOCKED_SENTINEL" ]]; then
|
|
2201
|
-
log "BLOCKED sentinel found. Campaign blocked."
|
|
2202
|
-
update_status "blocked" "blocked"
|
|
2203
|
-
return 1
|
|
2204
|
-
fi
|
|
2205
|
-
|
|
2206
|
-
# --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
|
|
2207
|
-
rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
|
|
2208
|
-
rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
|
|
2209
|
-
|
|
2210
|
-
# --- Clean previous claude session in panes (one-shot lifecycle) ---
|
|
2211
|
-
# Only needed from iteration 2 onwards (iteration 1 has fresh panes)
|
|
2212
|
-
if (( ITERATION > 1 )); then
|
|
2213
|
-
# Send C-c first (in case claude is mid-task), then /exit
|
|
2214
|
-
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
2215
|
-
sleep 1
|
|
2216
|
-
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
2217
|
-
sleep 2
|
|
2218
|
-
# Wait for shell prompt before proceeding
|
|
2219
|
-
wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
|
|
2220
|
-
fi
|
|
2221
|
-
|
|
2222
|
-
# Reset per-iteration state
|
|
2223
|
-
local worker_nudge_count=0
|
|
2224
|
-
local verifier_nudge_count=0
|
|
2225
|
-
ITER_VERIFIER_START=""
|
|
2226
|
-
ITER_VERIFIER_END=""
|
|
2227
|
-
|
|
2228
|
-
# --- US-004: detect PRD changes for live update + re-split ---
|
|
2229
|
-
check_prd_update
|
|
2230
|
-
|
|
2231
|
-
# --- governance.md s7 step 4: Build worker prompt + trigger ---
|
|
2232
|
-
write_worker_trigger "$ITERATION"
|
|
2233
|
-
local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
2234
|
-
|
|
2235
|
-
# AC1: capture worker start timestamp
|
|
2236
|
-
ITER_WORKER_START=$(date +%s)
|
|
2237
|
-
|
|
2238
|
-
update_status "worker" "running"
|
|
2239
|
-
|
|
2240
|
-
# --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
|
|
2241
|
-
log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
2242
|
-
|
|
2243
|
-
local worker_launch
|
|
2244
|
-
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
2245
|
-
worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
2246
|
-
if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
2247
|
-
write_blocked_sentinel "Worker codex failed to start in pane"
|
|
2248
|
-
update_status "blocked" "worker_start_failed"
|
|
2249
|
-
return 1
|
|
2250
|
-
fi
|
|
2251
|
-
else
|
|
2252
|
-
worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
|
|
2253
|
-
if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
2254
|
-
write_blocked_sentinel "Worker claude failed to start in pane"
|
|
2255
|
-
update_status "blocked" "worker_start_failed"
|
|
2256
|
-
return 1
|
|
2257
|
-
fi
|
|
2258
|
-
fi
|
|
2259
|
-
|
|
2260
|
-
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
2261
|
-
log " Polling for iter-signal.json..."
|
|
2262
|
-
local worker_poll_done=0
|
|
2263
|
-
while (( ! worker_poll_done )); do
|
|
2264
|
-
local worker_poll_rc=0
|
|
2265
|
-
if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
|
|
2266
|
-
worker_poll_done=1
|
|
2267
|
-
log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
|
|
2268
|
-
else
|
|
2269
|
-
worker_poll_rc=$?
|
|
2270
|
-
if (( worker_poll_rc == 2 )); then
|
|
2271
|
-
return 1
|
|
2272
|
-
fi
|
|
2273
|
-
# Check if Worker is still actively running (not stuck)
|
|
2274
|
-
local worker_cmd
|
|
2275
|
-
worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
2276
|
-
if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
|
|
2277
|
-
# Process alive — extend indefinitely (no hard ceiling kill)
|
|
2278
|
-
# Stale-context breaker and nudge system handle truly stuck workers
|
|
2279
|
-
local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
|
|
2280
|
-
local ceiling_exceeded=""
|
|
2281
|
-
if (( iter_elapsed >= HARD_CEILING )); then
|
|
2282
|
-
ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s — not enforced, logged only]"
|
|
2283
|
-
log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
|
|
2284
|
-
log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
|
|
2285
|
-
fi
|
|
2286
|
-
log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
|
|
2287
|
-
log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
|
|
2288
|
-
log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
|
|
2289
|
-
update_status "worker" "slow"
|
|
2290
|
-
# Loop continues — re-poll same iteration
|
|
2291
|
-
else
|
|
2292
|
-
# Worker is truly dead/stuck
|
|
2293
|
-
(( MONITOR_FAILURE_COUNT++ ))
|
|
2294
|
-
log_debug "[GOV] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
|
|
2295
|
-
if (( MONITOR_FAILURE_COUNT >= 3 )); then
|
|
2296
|
-
log_debug "[GOV] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
|
|
2297
|
-
write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
|
|
2298
|
-
update_status "blocked" "monitor_failures"
|
|
2299
|
-
return 1
|
|
2300
|
-
fi
|
|
2301
|
-
log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
|
|
2302
|
-
update_status "worker" "poll_failed"
|
|
2303
|
-
log_debug "[FLOW] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
|
|
2304
|
-
# Worker is truly dead/stuck — BLOCK and let user decide
|
|
2305
|
-
write_blocked_sentinel "Worker process dead/stuck (poll failed). Pane preserved for inspection."
|
|
2306
|
-
update_status "blocked" "worker_dead"
|
|
2307
|
-
return 1
|
|
2308
|
-
fi
|
|
2309
|
-
fi
|
|
2310
|
-
done
|
|
2311
|
-
|
|
2312
|
-
if [[ ! -f "$SIGNAL_FILE" ]]; then
|
|
2313
|
-
log_debug "[FLOW] iter=$ITERATION no_signal_after_poll=true continuing"
|
|
2314
|
-
# No signal — monitor failure, go to next iteration
|
|
2315
|
-
continue
|
|
2316
|
-
fi
|
|
2317
|
-
|
|
2318
|
-
# Reset monitor failure count on success
|
|
2319
|
-
MONITOR_FAILURE_COUNT=0
|
|
2320
|
-
|
|
2321
|
-
# AC1: capture worker end timestamp; reset consensus timing
|
|
2322
|
-
ITER_WORKER_END=$(date +%s)
|
|
2323
|
-
ITER_VERIFIER_CLAUDE_DURATION_S=""
|
|
2324
|
-
ITER_VERIFIER_CODEX_DURATION_S=""
|
|
2325
|
-
|
|
2326
|
-
# --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
|
|
2327
|
-
local signal_status
|
|
2328
|
-
signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
|
|
2329
|
-
local signal_summary
|
|
2330
|
-
signal_summary=$(jq -r '.summary // "no summary"' "$SIGNAL_FILE" 2>/dev/null)
|
|
2331
|
-
|
|
2332
|
-
log " Worker signal: status=$signal_status summary=\"$signal_summary\""
|
|
2333
|
-
|
|
2334
|
-
# Read us_id early for EXEC logging (also used later in verify branch)
|
|
2335
|
-
local signal_us_id_early=""
|
|
2336
|
-
signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
2337
|
-
log_debug "[FLOW] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
|
|
2338
|
-
|
|
2339
|
-
case "$signal_status" in
|
|
2340
|
-
continue)
|
|
2341
|
-
# --- governance.md s7 step 6: continue -> go to step 8 ---
|
|
2342
|
-
log " Worker requests continue. Moving to next iteration."
|
|
2343
|
-
update_status "worker" "continue"
|
|
2344
|
-
;;
|
|
2345
|
-
verify)
|
|
2346
|
-
# --- governance.md s7 step 7: Execute Verifier ---
|
|
2347
|
-
# Read us_id from signal for per-US scoping
|
|
2348
|
-
local signal_us_id=""
|
|
2349
|
-
signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
2350
|
-
log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
|
|
2351
|
-
|
|
2352
|
-
# AC1: capture verifier start timestamp
|
|
2353
|
-
ITER_VERIFIER_START=$(date +%s)
|
|
2354
|
-
|
|
2355
|
-
update_status "verifier" "running"
|
|
2356
|
-
|
|
2357
|
-
# --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
|
|
2358
|
-
if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
|
|
2359
|
-
log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
|
|
2360
|
-
local seq_rc=0
|
|
2361
|
-
run_sequential_final_verify "$ITERATION" || seq_rc=$?
|
|
2362
|
-
if (( seq_rc == 0 )); then
|
|
2363
|
-
write_complete_sentinel "Sequential final verify passed (all US verified individually)"
|
|
2364
|
-
update_status "complete" "pass"
|
|
2365
|
-
write_campaign_jsonl "$ITERATION" "ALL" "pass"
|
|
2366
|
-
return 0
|
|
2367
|
-
else
|
|
2368
|
-
# Sequential verify failed — fall through to fix loop with failed US
|
|
2369
|
-
log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
|
|
2370
|
-
signal_us_id="${FAILED_US:-ALL}"
|
|
2371
|
-
# Synthesize a fail verdict for the fix loop
|
|
2372
|
-
echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
|
|
2373
|
-
fi
|
|
2374
|
-
fi
|
|
2375
|
-
|
|
2376
|
-
# --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
|
|
2377
|
-
local use_consensus=0
|
|
2378
|
-
_should_use_consensus "$signal_us_id" && use_consensus=1
|
|
2379
|
-
|
|
2380
|
-
# --- Consensus vs single verification ---
|
|
2381
|
-
if (( use_consensus )); then
|
|
2382
|
-
# US-004: Run consensus verification (claude + codex sequentially)
|
|
2383
|
-
local consensus_rc=0
|
|
2384
|
-
run_consensus_verification "$ITERATION" || consensus_rc=$?
|
|
2385
|
-
|
|
2386
|
-
if (( consensus_rc == 2 )); then
|
|
2387
|
-
# Consensus disagreement — treat as fail, fix loop will handle
|
|
2388
|
-
log " Consensus disagreement, treating as fail."
|
|
2389
|
-
elif (( consensus_rc != 0 )); then
|
|
2390
|
-
# Consensus verification failed entirely
|
|
2391
|
-
log_error "Consensus verification failed"
|
|
2392
|
-
write_blocked_sentinel "Consensus verification failed after max rounds"
|
|
2393
|
-
update_status "blocked" "consensus_failed"
|
|
2394
|
-
return 1
|
|
2395
|
-
fi
|
|
2396
|
-
else
|
|
2397
|
-
# Standard single-engine verification
|
|
2398
|
-
write_verifier_trigger "$ITERATION"
|
|
2399
|
-
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
|
|
2400
|
-
|
|
2401
|
-
# Step 7a: Clean previous Verifier session (with dead pane detection)
|
|
2402
|
-
local verifier_cmd
|
|
2403
|
-
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
2404
|
-
if [[ -z "$verifier_cmd" ]]; then
|
|
2405
|
-
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
2406
|
-
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
2407
|
-
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
2408
|
-
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
2409
|
-
log " New verifier pane: $VERIFIER_PANE"
|
|
2410
|
-
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
2411
|
-
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
2412
|
-
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
2413
|
-
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
2414
|
-
sleep 0.2
|
|
2415
|
-
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
2416
|
-
sleep 0.3
|
|
2417
|
-
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
2418
|
-
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
2419
|
-
sleep 0.5
|
|
2420
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
2421
|
-
sleep 2
|
|
2422
|
-
fi
|
|
2423
|
-
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
2424
|
-
|
|
2425
|
-
local verifier_launch
|
|
2426
|
-
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2427
|
-
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
2428
|
-
else
|
|
2429
|
-
verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
|
|
2430
|
-
fi
|
|
2431
|
-
log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
|
|
2432
|
-
|
|
2433
|
-
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2434
|
-
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
|
|
2435
|
-
else
|
|
2436
|
-
if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
|
|
2437
|
-
update_status "verifier" "start_failed"
|
|
2438
|
-
continue
|
|
2439
|
-
fi
|
|
2440
|
-
fi
|
|
2441
|
-
|
|
2442
|
-
# Poll for verify-verdict.json
|
|
2443
|
-
log " Polling for verify-verdict.json..."
|
|
2444
|
-
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
2445
|
-
local verifier_poll_rc=$?
|
|
2446
|
-
if (( verifier_poll_rc == 2 )); then
|
|
2447
|
-
return 1
|
|
2448
|
-
fi
|
|
2449
|
-
log_error "Verifier poll failed"
|
|
2450
|
-
# Verifier is dead/stuck — BLOCK and let user decide
|
|
2451
|
-
write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection."
|
|
2452
|
-
update_status "blocked" "verifier_dead"
|
|
2453
|
-
return 1
|
|
2454
|
-
fi
|
|
2455
|
-
fi
|
|
2456
|
-
|
|
2457
|
-
# AC1: capture verifier end timestamp
|
|
2458
|
-
ITER_VERIFIER_END=$(date +%s)
|
|
2459
|
-
|
|
2460
|
-
# --- governance.md s7 step 7: Read verdict via jq ---
|
|
2461
|
-
local verdict
|
|
2462
|
-
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
2463
|
-
local recommended
|
|
2464
|
-
recommended=$(jq -r '.recommended_state_transition' "$VERDICT_FILE" 2>/dev/null)
|
|
2465
|
-
local verdict_summary
|
|
2466
|
-
verdict_summary=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
2467
|
-
|
|
2468
|
-
log " Verifier: verdict=$verdict recommended=$recommended"
|
|
2469
|
-
log " Verifier summary: \"$verdict_summary\""
|
|
2470
|
-
local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
|
|
2471
|
-
log_debug "[GOV] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
|
|
2472
|
-
|
|
2473
|
-
case "$verdict" in
|
|
2474
|
-
pass)
|
|
2475
|
-
CONSECUTIVE_FAILURES=0
|
|
2476
|
-
CONSENSUS_ROUND=0
|
|
2477
|
-
_SAME_US_FAIL_COUNT=0
|
|
2478
|
-
_LAST_FAILED_US=""
|
|
2479
|
-
if (( _MODEL_UPGRADED )); then
|
|
2480
|
-
log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
|
|
2481
|
-
log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
|
|
2482
|
-
WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
|
|
2483
|
-
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
2484
|
-
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
2485
|
-
WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
|
|
2486
|
-
fi
|
|
2487
|
-
_MODEL_UPGRADED=0
|
|
2488
|
-
fi
|
|
2489
|
-
|
|
2490
|
-
# --- Verified US tracking (both per-us and batch modes) ---
|
|
2491
|
-
if [[ -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
|
|
2492
|
-
# Add this US to verified list
|
|
2493
|
-
if [[ -n "$VERIFIED_US" ]]; then
|
|
2494
|
-
VERIFIED_US="${VERIFIED_US},${signal_us_id}"
|
|
2495
|
-
else
|
|
2496
|
-
VERIFIED_US="$signal_us_id"
|
|
2497
|
-
fi
|
|
2498
|
-
log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
|
|
2499
|
-
log_debug "[FLOW] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
|
|
2500
|
-
update_status "verifier" "pass_us"
|
|
2501
|
-
# Worker will do next US on next iteration
|
|
2502
|
-
elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
|
|
2503
|
-
# Final full verify passed or complete recommended
|
|
2504
|
-
write_complete_sentinel "$verdict_summary"
|
|
2505
|
-
update_status "complete" "pass"
|
|
2506
|
-
write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
|
|
2507
|
-
return 0
|
|
2508
|
-
else
|
|
2509
|
-
log " Verifier passed but did not recommend complete. Continuing."
|
|
2510
|
-
update_status "verifier" "pass_continue"
|
|
2511
|
-
fi
|
|
2512
|
-
;;
|
|
2513
|
-
fail)
|
|
2514
|
-
# --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
|
|
2515
|
-
|
|
2516
|
-
# Parse per_us_results from verdict to track partial progress (batch + per-us)
|
|
2517
|
-
local _prev_verified="$VERIFIED_US"
|
|
2518
|
-
if jq -e '.per_us_results' "$VERDICT_FILE" &>/dev/null; then
|
|
2519
|
-
local _newly_passed
|
|
2520
|
-
_newly_passed=$(jq -r '.per_us_results | to_entries[] | select(.value == "pass") | .key' "$VERDICT_FILE" 2>/dev/null)
|
|
2521
|
-
for _pus in $(echo "$_newly_passed"); do
|
|
2522
|
-
if ! echo ",$VERIFIED_US," | grep -q ",$_pus,"; then
|
|
2523
|
-
if [[ -n "$VERIFIED_US" ]]; then
|
|
2524
|
-
VERIFIED_US="${VERIFIED_US},${_pus}"
|
|
2525
|
-
else
|
|
2526
|
-
VERIFIED_US="$_pus"
|
|
2527
|
-
fi
|
|
2528
|
-
log " Partial progress: $_pus passed (overall FAIL). Verified so far: $VERIFIED_US"
|
|
2529
|
-
fi
|
|
2530
|
-
done
|
|
2531
|
-
log_debug "[FLOW] iter=$ITERATION partial_progress prev=$_prev_verified now=$VERIFIED_US"
|
|
2532
|
-
fi
|
|
2533
|
-
|
|
2534
|
-
# Partial progress resets consecutive failures (progress was made)
|
|
2535
|
-
if [[ "$VERIFIED_US" != "$_prev_verified" ]]; then
|
|
2536
|
-
CONSECUTIVE_FAILURES=0
|
|
2537
|
-
log " Progress detected — consecutive_failures reset to 0"
|
|
2538
|
-
log_debug "[GOV] iter=$ITERATION consecutive_failures_reset=partial_progress"
|
|
2539
|
-
fi
|
|
2540
|
-
|
|
2541
|
-
(( CONSECUTIVE_FAILURES++ ))
|
|
2542
|
-
record_us_failure "${signal_us_id:-unknown}"
|
|
2543
|
-
check_model_upgrade "${signal_us_id:-unknown}"
|
|
2544
|
-
|
|
2545
|
-
# Mid-CB warning: alert at halfway point (governance §8 early warning)
|
|
2546
|
-
if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
|
|
2547
|
-
log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
|
|
2548
|
-
log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
|
|
2549
|
-
fi
|
|
2550
|
-
local verdict_summary_fail
|
|
2551
|
-
verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
2552
|
-
log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
|
|
2553
|
-
|
|
2554
|
-
# Extract issues from verdict for next Worker's fix contract
|
|
2555
|
-
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).fix-contract.md"
|
|
2556
|
-
{
|
|
2557
|
-
echo "# Fix Contract (from Verifier iteration $ITERATION)"
|
|
2558
|
-
echo ""
|
|
2559
|
-
if [[ -n "$VERIFIED_US" ]]; then
|
|
2560
|
-
echo "## Verified US (do NOT re-implement these)"
|
|
2561
|
-
echo "$VERIFIED_US" | tr ',' '\n' | sed 's/^/- /'
|
|
2562
|
-
echo ""
|
|
2563
|
-
echo "**Focus ONLY on unverified user stories. The above are already verified.**"
|
|
2564
|
-
echo ""
|
|
2565
|
-
fi
|
|
2566
|
-
echo "## Summary"
|
|
2567
|
-
echo "$verdict_summary_fail"
|
|
2568
|
-
echo ""
|
|
2569
|
-
echo "## Issues (from verify-verdict.json)"
|
|
2570
|
-
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$VERDICT_FILE" 2>/dev/null || echo "- (no structured issues available)"
|
|
2571
|
-
echo ""
|
|
2572
|
-
echo "## Next Iteration Contract"
|
|
2573
|
-
jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
|
|
2574
|
-
} | atomic_write "$fix_contract"
|
|
2575
|
-
log " Fix contract: $fix_contract"
|
|
2576
|
-
log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
|
|
2577
|
-
|
|
2578
|
-
# Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
|
|
2579
|
-
if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
|
|
2580
|
-
# For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
|
|
2581
|
-
_ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
|
|
2582
|
-
if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
|
|
2583
|
-
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
|
|
2584
|
-
log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
|
|
2585
|
-
write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2586
|
-
else
|
|
2587
|
-
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
|
|
2588
|
-
log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2589
|
-
write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2590
|
-
fi
|
|
2591
|
-
update_status "blocked" "consecutive_failures"
|
|
2592
|
-
return 1
|
|
2593
|
-
fi
|
|
2594
|
-
|
|
2595
|
-
update_status "verifier" "fail"
|
|
2596
|
-
;;
|
|
2597
|
-
request_info)
|
|
2598
|
-
# --- governance.md s7 step 7: request_info (degraded in tmux mode) ---
|
|
2599
|
-
local verdict_summary_ri
|
|
2600
|
-
verdict_summary_ri=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
2601
|
-
log " Verifier requests info (degraded in tmux lean mode)."
|
|
2602
|
-
log " Questions: \"$verdict_summary_ri\""
|
|
2603
|
-
log " Treating as soft fail — Worker will see verdict in next iteration."
|
|
2604
|
-
update_status "verifier" "request_info"
|
|
2605
|
-
;;
|
|
2606
|
-
blocked)
|
|
2607
|
-
write_blocked_sentinel "Verifier verdict: blocked - $verdict_summary"
|
|
2608
|
-
update_status "blocked" "verifier_blocked"
|
|
2609
|
-
return 1
|
|
2610
|
-
;;
|
|
2611
|
-
*)
|
|
2612
|
-
log_error "Unknown verdict: $verdict"
|
|
2613
|
-
update_status "verifier" "unknown_verdict"
|
|
2614
|
-
;;
|
|
2615
|
-
esac
|
|
2616
|
-
;;
|
|
2617
|
-
blocked)
|
|
2618
|
-
# --- governance.md s7 step 6: blocked -> write sentinel ---
|
|
2619
|
-
write_blocked_sentinel "Worker reported blocked: $signal_summary"
|
|
2620
|
-
update_status "blocked" "worker_blocked"
|
|
2621
|
-
return 1
|
|
2622
|
-
;;
|
|
2623
|
-
*)
|
|
2624
|
-
log_error "Unknown signal status: $signal_status"
|
|
2625
|
-
update_status "worker" "unknown_status"
|
|
2626
|
-
;;
|
|
2627
|
-
esac
|
|
2628
|
-
|
|
2629
|
-
# --- step 7d: Archive iteration artifacts before cleanup ---
|
|
2630
|
-
archive_iter_artifacts "$ITERATION"
|
|
2631
|
-
|
|
2632
|
-
# --- AC5: Write per-iteration cost estimate ---
|
|
2633
|
-
write_cost_log "$ITERATION"
|
|
2634
|
-
write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
|
|
2635
|
-
|
|
2636
|
-
# --- governance.md s7 step 8: Write result log ---
|
|
2637
|
-
write_result_log "$ITERATION" "$signal_status"
|
|
2638
|
-
|
|
2639
|
-
# --- governance.md s7 step 8: Circuit breaker - stale context check ---
|
|
2640
|
-
if ! check_stale_context; then
|
|
2641
|
-
log_debug "[GOV] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
|
|
2642
|
-
write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
|
|
2643
|
-
update_status "blocked" "stale_context"
|
|
2644
|
-
return 1
|
|
2645
|
-
fi
|
|
2646
|
-
|
|
2647
|
-
# --- governance.md s7 step 8: Update status ---
|
|
2648
|
-
update_status "idle" "${signal_status:-unknown}"
|
|
2649
|
-
done
|
|
2650
|
-
|
|
2651
|
-
# Max iterations reached
|
|
2652
|
-
log "Max iterations ($MAX_ITER) reached."
|
|
2653
|
-
update_status "timeout" "max_iter"
|
|
2654
|
-
return 1
|
|
2655
|
-
}
|
|
2656
|
-
|
|
2657
|
-
# =============================================================================
|
|
2658
|
-
# Entry Point
|
|
2659
|
-
# =============================================================================
|
|
2660
|
-
|
|
2661
|
-
# --- CLI: parse --worker-model / --verifier-model flags ---
|
|
2662
|
-
# These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
|
|
2663
|
-
# Format: "model:reasoning" → codex engine; "model-name" → claude engine
|
|
2664
|
-
_cli_i=1
|
|
2665
|
-
while (( _cli_i <= $# )); do
|
|
2666
|
-
case "${@[$_cli_i]}" in
|
|
2667
|
-
--worker-model)
|
|
2668
|
-
(( _cli_i++ ))
|
|
2669
|
-
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
|
|
2670
|
-
WORKER_ENGINE="${_cli_parsed%% *}"
|
|
2671
|
-
_cli_rest="${_cli_parsed#* }"
|
|
2672
|
-
WORKER_MODEL="${_cli_rest%% *}"
|
|
2673
|
-
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
2674
|
-
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
2675
|
-
WORKER_CODEX_REASONING="${_cli_rest##* }"
|
|
2676
|
-
elif [[ "$_cli_rest" == *" "* ]]; then
|
|
2677
|
-
WORKER_EFFORT="${_cli_rest##* }"
|
|
2678
|
-
fi
|
|
2679
|
-
;;
|
|
2680
|
-
--verifier-model)
|
|
2681
|
-
(( _cli_i++ ))
|
|
2682
|
-
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
|
|
2683
|
-
VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
2684
|
-
_cli_rest="${_cli_parsed#* }"
|
|
2685
|
-
VERIFIER_MODEL="${_cli_rest%% *}"
|
|
2686
|
-
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2687
|
-
VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
|
|
2688
|
-
VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
2689
|
-
elif [[ "$_cli_rest" == *" "* ]]; then
|
|
2690
|
-
VERIFIER_EFFORT="${_cli_rest##* }"
|
|
2691
|
-
fi
|
|
2692
|
-
;;
|
|
2693
|
-
--lock-worker-model)
|
|
2694
|
-
LOCK_WORKER_MODEL=1
|
|
2695
|
-
;;
|
|
2696
|
-
--autonomous)
|
|
2697
|
-
AUTONOMOUS_MODE=1
|
|
2698
|
-
;;
|
|
2699
|
-
--final-verifier-model)
|
|
2700
|
-
(( _cli_i++ ))
|
|
2701
|
-
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
|
|
2702
|
-
FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
2703
|
-
_cli_rest="${_cli_parsed#* }"
|
|
2704
|
-
FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
|
|
2705
|
-
if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
|
|
2706
|
-
FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
|
|
2707
|
-
FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
2708
|
-
elif [[ "$_cli_rest" == *" "* ]]; then
|
|
2709
|
-
FINAL_VERIFIER_EFFORT="${_cli_rest##* }"
|
|
2710
|
-
fi
|
|
2711
|
-
;;
|
|
2712
|
-
--consensus)
|
|
2713
|
-
(( _cli_i++ ))
|
|
2714
|
-
CONSENSUS_MODE="${@[$_cli_i]:-off}"
|
|
2715
|
-
;;
|
|
2716
|
-
--consensus-model)
|
|
2717
|
-
(( _cli_i++ ))
|
|
2718
|
-
CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:medium}"
|
|
2719
|
-
;;
|
|
2720
|
-
--final-consensus-model)
|
|
2721
|
-
(( _cli_i++ ))
|
|
2722
|
-
FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:high}"
|
|
2723
|
-
;;
|
|
2724
|
-
--final-consensus)
|
|
2725
|
-
# Legacy: map to new --consensus final-only
|
|
2726
|
-
CONSENSUS_MODE="final-only"
|
|
2727
|
-
;;
|
|
2728
|
-
--verify-consensus)
|
|
2729
|
-
# Legacy: map to new --consensus all
|
|
2730
|
-
CONSENSUS_MODE="all"
|
|
2731
|
-
;;
|
|
2732
|
-
esac
|
|
2733
|
-
(( _cli_i++ ))
|
|
2734
|
-
done
|
|
2735
|
-
unset _cli_i _cli_parsed _cli_rest
|
|
2736
|
-
|
|
2737
|
-
# Require tmux — tmux mode only works inside an active tmux session
|
|
2738
|
-
if [[ -z "${TMUX:-}" ]]; then
|
|
2739
|
-
echo "ERROR: tmux mode requires running inside a tmux session."
|
|
2740
|
-
echo ""
|
|
2741
|
-
echo " Start tmux first, then retry:"
|
|
2742
|
-
echo " tmux"
|
|
2743
|
-
echo " LOOP_NAME=$SLUG $0"
|
|
2744
|
-
echo ""
|
|
2745
|
-
echo " Or use Agent() mode instead (no tmux needed):"
|
|
2746
|
-
echo " /rlp-desk run $SLUG"
|
|
2747
|
-
exit 1
|
|
2748
|
-
fi
|
|
2749
|
-
|
|
2750
|
-
main "$@"
|