@ai-dev-methodologies/rlp-desk 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +145 -69
- package/docs/plans/cozy-gliding-trinket.md +53 -0
- package/docs/plans/keen-sauteeing-snowflake.md +245 -0
- package/docs/plans/toasty-whistling-diffie-agent-a6814625642e956da.md +201 -0
- package/docs/plans/toasty-whistling-diffie.md +117 -0
- package/docs/prompts/ralplan-codex-review.md +1 -1
- package/install.sh +5 -0
- package/package.json +1 -1
- package/scripts/postinstall.js +5 -0
- package/scripts/uninstall.js +1 -0
- package/src/commands/rlp-desk.md +193 -51
- package/src/governance.md +28 -10
- package/src/model-upgrade-table.md +50 -0
- package/src/scripts/init_ralph_desk.zsh +200 -19
- package/src/scripts/lib_ralph_desk.zsh +838 -0
- package/src/scripts/run_ralph_desk.zsh +821 -608
|
@@ -69,14 +69,18 @@ CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
|
69
69
|
# --- Verify Mode ---
|
|
70
70
|
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
71
71
|
VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
|
|
72
|
+
FINAL_CONSENSUS="${FINAL_CONSENSUS:-0}" # 0|1 — consensus for final ALL verify only (independent of VERIFY_CONSENSUS)
|
|
72
73
|
CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
|
|
73
|
-
|
|
74
|
+
CONSENSUS_FAIL_FAST="${CONSENSUS_FAIL_FAST:-0}" # 0|1 — skip second verifier if first fails
|
|
75
|
+
CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
|
|
74
76
|
# Effective CB threshold: doubled when consensus mode active (AC2 auto-double)
|
|
75
77
|
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
76
78
|
EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
|
|
77
79
|
else
|
|
78
80
|
EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
|
|
79
81
|
fi
|
|
82
|
+
_API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
|
|
83
|
+
_API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
|
|
80
84
|
|
|
81
85
|
# --- Derived Paths ---
|
|
82
86
|
DESK="$ROOT/.claude/ralph-desk"
|
|
@@ -84,6 +88,14 @@ PROMPTS_DIR="$DESK/prompts"
|
|
|
84
88
|
CONTEXT_DIR="$DESK/context"
|
|
85
89
|
MEMOS_DIR="$DESK/memos"
|
|
86
90
|
LOGS_DIR="$DESK/logs/$SLUG"
|
|
91
|
+
RUNTIME_DIR="$LOGS_DIR/runtime"
|
|
92
|
+
PRD_FILE="$DESK/plans/prd-$SLUG.md"
|
|
93
|
+
TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
|
|
94
|
+
# --- Analytics Directory (user-level, cross-project) ---
|
|
95
|
+
ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
|
|
96
|
+
ANALYTICS_DIR="$HOME/.claude/ralph-desk/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
|
|
97
|
+
CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
|
|
98
|
+
METADATA_FILE="$ANALYTICS_DIR/metadata.json"
|
|
87
99
|
WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
|
|
88
100
|
VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
|
|
89
101
|
CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
|
|
@@ -93,10 +105,11 @@ DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
|
|
|
93
105
|
VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
|
|
94
106
|
COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
|
|
95
107
|
BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
108
|
+
LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
|
|
109
|
+
STATUS_FILE="$RUNTIME_DIR/status.json"
|
|
110
|
+
SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
|
|
111
|
+
WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
|
|
112
|
+
VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
|
|
100
113
|
COST_LOG="$LOGS_DIR/cost-log.jsonl"
|
|
101
114
|
|
|
102
115
|
# --- Session Naming ---
|
|
@@ -112,43 +125,265 @@ HEARTBEAT_STALE_COUNT=0
|
|
|
112
125
|
MONITOR_FAILURE_COUNT=0
|
|
113
126
|
CONSECUTIVE_FAILURES=0
|
|
114
127
|
PREV_CONTEXT_HASH=""
|
|
128
|
+
PREV_PRD_HASH=""
|
|
129
|
+
PREV_PRD_US_LIST=""
|
|
130
|
+
_PRD_CHANGED=0
|
|
115
131
|
ITERATION=0
|
|
116
132
|
START_TIME=$(date +%s)
|
|
117
133
|
BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
|
|
118
134
|
CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
|
|
135
|
+
SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
|
|
119
136
|
VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
|
|
120
137
|
CONSENSUS_ROUND=0 # current consensus round for current US
|
|
121
138
|
US_LIST="" # comma-separated US IDs from PRD (per-us mode)
|
|
139
|
+
LOCKFILE_ACQUIRED=0
|
|
140
|
+
LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
|
|
141
|
+
_SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
|
|
142
|
+
_LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
|
|
143
|
+
_MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
|
|
144
|
+
_ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
|
|
145
|
+
_ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
|
|
122
146
|
|
|
123
147
|
# =============================================================================
|
|
124
148
|
# Utility Functions
|
|
125
149
|
# =============================================================================
|
|
126
150
|
|
|
127
151
|
DEBUG="${DEBUG:-0}"
|
|
128
|
-
DEBUG_LOG="$
|
|
152
|
+
DEBUG_LOG="$ANALYTICS_DIR/debug.log"
|
|
129
153
|
|
|
130
|
-
|
|
131
|
-
|
|
154
|
+
# Source shared business logic
|
|
155
|
+
LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
156
|
+
source "$LIB_DIR/lib_ralph_desk.zsh"
|
|
157
|
+
|
|
158
|
+
# A16: Warn if running in foreground (may conflict with Claude Code pane)
|
|
159
|
+
if [[ -z "${RLP_BACKGROUND:-}" ]]; then
|
|
160
|
+
echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
|
|
161
|
+
echo " Recommended: launch via Bash tool with run_in_background: true" >&2
|
|
162
|
+
echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
|
|
163
|
+
fi
|
|
164
|
+
|
|
165
|
+
# check_dead_pane() — determine if pane command indicates a dead/exited process
|
|
166
|
+
# Engine-aware: bash is normal for codex workers (trigger runs in bash),
|
|
167
|
+
# but indicates dead pane for claude workers.
|
|
168
|
+
# Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
|
|
169
|
+
# Returns: 0 if dead, 1 if alive
|
|
170
|
+
check_dead_pane() {
|
|
171
|
+
local poll_cmd="$1"
|
|
172
|
+
local engine="${2:-claude}"
|
|
173
|
+
local role="${3:-worker}"
|
|
174
|
+
|
|
175
|
+
if [[ -z "$poll_cmd" ]]; then
|
|
176
|
+
return 0 # empty = dead
|
|
177
|
+
elif [[ "$poll_cmd" == "zsh" ]]; then
|
|
178
|
+
return 0 # bare zsh = dead
|
|
179
|
+
elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
|
|
180
|
+
return 0 # bash = dead for claude (codex uses bash trigger)
|
|
181
|
+
fi
|
|
182
|
+
return 1 # alive
|
|
132
183
|
}
|
|
133
184
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
185
|
+
# launch_worker_codex() — launch codex Worker via trigger script (non-interactive exec)
|
|
186
|
+
# Args: $1=pane_id $2=trigger_file $3=iteration
|
|
187
|
+
# Returns: 0 always (codex failures detected by poll_for_signal)
|
|
188
|
+
launch_worker_codex() {
|
|
189
|
+
local pane_id="$1"
|
|
190
|
+
local trigger_file="$2"
|
|
191
|
+
local iter="$3"
|
|
192
|
+
|
|
193
|
+
log " Launching Worker codex via trigger script in pane $pane_id..."
|
|
194
|
+
paste_to_pane "$pane_id" "bash $trigger_file"
|
|
195
|
+
tmux send-keys -t "$pane_id" C-m
|
|
196
|
+
log_debug "Worker codex trigger sent: $trigger_file"
|
|
197
|
+
sleep 3 # brief wait for codex to start
|
|
198
|
+
return 0
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
|
|
202
|
+
# Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
|
|
203
|
+
# restart recovery on submit failure.
|
|
204
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
|
|
205
|
+
# Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
|
|
206
|
+
launch_worker_claude() {
|
|
207
|
+
local pane_id="$1"
|
|
208
|
+
local prompt_file="$2"
|
|
209
|
+
local iter="$3"
|
|
210
|
+
local worker_launch="$4"
|
|
211
|
+
|
|
212
|
+
log " Launching Worker claude in pane $pane_id..."
|
|
213
|
+
paste_to_pane "$pane_id" "$worker_launch"
|
|
214
|
+
tmux send-keys -t "$pane_id" C-m
|
|
215
|
+
|
|
216
|
+
# Wait for claude TUI to be ready
|
|
217
|
+
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
218
|
+
log_error "Worker claude failed to start"
|
|
219
|
+
return 1
|
|
220
|
+
fi
|
|
221
|
+
|
|
222
|
+
# Send instruction to claude TUI
|
|
223
|
+
sleep 3
|
|
224
|
+
local worker_instruction="Read and execute the instructions in $prompt_file"
|
|
225
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
226
|
+
tmux send-keys -t "$pane_id" C-m
|
|
227
|
+
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
228
|
+
|
|
229
|
+
# 15-iteration submit loop — verify claude started working
|
|
230
|
+
local submit_attempts=0
|
|
231
|
+
while (( submit_attempts < 15 )); do
|
|
232
|
+
sleep 2
|
|
233
|
+
local pane_check
|
|
234
|
+
pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
235
|
+
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
|
|
236
|
+
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
237
|
+
log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
238
|
+
break
|
|
239
|
+
fi
|
|
240
|
+
# Every 3 failed attempts, re-send full instruction
|
|
241
|
+
if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
|
|
242
|
+
log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
|
|
243
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
244
|
+
sleep 0.2
|
|
245
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
246
|
+
sleep 0.15
|
|
247
|
+
tmux send-keys -t "$pane_id" C-m
|
|
248
|
+
sleep 1
|
|
249
|
+
fi
|
|
250
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
251
|
+
sleep 0.3
|
|
252
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
253
|
+
(( submit_attempts++ ))
|
|
254
|
+
done
|
|
255
|
+
|
|
256
|
+
# If 15 attempts failed, restart claude and retry
|
|
257
|
+
if (( submit_attempts >= 15 )); then
|
|
258
|
+
log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
|
|
259
|
+
log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
|
|
260
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
261
|
+
sleep 0.5
|
|
262
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
263
|
+
sleep 2
|
|
264
|
+
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
265
|
+
paste_to_pane "$pane_id" "$worker_launch"
|
|
266
|
+
tmux send-keys -t "$pane_id" C-m
|
|
267
|
+
if wait_for_pane_ready "$pane_id" 30; then
|
|
268
|
+
sleep 3
|
|
269
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
270
|
+
tmux send-keys -t "$pane_id" C-m
|
|
271
|
+
log " Worker restarted and instruction re-sent"
|
|
272
|
+
log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
|
|
273
|
+
else
|
|
274
|
+
log_error "Worker restart failed — pane not ready"
|
|
275
|
+
log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
|
|
276
|
+
fi
|
|
138
277
|
fi
|
|
278
|
+
|
|
279
|
+
return 0
|
|
139
280
|
}
|
|
140
281
|
|
|
141
|
-
|
|
142
|
-
|
|
282
|
+
# launch_verifier_codex() — launch codex Verifier in pane (non-interactive)
|
|
283
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
284
|
+
# Returns: 0 always
|
|
285
|
+
launch_verifier_codex() {
|
|
286
|
+
local pane_id="$1"
|
|
287
|
+
local prompt_file="$2"
|
|
288
|
+
local iter="$3"
|
|
289
|
+
local verifier_launch="$4"
|
|
290
|
+
|
|
291
|
+
log " Launching Verifier codex in pane $pane_id..."
|
|
292
|
+
paste_to_pane "$pane_id" "$verifier_launch"
|
|
293
|
+
tmux send-keys -t "$pane_id" C-m
|
|
294
|
+
sleep 3
|
|
295
|
+
return 0
|
|
143
296
|
}
|
|
144
297
|
|
|
145
|
-
#
|
|
146
|
-
#
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
local
|
|
150
|
-
|
|
151
|
-
|
|
298
|
+
# launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
|
|
299
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
300
|
+
# Returns: 0 on success
|
|
301
|
+
launch_verifier_claude() {
|
|
302
|
+
local pane_id="$1"
|
|
303
|
+
local prompt_file="$2"
|
|
304
|
+
local iter="$3"
|
|
305
|
+
local verifier_launch="$4"
|
|
306
|
+
|
|
307
|
+
log " Launching Verifier claude in pane $pane_id..."
|
|
308
|
+
paste_to_pane "$pane_id" "$verifier_launch"
|
|
309
|
+
tmux send-keys -t "$pane_id" C-m
|
|
310
|
+
|
|
311
|
+
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
312
|
+
log_error "Verifier failed to start"
|
|
313
|
+
return 1
|
|
314
|
+
fi
|
|
315
|
+
|
|
316
|
+
sleep 3
|
|
317
|
+
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
318
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
319
|
+
tmux send-keys -t "$pane_id" C-m
|
|
320
|
+
log_debug "Verifier instruction sent directly"
|
|
321
|
+
|
|
322
|
+
# Submit loop — verify verifier started working
|
|
323
|
+
local submit_attempts=0
|
|
324
|
+
while (( submit_attempts < 15 )); do
|
|
325
|
+
sleep 2
|
|
326
|
+
local vs_check
|
|
327
|
+
vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
328
|
+
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
329
|
+
log_debug "Verifier started working after $((submit_attempts + 1)) checks"
|
|
330
|
+
break
|
|
331
|
+
fi
|
|
332
|
+
if (( submit_attempts == 8 )); then
|
|
333
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
334
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
335
|
+
sleep 0.1
|
|
336
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
337
|
+
tmux send-keys -t "$pane_id" C-m
|
|
338
|
+
fi
|
|
339
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
340
|
+
sleep 0.3
|
|
341
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
342
|
+
(( submit_attempts++ ))
|
|
343
|
+
done
|
|
344
|
+
return 0
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
# handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
|
|
348
|
+
# On exit: check done-claim, auto-generate iter-signal.
|
|
349
|
+
# Args: $1=iteration $2=signal_file
|
|
350
|
+
# Returns: 0 (signal generated), 1 (error)
|
|
351
|
+
handle_worker_exit_codex() {
|
|
352
|
+
local iter="$1"
|
|
353
|
+
local signal_file="$2"
|
|
354
|
+
|
|
355
|
+
log " Codex worker process exited. Checking for done-claim..."
|
|
356
|
+
if [[ -f "$DONE_CLAIM_FILE" ]]; then
|
|
357
|
+
local dc_us_id
|
|
358
|
+
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
359
|
+
log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
|
|
360
|
+
echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exec exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
361
|
+
else
|
|
362
|
+
log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
|
|
363
|
+
local current_us
|
|
364
|
+
current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
|
|
365
|
+
local mem_us
|
|
366
|
+
mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
|
|
367
|
+
[[ -n "$mem_us" ]] && current_us="$mem_us"
|
|
368
|
+
echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exec exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
369
|
+
fi
|
|
370
|
+
return 0
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
# handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
|
|
374
|
+
# Args: $1=pane_id $2=iteration $3=trigger_file
|
|
375
|
+
# Returns: 0 (restarted), 1 (max restarts exceeded)
|
|
376
|
+
handle_worker_exit_claude() {
|
|
377
|
+
local pane_id="$1"
|
|
378
|
+
local iter="$2"
|
|
379
|
+
local trigger_file="$3"
|
|
380
|
+
|
|
381
|
+
log_error "Worker exited without writing signal file"
|
|
382
|
+
if restart_worker "$pane_id" "$iter" "$trigger_file"; then
|
|
383
|
+
return 0
|
|
384
|
+
else
|
|
385
|
+
return 1
|
|
386
|
+
fi
|
|
152
387
|
}
|
|
153
388
|
|
|
154
389
|
# --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
|
|
@@ -205,9 +440,13 @@ check_dependencies() {
|
|
|
205
440
|
missing=1
|
|
206
441
|
fi
|
|
207
442
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
443
|
+
# claude required only when claude engine is used for Worker or Verifier execution;
|
|
444
|
+
# codex-only campaigns can run without claude — generate_sv_report degrades gracefully
|
|
445
|
+
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
446
|
+
if ! command -v claude >/dev/null 2>&1; then
|
|
447
|
+
log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
|
|
448
|
+
missing=1
|
|
449
|
+
fi
|
|
211
450
|
fi
|
|
212
451
|
|
|
213
452
|
if ! command -v jq >/dev/null 2>&1; then
|
|
@@ -216,14 +455,9 @@ check_dependencies() {
|
|
|
216
455
|
fi
|
|
217
456
|
|
|
218
457
|
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
219
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
458
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
|
|
220
459
|
if ! command -v codex >/dev/null 2>&1; then
|
|
221
|
-
|
|
222
|
-
log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
|
|
223
|
-
else
|
|
224
|
-
log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
|
|
225
|
-
fi
|
|
226
|
-
log_error "Install with: npm install -g @openai/codex"
|
|
460
|
+
log_error "codex CLI not found. Install: npm install -g @openai/codex"
|
|
227
461
|
missing=1
|
|
228
462
|
fi
|
|
229
463
|
fi
|
|
@@ -232,52 +466,19 @@ check_dependencies() {
|
|
|
232
466
|
exit 1
|
|
233
467
|
fi
|
|
234
468
|
|
|
235
|
-
# Resolve full path to claude binary
|
|
236
|
-
|
|
237
|
-
|
|
469
|
+
# Resolve full path to claude binary when claude engine is in use
|
|
470
|
+
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
471
|
+
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
|
|
472
|
+
log " Claude binary: $CLAUDE_BIN"
|
|
473
|
+
fi
|
|
238
474
|
|
|
239
475
|
# Resolve codex binary if needed
|
|
240
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
476
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
|
|
241
477
|
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
242
478
|
log " Codex binary: $CODEX_BIN"
|
|
243
479
|
fi
|
|
244
480
|
}
|
|
245
481
|
|
|
246
|
-
# =============================================================================
|
|
247
|
-
# Scaffold Validation
|
|
248
|
-
# =============================================================================
|
|
249
|
-
|
|
250
|
-
validate_scaffold() {
|
|
251
|
-
local errors=0
|
|
252
|
-
|
|
253
|
-
if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
|
|
254
|
-
log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
|
|
255
|
-
errors=1
|
|
256
|
-
fi
|
|
257
|
-
|
|
258
|
-
if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
|
|
259
|
-
log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
|
|
260
|
-
errors=1
|
|
261
|
-
fi
|
|
262
|
-
|
|
263
|
-
if [[ ! -f "$CONTEXT_FILE" ]]; then
|
|
264
|
-
log_error "Context file not found: $CONTEXT_FILE"
|
|
265
|
-
errors=1
|
|
266
|
-
fi
|
|
267
|
-
|
|
268
|
-
if [[ ! -f "$MEMORY_FILE" ]]; then
|
|
269
|
-
log_error "Memory file not found: $MEMORY_FILE"
|
|
270
|
-
errors=1
|
|
271
|
-
fi
|
|
272
|
-
|
|
273
|
-
if (( errors )); then
|
|
274
|
-
log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
|
|
275
|
-
exit 1
|
|
276
|
-
fi
|
|
277
|
-
|
|
278
|
-
mkdir -p "$LOGS_DIR"
|
|
279
|
-
}
|
|
280
|
-
|
|
281
482
|
# =============================================================================
|
|
282
483
|
# Session Management (tmux pattern: pane IDs)
|
|
283
484
|
# =============================================================================
|
|
@@ -423,6 +624,17 @@ check_copy_mode() {
|
|
|
423
624
|
# Verification-Based Send Retry (tmux pattern)
|
|
424
625
|
# =============================================================================
|
|
425
626
|
|
|
627
|
+
# --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
|
|
628
|
+
paste_to_pane() {
|
|
629
|
+
local pane_id="$1"
|
|
630
|
+
local text="$2"
|
|
631
|
+
local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
|
|
632
|
+
echo -n "$text" > "$tmpbuf"
|
|
633
|
+
tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
|
|
634
|
+
tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
|
|
635
|
+
rm -f "$tmpbuf"
|
|
636
|
+
}
|
|
637
|
+
|
|
426
638
|
# --- governance.md s7 step 5: Send with copy-mode guard and retry ---
|
|
427
639
|
safe_send_keys() {
|
|
428
640
|
local pane_id="$1"
|
|
@@ -451,18 +663,18 @@ safe_send_keys() {
|
|
|
451
663
|
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
452
664
|
if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
453
665
|
log_debug " Permission prompt detected, auto-approving"
|
|
454
|
-
tmux send-keys -t "$pane_id"
|
|
666
|
+
tmux send-keys -t "$pane_id" C-m
|
|
455
667
|
sleep 0.3
|
|
456
668
|
fi
|
|
457
669
|
# Auto-dismiss codex update prompt (select Skip)
|
|
458
670
|
if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
459
671
|
log_debug " Codex update prompt detected, selecting Skip"
|
|
460
|
-
tmux send-keys -t "$pane_id" "2"
|
|
672
|
+
tmux send-keys -t "$pane_id" "2" C-m
|
|
461
673
|
sleep 0.2
|
|
462
674
|
fi
|
|
463
|
-
# Send text
|
|
464
|
-
log_debug "
|
|
465
|
-
|
|
675
|
+
# Send text via buffer paste (reliable for long strings)
|
|
676
|
+
log_debug " Pasting text to pane $pane_id (${#text} chars)"
|
|
677
|
+
paste_to_pane "$pane_id" "$text"
|
|
466
678
|
|
|
467
679
|
# Allow input buffer to settle (tmux: 150ms)
|
|
468
680
|
sleep 0.15
|
|
@@ -472,9 +684,7 @@ safe_send_keys() {
|
|
|
472
684
|
while (( round < 6 )); do
|
|
473
685
|
sleep 0.1
|
|
474
686
|
if (( round == 0 && pane_busy )); then
|
|
475
|
-
# Busy pane:
|
|
476
|
-
tmux send-keys -t "$pane_id" Tab
|
|
477
|
-
sleep 0.08
|
|
687
|
+
# Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
|
|
478
688
|
tmux send-keys -t "$pane_id" C-m
|
|
479
689
|
else
|
|
480
690
|
tmux send-keys -t "$pane_id" C-m
|
|
@@ -507,7 +717,7 @@ safe_send_keys() {
|
|
|
507
717
|
if ! check_copy_mode "$pane_id"; then
|
|
508
718
|
return 1
|
|
509
719
|
fi
|
|
510
|
-
|
|
720
|
+
paste_to_pane "$pane_id" "$text"
|
|
511
721
|
sleep 0.12
|
|
512
722
|
local retry_round=0
|
|
513
723
|
while (( retry_round < 4 )); do
|
|
@@ -551,9 +761,9 @@ wait_for_pane_ready() {
|
|
|
551
761
|
# Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
|
|
552
762
|
if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
|
|
553
763
|
log " Trust prompt detected, auto-dismissing..."
|
|
554
|
-
tmux send-keys -t "$pane_id"
|
|
764
|
+
tmux send-keys -t "$pane_id" C-m
|
|
555
765
|
sleep 0.12
|
|
556
|
-
tmux send-keys -t "$pane_id"
|
|
766
|
+
tmux send-keys -t "$pane_id" C-m
|
|
557
767
|
sleep 2
|
|
558
768
|
continue
|
|
559
769
|
fi
|
|
@@ -561,7 +771,7 @@ wait_for_pane_ready() {
|
|
|
561
771
|
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
562
772
|
if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
|
|
563
773
|
log " Permission prompt detected, auto-approving..."
|
|
564
|
-
tmux send-keys -t "$pane_id"
|
|
774
|
+
tmux send-keys -t "$pane_id" C-m
|
|
565
775
|
sleep 0.5
|
|
566
776
|
continue
|
|
567
777
|
fi
|
|
@@ -569,7 +779,7 @@ wait_for_pane_ready() {
|
|
|
569
779
|
# Auto-dismiss codex update prompt (select Skip = option 2)
|
|
570
780
|
if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
571
781
|
log " Codex update prompt detected, selecting Skip..."
|
|
572
|
-
tmux send-keys -t "$pane_id" "2"
|
|
782
|
+
tmux send-keys -t "$pane_id" "2" C-m
|
|
573
783
|
sleep 0.5
|
|
574
784
|
continue
|
|
575
785
|
fi
|
|
@@ -655,12 +865,19 @@ check_and_nudge_idle_pane() {
|
|
|
655
865
|
local now
|
|
656
866
|
now=$(date +%s)
|
|
657
867
|
if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
(
|
|
663
|
-
|
|
868
|
+
# A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
|
|
869
|
+
local _nudge_capture
|
|
870
|
+
_nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
|
|
871
|
+
if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
|
|
872
|
+
log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
|
|
873
|
+
else
|
|
874
|
+
local count=${(P)nudge_count_var}
|
|
875
|
+
if (( count < MAX_NUDGES )); then
|
|
876
|
+
log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
|
|
877
|
+
safe_send_keys "$pane_id" ""
|
|
878
|
+
(( count++ ))
|
|
879
|
+
eval "$nudge_count_var=$count"
|
|
880
|
+
fi
|
|
664
881
|
fi
|
|
665
882
|
fi
|
|
666
883
|
else
|
|
@@ -678,6 +895,13 @@ restart_worker() {
|
|
|
678
895
|
local pane_id="$1"
|
|
679
896
|
local iter="$2"
|
|
680
897
|
local trigger_file="$3"
|
|
898
|
+
|
|
899
|
+
# Codex workers are 1-shot exec; restart is not applicable
|
|
900
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
901
|
+
log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
|
|
902
|
+
return 1
|
|
903
|
+
fi
|
|
904
|
+
|
|
681
905
|
local restart_count="${WORKER_RESTARTS[$iter]:-0}"
|
|
682
906
|
|
|
683
907
|
if (( restart_count >= MAX_RESTARTS )); then
|
|
@@ -693,7 +917,7 @@ restart_worker() {
|
|
|
693
917
|
|
|
694
918
|
# Kill existing claude, wait for shell prompt
|
|
695
919
|
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
696
|
-
tmux send-keys -t "$pane_id" "/exit"
|
|
920
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
697
921
|
sleep 2
|
|
698
922
|
|
|
699
923
|
# Re-launch worker (tmux interactive pattern)
|
|
@@ -710,6 +934,25 @@ restart_worker() {
|
|
|
710
934
|
# Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
|
|
711
935
|
# =============================================================================
|
|
712
936
|
|
|
937
|
+
# Per-US PRD injection helper
|
|
938
|
+
# Substitutes the full PRD path with a per-US split path in the Worker prompt base.
|
|
939
|
+
# Falls back to the full PRD with a stderr warning if the split file is missing.
|
|
940
|
+
# Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
|
|
941
|
+
inject_per_us_prd() {
|
|
942
|
+
local prompt_base="$1"
|
|
943
|
+
local full_prd="$2"
|
|
944
|
+
local per_us_prd="${3:-}"
|
|
945
|
+
|
|
946
|
+
if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
|
|
947
|
+
sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
|
|
948
|
+
else
|
|
949
|
+
if [[ -n "$per_us_prd" ]]; then
|
|
950
|
+
echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
|
|
951
|
+
fi
|
|
952
|
+
cat "$prompt_base"
|
|
953
|
+
fi
|
|
954
|
+
}
|
|
955
|
+
|
|
713
956
|
# --- governance.md s7 step 4+5: Write prompt and trigger to files ---
|
|
714
957
|
# NEVER send prompt content through tmux send-keys.
|
|
715
958
|
# Write payloads to files, send only short trigger commands (<200 chars).
|
|
@@ -727,14 +970,31 @@ write_worker_trigger() {
|
|
|
727
970
|
local prev_iter=$((iter - 1))
|
|
728
971
|
local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
|
|
729
972
|
|
|
973
|
+
# Compute next unverified US before prompt assembly (required for per-US PRD injection)
|
|
974
|
+
local next_us=""
|
|
975
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
976
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
977
|
+
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
978
|
+
next_us="$us"
|
|
979
|
+
break
|
|
980
|
+
fi
|
|
981
|
+
done
|
|
982
|
+
fi
|
|
983
|
+
|
|
730
984
|
{
|
|
731
|
-
|
|
985
|
+
# Per-US PRD injection: substitute full PRD path with per-US split path when available
|
|
986
|
+
local per_us_prd=""
|
|
987
|
+
[[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
|
|
988
|
+
inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
|
|
732
989
|
echo ""
|
|
733
990
|
echo "---"
|
|
734
991
|
echo "## Iteration Context"
|
|
735
992
|
echo "- **Iteration**: $iter"
|
|
736
993
|
echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
|
|
737
994
|
echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
|
|
995
|
+
if (( _PRD_CHANGED )); then
|
|
996
|
+
echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
|
|
997
|
+
fi
|
|
738
998
|
|
|
739
999
|
# Include fix contract if previous verifier failed
|
|
740
1000
|
if [[ -f "$fix_contract_file" ]]; then
|
|
@@ -749,15 +1009,6 @@ write_worker_trigger() {
|
|
|
749
1009
|
|
|
750
1010
|
# Per-US mode: tell Worker exactly which US to work on
|
|
751
1011
|
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
752
|
-
# Find next unverified US
|
|
753
|
-
local next_us=""
|
|
754
|
-
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
755
|
-
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
756
|
-
next_us="$us"
|
|
757
|
-
break
|
|
758
|
-
fi
|
|
759
|
-
done
|
|
760
|
-
|
|
761
1012
|
if [[ -n "$next_us" ]]; then
|
|
762
1013
|
echo ""
|
|
763
1014
|
echo "---"
|
|
@@ -766,6 +1017,13 @@ write_worker_trigger() {
|
|
|
766
1017
|
echo "The Leader has determined that **${next_us}** is the next unverified story."
|
|
767
1018
|
echo "You MUST implement ONLY **${next_us}** in this iteration."
|
|
768
1019
|
echo "Do NOT implement any other user stories."
|
|
1020
|
+
# Per-US test-spec injection: point Worker to scoped test-spec if available
|
|
1021
|
+
local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
|
|
1022
|
+
if [[ -f "$per_us_test_spec" ]]; then
|
|
1023
|
+
echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
|
|
1024
|
+
else
|
|
1025
|
+
echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
|
|
1026
|
+
fi
|
|
769
1027
|
echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
|
|
770
1028
|
echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
|
|
771
1029
|
echo ""
|
|
@@ -793,12 +1051,12 @@ write_worker_trigger() {
|
|
|
793
1051
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
794
1052
|
# Engine-specific launch command (expanded at write time)
|
|
795
1053
|
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
796
|
-
local engine_cmd="${CODEX_BIN:-codex}
|
|
1054
|
+
local engine_cmd="${CODEX_BIN:-codex} exec \\
|
|
1055
|
+
-m $WORKER_CODEX_MODEL \\
|
|
797
1056
|
-c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
|
|
798
1057
|
--dangerously-bypass-approvals-and-sandbox \\
|
|
799
|
-
\"\$(cat $prompt_file)\"
|
|
800
|
-
|
|
801
|
-
local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
|
|
1058
|
+
\"\$(cat $prompt_file)\""
|
|
1059
|
+
local engine_comment="# Run codex exec with fresh context (no pipe — codex requires terminal)"
|
|
802
1060
|
else
|
|
803
1061
|
local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
|
|
804
1062
|
--model $WORKER_MODEL \\
|
|
@@ -929,282 +1187,6 @@ TRIGGER_EOF
|
|
|
929
1187
|
log " Verifier trigger: $trigger_file"
|
|
930
1188
|
}
|
|
931
1189
|
|
|
932
|
-
# =============================================================================
|
|
933
|
-
# Status Updates
|
|
934
|
-
# =============================================================================
|
|
935
|
-
|
|
936
|
-
# --- governance.md s7 step 8: Update status.json ---
|
|
937
|
-
update_status() {
|
|
938
|
-
local phase="$1"
|
|
939
|
-
local last_result="$2"
|
|
940
|
-
|
|
941
|
-
# Build verified_us as JSON array
|
|
942
|
-
local verified_us_json="[]"
|
|
943
|
-
if [[ -n "$VERIFIED_US" ]]; then
|
|
944
|
-
verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
|
|
945
|
-
fi
|
|
946
|
-
|
|
947
|
-
# Build consensus fields
|
|
948
|
-
local consensus_json=""
|
|
949
|
-
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
950
|
-
consensus_json=',
|
|
951
|
-
"consensus_scope": "'"$CONSENSUS_SCOPE"'",
|
|
952
|
-
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
953
|
-
"claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
|
|
954
|
-
"codex_verdict": "'"${CODEX_VERDICT:-}"'"'
|
|
955
|
-
fi
|
|
956
|
-
|
|
957
|
-
echo '{
|
|
958
|
-
"slug": "'"$SLUG"'",
|
|
959
|
-
"baseline_commit": "'"${BASELINE_COMMIT:-none}"'",
|
|
960
|
-
"iteration": '"$ITERATION"',
|
|
961
|
-
"max_iter": '"$MAX_ITER"',
|
|
962
|
-
"phase": "'"$phase"'",
|
|
963
|
-
"worker_model": "'"$WORKER_MODEL"'",
|
|
964
|
-
"verifier_model": "'"$VERIFIER_MODEL"'",
|
|
965
|
-
"worker_engine": "'"$WORKER_ENGINE"'",
|
|
966
|
-
"verifier_engine": "'"$VERIFIER_ENGINE"'",
|
|
967
|
-
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
968
|
-
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
969
|
-
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
970
|
-
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
|
|
971
|
-
"verify_mode": "'"$VERIFY_MODE"'",
|
|
972
|
-
"verify_consensus": '"$VERIFY_CONSENSUS"',
|
|
973
|
-
"last_result": "'"$last_result"'",
|
|
974
|
-
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
975
|
-
"verified_us": '"$verified_us_json"''"$consensus_json"',
|
|
976
|
-
"updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
|
|
977
|
-
}' | atomic_write "$STATUS_FILE"
|
|
978
|
-
}
|
|
979
|
-
|
|
980
|
-
# --- governance.md s7 step 8: Write result log ---
|
|
981
|
-
write_result_log() {
|
|
982
|
-
local iter="$1"
|
|
983
|
-
local result="$2"
|
|
984
|
-
local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
|
|
985
|
-
|
|
986
|
-
local git_diff=""
|
|
987
|
-
if git -C "$ROOT" rev-parse HEAD &>/dev/null; then
|
|
988
|
-
git_diff=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(no git diff available)")
|
|
989
|
-
else
|
|
990
|
-
git_diff="(no commits in repo — cannot diff)"
|
|
991
|
-
fi
|
|
992
|
-
# Include untracked new files in result log
|
|
993
|
-
local result_untracked
|
|
994
|
-
result_untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
|
|
995
|
-
if [[ -n "$result_untracked" ]]; then
|
|
996
|
-
git_diff="${git_diff}
|
|
997
|
-
|
|
998
|
-
Untracked new files:
|
|
999
|
-
${result_untracked}"
|
|
1000
|
-
fi
|
|
1001
|
-
|
|
1002
|
-
{
|
|
1003
|
-
echo "# Iteration $iter Result"
|
|
1004
|
-
echo ""
|
|
1005
|
-
echo "## Status"
|
|
1006
|
-
echo "$result [leader-measured]"
|
|
1007
|
-
echo ""
|
|
1008
|
-
echo "## Files Changed"
|
|
1009
|
-
echo '```'
|
|
1010
|
-
echo "$git_diff"
|
|
1011
|
-
echo '```'
|
|
1012
|
-
echo "[git-measured]"
|
|
1013
|
-
echo ""
|
|
1014
|
-
echo "## Timestamp"
|
|
1015
|
-
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
1016
|
-
} | atomic_write "$result_file"
|
|
1017
|
-
}
|
|
1018
|
-
|
|
1019
|
-
# --- step 7d: Archive iteration artifacts (done-claim + verdict) to logs/ ---
|
|
1020
|
-
archive_iter_artifacts() {
|
|
1021
|
-
local iter="$1"
|
|
1022
|
-
local iter_padded
|
|
1023
|
-
iter_padded=$(printf '%03d' "$iter")
|
|
1024
|
-
if [[ -f "$DONE_CLAIM_FILE" ]]; then
|
|
1025
|
-
cp "$DONE_CLAIM_FILE" "$LOGS_DIR/iter-${iter_padded}-done-claim.json" 2>/dev/null
|
|
1026
|
-
fi
|
|
1027
|
-
if [[ -f "$VERDICT_FILE" ]]; then
|
|
1028
|
-
cp "$VERDICT_FILE" "$LOGS_DIR/iter-${iter_padded}-verify-verdict.json" 2>/dev/null
|
|
1029
|
-
fi
|
|
1030
|
-
}
|
|
1031
|
-
|
|
1032
|
-
# --- AC5: Write per-iteration cost estimate to cost-log.jsonl ---
|
|
1033
|
-
write_cost_log() {
|
|
1034
|
-
local iter="$1"
|
|
1035
|
-
local iter_padded
|
|
1036
|
-
iter_padded=$(printf '%03d' "$iter")
|
|
1037
|
-
|
|
1038
|
-
local prompt_bytes=0 claim_bytes=0 verdict_bytes=0
|
|
1039
|
-
local worker_prompt_file="$LOGS_DIR/iter-${iter_padded}.worker-prompt.md"
|
|
1040
|
-
[[ -f "$worker_prompt_file" ]] && prompt_bytes=$(wc -c < "$worker_prompt_file" 2>/dev/null || echo 0)
|
|
1041
|
-
[[ -f "$DONE_CLAIM_FILE" ]] && claim_bytes=$(wc -c < "$DONE_CLAIM_FILE" 2>/dev/null || echo 0)
|
|
1042
|
-
[[ -f "$VERDICT_FILE" ]] && verdict_bytes=$(wc -c < "$VERDICT_FILE" 2>/dev/null || echo 0)
|
|
1043
|
-
|
|
1044
|
-
local estimated_tokens=$(( (prompt_bytes + claim_bytes + verdict_bytes) / 4 ))
|
|
1045
|
-
|
|
1046
|
-
echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
|
|
1047
|
-
}
|
|
1048
|
-
|
|
1049
|
-
# --- AC4: Generate campaign-report.md on all terminal states ---
|
|
1050
|
-
generate_campaign_report() {
|
|
1051
|
-
# Guard: idempotent — only generate once per campaign run
|
|
1052
|
-
if (( CAMPAIGN_REPORT_GENERATED )); then return 0; fi
|
|
1053
|
-
CAMPAIGN_REPORT_GENERATED=1
|
|
1054
|
-
|
|
1055
|
-
local final_status="UNKNOWN"
|
|
1056
|
-
if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
|
|
1057
|
-
elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
|
|
1058
|
-
else final_status="TIMEOUT"; fi
|
|
1059
|
-
|
|
1060
|
-
local report_file="$LOGS_DIR/campaign-report.md"
|
|
1061
|
-
|
|
1062
|
-
# AC9: Version existing report before writing new one
|
|
1063
|
-
if [[ -f "$report_file" ]]; then
|
|
1064
|
-
local v=1
|
|
1065
|
-
while [[ -f "${report_file%.md}-v${v}.md" ]]; do (( v++ )); done
|
|
1066
|
-
mv "$report_file" "${report_file%.md}-v${v}.md"
|
|
1067
|
-
fi
|
|
1068
|
-
|
|
1069
|
-
local end_time
|
|
1070
|
-
end_time=$(date +%s)
|
|
1071
|
-
local elapsed=$(( end_time - START_TIME ))
|
|
1072
|
-
|
|
1073
|
-
local baseline_commit_val="${BASELINE_COMMIT:-none}"
|
|
1074
|
-
local files_changed=""
|
|
1075
|
-
if [[ "$baseline_commit_val" != "none" ]]; then
|
|
1076
|
-
files_changed=$(git -C "$ROOT" diff --stat "${baseline_commit_val}" 2>/dev/null || echo "(git diff unavailable)")
|
|
1077
|
-
elif git -C "$ROOT" rev-parse HEAD &>/dev/null; then
|
|
1078
|
-
files_changed=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(git diff unavailable)")
|
|
1079
|
-
else
|
|
1080
|
-
files_changed="(no commits in repo — cannot diff)"
|
|
1081
|
-
fi
|
|
1082
|
-
# Include untracked new files
|
|
1083
|
-
local untracked
|
|
1084
|
-
untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
|
|
1085
|
-
if [[ -n "$untracked" ]]; then
|
|
1086
|
-
files_changed="${files_changed}
|
|
1087
|
-
|
|
1088
|
-
Untracked new files:
|
|
1089
|
-
${untracked}"
|
|
1090
|
-
fi
|
|
1091
|
-
|
|
1092
|
-
local sv_summary=""
|
|
1093
|
-
if (( WITH_SELF_VERIFICATION )); then
|
|
1094
|
-
local sv_report
|
|
1095
|
-
sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
|
|
1096
|
-
if [[ -n "$sv_report" ]]; then
|
|
1097
|
-
sv_summary="See: $(basename "$sv_report")"
|
|
1098
|
-
else
|
|
1099
|
-
sv_summary="SV report generation requires Agent mode. Flag recorded in session-config."
|
|
1100
|
-
fi
|
|
1101
|
-
else
|
|
1102
|
-
sv_summary="N/A — --with-self-verification not enabled"
|
|
1103
|
-
fi
|
|
1104
|
-
|
|
1105
|
-
{
|
|
1106
|
-
echo "# Campaign Report: $SLUG"
|
|
1107
|
-
echo ""
|
|
1108
|
-
echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) | Status: $final_status | Iterations: $ITERATION"
|
|
1109
|
-
echo ""
|
|
1110
|
-
echo "## Objective"
|
|
1111
|
-
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1112
|
-
if [[ -f "$prd_file" ]]; then
|
|
1113
|
-
grep '^## Objective' -A3 "$prd_file" 2>/dev/null | tail -n +2 | head -3
|
|
1114
|
-
else
|
|
1115
|
-
echo "(PRD not found)"
|
|
1116
|
-
fi
|
|
1117
|
-
echo ""
|
|
1118
|
-
echo "## Execution Summary"
|
|
1119
|
-
echo "- Terminal state: $final_status"
|
|
1120
|
-
echo "- Iterations run: $ITERATION / $MAX_ITER"
|
|
1121
|
-
echo "- Elapsed: ${elapsed}s"
|
|
1122
|
-
echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
|
|
1123
|
-
echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
|
|
1124
|
-
echo ""
|
|
1125
|
-
echo "## US Status"
|
|
1126
|
-
echo "- Verified: ${VERIFIED_US:-none}"
|
|
1127
|
-
echo "- Consecutive failures at end: $CONSECUTIVE_FAILURES"
|
|
1128
|
-
echo ""
|
|
1129
|
-
echo "## Verification Results"
|
|
1130
|
-
local ri=1
|
|
1131
|
-
while (( ri <= ITERATION )); do
|
|
1132
|
-
local iter_dc="$LOGS_DIR/iter-$(printf '%03d' $ri)-done-claim.json"
|
|
1133
|
-
if [[ -f "$iter_dc" ]]; then
|
|
1134
|
-
local us_id
|
|
1135
|
-
us_id=$(jq -r '.us_id // "unknown"' "$iter_dc" 2>/dev/null)
|
|
1136
|
-
echo "- $(basename "$iter_dc"): us_id=$us_id"
|
|
1137
|
-
fi
|
|
1138
|
-
(( ri++ ))
|
|
1139
|
-
done
|
|
1140
|
-
echo ""
|
|
1141
|
-
echo "## Issues Encountered"
|
|
1142
|
-
local fi_found=0
|
|
1143
|
-
local fi_i=1
|
|
1144
|
-
while (( fi_i <= ITERATION )); do
|
|
1145
|
-
local fix_f="$LOGS_DIR/iter-$(printf '%03d' $fi_i).fix-contract.md"
|
|
1146
|
-
if [[ -f "$fix_f" ]]; then
|
|
1147
|
-
echo "- $(basename "$fix_f")"
|
|
1148
|
-
fi_found=1
|
|
1149
|
-
fi
|
|
1150
|
-
(( fi_i++ ))
|
|
1151
|
-
done
|
|
1152
|
-
(( fi_found == 0 )) && echo "- None"
|
|
1153
|
-
echo ""
|
|
1154
|
-
echo "## Cost & Performance"
|
|
1155
|
-
if [[ -f "$COST_LOG" ]]; then
|
|
1156
|
-
local total_tokens=0
|
|
1157
|
-
while IFS= read -r line; do
|
|
1158
|
-
local t
|
|
1159
|
-
t=$(echo "$line" | jq -r '.estimated_tokens // 0' 2>/dev/null || echo 0)
|
|
1160
|
-
total_tokens=$(( total_tokens + t ))
|
|
1161
|
-
done < "$COST_LOG"
|
|
1162
|
-
echo "- Total estimated tokens: $total_tokens (source: estimated, tmux mode)"
|
|
1163
|
-
echo "- See: cost-log.jsonl for per-iteration breakdown"
|
|
1164
|
-
else
|
|
1165
|
-
echo "- No cost data available"
|
|
1166
|
-
fi
|
|
1167
|
-
echo ""
|
|
1168
|
-
echo "## SV Summary"
|
|
1169
|
-
echo "$sv_summary"
|
|
1170
|
-
echo ""
|
|
1171
|
-
echo "## Files Changed"
|
|
1172
|
-
echo '```'
|
|
1173
|
-
echo "$files_changed"
|
|
1174
|
-
echo '```'
|
|
1175
|
-
echo "Note: Files Changed may include pre-existing uncommitted changes if the campaign started in a dirty worktree."
|
|
1176
|
-
} | atomic_write "$report_file"
|
|
1177
|
-
|
|
1178
|
-
log "Campaign report written: $report_file"
|
|
1179
|
-
}
|
|
1180
|
-
|
|
1181
|
-
# =============================================================================
|
|
1182
|
-
# Sentinel Writers
|
|
1183
|
-
# =============================================================================
|
|
1184
|
-
|
|
1185
|
-
# --- governance.md s7: Only the Leader writes sentinels ---
|
|
1186
|
-
write_complete_sentinel() {
|
|
1187
|
-
local summary="$1"
|
|
1188
|
-
echo "# Campaign Complete
|
|
1189
|
-
|
|
1190
|
-
Completed at iteration $ITERATION.
|
|
1191
|
-
$summary
|
|
1192
|
-
|
|
1193
|
-
Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
|
|
1194
|
-
log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
|
|
1195
|
-
}
|
|
1196
|
-
|
|
1197
|
-
write_blocked_sentinel() {
|
|
1198
|
-
local reason="$1"
|
|
1199
|
-
echo "# Campaign Blocked
|
|
1200
|
-
|
|
1201
|
-
Blocked at iteration $ITERATION.
|
|
1202
|
-
Reason: $reason
|
|
1203
|
-
|
|
1204
|
-
Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
|
|
1205
|
-
log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
|
|
1206
|
-
}
|
|
1207
|
-
|
|
1208
1190
|
# =============================================================================
|
|
1209
1191
|
# Cleanup (trap handler)
|
|
1210
1192
|
# =============================================================================
|
|
@@ -1213,17 +1195,21 @@ cleanup() {
|
|
|
1213
1195
|
log "Cleaning up..."
|
|
1214
1196
|
|
|
1215
1197
|
# Remove lockfile
|
|
1216
|
-
|
|
1198
|
+
if (( LOCKFILE_ACQUIRED )); then
|
|
1199
|
+
rm -f "$LOCKFILE_PATH" 2>/dev/null
|
|
1200
|
+
else
|
|
1201
|
+
log_debug "cleanup: lockfile not owned by this process, skipping removal"
|
|
1202
|
+
fi
|
|
1217
1203
|
|
|
1218
1204
|
# Kill claude processes then kill panes
|
|
1219
1205
|
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
1220
1206
|
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
1221
1207
|
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
1222
|
-
tmux send-keys -t "$WORKER_PANE" "/exit"
|
|
1208
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
1223
1209
|
fi
|
|
1224
1210
|
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
1225
1211
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1226
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit"
|
|
1212
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
1227
1213
|
fi
|
|
1228
1214
|
sleep 2
|
|
1229
1215
|
# Kill panes on completion
|
|
@@ -1242,6 +1228,9 @@ cleanup() {
|
|
|
1242
1228
|
# AC4: Generate campaign report on all terminal states (always-on)
|
|
1243
1229
|
generate_campaign_report
|
|
1244
1230
|
|
|
1231
|
+
# US-001: Generate SV report after campaign report (tmux mode)
|
|
1232
|
+
generate_sv_report
|
|
1233
|
+
|
|
1245
1234
|
# Print summary
|
|
1246
1235
|
local end_time
|
|
1247
1236
|
end_time=$(date +%s)
|
|
@@ -1254,6 +1243,13 @@ cleanup() {
|
|
|
1254
1243
|
elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
|
|
1255
1244
|
else final_status="TIMEOUT"; fi
|
|
1256
1245
|
|
|
1246
|
+
# --- Update metadata.json with final status ---
|
|
1247
|
+
if [[ -f "$METADATA_FILE" ]]; then
|
|
1248
|
+
jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
1249
|
+
'.campaign_status = $status | .end_time = $end_time' \
|
|
1250
|
+
"$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
|
|
1251
|
+
fi
|
|
1252
|
+
|
|
1257
1253
|
if (( DEBUG )); then
|
|
1258
1254
|
local end_ts=$(date +%s)
|
|
1259
1255
|
local elapsed=$((end_ts - START_TIME))
|
|
@@ -1350,6 +1346,7 @@ poll_for_signal() {
|
|
|
1350
1346
|
local trigger_file="$4"
|
|
1351
1347
|
local role="$5" # "worker" or "verifier"
|
|
1352
1348
|
local nudge_count=0
|
|
1349
|
+
local api_retry_count=0
|
|
1353
1350
|
local poll_start
|
|
1354
1351
|
poll_start=$(date +%s)
|
|
1355
1352
|
|
|
@@ -1374,6 +1371,54 @@ poll_for_signal() {
|
|
|
1374
1371
|
return 0 # success
|
|
1375
1372
|
fi
|
|
1376
1373
|
|
|
1374
|
+
# A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
|
|
1375
|
+
# ONLY for Worker polling — Verifier waits for verdict file, not done-claim
|
|
1376
|
+
if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
|
|
1377
|
+
local dc_us_id
|
|
1378
|
+
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
1379
|
+
if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
|
|
1380
|
+
log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
|
|
1381
|
+
log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
|
|
1382
|
+
echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
1383
|
+
return 0
|
|
1384
|
+
fi
|
|
1385
|
+
fi
|
|
1386
|
+
|
|
1387
|
+
# API transient-error recovery with bounded backoff
|
|
1388
|
+
local pane_output_for_retry
|
|
1389
|
+
pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
|
|
1390
|
+
local is_api_text_retry=0
|
|
1391
|
+
if [[ -n "$pane_output_for_retry" ]] &&
|
|
1392
|
+
( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
|
|
1393
|
+
|| echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
|
|
1394
|
+
|| echo "$pane_output_for_retry" | grep -qi 'overloaded' \
|
|
1395
|
+
|| echo "$pane_output_for_retry" | grep -qi 'too many requests' \
|
|
1396
|
+
|| echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
|
|
1397
|
+
is_api_text_retry=1
|
|
1398
|
+
fi
|
|
1399
|
+
|
|
1400
|
+
if (( is_api_text_retry )) || is_api_error "$pane_id"; then
|
|
1401
|
+
(( api_retry_count++ ))
|
|
1402
|
+
log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
|
|
1403
|
+
if (( api_retry_count >= _API_MAX_RETRIES )); then
|
|
1404
|
+
log_error "API unavailable after ${_API_MAX_RETRIES} retries"
|
|
1405
|
+
write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries"
|
|
1406
|
+
return 2
|
|
1407
|
+
fi
|
|
1408
|
+
# A5: If pane shows "queued messages" or rate-limit corruption, restart pane
|
|
1409
|
+
if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
|
|
1410
|
+
log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
|
|
1411
|
+
log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
|
|
1412
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
|
|
1413
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
|
|
1414
|
+
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
1415
|
+
fi
|
|
1416
|
+
sleep "$_API_RETRY_INTERVAL_S"
|
|
1417
|
+
continue
|
|
1418
|
+
else
|
|
1419
|
+
api_retry_count=0
|
|
1420
|
+
fi
|
|
1421
|
+
|
|
1377
1422
|
# Check heartbeat freshness (tmux pattern)
|
|
1378
1423
|
if [[ -f "$heartbeat_file" ]]; then
|
|
1379
1424
|
if check_heartbeat_exited "$heartbeat_file"; then
|
|
@@ -1383,9 +1428,13 @@ poll_for_signal() {
|
|
|
1383
1428
|
log " Signal file detected after process exit: $signal_file"
|
|
1384
1429
|
return 0
|
|
1385
1430
|
fi
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1431
|
+
# Dispatch to engine-specific exit handler
|
|
1432
|
+
if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
|
|
1433
|
+
handle_worker_exit_codex "$ITERATION" "$signal_file"
|
|
1434
|
+
return 0
|
|
1435
|
+
fi
|
|
1436
|
+
# Claude path (or verifier of any engine)
|
|
1437
|
+
if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
1389
1438
|
# Reset poll timer for the restart
|
|
1390
1439
|
poll_start=$(date +%s)
|
|
1391
1440
|
nudge_count=0
|
|
@@ -1421,13 +1470,24 @@ poll_for_signal() {
|
|
|
1421
1470
|
fi
|
|
1422
1471
|
fi
|
|
1423
1472
|
|
|
1473
|
+
# Dead pane detection during poll: check if claude/codex process died
|
|
1474
|
+
local poll_cmd
|
|
1475
|
+
poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
|
|
1476
|
+
# Dead pane detection — delegates to check_dead_pane() for engine-aware logic
|
|
1477
|
+
if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
|
|
1478
|
+
log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
|
|
1479
|
+
log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
|
|
1480
|
+
# Return failure so caller can handle recovery
|
|
1481
|
+
return 1
|
|
1482
|
+
fi
|
|
1483
|
+
|
|
1424
1484
|
# Auto-approve permission prompts during poll
|
|
1425
1485
|
local poll_capture
|
|
1426
1486
|
poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
1427
1487
|
if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
1428
1488
|
log " Permission prompt detected during poll, auto-approving..."
|
|
1429
1489
|
log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
|
|
1430
|
-
tmux send-keys -t "$pane_id"
|
|
1490
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1431
1491
|
sleep 0.5
|
|
1432
1492
|
fi
|
|
1433
1493
|
|
|
@@ -1438,38 +1498,6 @@ poll_for_signal() {
|
|
|
1438
1498
|
done
|
|
1439
1499
|
}
|
|
1440
1500
|
|
|
1441
|
-
# =============================================================================
|
|
1442
|
-
# Circuit Breaker: Stale Context Detection
|
|
1443
|
-
# =============================================================================
|
|
1444
|
-
|
|
1445
|
-
# --- governance.md s7 step 8: Stale context detection ---
|
|
1446
|
-
compute_context_hash() {
|
|
1447
|
-
if [[ -f "$CONTEXT_FILE" ]]; then
|
|
1448
|
-
md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1
|
|
1449
|
-
else
|
|
1450
|
-
echo "no-context"
|
|
1451
|
-
fi
|
|
1452
|
-
}
|
|
1453
|
-
|
|
1454
|
-
check_stale_context() {
|
|
1455
|
-
local current_hash
|
|
1456
|
-
current_hash=$(compute_context_hash)
|
|
1457
|
-
|
|
1458
|
-
if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
|
|
1459
|
-
(( STALE_CONTEXT_COUNT++ ))
|
|
1460
|
-
log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
|
|
1461
|
-
if (( STALE_CONTEXT_COUNT >= 3 )); then
|
|
1462
|
-
log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
|
|
1463
|
-
return 1
|
|
1464
|
-
fi
|
|
1465
|
-
else
|
|
1466
|
-
STALE_CONTEXT_COUNT=0
|
|
1467
|
-
fi
|
|
1468
|
-
|
|
1469
|
-
PREV_CONTEXT_HASH="$current_hash"
|
|
1470
|
-
return 0
|
|
1471
|
-
}
|
|
1472
|
-
|
|
1473
1501
|
# =============================================================================
|
|
1474
1502
|
# Consensus Verification (run two verifiers sequentially in same pane)
|
|
1475
1503
|
# =============================================================================
|
|
@@ -1487,13 +1515,26 @@ run_single_verifier() {
|
|
|
1487
1515
|
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
1488
1516
|
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
1489
1517
|
|
|
1490
|
-
# Clean previous Verifier session
|
|
1518
|
+
# Clean previous Verifier session (with dead pane detection)
|
|
1491
1519
|
local verifier_cmd
|
|
1492
1520
|
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1493
|
-
if [[
|
|
1521
|
+
if [[ -z "$verifier_cmd" ]]; then
|
|
1522
|
+
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
1523
|
+
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
1524
|
+
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
1525
|
+
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
1526
|
+
log " New verifier pane: $VERIFIER_PANE"
|
|
1527
|
+
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
1528
|
+
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
1529
|
+
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
1530
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
1531
|
+
sleep 0.2
|
|
1532
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
1533
|
+
sleep 0.3
|
|
1534
|
+
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1494
1535
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1495
1536
|
sleep 0.5
|
|
1496
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit"
|
|
1537
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
1497
1538
|
sleep 2
|
|
1498
1539
|
fi
|
|
1499
1540
|
# Always ensure clean shell state before launching new verifier
|
|
@@ -1505,55 +1546,19 @@ run_single_verifier() {
|
|
|
1505
1546
|
# Remove previous verdict file
|
|
1506
1547
|
rm -f "$VERDICT_FILE" 2>/dev/null
|
|
1507
1548
|
|
|
1508
|
-
# Launch verifier
|
|
1549
|
+
# Launch verifier — dispatch to engine-specific function
|
|
1550
|
+
local verifier_launch
|
|
1509
1551
|
if [[ "$engine" = "codex" ]]; then
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
|
|
1514
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1515
|
-
log_debug "Verifier$suffix codex exec sent directly"
|
|
1552
|
+
verifier_launch="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1553
|
+
launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
|
|
1554
|
+
log_debug "Verifier$suffix codex exec dispatched"
|
|
1516
1555
|
else
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
|
|
1520
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1521
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1522
|
-
|
|
1523
|
-
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1556
|
+
verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
|
|
1557
|
+
if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
|
|
1524
1558
|
log_error "Verifier$suffix failed to start"
|
|
1525
1559
|
return 1
|
|
1526
1560
|
fi
|
|
1527
|
-
|
|
1528
|
-
sleep 3
|
|
1529
|
-
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
1530
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1531
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1532
|
-
log_debug "Verifier$suffix instruction sent directly"
|
|
1533
|
-
|
|
1534
|
-
# Verify claude actually started working
|
|
1535
|
-
local v_submit=0
|
|
1536
|
-
while (( v_submit < 15 )); do
|
|
1537
|
-
sleep 2
|
|
1538
|
-
local v_check
|
|
1539
|
-
v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
1540
|
-
if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
|
|
1541
|
-
log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
|
|
1542
|
-
break
|
|
1543
|
-
fi
|
|
1544
|
-
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1545
|
-
if (( v_submit == 8 )); then
|
|
1546
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1547
|
-
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
1548
|
-
sleep 0.1
|
|
1549
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1550
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1551
|
-
fi
|
|
1552
|
-
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1553
|
-
sleep 0.3
|
|
1554
|
-
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1555
|
-
(( v_submit++ ))
|
|
1556
|
-
done
|
|
1561
|
+
log_debug "Verifier$suffix claude dispatched"
|
|
1557
1562
|
fi
|
|
1558
1563
|
|
|
1559
1564
|
# Poll for verdict
|
|
@@ -1581,6 +1586,10 @@ run_single_verifier() {
|
|
|
1581
1586
|
# Claude: use full poll_for_signal with heartbeat/nudge
|
|
1582
1587
|
log " Polling for verify-verdict.json ($suffix)..."
|
|
1583
1588
|
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
|
|
1589
|
+
local verifier_poll_rc=$?
|
|
1590
|
+
if (( verifier_poll_rc == 2 )); then
|
|
1591
|
+
return 1
|
|
1592
|
+
fi
|
|
1584
1593
|
log_error "Verifier$suffix poll failed"
|
|
1585
1594
|
return 1
|
|
1586
1595
|
fi
|
|
@@ -1592,6 +1601,110 @@ run_single_verifier() {
|
|
|
1592
1601
|
return 0
|
|
1593
1602
|
}
|
|
1594
1603
|
|
|
1604
|
+
# --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
|
|
1605
|
+
# Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
|
|
1606
|
+
# Sets FAILED_US global on failure.
|
|
1607
|
+
run_sequential_final_verify() {
|
|
1608
|
+
local iter="$1"
|
|
1609
|
+
FAILED_US=""
|
|
1610
|
+
|
|
1611
|
+
log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
|
|
1612
|
+
log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
|
|
1613
|
+
|
|
1614
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
1615
|
+
log " Final verify: checking $us..."
|
|
1616
|
+
|
|
1617
|
+
# Temporarily override signal file to scope verifier to this US
|
|
1618
|
+
local orig_signal
|
|
1619
|
+
orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
|
|
1620
|
+
echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
|
|
1621
|
+
|
|
1622
|
+
# Write scoped verifier trigger
|
|
1623
|
+
write_verifier_trigger "$iter"
|
|
1624
|
+
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
|
|
1625
|
+
|
|
1626
|
+
# Clean verifier pane
|
|
1627
|
+
local verifier_cmd
|
|
1628
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1629
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1630
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
|
|
1631
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
|
|
1632
|
+
fi
|
|
1633
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1634
|
+
|
|
1635
|
+
# Launch verifier
|
|
1636
|
+
local verifier_launch
|
|
1637
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
1638
|
+
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1639
|
+
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
|
|
1640
|
+
else
|
|
1641
|
+
verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
1642
|
+
launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
|
|
1643
|
+
log_error "Failed to launch verifier for $us"
|
|
1644
|
+
FAILED_US="$us"
|
|
1645
|
+
return 1
|
|
1646
|
+
}
|
|
1647
|
+
fi
|
|
1648
|
+
|
|
1649
|
+
# Poll for verdict
|
|
1650
|
+
rm -f "$VERDICT_FILE"
|
|
1651
|
+
local poll_rc=0
|
|
1652
|
+
poll_for_signal "$VERDICT_FILE" "$ITER_TIMEOUT" "verdict" || poll_rc=$?
|
|
1653
|
+
if (( poll_rc != 0 )); then
|
|
1654
|
+
log_error "Verifier poll failed for $us (rc=$poll_rc)"
|
|
1655
|
+
FAILED_US="$us"
|
|
1656
|
+
return 1
|
|
1657
|
+
fi
|
|
1658
|
+
|
|
1659
|
+
# Check verdict
|
|
1660
|
+
local verdict
|
|
1661
|
+
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
1662
|
+
if [[ "$verdict" != "pass" ]]; then
|
|
1663
|
+
FAILED_US="$us"
|
|
1664
|
+
log " Sequential final verify FAILED at $us"
|
|
1665
|
+
log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
|
|
1666
|
+
return 1
|
|
1667
|
+
fi
|
|
1668
|
+
log " Sequential final verify: $us PASSED"
|
|
1669
|
+
|
|
1670
|
+
# Archive per-US final verdict
|
|
1671
|
+
cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
|
|
1672
|
+
done
|
|
1673
|
+
|
|
1674
|
+
# Integration check: run tests if VERIFICATION_CMD is set
|
|
1675
|
+
if [[ -n "${VERIFICATION_CMD:-}" ]]; then
|
|
1676
|
+
log " Running integration test suite after sequential verify..."
|
|
1677
|
+
log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
|
|
1678
|
+
if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
|
|
1679
|
+
log " Integration test suite FAILED"
|
|
1680
|
+
FAILED_US="integration"
|
|
1681
|
+
return 2
|
|
1682
|
+
fi
|
|
1683
|
+
log " Integration test suite PASSED"
|
|
1684
|
+
fi
|
|
1685
|
+
|
|
1686
|
+
log " Sequential final verify: ALL PASSED"
|
|
1687
|
+
return 0
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
# --- US-005: Determine whether consensus verification should run for this signal ---
|
|
1691
|
+
# Returns 0 (use consensus) or 1 (single engine).
|
|
1692
|
+
# VERIFY_CONSENSUS + CONSENSUS_SCOPE handles per-US consensus.
|
|
1693
|
+
# FINAL_CONSENSUS independently enables consensus for the final ALL verify only.
|
|
1694
|
+
_should_use_consensus() {
|
|
1695
|
+
local signal_us_id="${1:-}"
|
|
1696
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1697
|
+
case "$CONSENSUS_SCOPE" in
|
|
1698
|
+
all) return 0 ;;
|
|
1699
|
+
final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
|
|
1700
|
+
esac
|
|
1701
|
+
fi
|
|
1702
|
+
if [[ "$FINAL_CONSENSUS" = "1" && "$signal_us_id" == "ALL" ]]; then
|
|
1703
|
+
return 0
|
|
1704
|
+
fi
|
|
1705
|
+
return 1
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1595
1708
|
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
1596
1709
|
run_consensus_verification() {
|
|
1597
1710
|
local iter="$1"
|
|
@@ -1607,18 +1720,45 @@ run_consensus_verification() {
|
|
|
1607
1720
|
log " Consensus round $CONSENSUS_ROUND/6..."
|
|
1608
1721
|
|
|
1609
1722
|
# Run claude verifier first
|
|
1723
|
+
local _claude_t0=$(date +%s)
|
|
1610
1724
|
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1611
1725
|
log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1612
1726
|
return 1
|
|
1613
1727
|
fi
|
|
1728
|
+
ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
|
|
1614
1729
|
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1730
|
+
# A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
|
|
1731
|
+
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
1732
|
+
log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
|
|
1733
|
+
log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
|
|
1734
|
+
rm -f "$claude_verdict_file" 2>/dev/null
|
|
1735
|
+
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1736
|
+
log_error "Claude verifier retry also failed"
|
|
1737
|
+
return 1
|
|
1738
|
+
fi
|
|
1739
|
+
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1740
|
+
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
1741
|
+
log_error "Claude verdict still null after retry — consensus cannot proceed"
|
|
1742
|
+
return 1
|
|
1743
|
+
fi
|
|
1744
|
+
fi
|
|
1615
1745
|
log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
1616
1746
|
|
|
1747
|
+
# F8: --consensus-fail-fast — skip second verifier if first fails
|
|
1748
|
+
if [[ "$CONSENSUS_FAIL_FAST" = "1" && "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
1749
|
+
log " Consensus fail-fast: claude=fail, skipping codex verifier"
|
|
1750
|
+
log_debug "[GOV] iter=$iter phase=consensus_fail_fast claude=fail codex=skipped"
|
|
1751
|
+
CODEX_VERDICT="skipped"
|
|
1752
|
+
return 2 # disagreement/fail signal
|
|
1753
|
+
fi
|
|
1754
|
+
|
|
1617
1755
|
# Run codex verifier second
|
|
1756
|
+
local _codex_t0=$(date +%s)
|
|
1618
1757
|
if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
|
|
1619
1758
|
log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1620
1759
|
return 1
|
|
1621
1760
|
fi
|
|
1761
|
+
ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
|
|
1622
1762
|
CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
|
|
1623
1763
|
log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
|
|
1624
1764
|
|
|
@@ -1722,43 +1862,35 @@ run_consensus_verification() {
|
|
|
1722
1862
|
return 1
|
|
1723
1863
|
}
|
|
1724
1864
|
|
|
1725
|
-
# =============================================================================
|
|
1726
|
-
# Security Warning
|
|
1727
|
-
# =============================================================================
|
|
1728
|
-
|
|
1729
|
-
print_security_warning() {
|
|
1730
|
-
echo ""
|
|
1731
|
-
echo "================================================================"
|
|
1732
|
-
echo " WARNING: Running with --dangerously-skip-permissions"
|
|
1733
|
-
echo ""
|
|
1734
|
-
echo " The claude CLI will execute tools (file writes, shell commands)"
|
|
1735
|
-
echo " without asking for confirmation. Only run this on code you"
|
|
1736
|
-
echo " trust in an environment you control."
|
|
1737
|
-
echo "================================================================"
|
|
1738
|
-
echo ""
|
|
1739
|
-
}
|
|
1740
|
-
|
|
1741
1865
|
# =============================================================================
|
|
1742
1866
|
# Main Leader Loop
|
|
1743
1867
|
# =============================================================================
|
|
1744
1868
|
|
|
1745
1869
|
main() {
|
|
1746
1870
|
# --- Lockfile: prevent duplicate execution ---
|
|
1747
|
-
local lockfile="$
|
|
1871
|
+
local lockfile="$LOCKFILE_PATH"
|
|
1748
1872
|
mkdir -p "$(dirname "$lockfile")" 2>/dev/null
|
|
1749
1873
|
if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
|
|
1750
1874
|
local lock_pid
|
|
1751
1875
|
lock_pid=$(cat "$lockfile" 2>/dev/null)
|
|
1752
1876
|
if kill -0 "$lock_pid" 2>/dev/null; then
|
|
1753
|
-
log_error "Another instance is already running (PID $lock_pid)"
|
|
1877
|
+
log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
|
|
1754
1878
|
exit 1
|
|
1755
1879
|
fi
|
|
1756
1880
|
# Stale lock — overwrite
|
|
1881
|
+
log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
|
|
1757
1882
|
echo $$ > "$lockfile"
|
|
1883
|
+
LOCKFILE_ACQUIRED=1
|
|
1884
|
+
else
|
|
1885
|
+
LOCKFILE_ACQUIRED=1
|
|
1758
1886
|
fi
|
|
1759
|
-
|
|
1887
|
+
trap cleanup EXIT INT TERM
|
|
1888
|
+
mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
|
|
1889
|
+
|
|
1890
|
+
# --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
|
|
1891
|
+
mkdir -p "$ANALYTICS_DIR" 2>/dev/null
|
|
1760
1892
|
|
|
1761
|
-
# ---
|
|
1893
|
+
# --- debug.log versioning (in analytics dir, --debug only) ---
|
|
1762
1894
|
if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
|
|
1763
1895
|
local dbg_n=1
|
|
1764
1896
|
while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
|
|
@@ -1767,6 +1899,31 @@ main() {
|
|
|
1767
1899
|
mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
|
|
1768
1900
|
fi
|
|
1769
1901
|
|
|
1902
|
+
# --- campaign.jsonl versioning (always-on) ---
|
|
1903
|
+
if [[ -f "$CAMPAIGN_JSONL" ]]; then
|
|
1904
|
+
local cj_n=1
|
|
1905
|
+
while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
|
|
1906
|
+
(( cj_n++ ))
|
|
1907
|
+
done
|
|
1908
|
+
mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
|
|
1909
|
+
fi
|
|
1910
|
+
|
|
1911
|
+
# --- metadata.json: always write at campaign start (cross-project identification) ---
|
|
1912
|
+
jq -n \
|
|
1913
|
+
--arg slug "$SLUG" \
|
|
1914
|
+
--arg project_root "$ROOT" \
|
|
1915
|
+
--arg project_name "$(basename "$ROOT")" \
|
|
1916
|
+
--arg campaign_status "running" \
|
|
1917
|
+
--arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
1918
|
+
--arg end_time "" \
|
|
1919
|
+
--arg worker_model "$WORKER_MODEL" \
|
|
1920
|
+
--arg verifier_model "$VERIFIER_MODEL" \
|
|
1921
|
+
--argjson debug "$DEBUG" \
|
|
1922
|
+
--argjson with_sv "$WITH_SELF_VERIFICATION" \
|
|
1923
|
+
--argjson consensus "$VERIFY_CONSENSUS" \
|
|
1924
|
+
'{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
|
|
1925
|
+
> "$METADATA_FILE"
|
|
1926
|
+
|
|
1770
1927
|
# --- Startup ---
|
|
1771
1928
|
log "Ralph Desk Tmux Runner starting..."
|
|
1772
1929
|
log " Slug: $SLUG"
|
|
@@ -1776,6 +1933,7 @@ main() {
|
|
|
1776
1933
|
log " Verifier model: $VERIFIER_MODEL"
|
|
1777
1934
|
log " Verify mode: $VERIFY_MODE"
|
|
1778
1935
|
log " Verify consensus:$VERIFY_CONSENSUS"
|
|
1936
|
+
log " Final consensus: $FINAL_CONSENSUS"
|
|
1779
1937
|
log " Consensus scope: $CONSENSUS_SCOPE"
|
|
1780
1938
|
log " Poll interval: ${POLL_INTERVAL}s"
|
|
1781
1939
|
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
@@ -1819,9 +1977,9 @@ main() {
|
|
|
1819
1977
|
US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1820
1978
|
fi
|
|
1821
1979
|
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1980
|
+
# Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
|
|
1981
|
+
local memory_file="$DESK/memos/${SLUG}-memory.md"
|
|
1982
|
+
if [[ -f "$memory_file" ]]; then
|
|
1825
1983
|
local completed_us
|
|
1826
1984
|
completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1827
1985
|
if [[ -n "$completed_us" ]]; then
|
|
@@ -1830,8 +1988,23 @@ main() {
|
|
|
1830
1988
|
log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
|
|
1831
1989
|
fi
|
|
1832
1990
|
fi
|
|
1991
|
+
|
|
1992
|
+
# D1: Fallback — restore verified_us from status.json if memory had none
|
|
1993
|
+
if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
|
|
1994
|
+
local status_verified
|
|
1995
|
+
status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
|
|
1996
|
+
if [[ -n "$status_verified" ]]; then
|
|
1997
|
+
VERIFIED_US="$status_verified"
|
|
1998
|
+
log " Restored verified_us from status.json: $VERIFIED_US"
|
|
1999
|
+
log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
|
|
2000
|
+
fi
|
|
2001
|
+
fi
|
|
1833
2002
|
fi
|
|
1834
2003
|
|
|
2004
|
+
# Initialize PRD snapshot state for live update detection
|
|
2005
|
+
PREV_PRD_HASH=$(compute_prd_hash)
|
|
2006
|
+
PREV_PRD_US_LIST=$(count_prd_us)
|
|
2007
|
+
|
|
1835
2008
|
# Dependency checks
|
|
1836
2009
|
check_dependencies
|
|
1837
2010
|
|
|
@@ -1854,7 +2027,7 @@ main() {
|
|
|
1854
2027
|
PREV_CONTEXT_HASH=$(compute_context_hash)
|
|
1855
2028
|
|
|
1856
2029
|
# --- governance.md s7: Leader Loop ---
|
|
1857
|
-
local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) #
|
|
2030
|
+
local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced — Worker extends indefinitely when active
|
|
1858
2031
|
|
|
1859
2032
|
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
1860
2033
|
log ""
|
|
@@ -1887,7 +2060,7 @@ main() {
|
|
|
1887
2060
|
# Send C-c first (in case claude is mid-task), then /exit
|
|
1888
2061
|
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
1889
2062
|
sleep 1
|
|
1890
|
-
tmux send-keys -t "$WORKER_PANE" "/exit"
|
|
2063
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
1891
2064
|
sleep 2
|
|
1892
2065
|
# Wait for shell prompt before proceeding
|
|
1893
2066
|
wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
|
|
@@ -1896,96 +2069,66 @@ main() {
|
|
|
1896
2069
|
# Reset per-iteration state
|
|
1897
2070
|
local worker_nudge_count=0
|
|
1898
2071
|
local verifier_nudge_count=0
|
|
2072
|
+
ITER_VERIFIER_START=""
|
|
2073
|
+
ITER_VERIFIER_END=""
|
|
2074
|
+
|
|
2075
|
+
# --- US-004: detect PRD changes for live update + re-split ---
|
|
2076
|
+
check_prd_update
|
|
1899
2077
|
|
|
1900
2078
|
# --- governance.md s7 step 4: Build worker prompt + trigger ---
|
|
1901
2079
|
write_worker_trigger "$ITERATION"
|
|
1902
2080
|
local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
1903
2081
|
|
|
2082
|
+
# AC1: capture worker start timestamp
|
|
2083
|
+
ITER_WORKER_START=$(date +%s)
|
|
2084
|
+
|
|
1904
2085
|
update_status "worker" "running"
|
|
1905
2086
|
|
|
1906
|
-
# --- governance.md s7 step 5: Execute Worker (
|
|
1907
|
-
|
|
2087
|
+
# --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
|
|
2088
|
+
log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
2089
|
+
|
|
1908
2090
|
local worker_launch
|
|
1909
2091
|
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1910
|
-
|
|
1911
|
-
|
|
2092
|
+
local worker_trigger="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-trigger.sh"
|
|
2093
|
+
worker_launch="bash $worker_trigger"
|
|
2094
|
+
launch_worker_codex "$WORKER_PANE" "$worker_trigger" "$ITERATION"
|
|
1912
2095
|
else
|
|
1913
2096
|
worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
1919
|
-
|
|
1920
|
-
# Step 5b: Wait for claude TUI to be ready (tmux pattern)
|
|
1921
|
-
if ! wait_for_pane_ready "$WORKER_PANE" 30; then
|
|
1922
|
-
log_error "Worker claude failed to start"
|
|
1923
|
-
write_blocked_sentinel "Worker claude failed to start in pane"
|
|
1924
|
-
update_status "blocked" "worker_start_failed"
|
|
1925
|
-
return 1
|
|
1926
|
-
fi
|
|
1927
|
-
|
|
1928
|
-
# Step 5c: Wait for claude to fully initialize, then send instruction directly
|
|
1929
|
-
sleep 3
|
|
1930
|
-
local worker_instruction="Read and execute the instructions in $worker_prompt"
|
|
1931
|
-
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1932
|
-
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1933
|
-
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
1934
|
-
|
|
1935
|
-
# Verify claude actually started working — keep sending C-m until activity detected
|
|
1936
|
-
local submit_attempts=0
|
|
1937
|
-
while (( submit_attempts < 15 )); do
|
|
1938
|
-
sleep 2
|
|
1939
|
-
local pane_check
|
|
1940
|
-
pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
|
|
1941
|
-
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
1942
|
-
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
1943
|
-
log_debug "[FLOW] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
1944
|
-
break
|
|
1945
|
-
fi
|
|
1946
|
-
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1947
|
-
if (( submit_attempts == 8 )); then
|
|
1948
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1949
|
-
tmux send-keys -t "$WORKER_PANE" C-u 2>/dev/null
|
|
1950
|
-
sleep 0.1
|
|
1951
|
-
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1952
|
-
tmux send-keys -t "$WORKER_PANE" Enter
|
|
2097
|
+
if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
2098
|
+
write_blocked_sentinel "Worker claude failed to start in pane"
|
|
2099
|
+
update_status "blocked" "worker_start_failed"
|
|
2100
|
+
return 1
|
|
1953
2101
|
fi
|
|
1954
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1955
|
-
sleep 0.3
|
|
1956
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1957
|
-
(( submit_attempts++ ))
|
|
1958
|
-
done
|
|
1959
|
-
if (( submit_attempts >= 15 )); then
|
|
1960
|
-
log " WARNING: Could not confirm Worker started working after 15 attempts"
|
|
1961
|
-
log_debug "[FLOW] iter=$ITERATION worker_submit_check=FAILED attempts=15"
|
|
1962
2102
|
fi
|
|
1963
2103
|
|
|
1964
2104
|
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
1965
2105
|
log " Polling for iter-signal.json..."
|
|
1966
2106
|
local worker_poll_done=0
|
|
1967
2107
|
while (( ! worker_poll_done )); do
|
|
2108
|
+
local worker_poll_rc=0
|
|
1968
2109
|
if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
|
|
1969
2110
|
worker_poll_done=1
|
|
1970
2111
|
log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
|
|
1971
2112
|
else
|
|
2113
|
+
worker_poll_rc=$?
|
|
2114
|
+
if (( worker_poll_rc == 2 )); then
|
|
2115
|
+
return 1
|
|
2116
|
+
fi
|
|
1972
2117
|
# Check if Worker is still actively running (not stuck)
|
|
1973
2118
|
local worker_cmd
|
|
1974
2119
|
worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1975
2120
|
if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
|
|
1976
|
-
#
|
|
2121
|
+
# Process alive — extend indefinitely (no hard ceiling kill)
|
|
2122
|
+
# Stale-context breaker and nudge system handle truly stuck workers
|
|
1977
2123
|
local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
|
|
2124
|
+
local ceiling_exceeded=""
|
|
1978
2125
|
if (( iter_elapsed >= HARD_CEILING )); then
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
sleep 1
|
|
1983
|
-
write_blocked_sentinel "Worker hit hard ceiling (${HARD_CEILING}s). Pane preserved for inspection."
|
|
1984
|
-
update_status "blocked" "hard_timeout"
|
|
1985
|
-
return 1
|
|
2126
|
+
ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s — not enforced, logged only]"
|
|
2127
|
+
log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
|
|
2128
|
+
log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
|
|
1986
2129
|
fi
|
|
1987
|
-
log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s
|
|
1988
|
-
log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s
|
|
2130
|
+
log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
|
|
2131
|
+
log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
|
|
1989
2132
|
log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
|
|
1990
2133
|
update_status "worker" "slow"
|
|
1991
2134
|
# Loop continues — re-poll same iteration
|
|
@@ -2019,6 +2162,11 @@ main() {
|
|
|
2019
2162
|
# Reset monitor failure count on success
|
|
2020
2163
|
MONITOR_FAILURE_COUNT=0
|
|
2021
2164
|
|
|
2165
|
+
# AC1: capture worker end timestamp; reset consensus timing
|
|
2166
|
+
ITER_WORKER_END=$(date +%s)
|
|
2167
|
+
ITER_VERIFIER_CLAUDE_DURATION_S=""
|
|
2168
|
+
ITER_VERIFIER_CODEX_DURATION_S=""
|
|
2169
|
+
|
|
2022
2170
|
# --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
|
|
2023
2171
|
local signal_status
|
|
2024
2172
|
signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
|
|
@@ -2045,17 +2193,34 @@ main() {
|
|
|
2045
2193
|
signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
2046
2194
|
log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
|
|
2047
2195
|
|
|
2196
|
+
# AC1: capture verifier start timestamp
|
|
2197
|
+
ITER_VERIFIER_START=$(date +%s)
|
|
2198
|
+
|
|
2048
2199
|
update_status "verifier" "running"
|
|
2049
2200
|
|
|
2050
|
-
# ---
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2201
|
+
# --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
|
|
2202
|
+
if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
|
|
2203
|
+
log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
|
|
2204
|
+
local seq_rc=0
|
|
2205
|
+
run_sequential_final_verify "$ITERATION" || seq_rc=$?
|
|
2206
|
+
if (( seq_rc == 0 )); then
|
|
2207
|
+
write_complete_sentinel "Sequential final verify passed (all US verified individually)"
|
|
2208
|
+
update_status "complete" "pass"
|
|
2209
|
+
write_campaign_jsonl "$ITERATION" "ALL" "pass"
|
|
2210
|
+
return 0
|
|
2211
|
+
else
|
|
2212
|
+
# Sequential verify failed — fall through to fix loop with failed US
|
|
2213
|
+
log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
|
|
2214
|
+
signal_us_id="${FAILED_US:-ALL}"
|
|
2215
|
+
# Synthesize a fail verdict for the fix loop
|
|
2216
|
+
echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
|
|
2217
|
+
fi
|
|
2057
2218
|
fi
|
|
2058
2219
|
|
|
2220
|
+
# --- Consensus scope check (US-005: _should_use_consensus handles VERIFY_CONSENSUS + FINAL_CONSENSUS) ---
|
|
2221
|
+
local use_consensus=0
|
|
2222
|
+
_should_use_consensus "$signal_us_id" && use_consensus=1
|
|
2223
|
+
|
|
2059
2224
|
# --- Consensus vs single verification ---
|
|
2060
2225
|
if (( use_consensus )); then
|
|
2061
2226
|
# US-004: Run consensus verification (claude + codex sequentially)
|
|
@@ -2077,70 +2242,54 @@ main() {
|
|
|
2077
2242
|
write_verifier_trigger "$ITERATION"
|
|
2078
2243
|
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
|
|
2079
2244
|
|
|
2080
|
-
# Step 7a: Clean previous Verifier session
|
|
2245
|
+
# Step 7a: Clean previous Verifier session (with dead pane detection)
|
|
2081
2246
|
local verifier_cmd
|
|
2082
2247
|
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
2083
|
-
if [[
|
|
2248
|
+
if [[ -z "$verifier_cmd" ]]; then
|
|
2249
|
+
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
2250
|
+
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
2251
|
+
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
2252
|
+
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
2253
|
+
log " New verifier pane: $VERIFIER_PANE"
|
|
2254
|
+
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
2255
|
+
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
2256
|
+
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
2257
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
2258
|
+
sleep 0.2
|
|
2259
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
2260
|
+
sleep 0.3
|
|
2261
|
+
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
2084
2262
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
2085
2263
|
sleep 0.5
|
|
2086
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit"
|
|
2264
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
2087
2265
|
sleep 2
|
|
2088
|
-
wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
|
|
2089
2266
|
fi
|
|
2267
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
2090
2268
|
|
|
2091
2269
|
local verifier_launch
|
|
2092
2270
|
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2093
2271
|
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
2094
|
-
log " Launching Verifier codex in pane $VERIFIER_PANE..."
|
|
2095
2272
|
else
|
|
2096
2273
|
verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
2097
|
-
log " Launching Verifier claude in pane $VERIFIER_PANE..."
|
|
2098
2274
|
fi
|
|
2099
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
2100
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
2101
2275
|
log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
|
|
2102
2276
|
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
# Step 7c: Send instruction
|
|
2111
|
-
sleep 3
|
|
2112
|
-
local verifier_instruction="Read and execute the instructions in $verifier_prompt"
|
|
2113
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
2114
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
2115
|
-
log_debug "Verifier instruction sent directly"
|
|
2116
|
-
|
|
2117
|
-
# Verify verifier actually started working
|
|
2118
|
-
local vs_submit=0
|
|
2119
|
-
while (( vs_submit < 15 )); do
|
|
2120
|
-
sleep 2
|
|
2121
|
-
local vs_check
|
|
2122
|
-
vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
2123
|
-
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
2124
|
-
log_debug "Verifier started working after $((vs_submit + 1)) checks"
|
|
2125
|
-
break
|
|
2126
|
-
fi
|
|
2127
|
-
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
2128
|
-
if (( vs_submit == 8 )); then
|
|
2129
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
2130
|
-
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
2131
|
-
sleep 0.1
|
|
2132
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
2133
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
2277
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2278
|
+
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
|
|
2279
|
+
else
|
|
2280
|
+
if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
|
|
2281
|
+
update_status "verifier" "start_failed"
|
|
2282
|
+
continue
|
|
2134
2283
|
fi
|
|
2135
|
-
|
|
2136
|
-
sleep 0.3
|
|
2137
|
-
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
2138
|
-
(( vs_submit++ ))
|
|
2139
|
-
done
|
|
2284
|
+
fi
|
|
2140
2285
|
|
|
2141
2286
|
# Poll for verify-verdict.json
|
|
2142
2287
|
log " Polling for verify-verdict.json..."
|
|
2143
2288
|
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
2289
|
+
local verifier_poll_rc=$?
|
|
2290
|
+
if (( verifier_poll_rc == 2 )); then
|
|
2291
|
+
return 1
|
|
2292
|
+
fi
|
|
2144
2293
|
log_error "Verifier poll failed"
|
|
2145
2294
|
# Verifier is dead/stuck — BLOCK and let user decide
|
|
2146
2295
|
write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection."
|
|
@@ -2149,6 +2298,9 @@ main() {
|
|
|
2149
2298
|
fi
|
|
2150
2299
|
fi
|
|
2151
2300
|
|
|
2301
|
+
# AC1: capture verifier end timestamp
|
|
2302
|
+
ITER_VERIFIER_END=$(date +%s)
|
|
2303
|
+
|
|
2152
2304
|
# --- governance.md s7 step 7: Read verdict via jq ---
|
|
2153
2305
|
local verdict
|
|
2154
2306
|
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
@@ -2166,6 +2318,18 @@ main() {
|
|
|
2166
2318
|
pass)
|
|
2167
2319
|
CONSECUTIVE_FAILURES=0
|
|
2168
2320
|
CONSENSUS_ROUND=0
|
|
2321
|
+
_SAME_US_FAIL_COUNT=0
|
|
2322
|
+
_LAST_FAILED_US=""
|
|
2323
|
+
if (( _MODEL_UPGRADED )); then
|
|
2324
|
+
log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
|
|
2325
|
+
log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
|
|
2326
|
+
WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
|
|
2327
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
2328
|
+
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
2329
|
+
WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
|
|
2330
|
+
fi
|
|
2331
|
+
_MODEL_UPGRADED=0
|
|
2332
|
+
fi
|
|
2169
2333
|
|
|
2170
2334
|
# --- Per-US tracking ---
|
|
2171
2335
|
if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
|
|
@@ -2183,6 +2347,7 @@ main() {
|
|
|
2183
2347
|
# Final full verify passed or complete recommended
|
|
2184
2348
|
write_complete_sentinel "$verdict_summary"
|
|
2185
2349
|
update_status "complete" "pass"
|
|
2350
|
+
write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
|
|
2186
2351
|
return 0
|
|
2187
2352
|
else
|
|
2188
2353
|
log " Verifier passed but did not recommend complete. Continuing."
|
|
@@ -2192,6 +2357,7 @@ main() {
|
|
|
2192
2357
|
fail)
|
|
2193
2358
|
# --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
|
|
2194
2359
|
(( CONSECUTIVE_FAILURES++ ))
|
|
2360
|
+
check_model_upgrade "${signal_us_id:-unknown}"
|
|
2195
2361
|
local verdict_summary_fail
|
|
2196
2362
|
verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
2197
2363
|
log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
|
|
@@ -2213,11 +2379,19 @@ main() {
|
|
|
2213
2379
|
log " Fix contract: $fix_contract"
|
|
2214
2380
|
log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
|
|
2215
2381
|
|
|
2216
|
-
# Circuit breaker: consecutive failures
|
|
2382
|
+
# Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
|
|
2217
2383
|
if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2384
|
+
# For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
|
|
2385
|
+
_ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
|
|
2386
|
+
if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
|
|
2387
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
|
|
2388
|
+
log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
|
|
2389
|
+
write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2390
|
+
else
|
|
2391
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
|
|
2392
|
+
log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2393
|
+
write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2394
|
+
fi
|
|
2221
2395
|
update_status "blocked" "consecutive_failures"
|
|
2222
2396
|
return 1
|
|
2223
2397
|
fi
|
|
@@ -2261,6 +2435,7 @@ main() {
|
|
|
2261
2435
|
|
|
2262
2436
|
# --- AC5: Write per-iteration cost estimate ---
|
|
2263
2437
|
write_cost_log "$ITERATION"
|
|
2438
|
+
write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
|
|
2264
2439
|
|
|
2265
2440
|
# --- governance.md s7 step 8: Write result log ---
|
|
2266
2441
|
write_result_log "$ITERATION" "$signal_status"
|
|
@@ -2279,7 +2454,6 @@ main() {
|
|
|
2279
2454
|
|
|
2280
2455
|
# Max iterations reached
|
|
2281
2456
|
log "Max iterations ($MAX_ITER) reached."
|
|
2282
|
-
generate_campaign_report # AC4: TIMEOUT terminal path
|
|
2283
2457
|
update_status "timeout" "max_iter"
|
|
2284
2458
|
return 1
|
|
2285
2459
|
}
|
|
@@ -2288,6 +2462,45 @@ main() {
|
|
|
2288
2462
|
# Entry Point
|
|
2289
2463
|
# =============================================================================
|
|
2290
2464
|
|
|
2465
|
+
# --- CLI: parse --worker-model / --verifier-model flags ---
|
|
2466
|
+
# These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
|
|
2467
|
+
# Format: "model:reasoning" → codex engine; "model-name" → claude engine
|
|
2468
|
+
_cli_i=1
|
|
2469
|
+
while (( _cli_i <= $# )); do
|
|
2470
|
+
case "${@[$_cli_i]}" in
|
|
2471
|
+
--worker-model)
|
|
2472
|
+
(( _cli_i++ ))
|
|
2473
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
|
|
2474
|
+
WORKER_ENGINE="${_cli_parsed%% *}"
|
|
2475
|
+
_cli_rest="${_cli_parsed#* }"
|
|
2476
|
+
WORKER_MODEL="${_cli_rest%% *}"
|
|
2477
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
2478
|
+
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
2479
|
+
WORKER_CODEX_REASONING="${_cli_rest##* }"
|
|
2480
|
+
fi
|
|
2481
|
+
;;
|
|
2482
|
+
--verifier-model)
|
|
2483
|
+
(( _cli_i++ ))
|
|
2484
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
|
|
2485
|
+
VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
2486
|
+
_cli_rest="${_cli_parsed#* }"
|
|
2487
|
+
VERIFIER_MODEL="${_cli_rest%% *}"
|
|
2488
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2489
|
+
VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
|
|
2490
|
+
VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
2491
|
+
fi
|
|
2492
|
+
;;
|
|
2493
|
+
--lock-worker-model)
|
|
2494
|
+
LOCK_WORKER_MODEL=1
|
|
2495
|
+
;;
|
|
2496
|
+
--final-consensus)
|
|
2497
|
+
FINAL_CONSENSUS=1
|
|
2498
|
+
;;
|
|
2499
|
+
esac
|
|
2500
|
+
(( _cli_i++ ))
|
|
2501
|
+
done
|
|
2502
|
+
unset _cli_i _cli_parsed _cli_rest
|
|
2503
|
+
|
|
2291
2504
|
# Require tmux — tmux mode only works inside an active tmux session
|
|
2292
2505
|
if [[ -z "${TMUX:-}" ]]; then
|
|
2293
2506
|
echo "ERROR: tmux mode requires running inside a tmux session."
|