@ai-dev-methodologies/rlp-desk 0.3.6 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +145 -69
- package/docs/blueprints/blueprint-v0.4-evolution.md +347 -0
- package/docs/plans/cozy-gliding-trinket.md +53 -0
- package/docs/plans/toasty-whistling-diffie-agent-a6814625642e956da.md +201 -0
- package/docs/plans/toasty-whistling-diffie.md +117 -0
- package/docs/prompts/ralplan-codex-review.md +55 -0
- package/install.sh +5 -0
- package/package.json +1 -1
- package/scripts/postinstall.js +5 -0
- package/scripts/uninstall.js +1 -0
- package/src/commands/rlp-desk.md +252 -70
- package/src/governance.md +63 -28
- package/src/model-upgrade-table.md +50 -0
- package/src/scripts/init_ralph_desk.zsh +329 -13
- package/src/scripts/lib_ralph_desk.zsh +837 -0
- package/src/scripts/run_ralph_desk.zsh +978 -482
|
@@ -55,6 +55,7 @@ HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
|
|
|
55
55
|
MAX_RESTARTS="${MAX_RESTARTS:-3}"
|
|
56
56
|
IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
|
|
57
57
|
MAX_NUDGES="${MAX_NUDGES:-3}"
|
|
58
|
+
WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
|
|
58
59
|
|
|
59
60
|
# --- Engine Selection ---
|
|
60
61
|
WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
|
|
@@ -68,7 +69,18 @@ CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
|
68
69
|
# --- Verify Mode ---
|
|
69
70
|
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
70
71
|
VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
|
|
72
|
+
FINAL_CONSENSUS="${FINAL_CONSENSUS:-0}" # 0|1 — consensus for final ALL verify only (independent of VERIFY_CONSENSUS)
|
|
71
73
|
CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
|
|
74
|
+
CONSENSUS_FAIL_FAST="${CONSENSUS_FAIL_FAST:-0}" # 0|1 — skip second verifier if first fails
|
|
75
|
+
CB_THRESHOLD="${CB_THRESHOLD:-3}" # consecutive failures before BLOCKED (default: 3)
|
|
76
|
+
# Effective CB threshold: doubled when consensus mode active (AC2 auto-double)
|
|
77
|
+
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
78
|
+
EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
|
|
79
|
+
else
|
|
80
|
+
EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
|
|
81
|
+
fi
|
|
82
|
+
_API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
|
|
83
|
+
_API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
|
|
72
84
|
|
|
73
85
|
# --- Derived Paths ---
|
|
74
86
|
DESK="$ROOT/.claude/ralph-desk"
|
|
@@ -76,6 +88,14 @@ PROMPTS_DIR="$DESK/prompts"
|
|
|
76
88
|
CONTEXT_DIR="$DESK/context"
|
|
77
89
|
MEMOS_DIR="$DESK/memos"
|
|
78
90
|
LOGS_DIR="$DESK/logs/$SLUG"
|
|
91
|
+
RUNTIME_DIR="$LOGS_DIR/runtime"
|
|
92
|
+
PRD_FILE="$DESK/plans/prd-$SLUG.md"
|
|
93
|
+
TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
|
|
94
|
+
# --- Analytics Directory (user-level, cross-project) ---
|
|
95
|
+
ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
|
|
96
|
+
ANALYTICS_DIR="$HOME/.claude/ralph-desk/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
|
|
97
|
+
CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
|
|
98
|
+
METADATA_FILE="$ANALYTICS_DIR/metadata.json"
|
|
79
99
|
WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
|
|
80
100
|
VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
|
|
81
101
|
CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
|
|
@@ -85,10 +105,12 @@ DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
|
|
|
85
105
|
VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
|
|
86
106
|
COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
|
|
87
107
|
BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
108
|
+
LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
|
|
109
|
+
STATUS_FILE="$RUNTIME_DIR/status.json"
|
|
110
|
+
SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
|
|
111
|
+
WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
|
|
112
|
+
VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
|
|
113
|
+
COST_LOG="$LOGS_DIR/cost-log.jsonl"
|
|
92
114
|
|
|
93
115
|
# --- Session Naming ---
|
|
94
116
|
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
@@ -103,41 +125,265 @@ HEARTBEAT_STALE_COUNT=0
|
|
|
103
125
|
MONITOR_FAILURE_COUNT=0
|
|
104
126
|
CONSECUTIVE_FAILURES=0
|
|
105
127
|
PREV_CONTEXT_HASH=""
|
|
128
|
+
PREV_PRD_HASH=""
|
|
129
|
+
PREV_PRD_US_LIST=""
|
|
130
|
+
_PRD_CHANGED=0
|
|
106
131
|
ITERATION=0
|
|
107
132
|
START_TIME=$(date +%s)
|
|
133
|
+
BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
|
|
134
|
+
CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
|
|
135
|
+
SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
|
|
108
136
|
VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
|
|
109
137
|
CONSENSUS_ROUND=0 # current consensus round for current US
|
|
110
138
|
US_LIST="" # comma-separated US IDs from PRD (per-us mode)
|
|
139
|
+
LOCKFILE_ACQUIRED=0
|
|
140
|
+
LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
|
|
141
|
+
_SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
|
|
142
|
+
_LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
|
|
143
|
+
_MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
|
|
144
|
+
_ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
|
|
145
|
+
_ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
|
|
111
146
|
|
|
112
147
|
# =============================================================================
|
|
113
148
|
# Utility Functions
|
|
114
149
|
# =============================================================================
|
|
115
150
|
|
|
116
151
|
DEBUG="${DEBUG:-0}"
|
|
117
|
-
DEBUG_LOG="$
|
|
152
|
+
DEBUG_LOG="$ANALYTICS_DIR/debug.log"
|
|
153
|
+
|
|
154
|
+
# Source shared business logic
|
|
155
|
+
LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
156
|
+
source "$LIB_DIR/lib_ralph_desk.zsh"
|
|
157
|
+
|
|
158
|
+
# A16: Warn if running in foreground (may conflict with Claude Code pane)
|
|
159
|
+
if [[ -z "${RLP_BACKGROUND:-}" ]]; then
|
|
160
|
+
echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
|
|
161
|
+
echo " Recommended: launch via Bash tool with run_in_background: true" >&2
|
|
162
|
+
echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
|
|
163
|
+
fi
|
|
118
164
|
|
|
119
|
-
|
|
120
|
-
|
|
165
|
+
# check_dead_pane() — determine if pane command indicates a dead/exited process
|
|
166
|
+
# Engine-aware: bash is normal for codex workers (trigger runs in bash),
|
|
167
|
+
# but indicates dead pane for claude workers.
|
|
168
|
+
# Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
|
|
169
|
+
# Returns: 0 if dead, 1 if alive
|
|
170
|
+
check_dead_pane() {
|
|
171
|
+
local poll_cmd="$1"
|
|
172
|
+
local engine="${2:-claude}"
|
|
173
|
+
local role="${3:-worker}"
|
|
174
|
+
|
|
175
|
+
if [[ -z "$poll_cmd" ]]; then
|
|
176
|
+
return 0 # empty = dead
|
|
177
|
+
elif [[ "$poll_cmd" == "zsh" ]]; then
|
|
178
|
+
return 0 # bare zsh = dead
|
|
179
|
+
elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
|
|
180
|
+
return 0 # bash = dead for claude (codex uses bash trigger)
|
|
181
|
+
fi
|
|
182
|
+
return 1 # alive
|
|
121
183
|
}
|
|
122
184
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
185
|
+
# launch_worker_codex() — launch codex Worker via trigger script (non-interactive exec)
|
|
186
|
+
# Args: $1=pane_id $2=trigger_file $3=iteration
|
|
187
|
+
# Returns: 0 always (codex failures detected by poll_for_signal)
|
|
188
|
+
launch_worker_codex() {
|
|
189
|
+
local pane_id="$1"
|
|
190
|
+
local trigger_file="$2"
|
|
191
|
+
local iter="$3"
|
|
192
|
+
|
|
193
|
+
log " Launching Worker codex via trigger script in pane $pane_id..."
|
|
194
|
+
paste_to_pane "$pane_id" "bash $trigger_file"
|
|
195
|
+
tmux send-keys -t "$pane_id" Enter
|
|
196
|
+
log_debug "Worker codex trigger sent: $trigger_file"
|
|
197
|
+
sleep 3 # brief wait for codex to start
|
|
198
|
+
return 0
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
|
|
202
|
+
# Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
|
|
203
|
+
# restart recovery on submit failure.
|
|
204
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
|
|
205
|
+
# Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
|
|
206
|
+
launch_worker_claude() {
|
|
207
|
+
local pane_id="$1"
|
|
208
|
+
local prompt_file="$2"
|
|
209
|
+
local iter="$3"
|
|
210
|
+
local worker_launch="$4"
|
|
211
|
+
|
|
212
|
+
log " Launching Worker claude in pane $pane_id..."
|
|
213
|
+
paste_to_pane "$pane_id" "$worker_launch"
|
|
214
|
+
tmux send-keys -t "$pane_id" Enter
|
|
215
|
+
|
|
216
|
+
# Wait for claude TUI to be ready
|
|
217
|
+
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
218
|
+
log_error "Worker claude failed to start"
|
|
219
|
+
return 1
|
|
220
|
+
fi
|
|
221
|
+
|
|
222
|
+
# Send instruction to claude TUI
|
|
223
|
+
sleep 3
|
|
224
|
+
local worker_instruction="Read and execute the instructions in $prompt_file"
|
|
225
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
226
|
+
tmux send-keys -t "$pane_id" Enter
|
|
227
|
+
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
228
|
+
|
|
229
|
+
# 15-iteration submit loop — verify claude started working
|
|
230
|
+
local submit_attempts=0
|
|
231
|
+
while (( submit_attempts < 15 )); do
|
|
232
|
+
sleep 2
|
|
233
|
+
local pane_check
|
|
234
|
+
pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
235
|
+
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
|
|
236
|
+
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
237
|
+
log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
238
|
+
break
|
|
239
|
+
fi
|
|
240
|
+
# Every 3 failed attempts, re-send full instruction
|
|
241
|
+
if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
|
|
242
|
+
log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
|
|
243
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
244
|
+
sleep 0.2
|
|
245
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
246
|
+
sleep 0.15
|
|
247
|
+
tmux send-keys -t "$pane_id" Enter
|
|
248
|
+
sleep 1
|
|
249
|
+
fi
|
|
250
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
251
|
+
sleep 0.3
|
|
252
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
253
|
+
(( submit_attempts++ ))
|
|
254
|
+
done
|
|
255
|
+
|
|
256
|
+
# If 15 attempts failed, restart claude and retry
|
|
257
|
+
if (( submit_attempts >= 15 )); then
|
|
258
|
+
log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
|
|
259
|
+
log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
|
|
260
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
261
|
+
sleep 0.5
|
|
262
|
+
tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
|
|
263
|
+
sleep 2
|
|
264
|
+
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
265
|
+
paste_to_pane "$pane_id" "$worker_launch"
|
|
266
|
+
tmux send-keys -t "$pane_id" Enter
|
|
267
|
+
if wait_for_pane_ready "$pane_id" 30; then
|
|
268
|
+
sleep 3
|
|
269
|
+
paste_to_pane "$pane_id" "$worker_instruction"
|
|
270
|
+
tmux send-keys -t "$pane_id" Enter
|
|
271
|
+
log " Worker restarted and instruction re-sent"
|
|
272
|
+
log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
|
|
273
|
+
else
|
|
274
|
+
log_error "Worker restart failed — pane not ready"
|
|
275
|
+
log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
|
|
276
|
+
fi
|
|
277
|
+
fi
|
|
278
|
+
|
|
279
|
+
return 0
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
# launch_verifier_codex() — launch codex Verifier in pane (non-interactive)
|
|
283
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
284
|
+
# Returns: 0 always
|
|
285
|
+
launch_verifier_codex() {
|
|
286
|
+
local pane_id="$1"
|
|
287
|
+
local prompt_file="$2"
|
|
288
|
+
local iter="$3"
|
|
289
|
+
local verifier_launch="$4"
|
|
290
|
+
|
|
291
|
+
log " Launching Verifier codex in pane $pane_id..."
|
|
292
|
+
paste_to_pane "$pane_id" "$verifier_launch"
|
|
293
|
+
tmux send-keys -t "$pane_id" Enter
|
|
294
|
+
sleep 3
|
|
295
|
+
return 0
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
|
|
299
|
+
# Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
|
|
300
|
+
# Returns: 0 on success
|
|
301
|
+
launch_verifier_claude() {
|
|
302
|
+
local pane_id="$1"
|
|
303
|
+
local prompt_file="$2"
|
|
304
|
+
local iter="$3"
|
|
305
|
+
local verifier_launch="$4"
|
|
306
|
+
|
|
307
|
+
log " Launching Verifier claude in pane $pane_id..."
|
|
308
|
+
paste_to_pane "$pane_id" "$verifier_launch"
|
|
309
|
+
tmux send-keys -t "$pane_id" Enter
|
|
310
|
+
|
|
311
|
+
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
312
|
+
log_error "Verifier failed to start"
|
|
313
|
+
return 1
|
|
127
314
|
fi
|
|
315
|
+
|
|
316
|
+
sleep 3
|
|
317
|
+
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
318
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
319
|
+
tmux send-keys -t "$pane_id" Enter
|
|
320
|
+
log_debug "Verifier instruction sent directly"
|
|
321
|
+
|
|
322
|
+
# Submit loop — verify verifier started working
|
|
323
|
+
local submit_attempts=0
|
|
324
|
+
while (( submit_attempts < 15 )); do
|
|
325
|
+
sleep 2
|
|
326
|
+
local vs_check
|
|
327
|
+
vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
328
|
+
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
329
|
+
log_debug "Verifier started working after $((submit_attempts + 1)) checks"
|
|
330
|
+
break
|
|
331
|
+
fi
|
|
332
|
+
if (( submit_attempts == 8 )); then
|
|
333
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
334
|
+
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
335
|
+
sleep 0.1
|
|
336
|
+
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
337
|
+
tmux send-keys -t "$pane_id" Enter
|
|
338
|
+
fi
|
|
339
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
340
|
+
sleep 0.3
|
|
341
|
+
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
342
|
+
(( submit_attempts++ ))
|
|
343
|
+
done
|
|
344
|
+
return 0
|
|
128
345
|
}
|
|
129
346
|
|
|
130
|
-
|
|
131
|
-
|
|
347
|
+
# handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
|
|
348
|
+
# On exit: check done-claim, auto-generate iter-signal.
|
|
349
|
+
# Args: $1=iteration $2=signal_file
|
|
350
|
+
# Returns: 0 (signal generated), 1 (error)
|
|
351
|
+
handle_worker_exit_codex() {
|
|
352
|
+
local iter="$1"
|
|
353
|
+
local signal_file="$2"
|
|
354
|
+
|
|
355
|
+
log " Codex worker process exited. Checking for done-claim..."
|
|
356
|
+
if [[ -f "$DONE_CLAIM_FILE" ]]; then
|
|
357
|
+
local dc_us_id
|
|
358
|
+
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
359
|
+
log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
|
|
360
|
+
echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exec exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
361
|
+
else
|
|
362
|
+
log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
|
|
363
|
+
local current_us
|
|
364
|
+
current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
|
|
365
|
+
local mem_us
|
|
366
|
+
mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
|
|
367
|
+
[[ -n "$mem_us" ]] && current_us="$mem_us"
|
|
368
|
+
echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exec exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
369
|
+
fi
|
|
370
|
+
return 0
|
|
132
371
|
}
|
|
133
372
|
|
|
134
|
-
#
|
|
135
|
-
#
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
local
|
|
139
|
-
|
|
140
|
-
|
|
373
|
+
# handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
|
|
374
|
+
# Args: $1=pane_id $2=iteration $3=trigger_file
|
|
375
|
+
# Returns: 0 (restarted), 1 (max restarts exceeded)
|
|
376
|
+
handle_worker_exit_claude() {
|
|
377
|
+
local pane_id="$1"
|
|
378
|
+
local iter="$2"
|
|
379
|
+
local trigger_file="$3"
|
|
380
|
+
|
|
381
|
+
log_error "Worker exited without writing signal file"
|
|
382
|
+
if restart_worker "$pane_id" "$iter" "$trigger_file"; then
|
|
383
|
+
return 0
|
|
384
|
+
else
|
|
385
|
+
return 1
|
|
386
|
+
fi
|
|
141
387
|
}
|
|
142
388
|
|
|
143
389
|
# --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
|
|
@@ -148,12 +394,28 @@ replace_worker_pane() {
|
|
|
148
394
|
log " Replacing dead $role pane $old_pane..."
|
|
149
395
|
tmux kill-pane -t "$old_pane" 2>/dev/null
|
|
150
396
|
|
|
151
|
-
# Create fresh pane
|
|
397
|
+
# Create fresh pane maintaining original layout: worker(top-right) / verifier(bottom-right)
|
|
152
398
|
local new_pane
|
|
153
|
-
|
|
399
|
+
if [[ "$role" == "verifier" ]]; then
|
|
400
|
+
# Verifier goes below worker: split vertically from worker pane
|
|
401
|
+
if tmux display-message -t "$WORKER_PANE" -p '#{pane_id}' &>/dev/null; then
|
|
402
|
+
new_pane=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
403
|
+
else
|
|
404
|
+
# Fallback: worker pane also dead, split horizontally from leader
|
|
405
|
+
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
406
|
+
fi
|
|
407
|
+
else
|
|
408
|
+
# Worker goes above verifier: split vertically before verifier pane
|
|
409
|
+
if tmux display-message -t "$VERIFIER_PANE" -p '#{pane_id}' &>/dev/null; then
|
|
410
|
+
new_pane=$(tmux split-window -v -b -d -t "$VERIFIER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
411
|
+
else
|
|
412
|
+
# Fallback: verifier pane also dead, split horizontally from leader
|
|
413
|
+
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
414
|
+
fi
|
|
415
|
+
fi
|
|
154
416
|
|
|
155
417
|
log " New $role pane: $new_pane (replaced $old_pane)"
|
|
156
|
-
log_debug "[
|
|
418
|
+
log_debug "[FLOW] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
|
|
157
419
|
|
|
158
420
|
# Update session-config.json with new pane ID
|
|
159
421
|
if [[ -f "$SESSION_CONFIG" ]]; then
|
|
@@ -178,9 +440,13 @@ check_dependencies() {
|
|
|
178
440
|
missing=1
|
|
179
441
|
fi
|
|
180
442
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
443
|
+
# claude required only when claude engine is used for Worker or Verifier execution;
|
|
444
|
+
# codex-only campaigns can run without claude — generate_sv_report degrades gracefully
|
|
445
|
+
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
446
|
+
if ! command -v claude >/dev/null 2>&1; then
|
|
447
|
+
log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
|
|
448
|
+
missing=1
|
|
449
|
+
fi
|
|
184
450
|
fi
|
|
185
451
|
|
|
186
452
|
if ! command -v jq >/dev/null 2>&1; then
|
|
@@ -189,14 +455,9 @@ check_dependencies() {
|
|
|
189
455
|
fi
|
|
190
456
|
|
|
191
457
|
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
192
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
458
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
|
|
193
459
|
if ! command -v codex >/dev/null 2>&1; then
|
|
194
|
-
|
|
195
|
-
log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
|
|
196
|
-
else
|
|
197
|
-
log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
|
|
198
|
-
fi
|
|
199
|
-
log_error "Install with: npm install -g @openai/codex"
|
|
460
|
+
log_error "codex CLI not found. Install: npm install -g @openai/codex"
|
|
200
461
|
missing=1
|
|
201
462
|
fi
|
|
202
463
|
fi
|
|
@@ -205,52 +466,19 @@ check_dependencies() {
|
|
|
205
466
|
exit 1
|
|
206
467
|
fi
|
|
207
468
|
|
|
208
|
-
# Resolve full path to claude binary
|
|
209
|
-
|
|
210
|
-
|
|
469
|
+
# Resolve full path to claude binary when claude engine is in use
|
|
470
|
+
if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
|
|
471
|
+
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
|
|
472
|
+
log " Claude binary: $CLAUDE_BIN"
|
|
473
|
+
fi
|
|
211
474
|
|
|
212
475
|
# Resolve codex binary if needed
|
|
213
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
476
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
|
|
214
477
|
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
215
478
|
log " Codex binary: $CODEX_BIN"
|
|
216
479
|
fi
|
|
217
480
|
}
|
|
218
481
|
|
|
219
|
-
# =============================================================================
|
|
220
|
-
# Scaffold Validation
|
|
221
|
-
# =============================================================================
|
|
222
|
-
|
|
223
|
-
validate_scaffold() {
|
|
224
|
-
local errors=0
|
|
225
|
-
|
|
226
|
-
if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
|
|
227
|
-
log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
|
|
228
|
-
errors=1
|
|
229
|
-
fi
|
|
230
|
-
|
|
231
|
-
if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
|
|
232
|
-
log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
|
|
233
|
-
errors=1
|
|
234
|
-
fi
|
|
235
|
-
|
|
236
|
-
if [[ ! -f "$CONTEXT_FILE" ]]; then
|
|
237
|
-
log_error "Context file not found: $CONTEXT_FILE"
|
|
238
|
-
errors=1
|
|
239
|
-
fi
|
|
240
|
-
|
|
241
|
-
if [[ ! -f "$MEMORY_FILE" ]]; then
|
|
242
|
-
log_error "Memory file not found: $MEMORY_FILE"
|
|
243
|
-
errors=1
|
|
244
|
-
fi
|
|
245
|
-
|
|
246
|
-
if (( errors )); then
|
|
247
|
-
log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
|
|
248
|
-
exit 1
|
|
249
|
-
fi
|
|
250
|
-
|
|
251
|
-
mkdir -p "$LOGS_DIR"
|
|
252
|
-
}
|
|
253
|
-
|
|
254
482
|
# =============================================================================
|
|
255
483
|
# Session Management (tmux pattern: pane IDs)
|
|
256
484
|
# =============================================================================
|
|
@@ -300,15 +528,42 @@ create_session() {
|
|
|
300
528
|
|
|
301
529
|
fi
|
|
302
530
|
|
|
531
|
+
# Set pane titles and enable border labels for visual distinction
|
|
532
|
+
local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
|
|
533
|
+
local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
|
|
534
|
+
[[ "$VERIFY_CONSENSUS" = "1" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + codex:$VERIFIER_CODEX_MODEL)"
|
|
535
|
+
tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
|
|
536
|
+
tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
|
|
537
|
+
tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
|
|
538
|
+
# Color-coded pane borders: green=leader, blue=worker, yellow=verifier
|
|
539
|
+
tmux set-option -p -t "$LEADER_PANE" pane-border-style "fg=green" 2>/dev/null
|
|
540
|
+
tmux set-option -p -t "$WORKER_PANE" pane-border-style "fg=blue" 2>/dev/null
|
|
541
|
+
tmux set-option -p -t "$VERIFIER_PANE" pane-border-style "fg=yellow" 2>/dev/null
|
|
542
|
+
# Show pane titles in border
|
|
543
|
+
tmux set-option pane-border-status top 2>/dev/null
|
|
544
|
+
tmux set-option pane-border-format "#{?pane_active,#[fg=white bold],#[fg=grey]} #{pane_title} " 2>/dev/null
|
|
545
|
+
|
|
303
546
|
log " Leader pane: $LEADER_PANE"
|
|
304
547
|
log " Worker pane: $WORKER_PANE"
|
|
305
548
|
log " Verifier pane: $VERIFIER_PANE"
|
|
306
549
|
|
|
550
|
+
# AC12: Capture baseline commit before writing session config
|
|
551
|
+
BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
|
|
552
|
+
|
|
553
|
+
# Truncate cost-log for fresh run (previous data in versioned campaign reports)
|
|
554
|
+
> "$COST_LOG"
|
|
555
|
+
|
|
556
|
+
# SV flag warning for tmux mode
|
|
557
|
+
if (( WITH_SELF_VERIFICATION )); then
|
|
558
|
+
log " NOTE: --with-self-verification recorded but SV report generation is Agent-mode only"
|
|
559
|
+
fi
|
|
560
|
+
|
|
307
561
|
# Write session config (atomic write)
|
|
308
562
|
echo '{
|
|
309
563
|
"session_name": "'"$SESSION_NAME"'",
|
|
310
564
|
"slug": "'"$SLUG"'",
|
|
311
565
|
"created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
|
|
566
|
+
"baseline_commit": "'"$BASELINE_COMMIT"'",
|
|
312
567
|
"panes": {
|
|
313
568
|
"leader": "'"$LEADER_PANE"'",
|
|
314
569
|
"worker": "'"$WORKER_PANE"'",
|
|
@@ -340,7 +595,10 @@ create_session() {
|
|
|
340
595
|
"heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
|
|
341
596
|
"max_restarts": '"$MAX_RESTARTS"',
|
|
342
597
|
"idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
|
|
343
|
-
"max_nudges": '"$MAX_NUDGES"'
|
|
598
|
+
"max_nudges": '"$MAX_NUDGES"',
|
|
599
|
+
"cb_threshold": '"$CB_THRESHOLD"',
|
|
600
|
+
"effective_cb_threshold": '"$EFFECTIVE_CB_THRESHOLD"',
|
|
601
|
+
"with_self_verification": '"$WITH_SELF_VERIFICATION"'
|
|
344
602
|
}
|
|
345
603
|
}' | atomic_write "$SESSION_CONFIG"
|
|
346
604
|
|
|
@@ -366,6 +624,17 @@ check_copy_mode() {
|
|
|
366
624
|
# Verification-Based Send Retry (tmux pattern)
|
|
367
625
|
# =============================================================================
|
|
368
626
|
|
|
627
|
+
# --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
|
|
628
|
+
paste_to_pane() {
|
|
629
|
+
local pane_id="$1"
|
|
630
|
+
local text="$2"
|
|
631
|
+
local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
|
|
632
|
+
echo -n "$text" > "$tmpbuf"
|
|
633
|
+
tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
|
|
634
|
+
tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
|
|
635
|
+
rm -f "$tmpbuf"
|
|
636
|
+
}
|
|
637
|
+
|
|
369
638
|
# --- governance.md s7 step 5: Send with copy-mode guard and retry ---
|
|
370
639
|
safe_send_keys() {
|
|
371
640
|
local pane_id="$1"
|
|
@@ -403,9 +672,9 @@ safe_send_keys() {
|
|
|
403
672
|
tmux send-keys -t "$pane_id" "2" Enter
|
|
404
673
|
sleep 0.2
|
|
405
674
|
fi
|
|
406
|
-
# Send text
|
|
407
|
-
log_debug "
|
|
408
|
-
|
|
675
|
+
# Send text via buffer paste (reliable for long strings)
|
|
676
|
+
log_debug " Pasting text to pane $pane_id (${#text} chars)"
|
|
677
|
+
paste_to_pane "$pane_id" "$text"
|
|
409
678
|
|
|
410
679
|
# Allow input buffer to settle (tmux: 150ms)
|
|
411
680
|
sleep 0.15
|
|
@@ -415,9 +684,7 @@ safe_send_keys() {
|
|
|
415
684
|
while (( round < 6 )); do
|
|
416
685
|
sleep 0.1
|
|
417
686
|
if (( round == 0 && pane_busy )); then
|
|
418
|
-
# Busy pane:
|
|
419
|
-
tmux send-keys -t "$pane_id" Tab
|
|
420
|
-
sleep 0.08
|
|
687
|
+
# Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
|
|
421
688
|
tmux send-keys -t "$pane_id" C-m
|
|
422
689
|
else
|
|
423
690
|
tmux send-keys -t "$pane_id" C-m
|
|
@@ -450,7 +717,7 @@ safe_send_keys() {
|
|
|
450
717
|
if ! check_copy_mode "$pane_id"; then
|
|
451
718
|
return 1
|
|
452
719
|
fi
|
|
453
|
-
|
|
720
|
+
paste_to_pane "$pane_id" "$text"
|
|
454
721
|
sleep 0.12
|
|
455
722
|
local retry_round=0
|
|
456
723
|
while (( retry_round < 4 )); do
|
|
@@ -598,12 +865,19 @@ check_and_nudge_idle_pane() {
|
|
|
598
865
|
local now
|
|
599
866
|
now=$(date +%s)
|
|
600
867
|
if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
(
|
|
606
|
-
|
|
868
|
+
# A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
|
|
869
|
+
local _nudge_capture
|
|
870
|
+
_nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
|
|
871
|
+
if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
|
|
872
|
+
log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
|
|
873
|
+
else
|
|
874
|
+
local count=${(P)nudge_count_var}
|
|
875
|
+
if (( count < MAX_NUDGES )); then
|
|
876
|
+
log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
|
|
877
|
+
safe_send_keys "$pane_id" ""
|
|
878
|
+
(( count++ ))
|
|
879
|
+
eval "$nudge_count_var=$count"
|
|
880
|
+
fi
|
|
607
881
|
fi
|
|
608
882
|
fi
|
|
609
883
|
else
|
|
@@ -621,6 +895,13 @@ restart_worker() {
|
|
|
621
895
|
local pane_id="$1"
|
|
622
896
|
local iter="$2"
|
|
623
897
|
local trigger_file="$3"
|
|
898
|
+
|
|
899
|
+
# Codex workers are 1-shot exec; restart is not applicable
|
|
900
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
901
|
+
log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
|
|
902
|
+
return 1
|
|
903
|
+
fi
|
|
904
|
+
|
|
624
905
|
local restart_count="${WORKER_RESTARTS[$iter]:-0}"
|
|
625
906
|
|
|
626
907
|
if (( restart_count >= MAX_RESTARTS )); then
|
|
@@ -653,6 +934,25 @@ restart_worker() {
|
|
|
653
934
|
# Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
|
|
654
935
|
# =============================================================================
|
|
655
936
|
|
|
937
|
+
# Per-US PRD injection helper
|
|
938
|
+
# Substitutes the full PRD path with a per-US split path in the Worker prompt base.
|
|
939
|
+
# Falls back to the full PRD with a stderr warning if the split file is missing.
|
|
940
|
+
# Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
|
|
941
|
+
inject_per_us_prd() {
|
|
942
|
+
local prompt_base="$1"
|
|
943
|
+
local full_prd="$2"
|
|
944
|
+
local per_us_prd="${3:-}"
|
|
945
|
+
|
|
946
|
+
if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
|
|
947
|
+
sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
|
|
948
|
+
else
|
|
949
|
+
if [[ -n "$per_us_prd" ]]; then
|
|
950
|
+
echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
|
|
951
|
+
fi
|
|
952
|
+
cat "$prompt_base"
|
|
953
|
+
fi
|
|
954
|
+
}
|
|
955
|
+
|
|
656
956
|
# --- governance.md s7 step 4+5: Write prompt and trigger to files ---
|
|
657
957
|
# NEVER send prompt content through tmux send-keys.
|
|
658
958
|
# Write payloads to files, send only short trigger commands (<200 chars).
|
|
@@ -670,14 +970,31 @@ write_worker_trigger() {
|
|
|
670
970
|
local prev_iter=$((iter - 1))
|
|
671
971
|
local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
|
|
672
972
|
|
|
973
|
+
# Compute next unverified US before prompt assembly (required for per-US PRD injection)
|
|
974
|
+
local next_us=""
|
|
975
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
976
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
977
|
+
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
978
|
+
next_us="$us"
|
|
979
|
+
break
|
|
980
|
+
fi
|
|
981
|
+
done
|
|
982
|
+
fi
|
|
983
|
+
|
|
673
984
|
{
|
|
674
|
-
|
|
985
|
+
# Per-US PRD injection: substitute full PRD path with per-US split path when available
|
|
986
|
+
local per_us_prd=""
|
|
987
|
+
[[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
|
|
988
|
+
inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
|
|
675
989
|
echo ""
|
|
676
990
|
echo "---"
|
|
677
991
|
echo "## Iteration Context"
|
|
678
992
|
echo "- **Iteration**: $iter"
|
|
679
993
|
echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
|
|
680
994
|
echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
|
|
995
|
+
if (( _PRD_CHANGED )); then
|
|
996
|
+
echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
|
|
997
|
+
fi
|
|
681
998
|
|
|
682
999
|
# Include fix contract if previous verifier failed
|
|
683
1000
|
if [[ -f "$fix_contract_file" ]]; then
|
|
@@ -692,23 +1009,25 @@ write_worker_trigger() {
|
|
|
692
1009
|
|
|
693
1010
|
# Per-US mode: tell Worker exactly which US to work on
|
|
694
1011
|
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
695
|
-
# Find next unverified US
|
|
696
|
-
local next_us=""
|
|
697
|
-
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
698
|
-
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
699
|
-
next_us="$us"
|
|
700
|
-
break
|
|
701
|
-
fi
|
|
702
|
-
done
|
|
703
|
-
|
|
704
1012
|
if [[ -n "$next_us" ]]; then
|
|
705
1013
|
echo ""
|
|
706
1014
|
echo "---"
|
|
707
|
-
echo "## PER-US SCOPE LOCK (this iteration)"
|
|
1015
|
+
echo "## PER-US SCOPE LOCK (this iteration) — OVERRIDES memory contract"
|
|
1016
|
+
echo "**IGNORE the 'Next Iteration Contract' from memory if it references a different story.**"
|
|
1017
|
+
echo "The Leader has determined that **${next_us}** is the next unverified story."
|
|
708
1018
|
echo "You MUST implement ONLY **${next_us}** in this iteration."
|
|
709
1019
|
echo "Do NOT implement any other user stories."
|
|
1020
|
+
# Per-US test-spec injection: point Worker to scoped test-spec if available
|
|
1021
|
+
local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
|
|
1022
|
+
if [[ -f "$per_us_test_spec" ]]; then
|
|
1023
|
+
echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
|
|
1024
|
+
else
|
|
1025
|
+
echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
|
|
1026
|
+
fi
|
|
710
1027
|
echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
|
|
711
1028
|
echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
|
|
1029
|
+
echo ""
|
|
1030
|
+
echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
|
|
712
1031
|
elif [[ -n "$VERIFIED_US" ]]; then
|
|
713
1032
|
# All individual US verified — this is the final full verify iteration
|
|
714
1033
|
echo ""
|
|
@@ -732,12 +1051,12 @@ write_worker_trigger() {
|
|
|
732
1051
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
733
1052
|
# Engine-specific launch command (expanded at write time)
|
|
734
1053
|
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
735
|
-
local engine_cmd="${CODEX_BIN:-codex}
|
|
1054
|
+
local engine_cmd="${CODEX_BIN:-codex} exec \\
|
|
1055
|
+
-m $WORKER_CODEX_MODEL \\
|
|
736
1056
|
-c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
|
|
737
1057
|
--dangerously-bypass-approvals-and-sandbox \\
|
|
738
|
-
\"\$(cat $prompt_file)\"
|
|
739
|
-
|
|
740
|
-
local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
|
|
1058
|
+
\"\$(cat $prompt_file)\""
|
|
1059
|
+
local engine_comment="# Run codex exec with fresh context (no pipe — codex requires terminal)"
|
|
741
1060
|
else
|
|
742
1061
|
local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
|
|
743
1062
|
--model $WORKER_MODEL \\
|
|
@@ -868,106 +1187,6 @@ TRIGGER_EOF
|
|
|
868
1187
|
log " Verifier trigger: $trigger_file"
|
|
869
1188
|
}
|
|
870
1189
|
|
|
871
|
-
# =============================================================================
|
|
872
|
-
# Status Updates
|
|
873
|
-
# =============================================================================
|
|
874
|
-
|
|
875
|
-
# --- governance.md s7 step 8: Update status.json ---
|
|
876
|
-
update_status() {
|
|
877
|
-
local phase="$1"
|
|
878
|
-
local last_result="$2"
|
|
879
|
-
|
|
880
|
-
# Build verified_us as JSON array
|
|
881
|
-
local verified_us_json="[]"
|
|
882
|
-
if [[ -n "$VERIFIED_US" ]]; then
|
|
883
|
-
verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
|
|
884
|
-
fi
|
|
885
|
-
|
|
886
|
-
# Build consensus fields
|
|
887
|
-
local consensus_json=""
|
|
888
|
-
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
889
|
-
consensus_json=',
|
|
890
|
-
"consensus_scope": "'"$CONSENSUS_SCOPE"'",
|
|
891
|
-
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
892
|
-
"claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
|
|
893
|
-
"codex_verdict": "'"${CODEX_VERDICT:-}"'"'
|
|
894
|
-
fi
|
|
895
|
-
|
|
896
|
-
echo '{
|
|
897
|
-
"slug": "'"$SLUG"'",
|
|
898
|
-
"iteration": '"$ITERATION"',
|
|
899
|
-
"max_iter": '"$MAX_ITER"',
|
|
900
|
-
"phase": "'"$phase"'",
|
|
901
|
-
"worker_model": "'"$WORKER_MODEL"'",
|
|
902
|
-
"verifier_model": "'"$VERIFIER_MODEL"'",
|
|
903
|
-
"worker_engine": "'"$WORKER_ENGINE"'",
|
|
904
|
-
"verifier_engine": "'"$VERIFIER_ENGINE"'",
|
|
905
|
-
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
906
|
-
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
907
|
-
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
908
|
-
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
|
|
909
|
-
"verify_mode": "'"$VERIFY_MODE"'",
|
|
910
|
-
"verify_consensus": '"$VERIFY_CONSENSUS"',
|
|
911
|
-
"last_result": "'"$last_result"'",
|
|
912
|
-
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
913
|
-
"verified_us": '"$verified_us_json"''"$consensus_json"',
|
|
914
|
-
"updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
|
|
915
|
-
}' | atomic_write "$STATUS_FILE"
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
# --- governance.md s7 step 8: Write result log ---
|
|
919
|
-
write_result_log() {
|
|
920
|
-
local iter="$1"
|
|
921
|
-
local result="$2"
|
|
922
|
-
local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
|
|
923
|
-
|
|
924
|
-
local git_diff=""
|
|
925
|
-
git_diff=$(git diff --stat HEAD~1 HEAD 2>/dev/null || echo "(no git diff available)")
|
|
926
|
-
|
|
927
|
-
{
|
|
928
|
-
echo "# Iteration $iter Result"
|
|
929
|
-
echo ""
|
|
930
|
-
echo "## Status"
|
|
931
|
-
echo "$result [leader-measured]"
|
|
932
|
-
echo ""
|
|
933
|
-
echo "## Files Changed"
|
|
934
|
-
echo '```'
|
|
935
|
-
echo "$git_diff"
|
|
936
|
-
echo '```'
|
|
937
|
-
echo "[git-measured]"
|
|
938
|
-
echo ""
|
|
939
|
-
echo "## Timestamp"
|
|
940
|
-
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
941
|
-
} | atomic_write "$result_file"
|
|
942
|
-
}
|
|
943
|
-
|
|
944
|
-
# =============================================================================
|
|
945
|
-
# Sentinel Writers
|
|
946
|
-
# =============================================================================
|
|
947
|
-
|
|
948
|
-
# --- governance.md s7: Only the Leader writes sentinels ---
|
|
949
|
-
write_complete_sentinel() {
|
|
950
|
-
local summary="$1"
|
|
951
|
-
echo "# Campaign Complete
|
|
952
|
-
|
|
953
|
-
Completed at iteration $ITERATION.
|
|
954
|
-
$summary
|
|
955
|
-
|
|
956
|
-
Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
|
|
957
|
-
log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
write_blocked_sentinel() {
|
|
961
|
-
local reason="$1"
|
|
962
|
-
echo "# Campaign Blocked
|
|
963
|
-
|
|
964
|
-
Blocked at iteration $ITERATION.
|
|
965
|
-
Reason: $reason
|
|
966
|
-
|
|
967
|
-
Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
|
|
968
|
-
log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
|
|
969
|
-
}
|
|
970
|
-
|
|
971
1190
|
# =============================================================================
|
|
972
1191
|
# Cleanup (trap handler)
|
|
973
1192
|
# =============================================================================
|
|
@@ -976,7 +1195,11 @@ cleanup() {
|
|
|
976
1195
|
log "Cleaning up..."
|
|
977
1196
|
|
|
978
1197
|
# Remove lockfile
|
|
979
|
-
|
|
1198
|
+
if (( LOCKFILE_ACQUIRED )); then
|
|
1199
|
+
rm -f "$LOCKFILE_PATH" 2>/dev/null
|
|
1200
|
+
else
|
|
1201
|
+
log_debug "cleanup: lockfile not owned by this process, skipping removal"
|
|
1202
|
+
fi
|
|
980
1203
|
|
|
981
1204
|
# Kill claude processes then kill panes
|
|
982
1205
|
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
@@ -1002,6 +1225,12 @@ cleanup() {
|
|
|
1002
1225
|
setopt local_options nonomatch 2>/dev/null
|
|
1003
1226
|
rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
|
|
1004
1227
|
|
|
1228
|
+
# AC4: Generate campaign report on all terminal states (always-on)
|
|
1229
|
+
generate_campaign_report
|
|
1230
|
+
|
|
1231
|
+
# US-001: Generate SV report after campaign report (tmux mode)
|
|
1232
|
+
generate_sv_report
|
|
1233
|
+
|
|
1005
1234
|
# Print summary
|
|
1006
1235
|
local end_time
|
|
1007
1236
|
end_time=$(date +%s)
|
|
@@ -1014,17 +1243,24 @@ cleanup() {
|
|
|
1014
1243
|
elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
|
|
1015
1244
|
else final_status="TIMEOUT"; fi
|
|
1016
1245
|
|
|
1246
|
+
# --- Update metadata.json with final status ---
|
|
1247
|
+
if [[ -f "$METADATA_FILE" ]]; then
|
|
1248
|
+
jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
1249
|
+
'.campaign_status = $status | .end_time = $end_time' \
|
|
1250
|
+
"$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
|
|
1251
|
+
fi
|
|
1252
|
+
|
|
1017
1253
|
if (( DEBUG )); then
|
|
1018
1254
|
local end_ts=$(date +%s)
|
|
1019
1255
|
local elapsed=$((end_ts - START_TIME))
|
|
1020
1256
|
|
|
1021
|
-
log_debug "[
|
|
1257
|
+
log_debug "[FLOW] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
|
|
1022
1258
|
|
|
1023
1259
|
# --- Validation ---
|
|
1024
|
-
log_debug "[
|
|
1260
|
+
log_debug "[FLOW] === Execution Validation ==="
|
|
1025
1261
|
|
|
1026
1262
|
# 1. Did the correct verify mode run?
|
|
1027
|
-
log_debug "[
|
|
1263
|
+
log_debug "[FLOW] verify_mode=$VERIFY_MODE configured=true"
|
|
1028
1264
|
|
|
1029
1265
|
# 2. Per-US: were all US individually verified?
|
|
1030
1266
|
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
@@ -1038,39 +1274,39 @@ cleanup() {
|
|
|
1038
1274
|
|
|
1039
1275
|
if [[ "$final_status" = "COMPLETE" ]]; then
|
|
1040
1276
|
if (( verified_count >= expected_count )); then
|
|
1041
|
-
log_debug "[
|
|
1277
|
+
log_debug "[FLOW] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
|
|
1042
1278
|
else
|
|
1043
|
-
log_debug "[
|
|
1279
|
+
log_debug "[FLOW] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
|
|
1044
1280
|
fi
|
|
1045
1281
|
else
|
|
1046
|
-
log_debug "[
|
|
1282
|
+
log_debug "[FLOW] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
|
|
1047
1283
|
fi
|
|
1048
1284
|
fi
|
|
1049
1285
|
|
|
1050
1286
|
# 3. Consensus: were both engines used?
|
|
1051
1287
|
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1052
1288
|
if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
|
|
1053
|
-
log_debug "[
|
|
1289
|
+
log_debug "[FLOW] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
1054
1290
|
else
|
|
1055
|
-
log_debug "[
|
|
1291
|
+
log_debug "[FLOW] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1056
1292
|
fi
|
|
1057
1293
|
fi
|
|
1058
1294
|
|
|
1059
1295
|
# 4. Engine match: did the configured engines actually run?
|
|
1060
|
-
local worker_dispatches=$(grep -c '\[
|
|
1061
|
-
local verifier_dispatches=$(grep -c '\[
|
|
1062
|
-
log_debug "[
|
|
1296
|
+
local worker_dispatches=$(grep -c '\[FLOW\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1297
|
+
local verifier_dispatches=$(grep -c '\[FLOW\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1298
|
+
log_debug "[FLOW] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
|
|
1063
1299
|
|
|
1064
1300
|
# 5. Fix loops: how many fix contracts were generated?
|
|
1065
|
-
local fix_count=$(grep -c '\[
|
|
1066
|
-
log_debug "[
|
|
1301
|
+
local fix_count=$(grep -c '\[DECIDE\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1302
|
+
log_debug "[FLOW] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
|
|
1067
1303
|
|
|
1068
1304
|
# 6. Circuit breakers: any triggered?
|
|
1069
|
-
local cb_count=$(grep -c '\[
|
|
1070
|
-
log_debug "[
|
|
1305
|
+
local cb_count=$(grep -c '\[GOV\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1306
|
+
log_debug "[FLOW] circuit_breakers_triggered=$cb_count"
|
|
1071
1307
|
|
|
1072
1308
|
# 7. Overall result
|
|
1073
|
-
log_debug "[
|
|
1309
|
+
log_debug "[FLOW] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
|
|
1074
1310
|
fi
|
|
1075
1311
|
|
|
1076
1312
|
echo ""
|
|
@@ -1110,6 +1346,7 @@ poll_for_signal() {
|
|
|
1110
1346
|
local trigger_file="$4"
|
|
1111
1347
|
local role="$5" # "worker" or "verifier"
|
|
1112
1348
|
local nudge_count=0
|
|
1349
|
+
local api_retry_count=0
|
|
1113
1350
|
local poll_start
|
|
1114
1351
|
poll_start=$(date +%s)
|
|
1115
1352
|
|
|
@@ -1134,6 +1371,54 @@ poll_for_signal() {
|
|
|
1134
1371
|
return 0 # success
|
|
1135
1372
|
fi
|
|
1136
1373
|
|
|
1374
|
+
# A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
|
|
1375
|
+
# ONLY for Worker polling — Verifier waits for verdict file, not done-claim
|
|
1376
|
+
if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
|
|
1377
|
+
local dc_us_id
|
|
1378
|
+
dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
|
|
1379
|
+
if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
|
|
1380
|
+
log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
|
|
1381
|
+
log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
|
|
1382
|
+
echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
|
|
1383
|
+
return 0
|
|
1384
|
+
fi
|
|
1385
|
+
fi
|
|
1386
|
+
|
|
1387
|
+
# API transient-error recovery with bounded backoff
|
|
1388
|
+
local pane_output_for_retry
|
|
1389
|
+
pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
|
|
1390
|
+
local is_api_text_retry=0
|
|
1391
|
+
if [[ -n "$pane_output_for_retry" ]] &&
|
|
1392
|
+
( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
|
|
1393
|
+
|| echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
|
|
1394
|
+
|| echo "$pane_output_for_retry" | grep -qi 'overloaded' \
|
|
1395
|
+
|| echo "$pane_output_for_retry" | grep -qi 'too many requests' \
|
|
1396
|
+
|| echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
|
|
1397
|
+
is_api_text_retry=1
|
|
1398
|
+
fi
|
|
1399
|
+
|
|
1400
|
+
if (( is_api_text_retry )) || is_api_error "$pane_id"; then
|
|
1401
|
+
(( api_retry_count++ ))
|
|
1402
|
+
log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
|
|
1403
|
+
if (( api_retry_count >= _API_MAX_RETRIES )); then
|
|
1404
|
+
log_error "API unavailable after ${_API_MAX_RETRIES} retries"
|
|
1405
|
+
write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries"
|
|
1406
|
+
return 2
|
|
1407
|
+
fi
|
|
1408
|
+
# A5: If pane shows "queued messages" or rate-limit corruption, restart pane
|
|
1409
|
+
if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
|
|
1410
|
+
log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
|
|
1411
|
+
log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
|
|
1412
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
|
|
1413
|
+
tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null; sleep 2
|
|
1414
|
+
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
1415
|
+
fi
|
|
1416
|
+
sleep "$_API_RETRY_INTERVAL_S"
|
|
1417
|
+
continue
|
|
1418
|
+
else
|
|
1419
|
+
api_retry_count=0
|
|
1420
|
+
fi
|
|
1421
|
+
|
|
1137
1422
|
# Check heartbeat freshness (tmux pattern)
|
|
1138
1423
|
if [[ -f "$heartbeat_file" ]]; then
|
|
1139
1424
|
if check_heartbeat_exited "$heartbeat_file"; then
|
|
@@ -1143,9 +1428,13 @@ poll_for_signal() {
|
|
|
1143
1428
|
log " Signal file detected after process exit: $signal_file"
|
|
1144
1429
|
return 0
|
|
1145
1430
|
fi
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1431
|
+
# Dispatch to engine-specific exit handler
|
|
1432
|
+
if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
|
|
1433
|
+
handle_worker_exit_codex "$ITERATION" "$signal_file"
|
|
1434
|
+
return 0
|
|
1435
|
+
fi
|
|
1436
|
+
# Claude path (or verifier of any engine)
|
|
1437
|
+
if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
1149
1438
|
# Reset poll timer for the restart
|
|
1150
1439
|
poll_start=$(date +%s)
|
|
1151
1440
|
nudge_count=0
|
|
@@ -1163,7 +1452,7 @@ poll_for_signal() {
|
|
|
1163
1452
|
(( HEARTBEAT_STALE_COUNT++ ))
|
|
1164
1453
|
# Circuit breaker: 3 consecutive heartbeat stale events
|
|
1165
1454
|
if (( HEARTBEAT_STALE_COUNT >= 3 )); then
|
|
1166
|
-
log_debug "[
|
|
1455
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
|
|
1167
1456
|
log_error "Circuit breaker: 3 consecutive heartbeat stale events"
|
|
1168
1457
|
return 1
|
|
1169
1458
|
fi
|
|
@@ -1181,12 +1470,23 @@ poll_for_signal() {
|
|
|
1181
1470
|
fi
|
|
1182
1471
|
fi
|
|
1183
1472
|
|
|
1473
|
+
# Dead pane detection during poll: check if claude/codex process died
|
|
1474
|
+
local poll_cmd
|
|
1475
|
+
poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
|
|
1476
|
+
# Dead pane detection — delegates to check_dead_pane() for engine-aware logic
|
|
1477
|
+
if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
|
|
1478
|
+
log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
|
|
1479
|
+
log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
|
|
1480
|
+
# Return failure so caller can handle recovery
|
|
1481
|
+
return 1
|
|
1482
|
+
fi
|
|
1483
|
+
|
|
1184
1484
|
# Auto-approve permission prompts during poll
|
|
1185
1485
|
local poll_capture
|
|
1186
1486
|
poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
1187
1487
|
if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
1188
1488
|
log " Permission prompt detected during poll, auto-approving..."
|
|
1189
|
-
log_debug "[
|
|
1489
|
+
log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
|
|
1190
1490
|
tmux send-keys -t "$pane_id" Enter
|
|
1191
1491
|
sleep 0.5
|
|
1192
1492
|
fi
|
|
@@ -1198,38 +1498,6 @@ poll_for_signal() {
|
|
|
1198
1498
|
done
|
|
1199
1499
|
}
|
|
1200
1500
|
|
|
1201
|
-
# =============================================================================
|
|
1202
|
-
# Circuit Breaker: Stale Context Detection
|
|
1203
|
-
# =============================================================================
|
|
1204
|
-
|
|
1205
|
-
# --- governance.md s7 step 8: Stale context detection ---
|
|
1206
|
-
compute_context_hash() {
|
|
1207
|
-
if [[ -f "$CONTEXT_FILE" ]]; then
|
|
1208
|
-
md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1
|
|
1209
|
-
else
|
|
1210
|
-
echo "no-context"
|
|
1211
|
-
fi
|
|
1212
|
-
}
|
|
1213
|
-
|
|
1214
|
-
check_stale_context() {
|
|
1215
|
-
local current_hash
|
|
1216
|
-
current_hash=$(compute_context_hash)
|
|
1217
|
-
|
|
1218
|
-
if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
|
|
1219
|
-
(( STALE_CONTEXT_COUNT++ ))
|
|
1220
|
-
log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
|
|
1221
|
-
if (( STALE_CONTEXT_COUNT >= 3 )); then
|
|
1222
|
-
log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
|
|
1223
|
-
return 1
|
|
1224
|
-
fi
|
|
1225
|
-
else
|
|
1226
|
-
STALE_CONTEXT_COUNT=0
|
|
1227
|
-
fi
|
|
1228
|
-
|
|
1229
|
-
PREV_CONTEXT_HASH="$current_hash"
|
|
1230
|
-
return 0
|
|
1231
|
-
}
|
|
1232
|
-
|
|
1233
1501
|
# =============================================================================
|
|
1234
1502
|
# Consensus Verification (run two verifiers sequentially in same pane)
|
|
1235
1503
|
# =============================================================================
|
|
@@ -1247,10 +1515,23 @@ run_single_verifier() {
|
|
|
1247
1515
|
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
1248
1516
|
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
1249
1517
|
|
|
1250
|
-
# Clean previous Verifier session
|
|
1518
|
+
# Clean previous Verifier session (with dead pane detection)
|
|
1251
1519
|
local verifier_cmd
|
|
1252
1520
|
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1253
|
-
if [[
|
|
1521
|
+
if [[ -z "$verifier_cmd" ]]; then
|
|
1522
|
+
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
1523
|
+
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
1524
|
+
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
1525
|
+
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
1526
|
+
log " New verifier pane: $VERIFIER_PANE"
|
|
1527
|
+
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
1528
|
+
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
1529
|
+
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
1530
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
1531
|
+
sleep 0.2
|
|
1532
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" Enter 2>/dev/null
|
|
1533
|
+
sleep 0.3
|
|
1534
|
+
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1254
1535
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1255
1536
|
sleep 0.5
|
|
1256
1537
|
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
@@ -1265,55 +1546,19 @@ run_single_verifier() {
|
|
|
1265
1546
|
# Remove previous verdict file
|
|
1266
1547
|
rm -f "$VERDICT_FILE" 2>/dev/null
|
|
1267
1548
|
|
|
1268
|
-
# Launch verifier
|
|
1549
|
+
# Launch verifier — dispatch to engine-specific function
|
|
1550
|
+
local verifier_launch
|
|
1269
1551
|
if [[ "$engine" = "codex" ]]; then
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
|
|
1274
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1275
|
-
log_debug "Verifier$suffix codex exec sent directly"
|
|
1552
|
+
verifier_launch="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1553
|
+
launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
|
|
1554
|
+
log_debug "Verifier$suffix codex exec dispatched"
|
|
1276
1555
|
else
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
|
|
1280
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1281
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1282
|
-
|
|
1283
|
-
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1556
|
+
verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
|
|
1557
|
+
if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
|
|
1284
1558
|
log_error "Verifier$suffix failed to start"
|
|
1285
1559
|
return 1
|
|
1286
1560
|
fi
|
|
1287
|
-
|
|
1288
|
-
sleep 3
|
|
1289
|
-
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
1290
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1291
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1292
|
-
log_debug "Verifier$suffix instruction sent directly"
|
|
1293
|
-
|
|
1294
|
-
# Verify claude actually started working
|
|
1295
|
-
local v_submit=0
|
|
1296
|
-
while (( v_submit < 15 )); do
|
|
1297
|
-
sleep 2
|
|
1298
|
-
local v_check
|
|
1299
|
-
v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
1300
|
-
if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
|
|
1301
|
-
log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
|
|
1302
|
-
break
|
|
1303
|
-
fi
|
|
1304
|
-
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1305
|
-
if (( v_submit == 8 )); then
|
|
1306
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1307
|
-
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
1308
|
-
sleep 0.1
|
|
1309
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1310
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1311
|
-
fi
|
|
1312
|
-
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1313
|
-
sleep 0.3
|
|
1314
|
-
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1315
|
-
(( v_submit++ ))
|
|
1316
|
-
done
|
|
1561
|
+
log_debug "Verifier$suffix claude dispatched"
|
|
1317
1562
|
fi
|
|
1318
1563
|
|
|
1319
1564
|
# Poll for verdict
|
|
@@ -1341,6 +1586,10 @@ run_single_verifier() {
|
|
|
1341
1586
|
# Claude: use full poll_for_signal with heartbeat/nudge
|
|
1342
1587
|
log " Polling for verify-verdict.json ($suffix)..."
|
|
1343
1588
|
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
|
|
1589
|
+
local verifier_poll_rc=$?
|
|
1590
|
+
if (( verifier_poll_rc == 2 )); then
|
|
1591
|
+
return 1
|
|
1592
|
+
fi
|
|
1344
1593
|
log_error "Verifier$suffix poll failed"
|
|
1345
1594
|
return 1
|
|
1346
1595
|
fi
|
|
@@ -1352,6 +1601,110 @@ run_single_verifier() {
|
|
|
1352
1601
|
return 0
|
|
1353
1602
|
}
|
|
1354
1603
|
|
|
1604
|
+
# --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
|
|
1605
|
+
# Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
|
|
1606
|
+
# Sets FAILED_US global on failure.
|
|
1607
|
+
run_sequential_final_verify() {
|
|
1608
|
+
local iter="$1"
|
|
1609
|
+
FAILED_US=""
|
|
1610
|
+
|
|
1611
|
+
log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
|
|
1612
|
+
log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
|
|
1613
|
+
|
|
1614
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
1615
|
+
log " Final verify: checking $us..."
|
|
1616
|
+
|
|
1617
|
+
# Temporarily override signal file to scope verifier to this US
|
|
1618
|
+
local orig_signal
|
|
1619
|
+
orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
|
|
1620
|
+
echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
|
|
1621
|
+
|
|
1622
|
+
# Write scoped verifier trigger
|
|
1623
|
+
write_verifier_trigger "$iter"
|
|
1624
|
+
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
|
|
1625
|
+
|
|
1626
|
+
# Clean verifier pane
|
|
1627
|
+
local verifier_cmd
|
|
1628
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1629
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1630
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
|
|
1631
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null; sleep 2
|
|
1632
|
+
fi
|
|
1633
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1634
|
+
|
|
1635
|
+
# Launch verifier
|
|
1636
|
+
local verifier_launch
|
|
1637
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
1638
|
+
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1639
|
+
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
|
|
1640
|
+
else
|
|
1641
|
+
verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
1642
|
+
launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
|
|
1643
|
+
log_error "Failed to launch verifier for $us"
|
|
1644
|
+
FAILED_US="$us"
|
|
1645
|
+
return 1
|
|
1646
|
+
}
|
|
1647
|
+
fi
|
|
1648
|
+
|
|
1649
|
+
# Poll for verdict
|
|
1650
|
+
rm -f "$VERDICT_FILE"
|
|
1651
|
+
local poll_rc=0
|
|
1652
|
+
poll_for_signal "$VERDICT_FILE" "$ITER_TIMEOUT" "verdict" || poll_rc=$?
|
|
1653
|
+
if (( poll_rc != 0 )); then
|
|
1654
|
+
log_error "Verifier poll failed for $us (rc=$poll_rc)"
|
|
1655
|
+
FAILED_US="$us"
|
|
1656
|
+
return 1
|
|
1657
|
+
fi
|
|
1658
|
+
|
|
1659
|
+
# Check verdict
|
|
1660
|
+
local verdict
|
|
1661
|
+
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
1662
|
+
if [[ "$verdict" != "pass" ]]; then
|
|
1663
|
+
FAILED_US="$us"
|
|
1664
|
+
log " Sequential final verify FAILED at $us"
|
|
1665
|
+
log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
|
|
1666
|
+
return 1
|
|
1667
|
+
fi
|
|
1668
|
+
log " Sequential final verify: $us PASSED"
|
|
1669
|
+
|
|
1670
|
+
# Archive per-US final verdict
|
|
1671
|
+
cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
|
|
1672
|
+
done
|
|
1673
|
+
|
|
1674
|
+
# Integration check: run tests if VERIFICATION_CMD is set
|
|
1675
|
+
if [[ -n "${VERIFICATION_CMD:-}" ]]; then
|
|
1676
|
+
log " Running integration test suite after sequential verify..."
|
|
1677
|
+
log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
|
|
1678
|
+
if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
|
|
1679
|
+
log " Integration test suite FAILED"
|
|
1680
|
+
FAILED_US="integration"
|
|
1681
|
+
return 2
|
|
1682
|
+
fi
|
|
1683
|
+
log " Integration test suite PASSED"
|
|
1684
|
+
fi
|
|
1685
|
+
|
|
1686
|
+
log " Sequential final verify: ALL PASSED"
|
|
1687
|
+
return 0
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
# --- US-005: Determine whether consensus verification should run for this signal ---
|
|
1691
|
+
# Returns 0 (use consensus) or 1 (single engine).
|
|
1692
|
+
# VERIFY_CONSENSUS + CONSENSUS_SCOPE handles per-US consensus.
|
|
1693
|
+
# FINAL_CONSENSUS independently enables consensus for the final ALL verify only.
|
|
1694
|
+
_should_use_consensus() {
|
|
1695
|
+
local signal_us_id="${1:-}"
|
|
1696
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1697
|
+
case "$CONSENSUS_SCOPE" in
|
|
1698
|
+
all) return 0 ;;
|
|
1699
|
+
final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
|
|
1700
|
+
esac
|
|
1701
|
+
fi
|
|
1702
|
+
if [[ "$FINAL_CONSENSUS" = "1" && "$signal_us_id" == "ALL" ]]; then
|
|
1703
|
+
return 0
|
|
1704
|
+
fi
|
|
1705
|
+
return 1
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1355
1708
|
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
1356
1709
|
run_consensus_verification() {
|
|
1357
1710
|
local iter="$1"
|
|
@@ -1362,32 +1715,59 @@ run_consensus_verification() {
|
|
|
1362
1715
|
CLAUDE_VERDICT=""
|
|
1363
1716
|
CODEX_VERDICT=""
|
|
1364
1717
|
|
|
1365
|
-
while (( CONSENSUS_ROUND <
|
|
1718
|
+
while (( CONSENSUS_ROUND < 6 )); do
|
|
1366
1719
|
(( CONSENSUS_ROUND++ ))
|
|
1367
|
-
log " Consensus round $CONSENSUS_ROUND/
|
|
1720
|
+
log " Consensus round $CONSENSUS_ROUND/6..."
|
|
1368
1721
|
|
|
1369
1722
|
# Run claude verifier first
|
|
1723
|
+
local _claude_t0=$(date +%s)
|
|
1370
1724
|
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1371
1725
|
log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1372
1726
|
return 1
|
|
1373
1727
|
fi
|
|
1728
|
+
ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
|
|
1374
1729
|
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1375
|
-
|
|
1730
|
+
# A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
|
|
1731
|
+
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
1732
|
+
log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
|
|
1733
|
+
log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
|
|
1734
|
+
rm -f "$claude_verdict_file" 2>/dev/null
|
|
1735
|
+
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1736
|
+
log_error "Claude verifier retry also failed"
|
|
1737
|
+
return 1
|
|
1738
|
+
fi
|
|
1739
|
+
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1740
|
+
if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
|
|
1741
|
+
log_error "Claude verdict still null after retry — consensus cannot proceed"
|
|
1742
|
+
return 1
|
|
1743
|
+
fi
|
|
1744
|
+
fi
|
|
1745
|
+
log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
1746
|
+
|
|
1747
|
+
# F8: --consensus-fail-fast — skip second verifier if first fails
|
|
1748
|
+
if [[ "$CONSENSUS_FAIL_FAST" = "1" && "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
1749
|
+
log " Consensus fail-fast: claude=fail, skipping codex verifier"
|
|
1750
|
+
log_debug "[GOV] iter=$iter phase=consensus_fail_fast claude=fail codex=skipped"
|
|
1751
|
+
CODEX_VERDICT="skipped"
|
|
1752
|
+
return 2 # disagreement/fail signal
|
|
1753
|
+
fi
|
|
1376
1754
|
|
|
1377
1755
|
# Run codex verifier second
|
|
1756
|
+
local _codex_t0=$(date +%s)
|
|
1378
1757
|
if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
|
|
1379
1758
|
log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1380
1759
|
return 1
|
|
1381
1760
|
fi
|
|
1761
|
+
ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
|
|
1382
1762
|
CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
|
|
1383
|
-
log_debug "[
|
|
1763
|
+
log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
|
|
1384
1764
|
|
|
1385
1765
|
log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
|
|
1386
1766
|
local _combined_action="retry"
|
|
1387
1767
|
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
|
|
1388
|
-
elif (( CONSENSUS_ROUND >=
|
|
1768
|
+
elif (( CONSENSUS_ROUND >= 6 )); then _combined_action="blocked"
|
|
1389
1769
|
fi
|
|
1390
|
-
log_debug "[
|
|
1770
|
+
log_debug "[GOV] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
|
|
1391
1771
|
|
|
1392
1772
|
# Both pass → success
|
|
1393
1773
|
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
|
|
@@ -1409,7 +1789,7 @@ run_consensus_verification() {
|
|
|
1409
1789
|
fi
|
|
1410
1790
|
|
|
1411
1791
|
# Consensus disagreement
|
|
1412
|
-
log_debug "[
|
|
1792
|
+
log_debug "[GOV] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
|
|
1413
1793
|
|
|
1414
1794
|
# NOTE: pre_existing_failure heuristic was removed (v0.3.5).
|
|
1415
1795
|
# It used unreliable grep-in-description string matching to classify
|
|
@@ -1442,14 +1822,19 @@ run_consensus_verification() {
|
|
|
1442
1822
|
|
|
1443
1823
|
# If this is not the last round, the caller will dispatch the Worker with the fix contract
|
|
1444
1824
|
# For now, write a fail verdict so the main loop can handle the fix loop
|
|
1445
|
-
if (( CONSENSUS_ROUND <
|
|
1446
|
-
# Create a merged fail verdict for the main loop
|
|
1825
|
+
if (( CONSENSUS_ROUND < 6 )); then
|
|
1826
|
+
# Create a merged fail verdict for the main loop — include issues from BOTH verdicts
|
|
1827
|
+
local merged_issues="[]"
|
|
1828
|
+
local claude_issues codex_issues
|
|
1829
|
+
claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
|
|
1830
|
+
codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
|
|
1831
|
+
merged_issues=$(echo "$claude_issues $codex_issues" | jq -s 'add // []')
|
|
1447
1832
|
{
|
|
1448
1833
|
echo '{'
|
|
1449
1834
|
echo ' "verdict": "fail",'
|
|
1450
1835
|
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1451
|
-
echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/
|
|
1452
|
-
echo ' "issues":
|
|
1836
|
+
echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/6): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
1837
|
+
echo ' "issues": '"$merged_issues"','
|
|
1453
1838
|
echo ' "recommended_state_transition": "continue",'
|
|
1454
1839
|
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
|
|
1455
1840
|
echo '}'
|
|
@@ -1458,56 +1843,91 @@ run_consensus_verification() {
|
|
|
1458
1843
|
fi
|
|
1459
1844
|
done
|
|
1460
1845
|
|
|
1461
|
-
# Max consensus rounds exceeded
|
|
1462
|
-
log_error "Consensus failed after
|
|
1846
|
+
# Max consensus rounds exceeded — include issues from both verdicts
|
|
1847
|
+
log_error "Consensus failed after 6 rounds"
|
|
1848
|
+
local final_claude_issues final_codex_issues final_merged_issues
|
|
1849
|
+
final_claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
|
|
1850
|
+
final_codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
|
|
1851
|
+
final_merged_issues=$(echo "$final_claude_issues $final_codex_issues" | jq -s 'add // []')
|
|
1463
1852
|
{
|
|
1464
1853
|
echo '{'
|
|
1465
1854
|
echo ' "verdict": "fail",'
|
|
1466
1855
|
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1467
|
-
echo ' "summary": "Consensus failed after
|
|
1468
|
-
echo ' "issues":
|
|
1856
|
+
echo ' "summary": "Consensus failed after 6 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
1857
|
+
echo ' "issues": '"$final_merged_issues"','
|
|
1469
1858
|
echo ' "recommended_state_transition": "blocked",'
|
|
1470
|
-
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round":
|
|
1859
|
+
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 6 }'
|
|
1471
1860
|
echo '}'
|
|
1472
1861
|
} | atomic_write "$VERDICT_FILE"
|
|
1473
1862
|
return 1
|
|
1474
1863
|
}
|
|
1475
1864
|
|
|
1476
|
-
# =============================================================================
|
|
1477
|
-
# Security Warning
|
|
1478
|
-
# =============================================================================
|
|
1479
|
-
|
|
1480
|
-
print_security_warning() {
|
|
1481
|
-
echo ""
|
|
1482
|
-
echo "================================================================"
|
|
1483
|
-
echo " WARNING: Running with --dangerously-skip-permissions"
|
|
1484
|
-
echo ""
|
|
1485
|
-
echo " The claude CLI will execute tools (file writes, shell commands)"
|
|
1486
|
-
echo " without asking for confirmation. Only run this on code you"
|
|
1487
|
-
echo " trust in an environment you control."
|
|
1488
|
-
echo "================================================================"
|
|
1489
|
-
echo ""
|
|
1490
|
-
}
|
|
1491
|
-
|
|
1492
1865
|
# =============================================================================
|
|
1493
1866
|
# Main Leader Loop
|
|
1494
1867
|
# =============================================================================
|
|
1495
1868
|
|
|
1496
1869
|
main() {
|
|
1497
1870
|
# --- Lockfile: prevent duplicate execution ---
|
|
1498
|
-
local lockfile="$
|
|
1871
|
+
local lockfile="$LOCKFILE_PATH"
|
|
1499
1872
|
mkdir -p "$(dirname "$lockfile")" 2>/dev/null
|
|
1500
1873
|
if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
|
|
1501
1874
|
local lock_pid
|
|
1502
1875
|
lock_pid=$(cat "$lockfile" 2>/dev/null)
|
|
1503
1876
|
if kill -0 "$lock_pid" 2>/dev/null; then
|
|
1504
|
-
log_error "Another instance is already running (PID $lock_pid)"
|
|
1877
|
+
log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
|
|
1505
1878
|
exit 1
|
|
1506
1879
|
fi
|
|
1507
1880
|
# Stale lock — overwrite
|
|
1881
|
+
log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
|
|
1508
1882
|
echo $$ > "$lockfile"
|
|
1883
|
+
LOCKFILE_ACQUIRED=1
|
|
1884
|
+
else
|
|
1885
|
+
LOCKFILE_ACQUIRED=1
|
|
1886
|
+
fi
|
|
1887
|
+
trap cleanup EXIT INT TERM
|
|
1888
|
+
mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
|
|
1889
|
+
|
|
1890
|
+
# --- Analytics directory: create only when --debug or --with-self-verification ---
|
|
1891
|
+
if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
|
|
1892
|
+
mkdir -p "$ANALYTICS_DIR" 2>/dev/null
|
|
1893
|
+
fi
|
|
1894
|
+
|
|
1895
|
+
# --- debug.log versioning (in analytics dir) ---
|
|
1896
|
+
if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
|
|
1897
|
+
local dbg_n=1
|
|
1898
|
+
while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
|
|
1899
|
+
(( dbg_n++ ))
|
|
1900
|
+
done
|
|
1901
|
+
mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
|
|
1902
|
+
fi
|
|
1903
|
+
|
|
1904
|
+
# --- campaign.jsonl versioning (in analytics dir, after mkdir) ---
|
|
1905
|
+
if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
|
|
1906
|
+
if [[ -f "$CAMPAIGN_JSONL" ]]; then
|
|
1907
|
+
local cj_n=1
|
|
1908
|
+
while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
|
|
1909
|
+
(( cj_n++ ))
|
|
1910
|
+
done
|
|
1911
|
+
mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
|
|
1912
|
+
fi
|
|
1913
|
+
fi
|
|
1914
|
+
|
|
1915
|
+
# --- metadata.json: write at campaign start ---
|
|
1916
|
+
if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
|
|
1917
|
+
jq -n \
|
|
1918
|
+
--arg slug "$SLUG" \
|
|
1919
|
+
--arg project_root "$ROOT" \
|
|
1920
|
+
--arg campaign_status "running" \
|
|
1921
|
+
--arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
1922
|
+
--arg end_time "" \
|
|
1923
|
+
--arg worker_model "$WORKER_MODEL" \
|
|
1924
|
+
--arg verifier_model "$VERIFIER_MODEL" \
|
|
1925
|
+
--argjson debug "$DEBUG" \
|
|
1926
|
+
--argjson with_sv "$WITH_SELF_VERIFICATION" \
|
|
1927
|
+
--argjson consensus "$VERIFY_CONSENSUS" \
|
|
1928
|
+
'{slug: $slug, project_root: $project_root, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
|
|
1929
|
+
> "$METADATA_FILE"
|
|
1509
1930
|
fi
|
|
1510
|
-
mkdir -p "$LOGS_DIR" 2>/dev/null
|
|
1511
1931
|
|
|
1512
1932
|
# --- Startup ---
|
|
1513
1933
|
log "Ralph Desk Tmux Runner starting..."
|
|
@@ -1518,6 +1938,7 @@ main() {
|
|
|
1518
1938
|
log " Verifier model: $VERIFIER_MODEL"
|
|
1519
1939
|
log " Verify mode: $VERIFY_MODE"
|
|
1520
1940
|
log " Verify consensus:$VERIFY_CONSENSUS"
|
|
1941
|
+
log " Final consensus: $FINAL_CONSENSUS"
|
|
1521
1942
|
log " Consensus scope: $CONSENSUS_SCOPE"
|
|
1522
1943
|
log " Poll interval: ${POLL_INTERVAL}s"
|
|
1523
1944
|
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
@@ -1531,10 +1952,11 @@ main() {
|
|
|
1531
1952
|
fi
|
|
1532
1953
|
local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
|
|
1533
1954
|
|
|
1534
|
-
log_debug "[
|
|
1535
|
-
log_debug "[
|
|
1536
|
-
log_debug "[
|
|
1537
|
-
log_debug "[
|
|
1955
|
+
log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
1956
|
+
log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
1957
|
+
log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
1958
|
+
log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE max_iter=$MAX_ITER"
|
|
1959
|
+
log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION debug=$DEBUG"
|
|
1538
1960
|
|
|
1539
1961
|
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1540
1962
|
# Build expected flow
|
|
@@ -1543,13 +1965,13 @@ main() {
|
|
|
1543
1965
|
expected_flow="${expected_flow}worker->verify($us)->"
|
|
1544
1966
|
done
|
|
1545
1967
|
expected_flow="${expected_flow}verify(ALL)->COMPLETE"
|
|
1546
|
-
log_debug "[
|
|
1968
|
+
log_debug "[OPTION] expected_flow=$expected_flow"
|
|
1547
1969
|
else
|
|
1548
|
-
log_debug "[
|
|
1970
|
+
log_debug "[OPTION] expected_flow=worker(all)->verify(ALL)->COMPLETE"
|
|
1549
1971
|
fi
|
|
1550
1972
|
|
|
1551
1973
|
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1552
|
-
log_debug "[
|
|
1974
|
+
log_debug "[OPTION] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
|
|
1553
1975
|
fi
|
|
1554
1976
|
fi
|
|
1555
1977
|
|
|
@@ -1559,8 +1981,35 @@ main() {
|
|
|
1559
1981
|
if [[ -f "$prd_file" ]]; then
|
|
1560
1982
|
US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1561
1983
|
fi
|
|
1984
|
+
|
|
1985
|
+
# Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
|
|
1986
|
+
local memory_file="$DESK/memos/${SLUG}-memory.md"
|
|
1987
|
+
if [[ -f "$memory_file" ]]; then
|
|
1988
|
+
local completed_us
|
|
1989
|
+
completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1990
|
+
if [[ -n "$completed_us" ]]; then
|
|
1991
|
+
VERIFIED_US="$completed_us"
|
|
1992
|
+
log " Loaded completed stories from memory: $VERIFIED_US"
|
|
1993
|
+
log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
|
|
1994
|
+
fi
|
|
1995
|
+
fi
|
|
1996
|
+
|
|
1997
|
+
# D1: Fallback — restore verified_us from status.json if memory had none
|
|
1998
|
+
if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
|
|
1999
|
+
local status_verified
|
|
2000
|
+
status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
|
|
2001
|
+
if [[ -n "$status_verified" ]]; then
|
|
2002
|
+
VERIFIED_US="$status_verified"
|
|
2003
|
+
log " Restored verified_us from status.json: $VERIFIED_US"
|
|
2004
|
+
log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
|
|
2005
|
+
fi
|
|
2006
|
+
fi
|
|
1562
2007
|
fi
|
|
1563
2008
|
|
|
2009
|
+
# Initialize PRD snapshot state for live update detection
|
|
2010
|
+
PREV_PRD_HASH=$(compute_prd_hash)
|
|
2011
|
+
PREV_PRD_US_LIST=$(count_prd_us)
|
|
2012
|
+
|
|
1564
2013
|
# Dependency checks
|
|
1565
2014
|
check_dependencies
|
|
1566
2015
|
|
|
@@ -1583,7 +2032,7 @@ main() {
|
|
|
1583
2032
|
PREV_CONTEXT_HASH=$(compute_context_hash)
|
|
1584
2033
|
|
|
1585
2034
|
# --- governance.md s7: Leader Loop ---
|
|
1586
|
-
local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) #
|
|
2035
|
+
local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced — Worker extends indefinitely when active
|
|
1587
2036
|
|
|
1588
2037
|
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
1589
2038
|
log ""
|
|
@@ -1592,7 +2041,7 @@ main() {
|
|
|
1592
2041
|
ITER_START_TIME=$(date +%s)
|
|
1593
2042
|
local _iter_contract=""
|
|
1594
2043
|
_iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
|
|
1595
|
-
log_debug "[
|
|
2044
|
+
log_debug "[FLOW] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
|
|
1596
2045
|
|
|
1597
2046
|
# --- governance.md s7 step 1: Check sentinels ---
|
|
1598
2047
|
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
@@ -1625,122 +2074,92 @@ main() {
|
|
|
1625
2074
|
# Reset per-iteration state
|
|
1626
2075
|
local worker_nudge_count=0
|
|
1627
2076
|
local verifier_nudge_count=0
|
|
2077
|
+
ITER_VERIFIER_START=""
|
|
2078
|
+
ITER_VERIFIER_END=""
|
|
2079
|
+
|
|
2080
|
+
# --- US-004: detect PRD changes for live update + re-split ---
|
|
2081
|
+
check_prd_update
|
|
1628
2082
|
|
|
1629
2083
|
# --- governance.md s7 step 4: Build worker prompt + trigger ---
|
|
1630
2084
|
write_worker_trigger "$ITERATION"
|
|
1631
2085
|
local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
1632
2086
|
|
|
2087
|
+
# AC1: capture worker start timestamp
|
|
2088
|
+
ITER_WORKER_START=$(date +%s)
|
|
2089
|
+
|
|
1633
2090
|
update_status "worker" "running"
|
|
1634
2091
|
|
|
1635
|
-
# --- governance.md s7 step 5: Execute Worker (
|
|
1636
|
-
|
|
2092
|
+
# --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
|
|
2093
|
+
log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
2094
|
+
|
|
1637
2095
|
local worker_launch
|
|
1638
2096
|
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1639
|
-
|
|
1640
|
-
|
|
2097
|
+
local worker_trigger="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-trigger.sh"
|
|
2098
|
+
worker_launch="bash $worker_trigger"
|
|
2099
|
+
launch_worker_codex "$WORKER_PANE" "$worker_trigger" "$ITERATION"
|
|
1641
2100
|
else
|
|
1642
2101
|
worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
log_debug "[EXEC] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
1648
|
-
|
|
1649
|
-
# Step 5b: Wait for claude TUI to be ready (tmux pattern)
|
|
1650
|
-
if ! wait_for_pane_ready "$WORKER_PANE" 30; then
|
|
1651
|
-
log_error "Worker claude failed to start"
|
|
1652
|
-
write_blocked_sentinel "Worker claude failed to start in pane"
|
|
1653
|
-
update_status "blocked" "worker_start_failed"
|
|
1654
|
-
return 1
|
|
1655
|
-
fi
|
|
1656
|
-
|
|
1657
|
-
# Step 5c: Wait for claude to fully initialize, then send instruction directly
|
|
1658
|
-
sleep 3
|
|
1659
|
-
local worker_instruction="Read and execute the instructions in $worker_prompt"
|
|
1660
|
-
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1661
|
-
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1662
|
-
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
1663
|
-
|
|
1664
|
-
# Verify claude actually started working — keep sending C-m until activity detected
|
|
1665
|
-
local submit_attempts=0
|
|
1666
|
-
while (( submit_attempts < 15 )); do
|
|
1667
|
-
sleep 2
|
|
1668
|
-
local pane_check
|
|
1669
|
-
pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
|
|
1670
|
-
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
1671
|
-
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
1672
|
-
log_debug "[EXEC] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
1673
|
-
break
|
|
1674
|
-
fi
|
|
1675
|
-
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1676
|
-
if (( submit_attempts == 8 )); then
|
|
1677
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1678
|
-
tmux send-keys -t "$WORKER_PANE" C-u 2>/dev/null
|
|
1679
|
-
sleep 0.1
|
|
1680
|
-
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1681
|
-
tmux send-keys -t "$WORKER_PANE" Enter
|
|
2102
|
+
if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
2103
|
+
write_blocked_sentinel "Worker claude failed to start in pane"
|
|
2104
|
+
update_status "blocked" "worker_start_failed"
|
|
2105
|
+
return 1
|
|
1682
2106
|
fi
|
|
1683
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1684
|
-
sleep 0.3
|
|
1685
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1686
|
-
(( submit_attempts++ ))
|
|
1687
|
-
done
|
|
1688
|
-
if (( submit_attempts >= 15 )); then
|
|
1689
|
-
log " WARNING: Could not confirm Worker started working after 15 attempts"
|
|
1690
|
-
log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=15"
|
|
1691
2107
|
fi
|
|
1692
2108
|
|
|
1693
2109
|
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
1694
2110
|
log " Polling for iter-signal.json..."
|
|
1695
2111
|
local worker_poll_done=0
|
|
1696
2112
|
while (( ! worker_poll_done )); do
|
|
2113
|
+
local worker_poll_rc=0
|
|
1697
2114
|
if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
|
|
1698
2115
|
worker_poll_done=1
|
|
1699
|
-
log_debug "[
|
|
2116
|
+
log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
|
|
1700
2117
|
else
|
|
2118
|
+
worker_poll_rc=$?
|
|
2119
|
+
if (( worker_poll_rc == 2 )); then
|
|
2120
|
+
return 1
|
|
2121
|
+
fi
|
|
1701
2122
|
# Check if Worker is still actively running (not stuck)
|
|
1702
2123
|
local worker_cmd
|
|
1703
2124
|
worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1704
2125
|
if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
|
|
1705
|
-
#
|
|
2126
|
+
# Process alive — extend indefinitely (no hard ceiling kill)
|
|
2127
|
+
# Stale-context breaker and nudge system handle truly stuck workers
|
|
1706
2128
|
local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
|
|
2129
|
+
local ceiling_exceeded=""
|
|
1707
2130
|
if (( iter_elapsed >= HARD_CEILING )); then
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
sleep 1
|
|
1712
|
-
WORKER_PANE=$(replace_worker_pane "$WORKER_PANE" "worker")
|
|
1713
|
-
update_status "worker" "hard_timeout"
|
|
1714
|
-
worker_poll_done=1
|
|
1715
|
-
break
|
|
2131
|
+
ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s — not enforced, logged only]"
|
|
2132
|
+
log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
|
|
2133
|
+
log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
|
|
1716
2134
|
fi
|
|
1717
|
-
log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s
|
|
1718
|
-
log_debug "[
|
|
1719
|
-
log_debug "[
|
|
2135
|
+
log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
|
|
2136
|
+
log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
|
|
2137
|
+
log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
|
|
1720
2138
|
update_status "worker" "slow"
|
|
1721
2139
|
# Loop continues — re-poll same iteration
|
|
1722
2140
|
else
|
|
1723
2141
|
# Worker is truly dead/stuck
|
|
1724
2142
|
(( MONITOR_FAILURE_COUNT++ ))
|
|
1725
|
-
log_debug "[
|
|
2143
|
+
log_debug "[GOV] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
|
|
1726
2144
|
if (( MONITOR_FAILURE_COUNT >= 3 )); then
|
|
1727
|
-
log_debug "[
|
|
2145
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
|
|
1728
2146
|
write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
|
|
1729
2147
|
update_status "blocked" "monitor_failures"
|
|
1730
2148
|
return 1
|
|
1731
2149
|
fi
|
|
1732
2150
|
log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
|
|
1733
2151
|
update_status "worker" "poll_failed"
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
2152
|
+
log_debug "[FLOW] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
|
|
2153
|
+
# Worker is truly dead/stuck — BLOCK and let user decide
|
|
2154
|
+
write_blocked_sentinel "Worker process dead/stuck (poll failed). Pane preserved for inspection."
|
|
2155
|
+
update_status "blocked" "worker_dead"
|
|
2156
|
+
return 1
|
|
1738
2157
|
fi
|
|
1739
2158
|
fi
|
|
1740
2159
|
done
|
|
1741
2160
|
|
|
1742
2161
|
if [[ ! -f "$SIGNAL_FILE" ]]; then
|
|
1743
|
-
log_debug "[
|
|
2162
|
+
log_debug "[FLOW] iter=$ITERATION no_signal_after_poll=true continuing"
|
|
1744
2163
|
# No signal — monitor failure, go to next iteration
|
|
1745
2164
|
continue
|
|
1746
2165
|
fi
|
|
@@ -1748,6 +2167,11 @@ main() {
|
|
|
1748
2167
|
# Reset monitor failure count on success
|
|
1749
2168
|
MONITOR_FAILURE_COUNT=0
|
|
1750
2169
|
|
|
2170
|
+
# AC1: capture worker end timestamp; reset consensus timing
|
|
2171
|
+
ITER_WORKER_END=$(date +%s)
|
|
2172
|
+
ITER_VERIFIER_CLAUDE_DURATION_S=""
|
|
2173
|
+
ITER_VERIFIER_CODEX_DURATION_S=""
|
|
2174
|
+
|
|
1751
2175
|
# --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
|
|
1752
2176
|
local signal_status
|
|
1753
2177
|
signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
|
|
@@ -1759,7 +2183,7 @@ main() {
|
|
|
1759
2183
|
# Read us_id early for EXEC logging (also used later in verify branch)
|
|
1760
2184
|
local signal_us_id_early=""
|
|
1761
2185
|
signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1762
|
-
log_debug "[
|
|
2186
|
+
log_debug "[FLOW] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
|
|
1763
2187
|
|
|
1764
2188
|
case "$signal_status" in
|
|
1765
2189
|
continue)
|
|
@@ -1774,17 +2198,34 @@ main() {
|
|
|
1774
2198
|
signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1775
2199
|
log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
|
|
1776
2200
|
|
|
2201
|
+
# AC1: capture verifier start timestamp
|
|
2202
|
+
ITER_VERIFIER_START=$(date +%s)
|
|
2203
|
+
|
|
1777
2204
|
update_status "verifier" "running"
|
|
1778
2205
|
|
|
1779
|
-
# ---
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
2206
|
+
# --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
|
|
2207
|
+
if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
|
|
2208
|
+
log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
|
|
2209
|
+
local seq_rc=0
|
|
2210
|
+
run_sequential_final_verify "$ITERATION" || seq_rc=$?
|
|
2211
|
+
if (( seq_rc == 0 )); then
|
|
2212
|
+
write_complete_sentinel "Sequential final verify passed (all US verified individually)"
|
|
2213
|
+
update_status "complete" "pass"
|
|
2214
|
+
write_campaign_jsonl "$ITERATION" "ALL" "pass"
|
|
2215
|
+
return 0
|
|
2216
|
+
else
|
|
2217
|
+
# Sequential verify failed — fall through to fix loop with failed US
|
|
2218
|
+
log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
|
|
2219
|
+
signal_us_id="${FAILED_US:-ALL}"
|
|
2220
|
+
# Synthesize a fail verdict for the fix loop
|
|
2221
|
+
echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
|
|
2222
|
+
fi
|
|
1786
2223
|
fi
|
|
1787
2224
|
|
|
2225
|
+
# --- Consensus scope check (US-005: _should_use_consensus handles VERIFY_CONSENSUS + FINAL_CONSENSUS) ---
|
|
2226
|
+
local use_consensus=0
|
|
2227
|
+
_should_use_consensus "$signal_us_id" && use_consensus=1
|
|
2228
|
+
|
|
1788
2229
|
# --- Consensus vs single verification ---
|
|
1789
2230
|
if (( use_consensus )); then
|
|
1790
2231
|
# US-004: Run consensus verification (claude + codex sequentially)
|
|
@@ -1806,78 +2247,65 @@ main() {
|
|
|
1806
2247
|
write_verifier_trigger "$ITERATION"
|
|
1807
2248
|
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
|
|
1808
2249
|
|
|
1809
|
-
# Step 7a: Clean previous Verifier session
|
|
2250
|
+
# Step 7a: Clean previous Verifier session (with dead pane detection)
|
|
1810
2251
|
local verifier_cmd
|
|
1811
2252
|
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1812
|
-
if [[
|
|
2253
|
+
if [[ -z "$verifier_cmd" ]]; then
|
|
2254
|
+
log " Verifier pane $VERIFIER_PANE is gone — replacing..."
|
|
2255
|
+
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
|
|
2256
|
+
replace_worker_pane "$VERIFIER_PANE" "verifier"
|
|
2257
|
+
VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
|
|
2258
|
+
log " New verifier pane: $VERIFIER_PANE"
|
|
2259
|
+
elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
|
|
2260
|
+
log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
|
|
2261
|
+
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
2262
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
2263
|
+
sleep 0.2
|
|
2264
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" Enter 2>/dev/null
|
|
2265
|
+
sleep 0.3
|
|
2266
|
+
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1813
2267
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1814
2268
|
sleep 0.5
|
|
1815
2269
|
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
1816
2270
|
sleep 2
|
|
1817
|
-
wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
|
|
1818
2271
|
fi
|
|
2272
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1819
2273
|
|
|
1820
2274
|
local verifier_launch
|
|
1821
2275
|
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
1822
2276
|
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1823
|
-
log " Launching Verifier codex in pane $VERIFIER_PANE..."
|
|
1824
2277
|
else
|
|
1825
2278
|
verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
1826
|
-
log " Launching Verifier claude in pane $VERIFIER_PANE..."
|
|
1827
2279
|
fi
|
|
1828
|
-
|
|
1829
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1830
|
-
log_debug "[EXEC] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
|
|
1831
|
-
|
|
1832
|
-
# Step 7b: Wait for TUI to be ready
|
|
1833
|
-
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1834
|
-
log_error "Verifier failed to start"
|
|
1835
|
-
update_status "verifier" "start_failed"
|
|
1836
|
-
continue
|
|
1837
|
-
fi
|
|
1838
|
-
|
|
1839
|
-
# Step 7c: Send instruction
|
|
1840
|
-
sleep 3
|
|
1841
|
-
local verifier_instruction="Read and execute the instructions in $verifier_prompt"
|
|
1842
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1843
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1844
|
-
log_debug "Verifier instruction sent directly"
|
|
2280
|
+
log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
|
|
1845
2281
|
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
1853
|
-
log_debug "Verifier started working after $((vs_submit + 1)) checks"
|
|
1854
|
-
break
|
|
1855
|
-
fi
|
|
1856
|
-
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1857
|
-
if (( vs_submit == 8 )); then
|
|
1858
|
-
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1859
|
-
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
1860
|
-
sleep 0.1
|
|
1861
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1862
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
2282
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2283
|
+
launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
|
|
2284
|
+
else
|
|
2285
|
+
if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
|
|
2286
|
+
update_status "verifier" "start_failed"
|
|
2287
|
+
continue
|
|
1863
2288
|
fi
|
|
1864
|
-
|
|
1865
|
-
sleep 0.3
|
|
1866
|
-
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1867
|
-
(( vs_submit++ ))
|
|
1868
|
-
done
|
|
2289
|
+
fi
|
|
1869
2290
|
|
|
1870
2291
|
# Poll for verify-verdict.json
|
|
1871
2292
|
log " Polling for verify-verdict.json..."
|
|
1872
2293
|
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
2294
|
+
local verifier_poll_rc=$?
|
|
2295
|
+
if (( verifier_poll_rc == 2 )); then
|
|
2296
|
+
return 1
|
|
2297
|
+
fi
|
|
1873
2298
|
log_error "Verifier poll failed"
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
2299
|
+
# Verifier is dead/stuck — BLOCK and let user decide
|
|
2300
|
+
write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection."
|
|
2301
|
+
update_status "blocked" "verifier_dead"
|
|
2302
|
+
return 1
|
|
1878
2303
|
fi
|
|
1879
2304
|
fi
|
|
1880
2305
|
|
|
2306
|
+
# AC1: capture verifier end timestamp
|
|
2307
|
+
ITER_VERIFIER_END=$(date +%s)
|
|
2308
|
+
|
|
1881
2309
|
# --- governance.md s7 step 7: Read verdict via jq ---
|
|
1882
2310
|
local verdict
|
|
1883
2311
|
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
@@ -1889,12 +2317,24 @@ main() {
|
|
|
1889
2317
|
log " Verifier: verdict=$verdict recommended=$recommended"
|
|
1890
2318
|
log " Verifier summary: \"$verdict_summary\""
|
|
1891
2319
|
local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
|
|
1892
|
-
log_debug "[
|
|
2320
|
+
log_debug "[GOV] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
|
|
1893
2321
|
|
|
1894
2322
|
case "$verdict" in
|
|
1895
2323
|
pass)
|
|
1896
2324
|
CONSECUTIVE_FAILURES=0
|
|
1897
2325
|
CONSENSUS_ROUND=0
|
|
2326
|
+
_SAME_US_FAIL_COUNT=0
|
|
2327
|
+
_LAST_FAILED_US=""
|
|
2328
|
+
if (( _MODEL_UPGRADED )); then
|
|
2329
|
+
log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
|
|
2330
|
+
log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
|
|
2331
|
+
WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
|
|
2332
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
2333
|
+
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
2334
|
+
WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
|
|
2335
|
+
fi
|
|
2336
|
+
_MODEL_UPGRADED=0
|
|
2337
|
+
fi
|
|
1898
2338
|
|
|
1899
2339
|
# --- Per-US tracking ---
|
|
1900
2340
|
if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
|
|
@@ -1905,13 +2345,14 @@ main() {
|
|
|
1905
2345
|
VERIFIED_US="$signal_us_id"
|
|
1906
2346
|
fi
|
|
1907
2347
|
log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
|
|
1908
|
-
log_debug "[
|
|
2348
|
+
log_debug "[FLOW] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
|
|
1909
2349
|
update_status "verifier" "pass_us"
|
|
1910
2350
|
# Worker will do next US on next iteration
|
|
1911
2351
|
elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
|
|
1912
2352
|
# Final full verify passed or complete recommended
|
|
1913
2353
|
write_complete_sentinel "$verdict_summary"
|
|
1914
2354
|
update_status "complete" "pass"
|
|
2355
|
+
write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
|
|
1915
2356
|
return 0
|
|
1916
2357
|
else
|
|
1917
2358
|
log " Verifier passed but did not recommend complete. Continuing."
|
|
@@ -1921,6 +2362,7 @@ main() {
|
|
|
1921
2362
|
fail)
|
|
1922
2363
|
# --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
|
|
1923
2364
|
(( CONSECUTIVE_FAILURES++ ))
|
|
2365
|
+
check_model_upgrade "${signal_us_id:-unknown}"
|
|
1924
2366
|
local verdict_summary_fail
|
|
1925
2367
|
verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
1926
2368
|
log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
|
|
@@ -1940,13 +2382,21 @@ main() {
|
|
|
1940
2382
|
jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
|
|
1941
2383
|
} | atomic_write "$fix_contract"
|
|
1942
2384
|
log " Fix contract: $fix_contract"
|
|
1943
|
-
log_debug "[
|
|
1944
|
-
|
|
1945
|
-
# Circuit breaker: consecutive failures
|
|
1946
|
-
if (( CONSECUTIVE_FAILURES >=
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
2385
|
+
log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
|
|
2386
|
+
|
|
2387
|
+
# Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
|
|
2388
|
+
if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
|
|
2389
|
+
# For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
|
|
2390
|
+
_ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
|
|
2391
|
+
if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
|
|
2392
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
|
|
2393
|
+
log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
|
|
2394
|
+
write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2395
|
+
else
|
|
2396
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
|
|
2397
|
+
log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2398
|
+
write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
|
|
2399
|
+
fi
|
|
1950
2400
|
update_status "blocked" "consecutive_failures"
|
|
1951
2401
|
return 1
|
|
1952
2402
|
fi
|
|
@@ -1985,12 +2435,19 @@ main() {
|
|
|
1985
2435
|
;;
|
|
1986
2436
|
esac
|
|
1987
2437
|
|
|
2438
|
+
# --- step 7d: Archive iteration artifacts before cleanup ---
|
|
2439
|
+
archive_iter_artifacts "$ITERATION"
|
|
2440
|
+
|
|
2441
|
+
# --- AC5: Write per-iteration cost estimate ---
|
|
2442
|
+
write_cost_log "$ITERATION"
|
|
2443
|
+
write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
|
|
2444
|
+
|
|
1988
2445
|
# --- governance.md s7 step 8: Write result log ---
|
|
1989
2446
|
write_result_log "$ITERATION" "$signal_status"
|
|
1990
2447
|
|
|
1991
2448
|
# --- governance.md s7 step 8: Circuit breaker - stale context check ---
|
|
1992
2449
|
if ! check_stale_context; then
|
|
1993
|
-
log_debug "[
|
|
2450
|
+
log_debug "[GOV] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
|
|
1994
2451
|
write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
|
|
1995
2452
|
update_status "blocked" "stale_context"
|
|
1996
2453
|
return 1
|
|
@@ -2010,6 +2467,45 @@ main() {
|
|
|
2010
2467
|
# Entry Point
|
|
2011
2468
|
# =============================================================================
|
|
2012
2469
|
|
|
2470
|
+
# --- CLI: parse --worker-model / --verifier-model flags ---
|
|
2471
|
+
# These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
|
|
2472
|
+
# Format: "model:reasoning" → codex engine; "model-name" → claude engine
|
|
2473
|
+
_cli_i=1
|
|
2474
|
+
while (( _cli_i <= $# )); do
|
|
2475
|
+
case "${@[$_cli_i]}" in
|
|
2476
|
+
--worker-model)
|
|
2477
|
+
(( _cli_i++ ))
|
|
2478
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
|
|
2479
|
+
WORKER_ENGINE="${_cli_parsed%% *}"
|
|
2480
|
+
_cli_rest="${_cli_parsed#* }"
|
|
2481
|
+
WORKER_MODEL="${_cli_rest%% *}"
|
|
2482
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
2483
|
+
WORKER_CODEX_MODEL="$WORKER_MODEL"
|
|
2484
|
+
WORKER_CODEX_REASONING="${_cli_rest##* }"
|
|
2485
|
+
fi
|
|
2486
|
+
;;
|
|
2487
|
+
--verifier-model)
|
|
2488
|
+
(( _cli_i++ ))
|
|
2489
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
|
|
2490
|
+
VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
2491
|
+
_cli_rest="${_cli_parsed#* }"
|
|
2492
|
+
VERIFIER_MODEL="${_cli_rest%% *}"
|
|
2493
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
2494
|
+
VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
|
|
2495
|
+
VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
2496
|
+
fi
|
|
2497
|
+
;;
|
|
2498
|
+
--lock-worker-model)
|
|
2499
|
+
LOCK_WORKER_MODEL=1
|
|
2500
|
+
;;
|
|
2501
|
+
--final-consensus)
|
|
2502
|
+
FINAL_CONSENSUS=1
|
|
2503
|
+
;;
|
|
2504
|
+
esac
|
|
2505
|
+
(( _cli_i++ ))
|
|
2506
|
+
done
|
|
2507
|
+
unset _cli_i _cli_parsed _cli_rest
|
|
2508
|
+
|
|
2013
2509
|
# Require tmux — tmux mode only works inside an active tmux session
|
|
2014
2510
|
if [[ -z "${TMUX:-}" ]]; then
|
|
2015
2511
|
echo "ERROR: tmux mode requires running inside a tmux session."
|