@ai-dev-methodologies/rlp-desk 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -8
- package/docs/architecture.md +34 -8
- package/docs/getting-started.md +2 -2
- package/docs/protocol-reference.md +267 -14
- package/examples/calculator/.claude/ralph-desk/context/loop-test-latest.md +12 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-output.log +0 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-prompt.md +38 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-trigger.sh +28 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/session-config.json +25 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/status.json +10 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/worker-heartbeat.json +1 -0
- package/examples/calculator/.claude/ralph-desk/memos/loop-test-memory.md +17 -0
- package/examples/calculator/.claude/ralph-desk/prompts/loop-test.worker.prompt.md +1 -1
- package/install.sh +14 -0
- package/package.json +1 -1
- package/scripts/postinstall.js +17 -1
- package/scripts/uninstall.js +1 -0
- package/src/commands/rlp-desk.md +112 -21
- package/src/governance.md +92 -7
- package/src/scripts/init_ralph_desk.zsh +51 -30
- package/src/scripts/run_ralph_desk.zsh +1259 -0
|
@@ -0,0 +1,1259 @@
|
|
|
1
|
+
#!/bin/zsh
|
|
2
|
+
set -uo pipefail
|
|
3
|
+
# NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
|
|
4
|
+
# because the main loop uses explicit error checks throughout.
|
|
5
|
+
|
|
6
|
+
# =============================================================================
|
|
7
|
+
# Ralph Desk Tmux Runner
|
|
8
|
+
#
|
|
9
|
+
# Implements the Leader loop from governance.md section 7 as a shell script.
|
|
10
|
+
# Uses omc-teams proven patterns: write-then-notify, pane IDs (%N),
|
|
11
|
+
# copy-mode guards, verification-based retry, heartbeat monitoring,
|
|
12
|
+
# idle pane nudging, exponential backoff restarts, atomic file writes.
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# LOOP_NAME=<slug> ./run_ralph_desk.zsh
|
|
16
|
+
#
|
|
17
|
+
# Required env:
|
|
18
|
+
# LOOP_NAME - slug identifier for the campaign
|
|
19
|
+
#
|
|
20
|
+
# Optional env:
|
|
21
|
+
# ROOT - project root (default: $PWD)
|
|
22
|
+
# MAX_ITER - max iterations (default: 20)
|
|
23
|
+
# WORKER_MODEL - claude model for Worker (default: sonnet)
|
|
24
|
+
# VERIFIER_MODEL - claude model for Verifier (default: opus)
|
|
25
|
+
# POLL_INTERVAL - seconds between signal checks (default: 5)
|
|
26
|
+
# ITER_TIMEOUT - per-iteration timeout in seconds (default: 600)
|
|
27
|
+
# HEARTBEAT_STALE_THRESHOLD - seconds before heartbeat is stale (default: 120)
|
|
28
|
+
# MAX_RESTARTS - max restart attempts per worker (default: 3)
|
|
29
|
+
# IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
|
|
30
|
+
# MAX_NUDGES - max nudges per pane per iteration (default: 3)
|
|
31
|
+
#
|
|
32
|
+
# Dependencies: tmux, claude CLI, jq
|
|
33
|
+
# =============================================================================
|
|
34
|
+
|
|
35
|
+
# --- Environment Variables ---
|
|
36
|
+
SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
|
|
37
|
+
ROOT="${ROOT:-$PWD}"
|
|
38
|
+
MAX_ITER="${MAX_ITER:-20}"
|
|
39
|
+
WORKER_MODEL="${WORKER_MODEL:-sonnet}"
|
|
40
|
+
VERIFIER_MODEL="${VERIFIER_MODEL:-opus}"
|
|
41
|
+
POLL_INTERVAL="${POLL_INTERVAL:-5}"
|
|
42
|
+
ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
|
|
43
|
+
HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
|
|
44
|
+
MAX_RESTARTS="${MAX_RESTARTS:-3}"
|
|
45
|
+
IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
|
|
46
|
+
MAX_NUDGES="${MAX_NUDGES:-3}"
|
|
47
|
+
|
|
48
|
+
# --- Derived Paths ---
|
|
49
|
+
DESK="$ROOT/.claude/ralph-desk"
|
|
50
|
+
PROMPTS_DIR="$DESK/prompts"
|
|
51
|
+
CONTEXT_DIR="$DESK/context"
|
|
52
|
+
MEMOS_DIR="$DESK/memos"
|
|
53
|
+
LOGS_DIR="$DESK/logs/$SLUG"
|
|
54
|
+
WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
|
|
55
|
+
VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
|
|
56
|
+
CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
|
|
57
|
+
MEMORY_FILE="$MEMOS_DIR/${SLUG}-memory.md"
|
|
58
|
+
SIGNAL_FILE="$MEMOS_DIR/${SLUG}-iter-signal.json"
|
|
59
|
+
DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
|
|
60
|
+
VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
|
|
61
|
+
COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
|
|
62
|
+
BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
|
|
63
|
+
STATUS_FILE="$LOGS_DIR/status.json"
|
|
64
|
+
SESSION_CONFIG="$LOGS_DIR/session-config.json"
|
|
65
|
+
WORKER_HEARTBEAT="$LOGS_DIR/worker-heartbeat.json"
|
|
66
|
+
VERIFIER_HEARTBEAT="$LOGS_DIR/verifier-heartbeat.json"
|
|
67
|
+
|
|
68
|
+
# --- Session Naming ---
|
|
69
|
+
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
70
|
+
SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
|
|
71
|
+
|
|
72
|
+
# --- State Tracking ---
|
|
73
|
+
typeset -A LAST_PANE_CONTENT
|
|
74
|
+
typeset -A PANE_IDLE_SINCE
|
|
75
|
+
typeset -A WORKER_RESTARTS
|
|
76
|
+
STALE_CONTEXT_COUNT=0
|
|
77
|
+
HEARTBEAT_STALE_COUNT=0
|
|
78
|
+
MONITOR_FAILURE_COUNT=0
|
|
79
|
+
CONSECUTIVE_FAILURES=0
|
|
80
|
+
PREV_CONTEXT_HASH=""
|
|
81
|
+
ITERATION=0
|
|
82
|
+
START_TIME=$(date +%s)
|
|
83
|
+
|
|
84
|
+
# =============================================================================
|
|
85
|
+
# Utility Functions
|
|
86
|
+
# =============================================================================
|
|
87
|
+
|
|
88
|
+
DEBUG="${DEBUG:-0}"
|
|
89
|
+
DEBUG_LOG="$ROOT/.claude/ralph-desk/logs/${LOOP_NAME:-unknown}/debug.log"
|
|
90
|
+
|
|
91
|
+
log() {
|
|
92
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
log_debug() {
|
|
96
|
+
if (( DEBUG )); then
|
|
97
|
+
mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
|
|
98
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
|
|
99
|
+
fi
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
log_error() {
|
|
103
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
# --- governance.md s7: Atomic file writes (omc-teams pattern) ---
|
|
107
|
+
# All file writes by the Leader use tmp+mv to prevent corruption.
|
|
108
|
+
atomic_write() {
|
|
109
|
+
local target="$1"
|
|
110
|
+
local tmp="${target}.tmp.$$"
|
|
111
|
+
cat > "$tmp"
|
|
112
|
+
mv "$tmp" "$target"
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# =============================================================================
|
|
116
|
+
# Dependency Checks
|
|
117
|
+
# =============================================================================
|
|
118
|
+
|
|
119
|
+
# --- governance.md s7 step 1: Validate prerequisites before starting ---
|
|
120
|
+
check_dependencies() {
|
|
121
|
+
local missing=0
|
|
122
|
+
|
|
123
|
+
if ! command -v tmux >/dev/null 2>&1; then
|
|
124
|
+
log_error "tmux is required but not found. Install with: brew install tmux"
|
|
125
|
+
missing=1
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
if ! command -v claude >/dev/null 2>&1; then
|
|
129
|
+
log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
|
|
130
|
+
missing=1
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
134
|
+
log_error "jq is required but not found. Install with: brew install jq"
|
|
135
|
+
missing=1
|
|
136
|
+
fi
|
|
137
|
+
|
|
138
|
+
if (( missing )); then
|
|
139
|
+
exit 1
|
|
140
|
+
fi
|
|
141
|
+
|
|
142
|
+
# Resolve full path to claude binary for reliable launches
|
|
143
|
+
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
|
|
144
|
+
log " Claude binary: $CLAUDE_BIN"
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# =============================================================================
|
|
148
|
+
# Scaffold Validation
|
|
149
|
+
# =============================================================================
|
|
150
|
+
|
|
151
|
+
validate_scaffold() {
|
|
152
|
+
local errors=0
|
|
153
|
+
|
|
154
|
+
if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
|
|
155
|
+
log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
|
|
156
|
+
errors=1
|
|
157
|
+
fi
|
|
158
|
+
|
|
159
|
+
if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
|
|
160
|
+
log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
|
|
161
|
+
errors=1
|
|
162
|
+
fi
|
|
163
|
+
|
|
164
|
+
if [[ ! -f "$CONTEXT_FILE" ]]; then
|
|
165
|
+
log_error "Context file not found: $CONTEXT_FILE"
|
|
166
|
+
errors=1
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
if [[ ! -f "$MEMORY_FILE" ]]; then
|
|
170
|
+
log_error "Memory file not found: $MEMORY_FILE"
|
|
171
|
+
errors=1
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
if (( errors )); then
|
|
175
|
+
log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
|
|
176
|
+
exit 1
|
|
177
|
+
fi
|
|
178
|
+
|
|
179
|
+
mkdir -p "$LOGS_DIR"
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
# =============================================================================
|
|
183
|
+
# Session Management (omc-teams pattern: pane IDs)
|
|
184
|
+
# =============================================================================
|
|
185
|
+
|
|
186
|
+
# --- governance.md s7 step 1: Check for existing sessions ---
|
|
187
|
+
check_existing_sessions() {
|
|
188
|
+
local current_session
|
|
189
|
+
current_session=$(tmux display-message -p '#{session_name}' 2>/dev/null || echo "")
|
|
190
|
+
local existing
|
|
191
|
+
existing=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-${SLUG}-" | grep -v "^${current_session}$" || true)
|
|
192
|
+
if [[ -n "$existing" ]]; then
|
|
193
|
+
log_error "Existing tmux session(s) found for slug '$SLUG':"
|
|
194
|
+
echo "$existing" | while read -r s; do
|
|
195
|
+
echo " - $s"
|
|
196
|
+
done
|
|
197
|
+
echo ""
|
|
198
|
+
echo "Kill existing session first:"
|
|
199
|
+
echo " tmux kill-session -t <session-name>"
|
|
200
|
+
exit 1
|
|
201
|
+
fi
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# --- governance.md s7 step 1: Create tmux session with pane IDs (%N) ---
|
|
205
|
+
create_session() {
|
|
206
|
+
log "Creating tmux session: $SESSION_NAME"
|
|
207
|
+
|
|
208
|
+
# omc-teams split-pane pattern
|
|
209
|
+
if [[ -n "${TMUX:-}" ]]; then
|
|
210
|
+
# Inside tmux: split CURRENT pane in place
|
|
211
|
+
# Current pane stays as-is (leader/user stays here)
|
|
212
|
+
# Worker/Verifier appear on the RIGHT, user sees them immediately
|
|
213
|
+
LEADER_PANE=$(tmux display-message -p '#{pane_id}')
|
|
214
|
+
SESSION_NAME=$(tmux display-message -p '#{session_name}')
|
|
215
|
+
log " Splitting current pane in session: $SESSION_NAME"
|
|
216
|
+
|
|
217
|
+
# -h off current pane → right column (worker)
|
|
218
|
+
WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
219
|
+
# -v off worker → stacked below on right (verifier)
|
|
220
|
+
VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
221
|
+
else
|
|
222
|
+
# Outside tmux: wrap current terminal into a new tmux session and attach
|
|
223
|
+
# omc-teams pattern: user sees panes immediately, no separate attach needed
|
|
224
|
+
tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT"
|
|
225
|
+
LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
|
|
226
|
+
WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
227
|
+
VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
228
|
+
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
log " Leader pane: $LEADER_PANE"
|
|
232
|
+
log " Worker pane: $WORKER_PANE"
|
|
233
|
+
log " Verifier pane: $VERIFIER_PANE"
|
|
234
|
+
|
|
235
|
+
# Write session config (atomic write)
|
|
236
|
+
echo '{
|
|
237
|
+
"session_name": "'"$SESSION_NAME"'",
|
|
238
|
+
"slug": "'"$SLUG"'",
|
|
239
|
+
"created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
|
|
240
|
+
"panes": {
|
|
241
|
+
"leader": "'"$LEADER_PANE"'",
|
|
242
|
+
"worker": "'"$WORKER_PANE"'",
|
|
243
|
+
"verifier": "'"$VERIFIER_PANE"'"
|
|
244
|
+
},
|
|
245
|
+
"pid": '$$',
|
|
246
|
+
"root": "'"$ROOT"'",
|
|
247
|
+
"models": {
|
|
248
|
+
"worker": "'"$WORKER_MODEL"'",
|
|
249
|
+
"verifier": "'"$VERIFIER_MODEL"'"
|
|
250
|
+
},
|
|
251
|
+
"config": {
|
|
252
|
+
"max_iter": '"$MAX_ITER"',
|
|
253
|
+
"poll_interval": '"$POLL_INTERVAL"',
|
|
254
|
+
"iter_timeout": '"$ITER_TIMEOUT"',
|
|
255
|
+
"heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
|
|
256
|
+
"max_restarts": '"$MAX_RESTARTS"',
|
|
257
|
+
"idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
|
|
258
|
+
"max_nudges": '"$MAX_NUDGES"'
|
|
259
|
+
}
|
|
260
|
+
}' | atomic_write "$SESSION_CONFIG"
|
|
261
|
+
|
|
262
|
+
log " Session config: $SESSION_CONFIG"
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# =============================================================================
|
|
266
|
+
# Copy-Mode Guard (omc-teams pattern)
|
|
267
|
+
# =============================================================================
|
|
268
|
+
|
|
269
|
+
# --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
|
|
270
|
+
check_copy_mode() {
|
|
271
|
+
local pane_id="$1"
|
|
272
|
+
local in_mode
|
|
273
|
+
in_mode=$(tmux display-message -p -t "$pane_id" '#{pane_in_mode}' 2>/dev/null) || return 1
|
|
274
|
+
if [[ "$in_mode" -eq 1 ]]; then
|
|
275
|
+
return 1 # pane is in copy mode, cannot send keys
|
|
276
|
+
fi
|
|
277
|
+
return 0
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
# =============================================================================
|
|
281
|
+
# Verification-Based Send Retry (omc-teams pattern)
|
|
282
|
+
# =============================================================================
|
|
283
|
+
|
|
284
|
+
# --- governance.md s7 step 5: Send with copy-mode guard and retry ---
|
|
285
|
+
safe_send_keys() {
|
|
286
|
+
local pane_id="$1"
|
|
287
|
+
local text="$2"
|
|
288
|
+
|
|
289
|
+
# --- Exact omc-teams sendToWorker pattern (tmux-session.js:527-626) ---
|
|
290
|
+
|
|
291
|
+
# Guard: copy-mode captures keys; skip entirely
|
|
292
|
+
if ! check_copy_mode "$pane_id"; then
|
|
293
|
+
log_debug " Pane $pane_id in copy mode, skipping send"
|
|
294
|
+
return 1
|
|
295
|
+
fi
|
|
296
|
+
|
|
297
|
+
# Check for trust prompt and auto-dismiss
|
|
298
|
+
local initial_capture
|
|
299
|
+
initial_capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
|
|
300
|
+
local pane_busy=0
|
|
301
|
+
if echo "$initial_capture" | grep -q "esc to interrupt" 2>/dev/null; then
|
|
302
|
+
pane_busy=1
|
|
303
|
+
fi
|
|
304
|
+
if echo "$initial_capture" | grep -q "Do you trust" 2>/dev/null; then
|
|
305
|
+
log_debug " Trust prompt detected, dismissing"
|
|
306
|
+
tmux send-keys -t "$pane_id" C-m
|
|
307
|
+
sleep 0.12
|
|
308
|
+
tmux send-keys -t "$pane_id" C-m
|
|
309
|
+
sleep 0.2
|
|
310
|
+
fi
|
|
311
|
+
|
|
312
|
+
# Send text in literal mode with -- separator
|
|
313
|
+
log_debug " Sending text to pane $pane_id (${#text} chars)"
|
|
314
|
+
tmux send-keys -t "$pane_id" -l -- "$text"
|
|
315
|
+
|
|
316
|
+
# Allow input buffer to settle (omc-teams: 150ms)
|
|
317
|
+
sleep 0.15
|
|
318
|
+
|
|
319
|
+
# Submit: up to 6 rounds of C-m double-press
|
|
320
|
+
local round=0
|
|
321
|
+
while (( round < 6 )); do
|
|
322
|
+
sleep 0.1
|
|
323
|
+
if (( round == 0 && pane_busy )); then
|
|
324
|
+
# Busy pane: Tab+C-m queue semantics (omc-teams pattern)
|
|
325
|
+
tmux send-keys -t "$pane_id" Tab
|
|
326
|
+
sleep 0.08
|
|
327
|
+
tmux send-keys -t "$pane_id" C-m
|
|
328
|
+
else
|
|
329
|
+
tmux send-keys -t "$pane_id" C-m
|
|
330
|
+
sleep 0.2
|
|
331
|
+
tmux send-keys -t "$pane_id" C-m
|
|
332
|
+
fi
|
|
333
|
+
sleep 0.14
|
|
334
|
+
|
|
335
|
+
# Check if text was consumed
|
|
336
|
+
local check_capture
|
|
337
|
+
check_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
|
|
338
|
+
if ! echo "$check_capture" | grep -qF "$text" 2>/dev/null; then
|
|
339
|
+
log_debug " Text consumed after round $((round + 1))"
|
|
340
|
+
return 0
|
|
341
|
+
fi
|
|
342
|
+
sleep 0.14
|
|
343
|
+
(( round++ ))
|
|
344
|
+
done
|
|
345
|
+
|
|
346
|
+
# Safety gate: copy-mode check
|
|
347
|
+
if ! check_copy_mode "$pane_id"; then
|
|
348
|
+
log_debug " Copy mode activated during send, aborting"
|
|
349
|
+
return 1
|
|
350
|
+
fi
|
|
351
|
+
|
|
352
|
+
# Adaptive fallback: C-u clear line, resend (omc-teams pattern)
|
|
353
|
+
log_debug " Adaptive retry — clearing line and resending"
|
|
354
|
+
tmux send-keys -t "$pane_id" C-u
|
|
355
|
+
sleep 0.08
|
|
356
|
+
if ! check_copy_mode "$pane_id"; then
|
|
357
|
+
return 1
|
|
358
|
+
fi
|
|
359
|
+
tmux send-keys -t "$pane_id" -l -- "$text"
|
|
360
|
+
sleep 0.12
|
|
361
|
+
local retry_round=0
|
|
362
|
+
while (( retry_round < 4 )); do
|
|
363
|
+
tmux send-keys -t "$pane_id" C-m
|
|
364
|
+
sleep 0.18
|
|
365
|
+
tmux send-keys -t "$pane_id" C-m
|
|
366
|
+
sleep 0.14
|
|
367
|
+
local retry_capture
|
|
368
|
+
retry_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
|
|
369
|
+
if ! echo "$retry_capture" | grep -qF "$text" 2>/dev/null; then
|
|
370
|
+
log_debug " Text consumed after adaptive retry round $((retry_round + 1))"
|
|
371
|
+
return 0
|
|
372
|
+
fi
|
|
373
|
+
(( retry_round++ ))
|
|
374
|
+
done
|
|
375
|
+
|
|
376
|
+
# Fail-open: one last nudge
|
|
377
|
+
if ! check_copy_mode "$pane_id"; then
|
|
378
|
+
return 1
|
|
379
|
+
fi
|
|
380
|
+
tmux send-keys -t "$pane_id" C-m
|
|
381
|
+
sleep 0.12
|
|
382
|
+
tmux send-keys -t "$pane_id" C-m
|
|
383
|
+
log_debug " Fail-open — text may or may not have been submitted"
|
|
384
|
+
return 0
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
# =============================================================================
|
|
388
|
+
# Wait for Pane Ready (omc-teams pattern: paneLooksReady)
|
|
389
|
+
# =============================================================================
|
|
390
|
+
|
|
391
|
+
wait_for_pane_ready() {
|
|
392
|
+
local pane_id="$1"
|
|
393
|
+
local timeout="${2:-10}" # omc-teams default: 10s
|
|
394
|
+
local start=$(date +%s)
|
|
395
|
+
log " Waiting for pane $pane_id ready..."
|
|
396
|
+
while (( $(date +%s) - start < timeout )); do
|
|
397
|
+
local captured
|
|
398
|
+
captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
|
|
399
|
+
|
|
400
|
+
# Auto-dismiss trust prompt (omc-teams pattern: paneHasTrustPrompt)
|
|
401
|
+
if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
|
|
402
|
+
log " Trust prompt detected, auto-dismissing..."
|
|
403
|
+
tmux send-keys -t "$pane_id" Enter
|
|
404
|
+
sleep 0.12
|
|
405
|
+
tmux send-keys -t "$pane_id" Enter
|
|
406
|
+
sleep 2
|
|
407
|
+
continue
|
|
408
|
+
fi
|
|
409
|
+
|
|
410
|
+
# omc-teams paneLooksReady: check each line for prompt char at line start
|
|
411
|
+
local ready=0
|
|
412
|
+
echo "$captured" | while IFS= read -r line; do
|
|
413
|
+
local trimmed="${line## }"
|
|
414
|
+
if [[ "$trimmed" == ❯* || "$trimmed" == \>* || "$trimmed" == ›* || "$trimmed" == »* ]]; then
|
|
415
|
+
ready=1
|
|
416
|
+
break
|
|
417
|
+
fi
|
|
418
|
+
done 2>/dev/null
|
|
419
|
+
|
|
420
|
+
# Also check via grep as fallback
|
|
421
|
+
if echo "$captured" | tail -5 | grep -qE '^\s*[❯›]' 2>/dev/null; then
|
|
422
|
+
ready=1
|
|
423
|
+
fi
|
|
424
|
+
|
|
425
|
+
if (( ready )) || echo "$captured" | tail -3 | grep -qE '^\s*[❯›>]' 2>/dev/null; then
|
|
426
|
+
# Check no active task running
|
|
427
|
+
if ! echo "$captured" | grep -q "esc to interrupt" 2>/dev/null; then
|
|
428
|
+
log " Pane $pane_id is ready."
|
|
429
|
+
return 0
|
|
430
|
+
fi
|
|
431
|
+
fi
|
|
432
|
+
sleep 0.25
|
|
433
|
+
done
|
|
434
|
+
# Timeout — return success anyway (fail-open, let safe_send_keys handle it)
|
|
435
|
+
log " Pane $pane_id ready timeout after ${timeout}s (proceeding anyway)"
|
|
436
|
+
return 0
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
# =============================================================================
|
|
440
|
+
# Heartbeat Monitoring (omc-teams pattern)
|
|
441
|
+
# =============================================================================
|
|
442
|
+
|
|
443
|
+
# --- governance.md s7 step 5+6: Check heartbeat freshness ---
|
|
444
|
+
check_heartbeat() {
|
|
445
|
+
local hb_file="$1"
|
|
446
|
+
local threshold="$HEARTBEAT_STALE_THRESHOLD"
|
|
447
|
+
|
|
448
|
+
if [[ ! -f "$hb_file" ]]; then
|
|
449
|
+
return 1
|
|
450
|
+
fi
|
|
451
|
+
|
|
452
|
+
local hb_epoch now_epoch
|
|
453
|
+
# Read epoch seconds directly (avoids timezone parsing bugs)
|
|
454
|
+
hb_epoch=$(jq -r '.epoch // empty' "$hb_file" 2>/dev/null) || return 1
|
|
455
|
+
|
|
456
|
+
if [[ -z "$hb_epoch" ]]; then
|
|
457
|
+
return 1
|
|
458
|
+
fi
|
|
459
|
+
|
|
460
|
+
now_epoch=$(date +%s)
|
|
461
|
+
(( now_epoch - hb_epoch < threshold ))
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
# Check if heartbeat indicates process has exited
|
|
465
|
+
check_heartbeat_exited() {
|
|
466
|
+
local hb_file="$1"
|
|
467
|
+
if [[ ! -f "$hb_file" ]]; then
|
|
468
|
+
return 1
|
|
469
|
+
fi
|
|
470
|
+
local hb_status
|
|
471
|
+
hb_status=$(jq -r '.status // empty' "$hb_file" 2>/dev/null)
|
|
472
|
+
[[ "$hb_status" == "exited" ]]
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
# =============================================================================
|
|
476
|
+
# Idle Pane Nudging (omc-teams pattern)
|
|
477
|
+
# =============================================================================
|
|
478
|
+
|
|
479
|
+
# --- governance.md s7 step 5+6: Nudge idle panes ---
|
|
480
|
+
check_and_nudge_idle_pane() {
|
|
481
|
+
local pane_id="$1"
|
|
482
|
+
local nudge_count_var="$2"
|
|
483
|
+
local current_content
|
|
484
|
+
current_content=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -3)
|
|
485
|
+
|
|
486
|
+
if [[ "$current_content" == "${LAST_PANE_CONTENT[$pane_id]:-}" ]]; then
|
|
487
|
+
local idle_since="${PANE_IDLE_SINCE[$pane_id]:-$(date +%s)}"
|
|
488
|
+
local now
|
|
489
|
+
now=$(date +%s)
|
|
490
|
+
if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
|
|
491
|
+
local count=${(P)nudge_count_var}
|
|
492
|
+
if (( count < MAX_NUDGES )); then
|
|
493
|
+
log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
|
|
494
|
+
safe_send_keys "$pane_id" ""
|
|
495
|
+
(( count++ ))
|
|
496
|
+
eval "$nudge_count_var=$count"
|
|
497
|
+
fi
|
|
498
|
+
fi
|
|
499
|
+
else
|
|
500
|
+
LAST_PANE_CONTENT[$pane_id]="$current_content"
|
|
501
|
+
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
502
|
+
fi
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
# =============================================================================
|
|
506
|
+
# Exponential Backoff Restart (omc-teams pattern)
|
|
507
|
+
# =============================================================================
|
|
508
|
+
|
|
509
|
+
# --- governance.md s7 step 5: Restart dead workers with backoff ---
|
|
510
|
+
restart_worker() {
|
|
511
|
+
local pane_id="$1"
|
|
512
|
+
local iter="$2"
|
|
513
|
+
local trigger_file="$3"
|
|
514
|
+
local restart_count="${WORKER_RESTARTS[$iter]:-0}"
|
|
515
|
+
|
|
516
|
+
if (( restart_count >= MAX_RESTARTS )); then
|
|
517
|
+
log_error "Worker exceeded max restarts ($MAX_RESTARTS) for iteration $iter"
|
|
518
|
+
return 1 # caller writes BLOCKED
|
|
519
|
+
fi
|
|
520
|
+
|
|
521
|
+
# Exponential backoff: 5s, 10s, 20s, 60s (cap)
|
|
522
|
+
local -a delays=(5 10 20 60)
|
|
523
|
+
local delay=${delays[$((restart_count + 1))]:-60}
|
|
524
|
+
log " Restarting worker (attempt $((restart_count + 1))/$MAX_RESTARTS) after ${delay}s backoff..."
|
|
525
|
+
sleep "$delay"
|
|
526
|
+
|
|
527
|
+
# Kill existing claude, wait for shell prompt
|
|
528
|
+
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
529
|
+
tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
|
|
530
|
+
sleep 2
|
|
531
|
+
|
|
532
|
+
# Re-launch claude (omc-teams interactive pattern)
|
|
533
|
+
safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
534
|
+
WORKER_RESTARTS[$iter]=$((restart_count + 1))
|
|
535
|
+
return 0
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
# =============================================================================
|
|
539
|
+
# Write-Then-Notify: Trigger Script Generation (omc-teams CRITICAL pattern)
|
|
540
|
+
# =============================================================================
|
|
541
|
+
|
|
542
|
+
# --- governance.md s7 step 4+5: Write prompt and trigger to files ---
|
|
543
|
+
# NEVER send prompt content through tmux send-keys.
|
|
544
|
+
# Write payloads to files, send only short trigger commands (<200 chars).
|
|
545
|
+
write_worker_trigger() {
|
|
546
|
+
local iter="$1"
|
|
547
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-prompt.md"
|
|
548
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-trigger.sh"
|
|
549
|
+
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-output.log"
|
|
550
|
+
|
|
551
|
+
# Build the worker prompt: base prompt + iteration context
|
|
552
|
+
local contract
|
|
553
|
+
contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -5)
|
|
554
|
+
|
|
555
|
+
# Check for fix contract from previous verifier failure
|
|
556
|
+
local prev_iter=$((iter - 1))
|
|
557
|
+
local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
|
|
558
|
+
|
|
559
|
+
{
|
|
560
|
+
cat "$WORKER_PROMPT_BASE"
|
|
561
|
+
echo ""
|
|
562
|
+
echo "---"
|
|
563
|
+
echo "## Iteration Context"
|
|
564
|
+
echo "- **Iteration**: $iter"
|
|
565
|
+
echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
|
|
566
|
+
echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
|
|
567
|
+
|
|
568
|
+
# Include fix contract if previous verifier failed
|
|
569
|
+
if [[ -f "$fix_contract_file" ]]; then
|
|
570
|
+
echo ""
|
|
571
|
+
echo "---"
|
|
572
|
+
echo "## IMPORTANT: Fix Contract from Verifier (iteration $prev_iter)"
|
|
573
|
+
echo "The Verifier REJECTED your previous work. You MUST fix the issues below."
|
|
574
|
+
echo "Do NOT just resubmit — actually change the code to address each issue."
|
|
575
|
+
echo ""
|
|
576
|
+
cat "$fix_contract_file"
|
|
577
|
+
fi
|
|
578
|
+
} | atomic_write "$prompt_file"
|
|
579
|
+
|
|
580
|
+
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
581
|
+
{
|
|
582
|
+
cat <<TRIGGER_EOF
|
|
583
|
+
#!/bin/zsh
|
|
584
|
+
# Trigger for iteration $iter worker - generated by run_ralph_desk.zsh
|
|
585
|
+
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
586
|
+
|
|
587
|
+
HEARTBEAT_FILE="$WORKER_HEARTBEAT"
|
|
588
|
+
|
|
589
|
+
# Background heartbeat writer (omc-teams pattern)
|
|
590
|
+
(
|
|
591
|
+
while true; do
|
|
592
|
+
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
593
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
594
|
+
sleep 15
|
|
595
|
+
done
|
|
596
|
+
) &
|
|
597
|
+
HEARTBEAT_PID=\$!
|
|
598
|
+
|
|
599
|
+
# Run claude with fresh context (governance.md s7 step 5)
|
|
600
|
+
claude -p "\$(cat $prompt_file)" \\
|
|
601
|
+
--model $WORKER_MODEL \\
|
|
602
|
+
--dangerously-skip-permissions \\
|
|
603
|
+
--output-format text \\
|
|
604
|
+
2>&1 | tee $output_log
|
|
605
|
+
|
|
606
|
+
# Cleanup heartbeat writer
|
|
607
|
+
kill \$HEARTBEAT_PID 2>/dev/null
|
|
608
|
+
wait \$HEARTBEAT_PID 2>/dev/null
|
|
609
|
+
echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
610
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
611
|
+
TRIGGER_EOF
|
|
612
|
+
} | atomic_write "$trigger_file"
|
|
613
|
+
chmod +x "$trigger_file"
|
|
614
|
+
|
|
615
|
+
log " Worker prompt: $prompt_file"
|
|
616
|
+
log " Worker trigger: $trigger_file"
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
write_verifier_trigger() {
|
|
620
|
+
local iter="$1"
|
|
621
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
|
|
622
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-trigger.sh"
|
|
623
|
+
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-output.log"
|
|
624
|
+
|
|
625
|
+
# Build verifier prompt from base
|
|
626
|
+
{
|
|
627
|
+
cat "$VERIFIER_PROMPT_BASE"
|
|
628
|
+
echo ""
|
|
629
|
+
echo "---"
|
|
630
|
+
echo "## Verification Context"
|
|
631
|
+
echo "- **Iteration**: $iter"
|
|
632
|
+
echo "- **Done Claim**: $DONE_CLAIM_FILE"
|
|
633
|
+
} | atomic_write "$prompt_file"
|
|
634
|
+
|
|
635
|
+
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
636
|
+
{
|
|
637
|
+
cat <<TRIGGER_EOF
|
|
638
|
+
#!/bin/zsh
|
|
639
|
+
# Trigger for iteration $iter verifier - generated by run_ralph_desk.zsh
|
|
640
|
+
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
641
|
+
|
|
642
|
+
HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
|
|
643
|
+
|
|
644
|
+
# Background heartbeat writer (omc-teams pattern)
|
|
645
|
+
(
|
|
646
|
+
while true; do
|
|
647
|
+
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
648
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
649
|
+
sleep 15
|
|
650
|
+
done
|
|
651
|
+
) &
|
|
652
|
+
HEARTBEAT_PID=\$!
|
|
653
|
+
|
|
654
|
+
# Run claude with fresh context (governance.md s7 step 7)
|
|
655
|
+
claude -p "\$(cat $prompt_file)" \\
|
|
656
|
+
--model $VERIFIER_MODEL \\
|
|
657
|
+
--dangerously-skip-permissions \\
|
|
658
|
+
--output-format text \\
|
|
659
|
+
2>&1 | tee $output_log
|
|
660
|
+
|
|
661
|
+
# Cleanup heartbeat writer
|
|
662
|
+
kill \$HEARTBEAT_PID 2>/dev/null
|
|
663
|
+
wait \$HEARTBEAT_PID 2>/dev/null
|
|
664
|
+
echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
665
|
+
mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
|
|
666
|
+
TRIGGER_EOF
|
|
667
|
+
} | atomic_write "$trigger_file"
|
|
668
|
+
chmod +x "$trigger_file"
|
|
669
|
+
|
|
670
|
+
log " Verifier prompt: $prompt_file"
|
|
671
|
+
log " Verifier trigger: $trigger_file"
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
# =============================================================================
|
|
675
|
+
# Status Updates
|
|
676
|
+
# =============================================================================
|
|
677
|
+
|
|
678
|
+
# --- governance.md s7 step 8: Update status.json ---
|
|
679
|
+
update_status() {
|
|
680
|
+
local phase="$1"
|
|
681
|
+
local last_result="$2"
|
|
682
|
+
|
|
683
|
+
echo '{
|
|
684
|
+
"slug": "'"$SLUG"'",
|
|
685
|
+
"iteration": '"$ITERATION"',
|
|
686
|
+
"max_iter": '"$MAX_ITER"',
|
|
687
|
+
"phase": "'"$phase"'",
|
|
688
|
+
"worker_model": "'"$WORKER_MODEL"'",
|
|
689
|
+
"verifier_model": "'"$VERIFIER_MODEL"'",
|
|
690
|
+
"last_result": "'"$last_result"'",
|
|
691
|
+
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
692
|
+
"updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
|
|
693
|
+
}' | atomic_write "$STATUS_FILE"
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
# --- governance.md s7 step 8: Write result log ---
|
|
697
|
+
write_result_log() {
|
|
698
|
+
local iter="$1"
|
|
699
|
+
local result="$2"
|
|
700
|
+
local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
|
|
701
|
+
|
|
702
|
+
local git_diff=""
|
|
703
|
+
git_diff=$(git diff --stat HEAD~1 HEAD 2>/dev/null || echo "(no git diff available)")
|
|
704
|
+
|
|
705
|
+
{
|
|
706
|
+
echo "# Iteration $iter Result"
|
|
707
|
+
echo ""
|
|
708
|
+
echo "## Status"
|
|
709
|
+
echo "$result [leader-measured]"
|
|
710
|
+
echo ""
|
|
711
|
+
echo "## Files Changed"
|
|
712
|
+
echo '```'
|
|
713
|
+
echo "$git_diff"
|
|
714
|
+
echo '```'
|
|
715
|
+
echo "[git-measured]"
|
|
716
|
+
echo ""
|
|
717
|
+
echo "## Timestamp"
|
|
718
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
719
|
+
} | atomic_write "$result_file"
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
# =============================================================================
|
|
723
|
+
# Sentinel Writers
|
|
724
|
+
# =============================================================================
|
|
725
|
+
|
|
726
|
+
# --- governance.md s7: Only the Leader writes sentinels ---
|
|
727
|
+
write_complete_sentinel() {
|
|
728
|
+
local summary="$1"
|
|
729
|
+
echo "# Campaign Complete
|
|
730
|
+
|
|
731
|
+
Completed at iteration $ITERATION.
|
|
732
|
+
$summary
|
|
733
|
+
|
|
734
|
+
Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
|
|
735
|
+
log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
write_blocked_sentinel() {
|
|
739
|
+
local reason="$1"
|
|
740
|
+
echo "# Campaign Blocked
|
|
741
|
+
|
|
742
|
+
Blocked at iteration $ITERATION.
|
|
743
|
+
Reason: $reason
|
|
744
|
+
|
|
745
|
+
Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
|
|
746
|
+
log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
# =============================================================================
|
|
750
|
+
# Cleanup (trap handler)
|
|
751
|
+
# =============================================================================
|
|
752
|
+
|
|
753
|
+
cleanup() {
|
|
754
|
+
log "Cleaning up..."
|
|
755
|
+
|
|
756
|
+
# Kill claude processes then kill panes
|
|
757
|
+
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
758
|
+
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
759
|
+
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
760
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
|
|
761
|
+
fi
|
|
762
|
+
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
763
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
764
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
765
|
+
fi
|
|
766
|
+
sleep 2
|
|
767
|
+
# Kill the panes themselves
|
|
768
|
+
log_debug "cleanup: killing panes $WORKER_PANE $VERIFIER_PANE"
|
|
769
|
+
tmux kill-pane -t "$WORKER_PANE" 2>&1 | while read -r line; do log_debug "kill worker: $line"; done
|
|
770
|
+
tmux kill-pane -t "$VERIFIER_PANE" 2>&1 | while read -r line; do log_debug "kill verifier: $line"; done
|
|
771
|
+
|
|
772
|
+
# Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
|
|
773
|
+
setopt local_options nonomatch 2>/dev/null
|
|
774
|
+
rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
|
|
775
|
+
|
|
776
|
+
# Print summary
|
|
777
|
+
local end_time
|
|
778
|
+
end_time=$(date +%s)
|
|
779
|
+
local elapsed=$(( end_time - START_TIME ))
|
|
780
|
+
local minutes=$(( elapsed / 60 ))
|
|
781
|
+
local seconds=$(( elapsed % 60 ))
|
|
782
|
+
|
|
783
|
+
echo ""
|
|
784
|
+
echo "============================================================"
|
|
785
|
+
echo " Ralph Desk Tmux Runner - Session Complete"
|
|
786
|
+
echo "============================================================"
|
|
787
|
+
echo " Session: $SESSION_NAME"
|
|
788
|
+
echo " Slug: $SLUG"
|
|
789
|
+
echo " Iterations: $ITERATION / $MAX_ITER"
|
|
790
|
+
echo " Elapsed: ${minutes}m ${seconds}s"
|
|
791
|
+
echo ""
|
|
792
|
+
|
|
793
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
794
|
+
echo " Final State: COMPLETE"
|
|
795
|
+
elif [[ -f "$BLOCKED_SENTINEL" ]]; then
|
|
796
|
+
echo " Final State: BLOCKED"
|
|
797
|
+
else
|
|
798
|
+
echo " Final State: STOPPED (interrupted or timeout)"
|
|
799
|
+
fi
|
|
800
|
+
|
|
801
|
+
echo ""
|
|
802
|
+
echo " Tmux session left alive for inspection:"
|
|
803
|
+
echo " tmux attach -t $SESSION_NAME"
|
|
804
|
+
echo " tmux kill-session -t $SESSION_NAME"
|
|
805
|
+
echo "============================================================"
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
# =============================================================================
|
|
809
|
+
# Poll Loop (used for both Worker and Verifier)
|
|
810
|
+
# =============================================================================
|
|
811
|
+
|
|
812
|
+
# --- governance.md s7 step 5+6: Poll for signal file with heartbeat monitoring ---
|
|
813
|
+
poll_for_signal() {
|
|
814
|
+
local signal_file="$1"
|
|
815
|
+
local heartbeat_file="$2"
|
|
816
|
+
local pane_id="$3"
|
|
817
|
+
local trigger_file="$4"
|
|
818
|
+
local role="$5" # "worker" or "verifier"
|
|
819
|
+
local nudge_count=0
|
|
820
|
+
local poll_start
|
|
821
|
+
poll_start=$(date +%s)
|
|
822
|
+
|
|
823
|
+
# Initialize idle tracking for this pane
|
|
824
|
+
LAST_PANE_CONTENT[$pane_id]=""
|
|
825
|
+
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
826
|
+
|
|
827
|
+
while true; do
|
|
828
|
+
local now
|
|
829
|
+
now=$(date +%s)
|
|
830
|
+
local elapsed=$(( now - poll_start ))
|
|
831
|
+
|
|
832
|
+
# Per-iteration timeout check
|
|
833
|
+
if (( elapsed >= ITER_TIMEOUT )); then
|
|
834
|
+
log_error "$role timed out after ${ITER_TIMEOUT}s for iteration $ITERATION"
|
|
835
|
+
return 1 # timeout
|
|
836
|
+
fi
|
|
837
|
+
|
|
838
|
+
# Check if signal file appeared
|
|
839
|
+
if [[ -f "$signal_file" ]]; then
|
|
840
|
+
log " Signal file detected: $signal_file"
|
|
841
|
+
return 0 # success
|
|
842
|
+
fi
|
|
843
|
+
|
|
844
|
+
# Check heartbeat freshness (omc-teams pattern)
|
|
845
|
+
if [[ -f "$heartbeat_file" ]]; then
|
|
846
|
+
if check_heartbeat_exited "$heartbeat_file"; then
|
|
847
|
+
# Process exited but no signal file -- give a brief grace period
|
|
848
|
+
sleep 3
|
|
849
|
+
if [[ -f "$signal_file" ]]; then
|
|
850
|
+
log " Signal file detected after process exit: $signal_file"
|
|
851
|
+
return 0
|
|
852
|
+
fi
|
|
853
|
+
log_error "$role exited without writing signal file"
|
|
854
|
+
# Attempt restart with exponential backoff
|
|
855
|
+
if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
856
|
+
# Reset poll timer for the restart
|
|
857
|
+
poll_start=$(date +%s)
|
|
858
|
+
nudge_count=0
|
|
859
|
+
LAST_PANE_CONTENT[$pane_id]=""
|
|
860
|
+
PANE_IDLE_SINCE[$pane_id]=$(date +%s)
|
|
861
|
+
sleep "$POLL_INTERVAL"
|
|
862
|
+
continue
|
|
863
|
+
else
|
|
864
|
+
return 1 # max restarts exceeded
|
|
865
|
+
fi
|
|
866
|
+
fi
|
|
867
|
+
|
|
868
|
+
if ! check_heartbeat "$heartbeat_file"; then
|
|
869
|
+
log " WARNING: $role heartbeat stale (>${HEARTBEAT_STALE_THRESHOLD}s)"
|
|
870
|
+
(( HEARTBEAT_STALE_COUNT++ ))
|
|
871
|
+
# Circuit breaker: 3 consecutive heartbeat stale events
|
|
872
|
+
if (( HEARTBEAT_STALE_COUNT >= 3 )); then
|
|
873
|
+
log_error "Circuit breaker: 3 consecutive heartbeat stale events"
|
|
874
|
+
return 1
|
|
875
|
+
fi
|
|
876
|
+
# Attempt restart
|
|
877
|
+
if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
|
|
878
|
+
poll_start=$(date +%s)
|
|
879
|
+
nudge_count=0
|
|
880
|
+
continue
|
|
881
|
+
else
|
|
882
|
+
return 1
|
|
883
|
+
fi
|
|
884
|
+
else
|
|
885
|
+
# Heartbeat is fresh, reset stale counter
|
|
886
|
+
HEARTBEAT_STALE_COUNT=0
|
|
887
|
+
fi
|
|
888
|
+
fi
|
|
889
|
+
|
|
890
|
+
# Idle pane nudging (omc-teams pattern)
|
|
891
|
+
check_and_nudge_idle_pane "$pane_id" "nudge_count"
|
|
892
|
+
|
|
893
|
+
sleep "$POLL_INTERVAL"
|
|
894
|
+
done
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
# =============================================================================
|
|
898
|
+
# Circuit Breaker: Stale Context Detection
|
|
899
|
+
# =============================================================================
|
|
900
|
+
|
|
901
|
+
# --- governance.md s7 step 8: Stale context detection ---
|
|
902
|
+
compute_context_hash() {
|
|
903
|
+
if [[ -f "$CONTEXT_FILE" ]]; then
|
|
904
|
+
md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1
|
|
905
|
+
else
|
|
906
|
+
echo "no-context"
|
|
907
|
+
fi
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
check_stale_context() {
|
|
911
|
+
local current_hash
|
|
912
|
+
current_hash=$(compute_context_hash)
|
|
913
|
+
|
|
914
|
+
if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
|
|
915
|
+
(( STALE_CONTEXT_COUNT++ ))
|
|
916
|
+
log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
|
|
917
|
+
if (( STALE_CONTEXT_COUNT >= 3 )); then
|
|
918
|
+
log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
|
|
919
|
+
return 1
|
|
920
|
+
fi
|
|
921
|
+
else
|
|
922
|
+
STALE_CONTEXT_COUNT=0
|
|
923
|
+
fi
|
|
924
|
+
|
|
925
|
+
PREV_CONTEXT_HASH="$current_hash"
|
|
926
|
+
return 0
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
# =============================================================================
|
|
930
|
+
# Security Warning
|
|
931
|
+
# =============================================================================
|
|
932
|
+
|
|
933
|
+
print_security_warning() {
|
|
934
|
+
echo ""
|
|
935
|
+
echo "================================================================"
|
|
936
|
+
echo " WARNING: Running with --dangerously-skip-permissions"
|
|
937
|
+
echo ""
|
|
938
|
+
echo " The claude CLI will execute tools (file writes, shell commands)"
|
|
939
|
+
echo " without asking for confirmation. Only run this on code you"
|
|
940
|
+
echo " trust in an environment you control."
|
|
941
|
+
echo "================================================================"
|
|
942
|
+
echo ""
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
# =============================================================================
|
|
946
|
+
# Main Leader Loop
|
|
947
|
+
# =============================================================================
|
|
948
|
+
|
|
949
|
+
main() {
|
|
950
|
+
# --- Startup ---
|
|
951
|
+
log "Ralph Desk Tmux Runner starting..."
|
|
952
|
+
log " Slug: $SLUG"
|
|
953
|
+
log " Root: $ROOT"
|
|
954
|
+
log " Max iterations: $MAX_ITER"
|
|
955
|
+
log " Worker model: $WORKER_MODEL"
|
|
956
|
+
log " Verifier model: $VERIFIER_MODEL"
|
|
957
|
+
log " Poll interval: ${POLL_INTERVAL}s"
|
|
958
|
+
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
959
|
+
|
|
960
|
+
# Dependency checks
|
|
961
|
+
check_dependencies
|
|
962
|
+
|
|
963
|
+
# Print security warning (governance.md s7: --dangerously-skip-permissions)
|
|
964
|
+
print_security_warning
|
|
965
|
+
|
|
966
|
+
# Validate scaffold
|
|
967
|
+
validate_scaffold
|
|
968
|
+
|
|
969
|
+
# Check for existing sessions
|
|
970
|
+
check_existing_sessions
|
|
971
|
+
|
|
972
|
+
# Create tmux session with pane IDs (governance.md s7 step 1)
|
|
973
|
+
create_session
|
|
974
|
+
|
|
975
|
+
# Set trap for cleanup on exit/error
|
|
976
|
+
trap cleanup EXIT
|
|
977
|
+
|
|
978
|
+
# Initialize context hash for stale detection
|
|
979
|
+
PREV_CONTEXT_HASH=$(compute_context_hash)
|
|
980
|
+
|
|
981
|
+
# --- governance.md s7: Leader Loop ---
|
|
982
|
+
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
983
|
+
log ""
|
|
984
|
+
log "========== Iteration $ITERATION / $MAX_ITER =========="
|
|
985
|
+
|
|
986
|
+
# --- governance.md s7 step 1: Check sentinels ---
|
|
987
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
988
|
+
log "COMPLETE sentinel found. Campaign succeeded."
|
|
989
|
+
update_status "complete" "complete"
|
|
990
|
+
return 0
|
|
991
|
+
fi
|
|
992
|
+
if [[ -f "$BLOCKED_SENTINEL" ]]; then
|
|
993
|
+
log "BLOCKED sentinel found. Campaign blocked."
|
|
994
|
+
update_status "blocked" "blocked"
|
|
995
|
+
return 1
|
|
996
|
+
fi
|
|
997
|
+
|
|
998
|
+
# --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
|
|
999
|
+
rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
|
|
1000
|
+
rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
|
|
1001
|
+
|
|
1002
|
+
# --- Clean previous claude session in panes (one-shot lifecycle) ---
|
|
1003
|
+
# Only needed from iteration 2 onwards (iteration 1 has fresh panes)
|
|
1004
|
+
if (( ITERATION > 1 )); then
|
|
1005
|
+
# Send C-c first (in case claude is mid-task), then /exit
|
|
1006
|
+
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
1007
|
+
sleep 1
|
|
1008
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
|
|
1009
|
+
sleep 2
|
|
1010
|
+
# Wait for shell prompt before proceeding
|
|
1011
|
+
wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
|
|
1012
|
+
fi
|
|
1013
|
+
|
|
1014
|
+
# Reset per-iteration state
|
|
1015
|
+
local worker_nudge_count=0
|
|
1016
|
+
local verifier_nudge_count=0
|
|
1017
|
+
|
|
1018
|
+
# --- governance.md s7 step 4: Build worker prompt + trigger ---
|
|
1019
|
+
write_worker_trigger "$ITERATION"
|
|
1020
|
+
local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
1021
|
+
|
|
1022
|
+
update_status "worker" "running"
|
|
1023
|
+
|
|
1024
|
+
# --- governance.md s7 step 5: Execute Worker (interactive claude, omc-teams pattern) ---
|
|
1025
|
+
# Step 5a: Launch interactive claude in Worker pane
|
|
1026
|
+
local worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
1027
|
+
log " Launching Worker claude in pane $WORKER_PANE..."
|
|
1028
|
+
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
|
|
1029
|
+
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1030
|
+
|
|
1031
|
+
# Step 5b: Wait for claude TUI to be ready (omc-teams pattern)
|
|
1032
|
+
if ! wait_for_pane_ready "$WORKER_PANE" 30; then
|
|
1033
|
+
log_error "Worker claude failed to start"
|
|
1034
|
+
write_blocked_sentinel "Worker claude failed to start in pane"
|
|
1035
|
+
update_status "blocked" "worker_start_failed"
|
|
1036
|
+
return 1
|
|
1037
|
+
fi
|
|
1038
|
+
|
|
1039
|
+
# Step 5c: Wait for claude to fully initialize, then send instruction
|
|
1040
|
+
sleep 3
|
|
1041
|
+
local worker_instruction="Read and execute the instructions in $worker_prompt"
|
|
1042
|
+
if ! safe_send_keys "$WORKER_PANE" "$worker_instruction"; then
|
|
1043
|
+
log_error "Failed to send instruction to Worker"
|
|
1044
|
+
fi
|
|
1045
|
+
# Extra C-m to ensure submission (long text may false-positive the consumed check)
|
|
1046
|
+
sleep 0.5
|
|
1047
|
+
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1048
|
+
sleep 0.3
|
|
1049
|
+
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1050
|
+
|
|
1051
|
+
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
1052
|
+
log " Polling for iter-signal.json..."
|
|
1053
|
+
if ! poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
|
|
1054
|
+
# Monitor failure or timeout
|
|
1055
|
+
(( MONITOR_FAILURE_COUNT++ ))
|
|
1056
|
+
if (( MONITOR_FAILURE_COUNT >= 3 )); then
|
|
1057
|
+
write_blocked_sentinel "3 consecutive monitor failures"
|
|
1058
|
+
update_status "blocked" "monitor_failures"
|
|
1059
|
+
return 1
|
|
1060
|
+
fi
|
|
1061
|
+
log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
|
|
1062
|
+
update_status "worker" "poll_failed"
|
|
1063
|
+
continue
|
|
1064
|
+
fi
|
|
1065
|
+
|
|
1066
|
+
# Reset monitor failure count on success
|
|
1067
|
+
MONITOR_FAILURE_COUNT=0
|
|
1068
|
+
|
|
1069
|
+
# --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
|
|
1070
|
+
local signal_status
|
|
1071
|
+
signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
|
|
1072
|
+
local signal_summary
|
|
1073
|
+
signal_summary=$(jq -r '.summary // "no summary"' "$SIGNAL_FILE" 2>/dev/null)
|
|
1074
|
+
|
|
1075
|
+
log " Worker signal: status=$signal_status summary=\"$signal_summary\""
|
|
1076
|
+
|
|
1077
|
+
case "$signal_status" in
|
|
1078
|
+
continue)
|
|
1079
|
+
# --- governance.md s7 step 6: continue -> go to step 8 ---
|
|
1080
|
+
log " Worker requests continue. Moving to next iteration."
|
|
1081
|
+
update_status "worker" "continue"
|
|
1082
|
+
;;
|
|
1083
|
+
verify)
|
|
1084
|
+
# --- governance.md s7 step 7: Execute Verifier ---
|
|
1085
|
+
log " Worker claims done. Dispatching Verifier..."
|
|
1086
|
+
|
|
1087
|
+
write_verifier_trigger "$ITERATION"
|
|
1088
|
+
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
|
|
1089
|
+
|
|
1090
|
+
update_status "verifier" "running"
|
|
1091
|
+
|
|
1092
|
+
# Step 7a: Clean previous Verifier session if claude is running
|
|
1093
|
+
local verifier_cmd
|
|
1094
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1095
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" ]]; then
|
|
1096
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1097
|
+
sleep 0.5
|
|
1098
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
1099
|
+
sleep 2
|
|
1100
|
+
wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
|
|
1101
|
+
fi
|
|
1102
|
+
|
|
1103
|
+
local verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
1104
|
+
log " Launching Verifier claude in pane $VERIFIER_PANE..."
|
|
1105
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1106
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1107
|
+
|
|
1108
|
+
# Step 7b: Wait for claude TUI to be ready
|
|
1109
|
+
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1110
|
+
log_error "Verifier claude failed to start"
|
|
1111
|
+
update_status "verifier" "start_failed"
|
|
1112
|
+
continue
|
|
1113
|
+
fi
|
|
1114
|
+
|
|
1115
|
+
# Step 7c: Wait for claude to fully initialize, then send instruction
|
|
1116
|
+
sleep 3
|
|
1117
|
+
local verifier_instruction="Read and execute the instructions in $verifier_prompt"
|
|
1118
|
+
safe_send_keys "$VERIFIER_PANE" "$verifier_instruction"
|
|
1119
|
+
# Extra C-m to ensure submission
|
|
1120
|
+
sleep 0.5
|
|
1121
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1122
|
+
sleep 0.3
|
|
1123
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1124
|
+
|
|
1125
|
+
# Poll for verify-verdict.json
|
|
1126
|
+
log " Polling for verify-verdict.json..."
|
|
1127
|
+
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
1128
|
+
log_error "Verifier poll failed"
|
|
1129
|
+
update_status "verifier" "poll_failed"
|
|
1130
|
+
continue
|
|
1131
|
+
fi
|
|
1132
|
+
|
|
1133
|
+
# --- governance.md s7 step 7: Read verdict via jq ---
|
|
1134
|
+
local verdict
|
|
1135
|
+
verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
|
|
1136
|
+
local recommended
|
|
1137
|
+
recommended=$(jq -r '.recommended_state_transition' "$VERDICT_FILE" 2>/dev/null)
|
|
1138
|
+
local verdict_summary
|
|
1139
|
+
verdict_summary=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
1140
|
+
|
|
1141
|
+
log " Verifier: verdict=$verdict recommended=$recommended"
|
|
1142
|
+
log " Verifier summary: \"$verdict_summary\""
|
|
1143
|
+
|
|
1144
|
+
case "$verdict" in
|
|
1145
|
+
pass)
|
|
1146
|
+
CONSECUTIVE_FAILURES=0
|
|
1147
|
+
if [[ "$recommended" == "complete" ]]; then
|
|
1148
|
+
# Write COMPLETE sentinel (only Leader writes sentinels)
|
|
1149
|
+
write_complete_sentinel "$verdict_summary"
|
|
1150
|
+
update_status "complete" "pass"
|
|
1151
|
+
return 0
|
|
1152
|
+
else
|
|
1153
|
+
log " Verifier passed but did not recommend complete. Continuing."
|
|
1154
|
+
update_status "verifier" "pass_continue"
|
|
1155
|
+
fi
|
|
1156
|
+
;;
|
|
1157
|
+
fail)
|
|
1158
|
+
# --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
|
|
1159
|
+
(( CONSECUTIVE_FAILURES++ ))
|
|
1160
|
+
local verdict_summary_fail
|
|
1161
|
+
verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
1162
|
+
log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
|
|
1163
|
+
|
|
1164
|
+
# Extract issues from verdict for next Worker's fix contract
|
|
1165
|
+
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).fix-contract.md"
|
|
1166
|
+
{
|
|
1167
|
+
echo "# Fix Contract (from Verifier iteration $ITERATION)"
|
|
1168
|
+
echo ""
|
|
1169
|
+
echo "## Summary"
|
|
1170
|
+
echo "$verdict_summary_fail"
|
|
1171
|
+
echo ""
|
|
1172
|
+
echo "## Issues (from verify-verdict.json)"
|
|
1173
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$VERDICT_FILE" 2>/dev/null || echo "- (no structured issues available)"
|
|
1174
|
+
echo ""
|
|
1175
|
+
echo "## Next Iteration Contract"
|
|
1176
|
+
jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
|
|
1177
|
+
} | atomic_write "$fix_contract"
|
|
1178
|
+
log " Fix contract: $fix_contract"
|
|
1179
|
+
|
|
1180
|
+
# Circuit breaker: consecutive failures
|
|
1181
|
+
if (( CONSECUTIVE_FAILURES >= 3 )); then
|
|
1182
|
+
log_error "Circuit breaker: 3 consecutive verification failures"
|
|
1183
|
+
write_blocked_sentinel "3 consecutive verification failures"
|
|
1184
|
+
update_status "blocked" "consecutive_failures"
|
|
1185
|
+
return 1
|
|
1186
|
+
fi
|
|
1187
|
+
|
|
1188
|
+
update_status "verifier" "fail"
|
|
1189
|
+
;;
|
|
1190
|
+
request_info)
|
|
1191
|
+
# --- governance.md s7 step 7: request_info (degraded in tmux mode) ---
|
|
1192
|
+
local verdict_summary_ri
|
|
1193
|
+
verdict_summary_ri=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
1194
|
+
log " Verifier requests info (degraded in tmux lean mode)."
|
|
1195
|
+
log " Questions: \"$verdict_summary_ri\""
|
|
1196
|
+
log " Treating as soft fail — Worker will see verdict in next iteration."
|
|
1197
|
+
update_status "verifier" "request_info"
|
|
1198
|
+
;;
|
|
1199
|
+
blocked)
|
|
1200
|
+
write_blocked_sentinel "Verifier verdict: blocked - $verdict_summary"
|
|
1201
|
+
update_status "blocked" "verifier_blocked"
|
|
1202
|
+
return 1
|
|
1203
|
+
;;
|
|
1204
|
+
*)
|
|
1205
|
+
log_error "Unknown verdict: $verdict"
|
|
1206
|
+
update_status "verifier" "unknown_verdict"
|
|
1207
|
+
;;
|
|
1208
|
+
esac
|
|
1209
|
+
;;
|
|
1210
|
+
blocked)
|
|
1211
|
+
# --- governance.md s7 step 6: blocked -> write sentinel ---
|
|
1212
|
+
write_blocked_sentinel "Worker reported blocked: $signal_summary"
|
|
1213
|
+
update_status "blocked" "worker_blocked"
|
|
1214
|
+
return 1
|
|
1215
|
+
;;
|
|
1216
|
+
*)
|
|
1217
|
+
log_error "Unknown signal status: $signal_status"
|
|
1218
|
+
update_status "worker" "unknown_status"
|
|
1219
|
+
;;
|
|
1220
|
+
esac
|
|
1221
|
+
|
|
1222
|
+
# --- governance.md s7 step 8: Write result log ---
|
|
1223
|
+
write_result_log "$ITERATION" "$signal_status"
|
|
1224
|
+
|
|
1225
|
+
# --- governance.md s7 step 8: Circuit breaker - stale context check ---
|
|
1226
|
+
if ! check_stale_context; then
|
|
1227
|
+
write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
|
|
1228
|
+
update_status "blocked" "stale_context"
|
|
1229
|
+
return 1
|
|
1230
|
+
fi
|
|
1231
|
+
|
|
1232
|
+
# --- governance.md s7 step 8: Update status ---
|
|
1233
|
+
update_status "idle" "${signal_status:-unknown}"
|
|
1234
|
+
done
|
|
1235
|
+
|
|
1236
|
+
# Max iterations reached
|
|
1237
|
+
log "Max iterations ($MAX_ITER) reached."
|
|
1238
|
+
update_status "timeout" "max_iter"
|
|
1239
|
+
return 1
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
# =============================================================================
|
|
1243
|
+
# Entry Point
|
|
1244
|
+
# =============================================================================
|
|
1245
|
+
|
|
1246
|
+
# Require tmux — tmux mode only works inside an active tmux session
|
|
1247
|
+
if [[ -z "${TMUX:-}" ]]; then
|
|
1248
|
+
echo "ERROR: tmux mode requires running inside a tmux session."
|
|
1249
|
+
echo ""
|
|
1250
|
+
echo " Start tmux first, then retry:"
|
|
1251
|
+
echo " tmux"
|
|
1252
|
+
echo " LOOP_NAME=$SLUG $0"
|
|
1253
|
+
echo ""
|
|
1254
|
+
echo " Or use Agent() mode instead (no tmux needed):"
|
|
1255
|
+
echo " /rlp-desk run $SLUG"
|
|
1256
|
+
exit 1
|
|
1257
|
+
fi
|
|
1258
|
+
|
|
1259
|
+
main "$@"
|