@ai-dev-methodologies/rlp-desk 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,838 @@
1
+ # lib_ralph_desk.zsh — Shared business logic for RLP Desk runner
2
+ # SOURCED by run_ralph_desk.zsh. Do NOT execute directly.
3
+ #
4
+ # IMPORTANT: Must be sourced at file scope, not inside a function.
5
+ # typeset -A creates local arrays inside functions, breaking global state.
6
+ # Functions in this file read/write globals defined by the sourcing script.
7
+
8
+ if [[ -n "${funcstack[2]:-}" ]]; then
9
+ echo "FATAL: lib_ralph_desk.zsh must be sourced at file scope" >&2
10
+ exit 1
11
+ fi
12
+
13
+ # =============================================================================
14
+ # Utility Functions
15
+ # =============================================================================
16
+
17
+ log() {
18
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
19
+ }
20
+
21
+ log_debug() {
22
+ if (( DEBUG )); then
23
+ mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
24
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
25
+ fi
26
+ }
27
+
28
+ log_error() {
29
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
30
+ }
31
+
32
+ # parse_model_flag() — parse unified --worker-model / --verifier-model value
33
+ # Colon format (model:reasoning) → codex engine; plain name → claude engine.
34
+ # Spark alias: any model name containing "spark" is normalized to "spark".
35
+ # Usage: parse_model_flag <value> <role>
36
+ # Output (stdout): "engine model [reasoning]" e.g. "codex gpt-5.4 medium" | "claude sonnet"
37
+ # Returns: 0 on success, 1 on invalid format (error written to stderr)
38
+ parse_model_flag() {
39
+ local value="$1"
40
+ local role="${2:-worker}"
41
+ local colon_count
42
+ colon_count=$(printf '%s' "$value" | tr -cd ':' | wc -c | tr -d ' ')
43
+ if (( colon_count > 1 )); then
44
+ echo "ERROR: Invalid --${role}-model format '${value}'. Use 'model:reasoning' (codex) or 'model-name' (claude)." >&2
45
+ return 1
46
+ fi
47
+ if (( colon_count == 1 )); then
48
+ local model="${value%%:*}"
49
+ local reasoning="${value##*:}"
50
+ if [[ "$model" == *"spark"* ]]; then
51
+ model="spark"
52
+ fi
53
+ echo "codex $model $reasoning"
54
+ else
55
+ echo "claude $value"
56
+ fi
57
+ }
58
+
59
+ # get_model_string() — return engine-appropriate model identifier string
60
+ # Claude: returns model name (e.g., "sonnet")
61
+ # Codex: returns model:reasoning (e.g., "gpt-5.4:high")
62
+ # Args: $1=engine (claude|codex) $2=model $3=codex_reasoning (optional)
63
+ # Output: model string on stdout
64
+ get_model_string() {
65
+ local engine="$1"
66
+ local model="$2"
67
+ local reasoning="${3:-}"
68
+
69
+ if [[ "$engine" = "codex" && -n "$reasoning" ]]; then
70
+ echo "${model}:${reasoning}"
71
+ else
72
+ echo "$model"
73
+ fi
74
+ }
75
+
76
+ # get_next_model() — return next model in Worker upgrade path, or empty at ceiling
77
+ # Usage: get_next_model <model_str>
78
+ # claude: "haiku"|"sonnet"|"opus"
79
+ # codex: "gpt-5.4:medium"|"gpt-5.4:high"|"gpt-5.4:xhigh"|"spark:medium"|...
80
+ # Output: next model string, or empty string if at ceiling
81
+ get_next_model() {
82
+ local current="$1"
83
+ case "$current" in
84
+ # Claude upgrade path (Worker only — Verifier fixed)
85
+ haiku) echo "sonnet" ;;
86
+ sonnet) echo "opus" ;;
87
+ opus) echo "" ;;
88
+ # Codex GPT Pro upgrade path (short aliases)
89
+ spark:low) echo "spark:medium" ;;
90
+ spark:medium) echo "spark:high" ;;
91
+ spark:high) echo "spark:xhigh" ;;
92
+ spark:xhigh) echo "" ;; # spark ceiling
93
+ # Codex GPT Pro upgrade path (full model names)
94
+ gpt-5.3-codex-spark:low) echo "gpt-5.3-codex-spark:medium" ;;
95
+ gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
96
+ gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
97
+ gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling (full name)
98
+ # Codex standard (gpt-5.3-codex) upgrade path
99
+ gpt-5.3-codex:low) echo "gpt-5.3-codex:medium" ;;
100
+ gpt-5.3-codex:medium) echo "gpt-5.3-codex:high" ;;
101
+ gpt-5.3-codex:high) echo "gpt-5.3-codex:xhigh" ;;
102
+ gpt-5.3-codex:xhigh) echo "" ;; # codex ceiling
103
+ # Codex Non-Pro / upper path
104
+ gpt-5.4:low) echo "gpt-5.4:medium" ;;
105
+ gpt-5.4:medium) echo "gpt-5.4:high" ;;
106
+ gpt-5.4:high) echo "gpt-5.4:xhigh" ;;
107
+ gpt-5.4:xhigh) echo "" ;;
108
+ *) echo "" ;; # unknown → treat as ceiling
109
+ esac
110
+ }
111
+
112
+ # check_model_upgrade() — evaluate and apply Worker model upgrade on repeated same-US failure
113
+ # Called in the fail verdict path. Upgrades Worker model when same US fails >= 2 consecutive times.
114
+ # Respects LOCK_WORKER_MODEL flag. Never modifies VERIFIER_MODEL.
115
+ # Usage: check_model_upgrade <us_id>
116
+ check_model_upgrade() {
117
+ local current_us="$1"
118
+
119
+ # Track consecutive failures on same US
120
+ if [[ "$current_us" = "$_LAST_FAILED_US" ]]; then
121
+ (( _SAME_US_FAIL_COUNT++ ))
122
+ else
123
+ _SAME_US_FAIL_COUNT=1
124
+ _LAST_FAILED_US="$current_us"
125
+ fi
126
+
127
+ # Respect --lock-worker-model: no upgrade; CB threshold handles BLOCKED
128
+ if (( LOCK_WORKER_MODEL )); then
129
+ log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=locked"
130
+ return 0
131
+ fi
132
+
133
+ # Upgrade when same US fails >= 2 consecutive times
134
+ if (( _SAME_US_FAIL_COUNT >= 2 )); then
135
+ local current_model_str
136
+ current_model_str=$(get_model_string "$WORKER_ENGINE" "${WORKER_CODEX_MODEL:-$WORKER_MODEL}" "${WORKER_CODEX_REASONING:-}")
137
+
138
+ local next_model
139
+ next_model=$(get_next_model "$current_model_str")
140
+
141
+ if [[ -z "$next_model" ]]; then
142
+ # Already at ceiling — CB threshold will trigger BLOCKED with escalation message
143
+ log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=already_max current=$current_model_str"
144
+ return 0
145
+ fi
146
+
147
+ # Save original model on first upgrade only
148
+ if (( _MODEL_UPGRADED == 0 )); then
149
+ _ORIGINAL_WORKER_MODEL="$WORKER_MODEL"
150
+ _ORIGINAL_WORKER_CODEX_REASONING="$WORKER_CODEX_REASONING"
151
+ fi
152
+ _MODEL_UPGRADED=1
153
+
154
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
155
+ WORKER_CODEX_MODEL="${next_model%%:*}"
156
+ WORKER_CODEX_REASONING="${next_model##*:}"
157
+ WORKER_MODEL="$WORKER_CODEX_MODEL"
158
+ else
159
+ WORKER_MODEL="$next_model"
160
+ fi
161
+
162
+ log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
163
+ log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
164
+ _SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
165
+ fi
166
+
167
+ return 0
168
+ }
169
+
170
+ # --- governance.md s7: Atomic file writes (tmux pattern) ---
171
+ # All file writes by the Leader use tmp+mv to prevent corruption.
172
+ atomic_write() {
173
+ local target="$1"
174
+ local tmp="${target}.tmp.$$"
175
+ cat > "$tmp"
176
+ mv "$tmp" "$target"
177
+ }
178
+
179
+ # =============================================================================
180
+ # Scaffold Validation
181
+ # =============================================================================
182
+
183
+ validate_scaffold() {
184
+ local errors=0
185
+
186
+ if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
187
+ log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
188
+ errors=1
189
+ fi
190
+
191
+ if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
192
+ log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
193
+ errors=1
194
+ fi
195
+
196
+ if [[ ! -f "$CONTEXT_FILE" ]]; then
197
+ log_error "Context file not found: $CONTEXT_FILE"
198
+ errors=1
199
+ fi
200
+
201
+ if [[ ! -f "$MEMORY_FILE" ]]; then
202
+ log_error "Memory file not found: $MEMORY_FILE"
203
+ errors=1
204
+ fi
205
+
206
+ if (( errors )); then
207
+ log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
208
+ exit 1
209
+ fi
210
+
211
+ mkdir -p "$LOGS_DIR"
212
+ }
213
+
214
+ # =============================================================================
215
+ # Status Updates
216
+ # =============================================================================
217
+
218
+ # --- governance.md s7 step 8: Update status.json ---
219
+ update_status() {
220
+ local phase="$1"
221
+ local last_result="$2"
222
+
223
+ # Build verified_us as JSON array
224
+ local verified_us_json="[]"
225
+ if [[ -n "$VERIFIED_US" ]]; then
226
+ verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
227
+ fi
228
+
229
+ # Build consensus fields
230
+ local consensus_json=""
231
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
232
+ consensus_json=',
233
+ "consensus_scope": "'"$CONSENSUS_SCOPE"'",
234
+ "consensus_round": '"$CONSENSUS_ROUND"',
235
+ "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
236
+ "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
237
+ fi
238
+
239
+ echo '{
240
+ "slug": "'"$SLUG"'",
241
+ "baseline_commit": "'"${BASELINE_COMMIT:-none}"'",
242
+ "iteration": '"$ITERATION"',
243
+ "max_iter": '"$MAX_ITER"',
244
+ "phase": "'"$phase"'",
245
+ "worker_model": "'"$WORKER_MODEL"'",
246
+ "verifier_model": "'"$VERIFIER_MODEL"'",
247
+ "worker_engine": "'"$WORKER_ENGINE"'",
248
+ "verifier_engine": "'"$VERIFIER_ENGINE"'",
249
+ "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
250
+ "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
251
+ "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
252
+ "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
253
+ "verify_mode": "'"$VERIFY_MODE"'",
254
+ "verify_consensus": '"$VERIFY_CONSENSUS"',
255
+ "last_result": "'"$last_result"'",
256
+ "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
257
+ "verified_us": '"$verified_us_json"''"$consensus_json"',
258
+ "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
259
+ }' | atomic_write "$STATUS_FILE"
260
+ }
261
+
262
+ # --- governance.md s7 step 8: Write result log ---
263
+ write_result_log() {
264
+ local iter="$1"
265
+ local result="$2"
266
+ local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
267
+
268
+ local git_diff=""
269
+ if git -C "$ROOT" rev-parse HEAD &>/dev/null; then
270
+ git_diff=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(no git diff available)")
271
+ else
272
+ git_diff="(no commits in repo — cannot diff)"
273
+ fi
274
+ # Include untracked new files in result log
275
+ local result_untracked
276
+ result_untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
277
+ if [[ -n "$result_untracked" ]]; then
278
+ git_diff="${git_diff}
279
+
280
+ Untracked new files:
281
+ ${result_untracked}"
282
+ fi
283
+
284
+ {
285
+ echo "# Iteration $iter Result"
286
+ echo ""
287
+ echo "## Status"
288
+ echo "$result [leader-measured]"
289
+ echo ""
290
+ echo "## Files Changed"
291
+ echo '```'
292
+ echo "$git_diff"
293
+ echo '```'
294
+ echo "[git-measured]"
295
+ echo ""
296
+ echo "## Timestamp"
297
+ echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
298
+ } | atomic_write "$result_file"
299
+ }
300
+
301
+ # --- step 7d: Archive iteration artifacts (done-claim + verdict) to logs/ ---
302
+ archive_iter_artifacts() {
303
+ local iter="$1"
304
+ local iter_padded
305
+ iter_padded=$(printf '%03d' "$iter")
306
+ if [[ -f "$DONE_CLAIM_FILE" ]]; then
307
+ cp "$DONE_CLAIM_FILE" "$LOGS_DIR/iter-${iter_padded}-done-claim.json" 2>/dev/null
308
+ fi
309
+ if [[ -f "$VERDICT_FILE" ]]; then
310
+ cp "$VERDICT_FILE" "$LOGS_DIR/iter-${iter_padded}-verify-verdict.json" 2>/dev/null
311
+ fi
312
+ }
313
+
314
+ # --- AC5: Write per-iteration cost estimate to cost-log.jsonl ---
315
+ write_cost_log() {
316
+ local iter="$1"
317
+ local iter_padded
318
+ iter_padded=$(printf '%03d' "$iter")
319
+
320
+ local prompt_bytes=0 claim_bytes=0 verdict_bytes=0
321
+ local worker_prompt_file="$LOGS_DIR/iter-${iter_padded}.worker-prompt.md"
322
+ [[ -f "$worker_prompt_file" ]] && prompt_bytes=$(wc -c < "$worker_prompt_file" 2>/dev/null || echo 0)
323
+ [[ -f "$DONE_CLAIM_FILE" ]] && claim_bytes=$(wc -c < "$DONE_CLAIM_FILE" 2>/dev/null || echo 0)
324
+ [[ -f "$VERDICT_FILE" ]] && verdict_bytes=$(wc -c < "$VERDICT_FILE" 2>/dev/null || echo 0)
325
+
326
+ local estimated_tokens=$(( (prompt_bytes + claim_bytes + verdict_bytes) / 4 ))
327
+
328
+ # AC1: per-phase timing fields
329
+ local worker_start_time="" worker_end_time="" worker_duration_s=0
330
+ local verifier_start_time="" verifier_end_time="" verifier_duration_s=0
331
+ if [[ -n "${ITER_WORKER_START:-}" ]]; then
332
+ worker_start_time=$(date -u -r "$ITER_WORKER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
333
+ worker_end_time=$(date -u -r "${ITER_WORKER_END:-$ITER_WORKER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
334
+ worker_duration_s=$(( ${ITER_WORKER_END:-$ITER_WORKER_START} - ITER_WORKER_START ))
335
+ fi
336
+ if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
337
+ verifier_start_time=$(date -u -r "$ITER_VERIFIER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
338
+ verifier_end_time=$(date -u -r "${ITER_VERIFIER_END:-$ITER_VERIFIER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
339
+ verifier_duration_s=$(( ${ITER_VERIFIER_END:-$ITER_VERIFIER_START} - ITER_VERIFIER_START ))
340
+ fi
341
+
342
+ # AC2: consensus mode per-engine timing
343
+ local consensus_fields=""
344
+ if [[ -n "${ITER_VERIFIER_CLAUDE_DURATION_S:-}" ]]; then
345
+ consensus_fields="${consensus_fields}"',"verifier_claude_duration_s":'"${ITER_VERIFIER_CLAUDE_DURATION_S}"
346
+ fi
347
+ if [[ -n "${ITER_VERIFIER_CODEX_DURATION_S:-}" ]]; then
348
+ consensus_fields="${consensus_fields}"',"verifier_codex_duration_s":'"${ITER_VERIFIER_CODEX_DURATION_S}"
349
+ fi
350
+
351
+ echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"worker_start_time":"'"$worker_start_time"'","worker_end_time":"'"$worker_end_time"'","worker_duration_s":'"$worker_duration_s"',"verifier_start_time":"'"$verifier_start_time"'","verifier_end_time":"'"$verifier_end_time"'","verifier_duration_s":'"$verifier_duration_s"''"$consensus_fields"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
352
+ }
353
+
354
+ # --- Analytics: write per-iteration structured data to campaign.jsonl (always-on) ---
355
+ write_campaign_jsonl() {
356
+ local iter="$1"
357
+ local us_id="${2:-unknown}"
358
+ local verdict="${3:-unknown}"
359
+
360
+ local worker_duration_s=0
361
+ local verifier_duration_s=0
362
+ if [[ -n "${ITER_WORKER_START:-}" ]]; then
363
+ worker_duration_s=$(( ${ITER_WORKER_END:-$(date +%s)} - ITER_WORKER_START ))
364
+ fi
365
+ if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
366
+ verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
367
+ fi
368
+
369
+ jq -nc \
370
+ --argjson iter "$iter" \
371
+ --arg us_id "$us_id" \
372
+ --arg worker_model "$WORKER_MODEL" \
373
+ --arg worker_engine "$WORKER_ENGINE" \
374
+ --arg verifier_engine "$VERIFIER_ENGINE" \
375
+ --arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
376
+ --arg codex_verdict "${CODEX_VERDICT:-N/A}" \
377
+ --argjson consensus "$VERIFY_CONSENSUS" \
378
+ --argjson consecutive_failures "$CONSECUTIVE_FAILURES" \
379
+ --argjson model_upgraded "${_MODEL_UPGRADED:-0}" \
380
+ --argjson duration_worker_s "$worker_duration_s" \
381
+ --argjson duration_verifier_s "$verifier_duration_s" \
382
+ --arg project_root "$ROOT" \
383
+ --arg slug "$SLUG" \
384
+ --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
385
+ '{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus: $consensus, consecutive_failures: $consecutive_failures, model_upgraded: $model_upgraded, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
386
+ >> "$CAMPAIGN_JSONL"
387
+ }
388
+
389
+ # --- AC4: Generate campaign-report.md on all terminal states ---
390
+ generate_campaign_report() {
391
+ # Guard: idempotent — only generate once per campaign run
392
+ if (( CAMPAIGN_REPORT_GENERATED )); then return 0; fi
393
+ CAMPAIGN_REPORT_GENERATED=1
394
+
395
+ local final_status="UNKNOWN"
396
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
397
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
398
+ else final_status="TIMEOUT"; fi
399
+
400
+ local report_file="$LOGS_DIR/campaign-report.md"
401
+
402
+ # AC9: Version existing report before writing new one
403
+ if [[ -f "$report_file" ]]; then
404
+ local v=1
405
+ while [[ -f "${report_file%.md}-v${v}.md" ]]; do (( v++ )); done
406
+ mv "$report_file" "${report_file%.md}-v${v}.md"
407
+ fi
408
+
409
+ local end_time
410
+ end_time=$(date +%s)
411
+ local elapsed=$(( end_time - START_TIME ))
412
+
413
+ local baseline_commit_val="${BASELINE_COMMIT:-none}"
414
+ local files_changed=""
415
+ if [[ "$baseline_commit_val" != "none" ]]; then
416
+ files_changed=$(git -C "$ROOT" diff --stat "${baseline_commit_val}" 2>/dev/null || echo "(git diff unavailable)")
417
+ elif git -C "$ROOT" rev-parse HEAD &>/dev/null; then
418
+ files_changed=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(git diff unavailable)")
419
+ else
420
+ files_changed="(no commits in repo — cannot diff)"
421
+ fi
422
+ # Include untracked new files
423
+ local untracked
424
+ untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
425
+ if [[ -n "$untracked" ]]; then
426
+ files_changed="${files_changed}
427
+
428
+ Untracked new files:
429
+ ${untracked}"
430
+ fi
431
+
432
+ local sv_summary=""
433
+ if (( WITH_SELF_VERIFICATION )); then
434
+ local sv_report
435
+ sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
436
+ if [[ -n "$sv_report" ]]; then
437
+ sv_summary="See: $sv_report"
438
+ else
439
+ sv_summary="SV report generation pending — will be appended after this report."
440
+ fi
441
+ else
442
+ sv_summary="N/A — --with-self-verification not enabled"
443
+ fi
444
+
445
+ {
446
+ echo "# Campaign Report: $SLUG"
447
+ echo ""
448
+ echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) | Status: $final_status | Iterations: $ITERATION"
449
+ echo ""
450
+ echo "## Objective"
451
+ local prd_file="$DESK/plans/prd-$SLUG.md"
452
+ if [[ -f "$prd_file" ]]; then
453
+ grep '^## Objective' -A3 "$prd_file" 2>/dev/null | tail -n +2 | head -3
454
+ else
455
+ echo "(PRD not found)"
456
+ fi
457
+ echo ""
458
+ echo "## Execution Summary"
459
+ echo "- Terminal state: $final_status"
460
+ echo "- Iterations run: $ITERATION / $MAX_ITER"
461
+ echo "- Elapsed: ${elapsed}s"
462
+ echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
463
+ echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
464
+ echo "- Consensus: verify_consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE final_consensus=$FINAL_CONSENSUS"
465
+ echo ""
466
+ echo "## US Status"
467
+ echo "- Verified: ${VERIFIED_US:-none}"
468
+ echo "- Consecutive failures at end: $CONSECUTIVE_FAILURES"
469
+ echo ""
470
+ echo "## Verification Results"
471
+ local ri=1
472
+ while (( ri <= ITERATION )); do
473
+ local iter_dc="$LOGS_DIR/iter-$(printf '%03d' $ri)-done-claim.json"
474
+ if [[ -f "$iter_dc" ]]; then
475
+ local us_id
476
+ us_id=$(jq -r '.us_id // "unknown"' "$iter_dc" 2>/dev/null)
477
+ echo "- $(basename "$iter_dc"): us_id=$us_id"
478
+ fi
479
+ (( ri++ ))
480
+ done
481
+ echo ""
482
+ echo "## Issues Encountered"
483
+ local fi_found=0
484
+ local fi_i=1
485
+ while (( fi_i <= ITERATION )); do
486
+ local fix_f="$LOGS_DIR/iter-$(printf '%03d' $fi_i).fix-contract.md"
487
+ if [[ -f "$fix_f" ]]; then
488
+ echo "- $(basename "$fix_f")"
489
+ fi_found=1
490
+ fi
491
+ (( fi_i++ ))
492
+ done
493
+ (( fi_found == 0 )) && echo "- None"
494
+ echo ""
495
+ echo "## Cost & Performance"
496
+ if [[ -f "$COST_LOG" ]]; then
497
+ local total_tokens=0
498
+ while IFS= read -r line; do
499
+ local t
500
+ t=$(echo "$line" | jq -r '.estimated_tokens // 0' 2>/dev/null || echo 0)
501
+ total_tokens=$(( total_tokens + t ))
502
+ done < "$COST_LOG"
503
+ echo "- Total estimated tokens: $total_tokens (source: estimated, tmux mode)"
504
+ echo "- See: cost-log.jsonl for per-iteration breakdown"
505
+ else
506
+ echo "- No cost data available"
507
+ fi
508
+ echo ""
509
+ echo "## SV Summary"
510
+ echo "$sv_summary"
511
+ echo ""
512
+ echo "## Files Changed"
513
+ echo '```'
514
+ echo "$files_changed"
515
+ echo '```'
516
+ echo "Note: Files Changed may include pre-existing uncommitted changes if the campaign started in a dirty worktree."
517
+ echo ""
518
+ echo "## Suggested Next Actions"
519
+ if [[ "$final_status" == "COMPLETE" ]]; then
520
+ echo "- Review verified US list and plan next feature campaign or next cycle"
521
+ echo "- Consider re-run with --mode improve for quality refinement"
522
+ echo "- Archive campaign artifacts and update project documentation"
523
+ elif [[ "$final_status" == "BLOCKED" ]]; then
524
+ echo "- Review PRD acceptance criteria for the failing US"
525
+ echo "- Check circuit breaker history (consecutive failures: $CONSECUTIVE_FAILURES)"
526
+ echo "- Consider relaxing verifier criteria if false-negative pattern detected"
527
+ elif [[ "$final_status" == "TIMEOUT" ]]; then
528
+ echo "- Increase --max-iter to allow more iterations for completion"
529
+ echo "- Reduce scope by splitting remaining US into a follow-up campaign"
530
+ echo "- Review last iteration done-claim for partial progress"
531
+ fi
532
+ } | atomic_write "$report_file"
533
+
534
+ log "Campaign report written: $report_file"
535
+ }
536
+
537
+ generate_sv_report() {
538
+ # AC1-boundary: SV_REPORT_GENERATED guard (init + check + set = 3 occurrences)
539
+ if (( SV_REPORT_GENERATED )); then return 0; fi
540
+
541
+ # AC3-negative: early return if ! WITH_SELF_VERIFICATION flag not set
542
+ if (( ! WITH_SELF_VERIFICATION )); then return 0; fi
543
+
544
+ SV_REPORT_GENERATED=1
545
+
546
+ # AC4: check claude CLI availability — graceful degradation, not exit 1
547
+ if ! command -v claude &>/dev/null; then
548
+ echo "SV report generation failed: claude CLI not found" >> "$LOGS_DIR/campaign-report.md"
549
+ return 0
550
+ fi
551
+
552
+ # AC2: versioning — find next available sv_version slot (in logs dir)
553
+ local sv_version=1
554
+ while [[ -f "$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md" ]]; do
555
+ (( sv_version++ ))
556
+ done
557
+ local sv_report_file="$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md"
558
+
559
+ log "Generating SV report: $(basename "$sv_report_file")"
560
+
561
+ # AC5: configurable timeout with in-process watchdog
562
+ local _sv_timeout_secs="${_SV_TIMEOUT_SECS:-300}"
563
+ local _sv_timeout_flag=0
564
+ local _sv_timeout_file="$LOGS_DIR/.sv_timeout_${$}.tmp"
565
+ rm -f "$_sv_timeout_file"
566
+
567
+ # Spawn claude CLI in background — write to sv_report_file
568
+ claude --print "Analyze campaign artifacts in $LOGS_DIR and generate a self-verification report with sections: 1. Automated Validation Summary, 2. Failure Deep Dive, 3. Worker Process Quality, 4. Verifier Judgment Quality, 5. AC Lifecycle, 6. Test-Spec Adherence, 7. Patterns: Strengths & Weaknesses, 8. Recommendations for Next Cycle, 9. Cost & Performance, 10. Blind Spots." \
569
+ > "$sv_report_file" 2>/dev/null &
570
+ local _sv_pid=$!
571
+
572
+ # AC5: watchdog — signals timeout file THEN kills _sv_pid after _sv_timeout_secs
573
+ local _sv_watchdog
574
+ (
575
+ sleep "$_sv_timeout_secs"
576
+ if kill -0 "$_sv_pid" 2>/dev/null; then
577
+ touch "$_sv_timeout_file"
578
+ kill "$_sv_pid" 2>/dev/null
579
+ fi
580
+ ) &
581
+ _sv_watchdog=$!
582
+
583
+ wait "$_sv_pid"
584
+ local _sv_exit=$?
585
+ kill "$_sv_watchdog" 2>/dev/null
586
+ wait "$_sv_watchdog" 2>/dev/null
587
+
588
+ # AC5: detect timeout — exit code 124 or watchdog file present
589
+ if [[ "$_sv_exit" == 124 ]] || [[ -f "$_sv_timeout_file" ]]; then
590
+ _sv_timeout_flag=1
591
+ rm -f "$_sv_timeout_file"
592
+ local _timeout_msg="SV report generation TIMEOUT: exceeded ${_sv_timeout_secs}s"
593
+ echo "$_timeout_msg" >> "$sv_report_file"
594
+ echo "$_timeout_msg" >> "$LOGS_DIR/campaign-report.md"
595
+ log "$_timeout_msg"
596
+ return 0
597
+ fi
598
+
599
+ # On success: append reference to campaign-report (full path, cross-directory)
600
+ echo "See: $sv_report_file" >> "$LOGS_DIR/campaign-report.md"
601
+ log "SV report written: $sv_report_file"
602
+ return 0
603
+ }
604
+
605
+ # =============================================================================
606
+ # Sentinel Writers
607
+ # =============================================================================
608
+
609
+ # --- governance.md s7: Only the Leader writes sentinels ---
610
+ write_complete_sentinel() {
611
+ local summary="$1"
612
+ echo "# Campaign Complete
613
+
614
+ Completed at iteration $ITERATION.
615
+ $summary
616
+
617
+ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
618
+ log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
619
+ }
620
+
621
+ write_blocked_sentinel() {
622
+ local reason="$1"
623
+ echo "# Campaign Blocked
624
+
625
+ Blocked at iteration $ITERATION.
626
+ Reason: $reason
627
+
628
+ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
629
+ log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
630
+ }
631
+
632
+ # =============================================================================
633
+ # PRD Tracking
634
+ # =============================================================================
635
+
636
+ # --- US-004: Live PRD update helpers ---
637
+ compute_prd_hash() {
638
+ local prd_file="${PRD_FILE:-}"
639
+ if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
640
+ prd_file="$DESK/plans/prd-$SLUG.md"
641
+ fi
642
+ if [[ -f "$prd_file" ]]; then
643
+ md5 -q "$prd_file" 2>/dev/null || md5sum "$prd_file" 2>/dev/null | cut -d' ' -f1
644
+ else
645
+ echo ""
646
+ fi
647
+ }
648
+
649
+ count_prd_us() {
650
+ local prd_file="${PRD_FILE:-}"
651
+ if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
652
+ prd_file="$DESK/plans/prd-$SLUG.md"
653
+ fi
654
+ if [[ -f "$prd_file" ]]; then
655
+ grep -oE '^### US-[0-9]+' "$prd_file" 2>/dev/null | sed 's/^### //' | sort -u | tr '\n' ',' | sed 's/,$//'
656
+ else
657
+ echo ""
658
+ fi
659
+ }
660
+
661
+ split_prd_by_us() {
662
+ local prd_file="$1"
663
+ local slug="$2"
664
+ local plans_dir
665
+ plans_dir="$(dirname "$prd_file")"
666
+
667
+ [[ -f "$prd_file" ]] || return 0
668
+
669
+ local us_count
670
+ us_count=$(grep -oE '^### US-' "$prd_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
671
+ if [[ "$us_count" -eq 0 ]]; then
672
+ return 0
673
+ fi
674
+
675
+ awk -v dir="$plans_dir" -v slug="$slug" '
676
+ /^### US-[0-9]+:/ {
677
+ if (out != "") close(out)
678
+ match($0, /US-[0-9]+/)
679
+ us_id = substr($0, RSTART, RLENGTH)
680
+ out = dir "/prd-" slug "-" us_id ".md"
681
+ }
682
+ out != "" { print > out }
683
+ ' "$prd_file"
684
+ }
685
+
686
+ split_test_spec_by_us() {
687
+ local ts_file="$1"
688
+ local slug="$2"
689
+ local plans_dir
690
+ plans_dir="$(dirname "$ts_file")"
691
+
692
+ [[ -f "$ts_file" ]] || return 0
693
+
694
+ local us_count
695
+ us_count=$(grep -oE '^## US-' "$ts_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
696
+ if [[ "$us_count" -eq 0 ]]; then
697
+ return 0
698
+ fi
699
+
700
+ local header_tmp="${plans_dir}/test-spec-${slug}-header.tmp.$$"
701
+ awk '/^## US-[0-9]+:/{exit} {print}' "$ts_file" > "$header_tmp"
702
+
703
+ awk -v dir="$plans_dir" -v slug="$slug" '
704
+ /^## US-[0-9]+:/ {
705
+ if (out != "") close(out)
706
+ match($0, /US-[0-9]+/)
707
+ us_id = substr($0, RSTART, RLENGTH)
708
+ out = dir "/test-spec-" slug "-" us_id ".md"
709
+ }
710
+ out != "" { print > out }
711
+ ' "$ts_file"
712
+
713
+ for split_file in "$plans_dir"/test-spec-"$slug"-US-*.md; do
714
+ [[ -f "$split_file" ]] || continue
715
+ local tmp="${split_file}.tmp.$$"
716
+ cat "$header_tmp" "$split_file" > "$tmp" && mv "$tmp" "$split_file"
717
+ done
718
+ rm -f "$header_tmp"
719
+ }
720
+
721
+ check_prd_update() {
722
+ local current_hash current_us_list us_count_prev us_count_now new_us
723
+ current_hash=$(compute_prd_hash)
724
+ current_us_list=$(count_prd_us)
725
+ us_count_prev=$(echo "$PREV_PRD_US_LIST" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
726
+ us_count_now=$(echo "$current_us_list" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
727
+
728
+ _PRD_CHANGED=0
729
+
730
+ if [[ "$current_hash" != "$PREV_PRD_HASH" ]]; then
731
+ _PRD_CHANGED=1
732
+ new_us=$(printf '%s\n' "$current_us_list" | tr ',' '\n' | awk -v prev="$PREV_PRD_US_LIST" '
733
+ BEGIN {
734
+ split(prev, p, ",")
735
+ for (i in p) {
736
+ seen[p[i]] = 1
737
+ }
738
+ }
739
+ {
740
+ if ($0 != "" && !seen[$0]) {
741
+ if (out == "") out = $0
742
+ else out = out "," $0
743
+ }
744
+ }
745
+ END { print out }
746
+ ')
747
+ log_debug "prd_changed=true prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now} new_us=${new_us:-none}"
748
+ split_prd_by_us "$PRD_FILE" "$SLUG"
749
+ split_test_spec_by_us "$TEST_SPEC_FILE" "$SLUG"
750
+ US_LIST="$current_us_list"
751
+ else
752
+ log_debug "prd_changed=false prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now}"
753
+ fi
754
+
755
+ PREV_PRD_HASH="$current_hash"
756
+ PREV_PRD_US_LIST="$current_us_list"
757
+ }
758
+
759
+ # =============================================================================
760
+ # Circuit Breakers: Stale Context Detection
761
+ # =============================================================================
762
+
763
+ # --- governance.md s7 step 8: Stale context detection ---
764
+ compute_context_hash() {
765
+ # Hash context-latest.md + memory.md + verified_us from status.json
766
+ # This prevents false stale detection when Worker updates memory but not context,
767
+ # or when verified_us changes between iterations
768
+ local hash_input=""
769
+ if [[ -f "$CONTEXT_FILE" ]]; then
770
+ hash_input+=$(md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1)
771
+ fi
772
+ local memory_file="$DESK/memos/${SLUG}-memory.md"
773
+ if [[ -f "$memory_file" ]]; then
774
+ hash_input+=$(md5 -q "$memory_file" 2>/dev/null || md5sum "$memory_file" 2>/dev/null | cut -d' ' -f1)
775
+ fi
776
+ if [[ -f "$STATUS_FILE" ]]; then
777
+ hash_input+=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
778
+ fi
779
+ echo -n "$hash_input" | md5 -q 2>/dev/null || echo -n "$hash_input" | md5sum 2>/dev/null | cut -d' ' -f1
780
+ }
781
+
782
+ check_stale_context() {
783
+ local current_hash
784
+ current_hash=$(compute_context_hash)
785
+
786
+ if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
787
+ (( STALE_CONTEXT_COUNT++ ))
788
+ log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
789
+ if (( STALE_CONTEXT_COUNT >= 3 )); then
790
+ log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
791
+ return 1
792
+ fi
793
+ else
794
+ STALE_CONTEXT_COUNT=0
795
+ fi
796
+
797
+ PREV_CONTEXT_HASH="$current_hash"
798
+ return 0
799
+ }
800
+
801
+ # =============================================================================
802
+ # Error Detection
803
+ # =============================================================================
804
+
805
+ # --- US-003: API error detector using tmux pane buffer ---
806
+ is_api_error() {
807
+ local pane_id="$1"
808
+ local pane_output
809
+ pane_output=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
810
+ if [[ -z "$pane_output" ]]; then
811
+ return 1
812
+ fi
813
+
814
+ if echo "$pane_output" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
815
+ || echo "$pane_output" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
816
+ || echo "$pane_output" | grep -qi 'overloaded' \
817
+ || echo "$pane_output" | grep -qi 'too many requests' \
818
+ || echo "$pane_output" | grep -qi 'service unavailable'; then
819
+ return 0
820
+ fi
821
+ return 1
822
+ }
823
+
824
+ # =============================================================================
825
+ # Security Warning
826
+ # =============================================================================
827
+
828
+ print_security_warning() {
829
+ echo ""
830
+ echo "================================================================"
831
+ echo " WARNING: Running with --dangerously-skip-permissions"
832
+ echo ""
833
+ echo " The claude CLI will execute tools (file writes, shell commands)"
834
+ echo " without asking for confirmation. Only run this on code you"
835
+ echo " trust in an environment you control."
836
+ echo "================================================================"
837
+ echo ""
838
+ }