@ai-dev-methodologies/rlp-desk 0.3.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,837 @@
1
+ # lib_ralph_desk.zsh — Shared business logic for RLP Desk runner
2
+ # SOURCED by run_ralph_desk.zsh. Do NOT execute directly.
3
+ #
4
+ # IMPORTANT: Must be sourced at file scope, not inside a function.
5
+ # typeset -A creates local arrays inside functions, breaking global state.
6
+ # Functions in this file read/write globals defined by the sourcing script.
7
+
8
+ if [[ -n "${funcstack[2]:-}" ]]; then
9
+ echo "FATAL: lib_ralph_desk.zsh must be sourced at file scope" >&2
10
+ exit 1
11
+ fi
12
+
13
+ # =============================================================================
14
+ # Utility Functions
15
+ # =============================================================================
16
+
17
+ log() {
18
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
19
+ }
20
+
21
+ log_debug() {
22
+ if (( DEBUG )); then
23
+ mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
24
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
25
+ fi
26
+ }
27
+
28
+ log_error() {
29
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
30
+ }
31
+
32
+ # parse_model_flag() — parse unified --worker-model / --verifier-model value
33
+ # Colon format (model:reasoning) → codex engine; plain name → claude engine.
34
+ # Spark alias: any model name containing "spark" is normalized to "spark".
35
+ # Usage: parse_model_flag <value> <role>
36
+ # Output (stdout): "engine model [reasoning]" e.g. "codex gpt-5.4 medium" | "claude sonnet"
37
+ # Returns: 0 on success, 1 on invalid format (error written to stderr)
38
+ parse_model_flag() {
39
+ local value="$1"
40
+ local role="${2:-worker}"
41
+ local colon_count
42
+ colon_count=$(printf '%s' "$value" | tr -cd ':' | wc -c | tr -d ' ')
43
+ if (( colon_count > 1 )); then
44
+ echo "ERROR: Invalid --${role}-model format '${value}'. Use 'model:reasoning' (codex) or 'model-name' (claude)." >&2
45
+ return 1
46
+ fi
47
+ if (( colon_count == 1 )); then
48
+ local model="${value%%:*}"
49
+ local reasoning="${value##*:}"
50
+ if [[ "$model" == *"spark"* ]]; then
51
+ model="spark"
52
+ fi
53
+ echo "codex $model $reasoning"
54
+ else
55
+ echo "claude $value"
56
+ fi
57
+ }
58
+
59
+ # get_model_string() — return engine-appropriate model identifier string
60
+ # Claude: returns model name (e.g., "sonnet")
61
+ # Codex: returns model:reasoning (e.g., "gpt-5.4:high")
62
+ # Args: $1=engine (claude|codex) $2=model $3=codex_reasoning (optional)
63
+ # Output: model string on stdout
64
+ get_model_string() {
65
+ local engine="$1"
66
+ local model="$2"
67
+ local reasoning="${3:-}"
68
+
69
+ if [[ "$engine" = "codex" && -n "$reasoning" ]]; then
70
+ echo "${model}:${reasoning}"
71
+ else
72
+ echo "$model"
73
+ fi
74
+ }
75
+
76
+ # get_next_model() — return next model in Worker upgrade path, or empty at ceiling
77
+ # Usage: get_next_model <model_str>
78
+ # claude: "haiku"|"sonnet"|"opus"
79
+ # codex: "gpt-5.4:medium"|"gpt-5.4:high"|"gpt-5.4:xhigh"|"spark:medium"|...
80
+ # Output: next model string, or empty string if at ceiling
81
+ get_next_model() {
82
+ local current="$1"
83
+ case "$current" in
84
+ # Claude upgrade path (Worker only — Verifier fixed)
85
+ haiku) echo "sonnet" ;;
86
+ sonnet) echo "opus" ;;
87
+ opus) echo "" ;;
88
+ # Codex GPT Pro upgrade path (short aliases)
89
+ spark:low) echo "spark:medium" ;;
90
+ spark:medium) echo "spark:high" ;;
91
+ spark:high) echo "spark:xhigh" ;;
92
+ spark:xhigh) echo "" ;; # spark ceiling
93
+ # Codex GPT Pro upgrade path (full model names)
94
+ gpt-5.3-codex-spark:low) echo "gpt-5.3-codex-spark:medium" ;;
95
+ gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
96
+ gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
97
+ gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling (full name)
98
+ # Codex standard (gpt-5.3-codex) upgrade path
99
+ gpt-5.3-codex:low) echo "gpt-5.3-codex:medium" ;;
100
+ gpt-5.3-codex:medium) echo "gpt-5.3-codex:high" ;;
101
+ gpt-5.3-codex:high) echo "gpt-5.3-codex:xhigh" ;;
102
+ gpt-5.3-codex:xhigh) echo "" ;; # codex ceiling
103
+ # Codex Non-Pro / upper path
104
+ gpt-5.4:low) echo "gpt-5.4:medium" ;;
105
+ gpt-5.4:medium) echo "gpt-5.4:high" ;;
106
+ gpt-5.4:high) echo "gpt-5.4:xhigh" ;;
107
+ gpt-5.4:xhigh) echo "" ;;
108
+ *) echo "" ;; # unknown → treat as ceiling
109
+ esac
110
+ }
111
+
112
+ # check_model_upgrade() — evaluate and apply Worker model upgrade on repeated same-US failure
113
+ # Called in the fail verdict path. Upgrades Worker model when same US fails >= 2 consecutive times.
114
+ # Respects LOCK_WORKER_MODEL flag. Never modifies VERIFIER_MODEL.
115
+ # Usage: check_model_upgrade <us_id>
116
+ check_model_upgrade() {
117
+ local current_us="$1"
118
+
119
+ # Track consecutive failures on same US
120
+ if [[ "$current_us" = "$_LAST_FAILED_US" ]]; then
121
+ (( _SAME_US_FAIL_COUNT++ ))
122
+ else
123
+ _SAME_US_FAIL_COUNT=1
124
+ _LAST_FAILED_US="$current_us"
125
+ fi
126
+
127
+ # Respect --lock-worker-model: no upgrade; CB threshold handles BLOCKED
128
+ if (( LOCK_WORKER_MODEL )); then
129
+ log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=locked"
130
+ return 0
131
+ fi
132
+
133
+ # Upgrade when same US fails >= 2 consecutive times
134
+ if (( _SAME_US_FAIL_COUNT >= 2 )); then
135
+ local current_model_str
136
+ current_model_str=$(get_model_string "$WORKER_ENGINE" "${WORKER_CODEX_MODEL:-$WORKER_MODEL}" "${WORKER_CODEX_REASONING:-}")
137
+
138
+ local next_model
139
+ next_model=$(get_next_model "$current_model_str")
140
+
141
+ if [[ -z "$next_model" ]]; then
142
+ # Already at ceiling — CB threshold will trigger BLOCKED with escalation message
143
+ log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=already_max current=$current_model_str"
144
+ return 0
145
+ fi
146
+
147
+ # Save original model on first upgrade only
148
+ if (( _MODEL_UPGRADED == 0 )); then
149
+ _ORIGINAL_WORKER_MODEL="$WORKER_MODEL"
150
+ _ORIGINAL_WORKER_CODEX_REASONING="$WORKER_CODEX_REASONING"
151
+ fi
152
+ _MODEL_UPGRADED=1
153
+
154
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
155
+ WORKER_CODEX_MODEL="${next_model%%:*}"
156
+ WORKER_CODEX_REASONING="${next_model##*:}"
157
+ WORKER_MODEL="$WORKER_CODEX_MODEL"
158
+ else
159
+ WORKER_MODEL="$next_model"
160
+ fi
161
+
162
+ log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
163
+ log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
164
+ _SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
165
+ fi
166
+
167
+ return 0
168
+ }
169
+
170
+ # --- governance.md s7: Atomic file writes (tmux pattern) ---
171
+ # All file writes by the Leader use tmp+mv to prevent corruption.
172
+ atomic_write() {
173
+ local target="$1"
174
+ local tmp="${target}.tmp.$$"
175
+ cat > "$tmp"
176
+ mv "$tmp" "$target"
177
+ }
178
+
179
+ # =============================================================================
180
+ # Scaffold Validation
181
+ # =============================================================================
182
+
183
+ validate_scaffold() {
184
+ local errors=0
185
+
186
+ if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
187
+ log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
188
+ errors=1
189
+ fi
190
+
191
+ if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
192
+ log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
193
+ errors=1
194
+ fi
195
+
196
+ if [[ ! -f "$CONTEXT_FILE" ]]; then
197
+ log_error "Context file not found: $CONTEXT_FILE"
198
+ errors=1
199
+ fi
200
+
201
+ if [[ ! -f "$MEMORY_FILE" ]]; then
202
+ log_error "Memory file not found: $MEMORY_FILE"
203
+ errors=1
204
+ fi
205
+
206
+ if (( errors )); then
207
+ log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
208
+ exit 1
209
+ fi
210
+
211
+ mkdir -p "$LOGS_DIR"
212
+ }
213
+
214
+ # =============================================================================
215
+ # Status Updates
216
+ # =============================================================================
217
+
218
+ # --- governance.md s7 step 8: Update status.json ---
219
+ update_status() {
220
+ local phase="$1"
221
+ local last_result="$2"
222
+
223
+ # Build verified_us as JSON array
224
+ local verified_us_json="[]"
225
+ if [[ -n "$VERIFIED_US" ]]; then
226
+ verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
227
+ fi
228
+
229
+ # Build consensus fields
230
+ local consensus_json=""
231
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
232
+ consensus_json=',
233
+ "consensus_scope": "'"$CONSENSUS_SCOPE"'",
234
+ "consensus_round": '"$CONSENSUS_ROUND"',
235
+ "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
236
+ "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
237
+ fi
238
+
239
+ echo '{
240
+ "slug": "'"$SLUG"'",
241
+ "baseline_commit": "'"${BASELINE_COMMIT:-none}"'",
242
+ "iteration": '"$ITERATION"',
243
+ "max_iter": '"$MAX_ITER"',
244
+ "phase": "'"$phase"'",
245
+ "worker_model": "'"$WORKER_MODEL"'",
246
+ "verifier_model": "'"$VERIFIER_MODEL"'",
247
+ "worker_engine": "'"$WORKER_ENGINE"'",
248
+ "verifier_engine": "'"$VERIFIER_ENGINE"'",
249
+ "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
250
+ "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
251
+ "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
252
+ "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
253
+ "verify_mode": "'"$VERIFY_MODE"'",
254
+ "verify_consensus": '"$VERIFY_CONSENSUS"',
255
+ "last_result": "'"$last_result"'",
256
+ "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
257
+ "verified_us": '"$verified_us_json"''"$consensus_json"',
258
+ "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
259
+ }' | atomic_write "$STATUS_FILE"
260
+ }
261
+
262
+ # --- governance.md s7 step 8: Write result log ---
263
+ write_result_log() {
264
+ local iter="$1"
265
+ local result="$2"
266
+ local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
267
+
268
+ local git_diff=""
269
+ if git -C "$ROOT" rev-parse HEAD &>/dev/null; then
270
+ git_diff=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(no git diff available)")
271
+ else
272
+ git_diff="(no commits in repo — cannot diff)"
273
+ fi
274
+ # Include untracked new files in result log
275
+ local result_untracked
276
+ result_untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
277
+ if [[ -n "$result_untracked" ]]; then
278
+ git_diff="${git_diff}
279
+
280
+ Untracked new files:
281
+ ${result_untracked}"
282
+ fi
283
+
284
+ {
285
+ echo "# Iteration $iter Result"
286
+ echo ""
287
+ echo "## Status"
288
+ echo "$result [leader-measured]"
289
+ echo ""
290
+ echo "## Files Changed"
291
+ echo '```'
292
+ echo "$git_diff"
293
+ echo '```'
294
+ echo "[git-measured]"
295
+ echo ""
296
+ echo "## Timestamp"
297
+ echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
298
+ } | atomic_write "$result_file"
299
+ }
300
+
301
+ # --- step 7d: Archive iteration artifacts (done-claim + verdict) to logs/ ---
302
+ archive_iter_artifacts() {
303
+ local iter="$1"
304
+ local iter_padded
305
+ iter_padded=$(printf '%03d' "$iter")
306
+ if [[ -f "$DONE_CLAIM_FILE" ]]; then
307
+ cp "$DONE_CLAIM_FILE" "$LOGS_DIR/iter-${iter_padded}-done-claim.json" 2>/dev/null
308
+ fi
309
+ if [[ -f "$VERDICT_FILE" ]]; then
310
+ cp "$VERDICT_FILE" "$LOGS_DIR/iter-${iter_padded}-verify-verdict.json" 2>/dev/null
311
+ fi
312
+ }
313
+
314
+ # --- AC5: Write per-iteration cost estimate to cost-log.jsonl ---
315
+ write_cost_log() {
316
+ local iter="$1"
317
+ local iter_padded
318
+ iter_padded=$(printf '%03d' "$iter")
319
+
320
+ local prompt_bytes=0 claim_bytes=0 verdict_bytes=0
321
+ local worker_prompt_file="$LOGS_DIR/iter-${iter_padded}.worker-prompt.md"
322
+ [[ -f "$worker_prompt_file" ]] && prompt_bytes=$(wc -c < "$worker_prompt_file" 2>/dev/null || echo 0)
323
+ [[ -f "$DONE_CLAIM_FILE" ]] && claim_bytes=$(wc -c < "$DONE_CLAIM_FILE" 2>/dev/null || echo 0)
324
+ [[ -f "$VERDICT_FILE" ]] && verdict_bytes=$(wc -c < "$VERDICT_FILE" 2>/dev/null || echo 0)
325
+
326
+ local estimated_tokens=$(( (prompt_bytes + claim_bytes + verdict_bytes) / 4 ))
327
+
328
+ # AC1: per-phase timing fields
329
+ local worker_start_time="" worker_end_time="" worker_duration_s=0
330
+ local verifier_start_time="" verifier_end_time="" verifier_duration_s=0
331
+ if [[ -n "${ITER_WORKER_START:-}" ]]; then
332
+ worker_start_time=$(date -u -r "$ITER_WORKER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
333
+ worker_end_time=$(date -u -r "${ITER_WORKER_END:-$ITER_WORKER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
334
+ worker_duration_s=$(( ${ITER_WORKER_END:-$ITER_WORKER_START} - ITER_WORKER_START ))
335
+ fi
336
+ if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
337
+ verifier_start_time=$(date -u -r "$ITER_VERIFIER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
338
+ verifier_end_time=$(date -u -r "${ITER_VERIFIER_END:-$ITER_VERIFIER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
339
+ verifier_duration_s=$(( ${ITER_VERIFIER_END:-$ITER_VERIFIER_START} - ITER_VERIFIER_START ))
340
+ fi
341
+
342
+ # AC2: consensus mode per-engine timing
343
+ local consensus_fields=""
344
+ if [[ -n "${ITER_VERIFIER_CLAUDE_DURATION_S:-}" ]]; then
345
+ consensus_fields="${consensus_fields}"',"verifier_claude_duration_s":'"${ITER_VERIFIER_CLAUDE_DURATION_S}"
346
+ fi
347
+ if [[ -n "${ITER_VERIFIER_CODEX_DURATION_S:-}" ]]; then
348
+ consensus_fields="${consensus_fields}"',"verifier_codex_duration_s":'"${ITER_VERIFIER_CODEX_DURATION_S}"
349
+ fi
350
+
351
+ echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"worker_start_time":"'"$worker_start_time"'","worker_end_time":"'"$worker_end_time"'","worker_duration_s":'"$worker_duration_s"',"verifier_start_time":"'"$verifier_start_time"'","verifier_end_time":"'"$verifier_end_time"'","verifier_duration_s":'"$verifier_duration_s"''"$consensus_fields"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
352
+ }
353
+
354
+ # --- Analytics: write per-iteration structured data to campaign.jsonl ---
355
+ write_campaign_jsonl() {
356
+ if (( ! DEBUG )) && (( ! WITH_SELF_VERIFICATION )); then return 0; fi
357
+ local iter="$1"
358
+ local us_id="${2:-unknown}"
359
+ local verdict="${3:-unknown}"
360
+
361
+ local worker_duration_s=0
362
+ local verifier_duration_s=0
363
+ if [[ -n "${ITER_WORKER_START:-}" ]]; then
364
+ worker_duration_s=$(( ${ITER_WORKER_END:-$(date +%s)} - ITER_WORKER_START ))
365
+ fi
366
+ if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
367
+ verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
368
+ fi
369
+
370
+ jq -nc \
371
+ --argjson iter "$iter" \
372
+ --arg us_id "$us_id" \
373
+ --arg worker_model "$WORKER_MODEL" \
374
+ --arg worker_engine "$WORKER_ENGINE" \
375
+ --arg verifier_engine "$VERIFIER_ENGINE" \
376
+ --arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
377
+ --arg codex_verdict "${CODEX_VERDICT:-N/A}" \
378
+ --argjson consensus "$VERIFY_CONSENSUS" \
379
+ --argjson duration_worker_s "$worker_duration_s" \
380
+ --argjson duration_verifier_s "$verifier_duration_s" \
381
+ --arg project_root "$ROOT" \
382
+ --arg slug "$SLUG" \
383
+ --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
384
+ '{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus: $consensus, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
385
+ >> "$CAMPAIGN_JSONL"
386
+ }
387
+
388
+ # --- AC4: Generate campaign-report.md on all terminal states ---
389
+ generate_campaign_report() {
390
+ # Guard: idempotent — only generate once per campaign run
391
+ if (( CAMPAIGN_REPORT_GENERATED )); then return 0; fi
392
+ CAMPAIGN_REPORT_GENERATED=1
393
+
394
+ local final_status="UNKNOWN"
395
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
396
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
397
+ else final_status="TIMEOUT"; fi
398
+
399
+ local report_file="$LOGS_DIR/campaign-report.md"
400
+
401
+ # AC9: Version existing report before writing new one
402
+ if [[ -f "$report_file" ]]; then
403
+ local v=1
404
+ while [[ -f "${report_file%.md}-v${v}.md" ]]; do (( v++ )); done
405
+ mv "$report_file" "${report_file%.md}-v${v}.md"
406
+ fi
407
+
408
+ local end_time
409
+ end_time=$(date +%s)
410
+ local elapsed=$(( end_time - START_TIME ))
411
+
412
+ local baseline_commit_val="${BASELINE_COMMIT:-none}"
413
+ local files_changed=""
414
+ if [[ "$baseline_commit_val" != "none" ]]; then
415
+ files_changed=$(git -C "$ROOT" diff --stat "${baseline_commit_val}" 2>/dev/null || echo "(git diff unavailable)")
416
+ elif git -C "$ROOT" rev-parse HEAD &>/dev/null; then
417
+ files_changed=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(git diff unavailable)")
418
+ else
419
+ files_changed="(no commits in repo — cannot diff)"
420
+ fi
421
+ # Include untracked new files
422
+ local untracked
423
+ untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
424
+ if [[ -n "$untracked" ]]; then
425
+ files_changed="${files_changed}
426
+
427
+ Untracked new files:
428
+ ${untracked}"
429
+ fi
430
+
431
+ local sv_summary=""
432
+ if (( WITH_SELF_VERIFICATION )); then
433
+ local sv_report
434
+ sv_report=$(ls -t "$ANALYTICS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
435
+ if [[ -n "$sv_report" ]]; then
436
+ sv_summary="See: $sv_report"
437
+ else
438
+ sv_summary="SV report generation pending — will be appended after this report."
439
+ fi
440
+ else
441
+ sv_summary="N/A — --with-self-verification not enabled"
442
+ fi
443
+
444
+ {
445
+ echo "# Campaign Report: $SLUG"
446
+ echo ""
447
+ echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) | Status: $final_status | Iterations: $ITERATION"
448
+ echo ""
449
+ echo "## Objective"
450
+ local prd_file="$DESK/plans/prd-$SLUG.md"
451
+ if [[ -f "$prd_file" ]]; then
452
+ grep '^## Objective' -A3 "$prd_file" 2>/dev/null | tail -n +2 | head -3
453
+ else
454
+ echo "(PRD not found)"
455
+ fi
456
+ echo ""
457
+ echo "## Execution Summary"
458
+ echo "- Terminal state: $final_status"
459
+ echo "- Iterations run: $ITERATION / $MAX_ITER"
460
+ echo "- Elapsed: ${elapsed}s"
461
+ echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
462
+ echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
463
+ echo "- Consensus: verify_consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE final_consensus=$FINAL_CONSENSUS"
464
+ echo ""
465
+ echo "## US Status"
466
+ echo "- Verified: ${VERIFIED_US:-none}"
467
+ echo "- Consecutive failures at end: $CONSECUTIVE_FAILURES"
468
+ echo ""
469
+ echo "## Verification Results"
470
+ local ri=1
471
+ while (( ri <= ITERATION )); do
472
+ local iter_dc="$LOGS_DIR/iter-$(printf '%03d' $ri)-done-claim.json"
473
+ if [[ -f "$iter_dc" ]]; then
474
+ local us_id
475
+ us_id=$(jq -r '.us_id // "unknown"' "$iter_dc" 2>/dev/null)
476
+ echo "- $(basename "$iter_dc"): us_id=$us_id"
477
+ fi
478
+ (( ri++ ))
479
+ done
480
+ echo ""
481
+ echo "## Issues Encountered"
482
+ local fi_found=0
483
+ local fi_i=1
484
+ while (( fi_i <= ITERATION )); do
485
+ local fix_f="$LOGS_DIR/iter-$(printf '%03d' $fi_i).fix-contract.md"
486
+ if [[ -f "$fix_f" ]]; then
487
+ echo "- $(basename "$fix_f")"
488
+ fi_found=1
489
+ fi
490
+ (( fi_i++ ))
491
+ done
492
+ (( fi_found == 0 )) && echo "- None"
493
+ echo ""
494
+ echo "## Cost & Performance"
495
+ if [[ -f "$COST_LOG" ]]; then
496
+ local total_tokens=0
497
+ while IFS= read -r line; do
498
+ local t
499
+ t=$(echo "$line" | jq -r '.estimated_tokens // 0' 2>/dev/null || echo 0)
500
+ total_tokens=$(( total_tokens + t ))
501
+ done < "$COST_LOG"
502
+ echo "- Total estimated tokens: $total_tokens (source: estimated, tmux mode)"
503
+ echo "- See: cost-log.jsonl for per-iteration breakdown"
504
+ else
505
+ echo "- No cost data available"
506
+ fi
507
+ echo ""
508
+ echo "## SV Summary"
509
+ echo "$sv_summary"
510
+ echo ""
511
+ echo "## Files Changed"
512
+ echo '```'
513
+ echo "$files_changed"
514
+ echo '```'
515
+ echo "Note: Files Changed may include pre-existing uncommitted changes if the campaign started in a dirty worktree."
516
+ echo ""
517
+ echo "## Suggested Next Actions"
518
+ if [[ "$final_status" == "COMPLETE" ]]; then
519
+ echo "- Review verified US list and plan next feature campaign or next cycle"
520
+ echo "- Consider re-run with --mode improve for quality refinement"
521
+ echo "- Archive campaign artifacts and update project documentation"
522
+ elif [[ "$final_status" == "BLOCKED" ]]; then
523
+ echo "- Review PRD acceptance criteria for the failing US"
524
+ echo "- Check circuit breaker history (consecutive failures: $CONSECUTIVE_FAILURES)"
525
+ echo "- Consider relaxing verifier criteria if false-negative pattern detected"
526
+ elif [[ "$final_status" == "TIMEOUT" ]]; then
527
+ echo "- Increase --max-iter to allow more iterations for completion"
528
+ echo "- Reduce scope by splitting remaining US into a follow-up campaign"
529
+ echo "- Review last iteration done-claim for partial progress"
530
+ fi
531
+ } | atomic_write "$report_file"
532
+
533
+ log "Campaign report written: $report_file"
534
+ }
535
+
536
+ generate_sv_report() {
537
+ # AC1-boundary: SV_REPORT_GENERATED guard (init + check + set = 3 occurrences)
538
+ if (( SV_REPORT_GENERATED )); then return 0; fi
539
+
540
+ # AC3-negative: early return if ! WITH_SELF_VERIFICATION flag not set
541
+ if (( ! WITH_SELF_VERIFICATION )); then return 0; fi
542
+
543
+ SV_REPORT_GENERATED=1
544
+
545
+ # AC4: check claude CLI availability — graceful degradation, not exit 1
546
+ if ! command -v claude &>/dev/null; then
547
+ echo "SV report generation failed: claude CLI not found" >> "$LOGS_DIR/campaign-report.md"
548
+ return 0
549
+ fi
550
+
551
+ # AC2: versioning — find next available sv_version slot (in logs dir)
552
+ local sv_version=1
553
+ while [[ -f "$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md" ]]; do
554
+ (( sv_version++ ))
555
+ done
556
+ local sv_report_file="$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md"
557
+
558
+ log "Generating SV report: $(basename "$sv_report_file")"
559
+
560
+ # AC5: configurable timeout with in-process watchdog
561
+ local _sv_timeout_secs="${_SV_TIMEOUT_SECS:-300}"
562
+ local _sv_timeout_flag=0
563
+ local _sv_timeout_file="$LOGS_DIR/.sv_timeout_${$}.tmp"
564
+ rm -f "$_sv_timeout_file"
565
+
566
+ # Spawn claude CLI in background — write to sv_report_file
567
+ claude --print "Analyze campaign artifacts in $LOGS_DIR and generate a self-verification report with sections: 1. Automated Validation Summary, 2. Failure Deep Dive, 3. Worker Process Quality, 4. Verifier Judgment Quality, 5. AC Lifecycle, 6. Test-Spec Adherence, 7. Patterns: Strengths & Weaknesses, 8. Recommendations for Next Cycle, 9. Cost & Performance, 10. Blind Spots." \
568
+ > "$sv_report_file" 2>/dev/null &
569
+ local _sv_pid=$!
570
+
571
+ # AC5: watchdog — signals timeout file THEN kills _sv_pid after _sv_timeout_secs
572
+ local _sv_watchdog
573
+ (
574
+ sleep "$_sv_timeout_secs"
575
+ if kill -0 "$_sv_pid" 2>/dev/null; then
576
+ touch "$_sv_timeout_file"
577
+ kill "$_sv_pid" 2>/dev/null
578
+ fi
579
+ ) &
580
+ _sv_watchdog=$!
581
+
582
+ wait "$_sv_pid"
583
+ local _sv_exit=$?
584
+ kill "$_sv_watchdog" 2>/dev/null
585
+ wait "$_sv_watchdog" 2>/dev/null
586
+
587
+ # AC5: detect timeout — exit code 124 or watchdog file present
588
+ if [[ "$_sv_exit" == 124 ]] || [[ -f "$_sv_timeout_file" ]]; then
589
+ _sv_timeout_flag=1
590
+ rm -f "$_sv_timeout_file"
591
+ local _timeout_msg="SV report generation TIMEOUT: exceeded ${_sv_timeout_secs}s"
592
+ echo "$_timeout_msg" >> "$sv_report_file"
593
+ echo "$_timeout_msg" >> "$LOGS_DIR/campaign-report.md"
594
+ log "$_timeout_msg"
595
+ return 0
596
+ fi
597
+
598
+ # On success: append reference to campaign-report (full path, cross-directory)
599
+ echo "See: $sv_report_file" >> "$LOGS_DIR/campaign-report.md"
600
+ log "SV report written: $sv_report_file"
601
+ return 0
602
+ }
603
+
604
+ # =============================================================================
605
+ # Sentinel Writers
606
+ # =============================================================================
607
+
608
+ # --- governance.md s7: Only the Leader writes sentinels ---
609
+ write_complete_sentinel() {
610
+ local summary="$1"
611
+ echo "# Campaign Complete
612
+
613
+ Completed at iteration $ITERATION.
614
+ $summary
615
+
616
+ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
617
+ log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
618
+ }
619
+
620
+ write_blocked_sentinel() {
621
+ local reason="$1"
622
+ echo "# Campaign Blocked
623
+
624
+ Blocked at iteration $ITERATION.
625
+ Reason: $reason
626
+
627
+ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
628
+ log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
629
+ }
630
+
631
+ # =============================================================================
632
+ # PRD Tracking
633
+ # =============================================================================
634
+
635
+ # --- US-004: Live PRD update helpers ---
636
+ compute_prd_hash() {
637
+ local prd_file="${PRD_FILE:-}"
638
+ if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
639
+ prd_file="$DESK/plans/prd-$SLUG.md"
640
+ fi
641
+ if [[ -f "$prd_file" ]]; then
642
+ md5 -q "$prd_file" 2>/dev/null || md5sum "$prd_file" 2>/dev/null | cut -d' ' -f1
643
+ else
644
+ echo ""
645
+ fi
646
+ }
647
+
648
+ count_prd_us() {
649
+ local prd_file="${PRD_FILE:-}"
650
+ if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
651
+ prd_file="$DESK/plans/prd-$SLUG.md"
652
+ fi
653
+ if [[ -f "$prd_file" ]]; then
654
+ grep -oE '^### US-[0-9]+' "$prd_file" 2>/dev/null | sed 's/^### //' | sort -u | tr '\n' ',' | sed 's/,$//'
655
+ else
656
+ echo ""
657
+ fi
658
+ }
659
+
660
+ split_prd_by_us() {
661
+ local prd_file="$1"
662
+ local slug="$2"
663
+ local plans_dir
664
+ plans_dir="$(dirname "$prd_file")"
665
+
666
+ [[ -f "$prd_file" ]] || return 0
667
+
668
+ local us_count
669
+ us_count=$(grep -oE '^### US-' "$prd_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
670
+ if [[ "$us_count" -eq 0 ]]; then
671
+ return 0
672
+ fi
673
+
674
+ awk -v dir="$plans_dir" -v slug="$slug" '
675
+ /^### US-[0-9]+:/ {
676
+ if (out != "") close(out)
677
+ match($0, /US-[0-9]+/)
678
+ us_id = substr($0, RSTART, RLENGTH)
679
+ out = dir "/prd-" slug "-" us_id ".md"
680
+ }
681
+ out != "" { print > out }
682
+ ' "$prd_file"
683
+ }
684
+
685
+ split_test_spec_by_us() {
686
+ local ts_file="$1"
687
+ local slug="$2"
688
+ local plans_dir
689
+ plans_dir="$(dirname "$ts_file")"
690
+
691
+ [[ -f "$ts_file" ]] || return 0
692
+
693
+ local us_count
694
+ us_count=$(grep -oE '^## US-' "$ts_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
695
+ if [[ "$us_count" -eq 0 ]]; then
696
+ return 0
697
+ fi
698
+
699
+ local header_tmp="${plans_dir}/test-spec-${slug}-header.tmp.$$"
700
+ awk '/^## US-[0-9]+:/{exit} {print}' "$ts_file" > "$header_tmp"
701
+
702
+ awk -v dir="$plans_dir" -v slug="$slug" '
703
+ /^## US-[0-9]+:/ {
704
+ if (out != "") close(out)
705
+ match($0, /US-[0-9]+/)
706
+ us_id = substr($0, RSTART, RLENGTH)
707
+ out = dir "/test-spec-" slug "-" us_id ".md"
708
+ }
709
+ out != "" { print > out }
710
+ ' "$ts_file"
711
+
712
+ for split_file in "$plans_dir"/test-spec-"$slug"-US-*.md; do
713
+ [[ -f "$split_file" ]] || continue
714
+ local tmp="${split_file}.tmp.$$"
715
+ cat "$header_tmp" "$split_file" > "$tmp" && mv "$tmp" "$split_file"
716
+ done
717
+ rm -f "$header_tmp"
718
+ }
719
+
720
+ check_prd_update() {
721
+ local current_hash current_us_list us_count_prev us_count_now new_us
722
+ current_hash=$(compute_prd_hash)
723
+ current_us_list=$(count_prd_us)
724
+ us_count_prev=$(echo "$PREV_PRD_US_LIST" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
725
+ us_count_now=$(echo "$current_us_list" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
726
+
727
+ _PRD_CHANGED=0
728
+
729
+ if [[ "$current_hash" != "$PREV_PRD_HASH" ]]; then
730
+ _PRD_CHANGED=1
731
+ new_us=$(printf '%s\n' "$current_us_list" | tr ',' '\n' | awk -v prev="$PREV_PRD_US_LIST" '
732
+ BEGIN {
733
+ split(prev, p, ",")
734
+ for (i in p) {
735
+ seen[p[i]] = 1
736
+ }
737
+ }
738
+ {
739
+ if ($0 != "" && !seen[$0]) {
740
+ if (out == "") out = $0
741
+ else out = out "," $0
742
+ }
743
+ }
744
+ END { print out }
745
+ ')
746
+ log_debug "prd_changed=true prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now} new_us=${new_us:-none}"
747
+ split_prd_by_us "$PRD_FILE" "$SLUG"
748
+ split_test_spec_by_us "$TEST_SPEC_FILE" "$SLUG"
749
+ US_LIST="$current_us_list"
750
+ else
751
+ log_debug "prd_changed=false prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now}"
752
+ fi
753
+
754
+ PREV_PRD_HASH="$current_hash"
755
+ PREV_PRD_US_LIST="$current_us_list"
756
+ }
757
+
758
+ # =============================================================================
759
+ # Circuit Breakers: Stale Context Detection
760
+ # =============================================================================
761
+
762
+ # --- governance.md s7 step 8: Stale context detection ---
763
+ compute_context_hash() {
764
+ # Hash context-latest.md + memory.md + verified_us from status.json
765
+ # This prevents false stale detection when Worker updates memory but not context,
766
+ # or when verified_us changes between iterations
767
+ local hash_input=""
768
+ if [[ -f "$CONTEXT_FILE" ]]; then
769
+ hash_input+=$(md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1)
770
+ fi
771
+ local memory_file="$DESK/memos/${SLUG}-memory.md"
772
+ if [[ -f "$memory_file" ]]; then
773
+ hash_input+=$(md5 -q "$memory_file" 2>/dev/null || md5sum "$memory_file" 2>/dev/null | cut -d' ' -f1)
774
+ fi
775
+ if [[ -f "$STATUS_FILE" ]]; then
776
+ hash_input+=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
777
+ fi
778
+ echo -n "$hash_input" | md5 -q 2>/dev/null || echo -n "$hash_input" | md5sum 2>/dev/null | cut -d' ' -f1
779
+ }
780
+
781
+ check_stale_context() {
782
+ local current_hash
783
+ current_hash=$(compute_context_hash)
784
+
785
+ if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
786
+ (( STALE_CONTEXT_COUNT++ ))
787
+ log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
788
+ if (( STALE_CONTEXT_COUNT >= 3 )); then
789
+ log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
790
+ return 1
791
+ fi
792
+ else
793
+ STALE_CONTEXT_COUNT=0
794
+ fi
795
+
796
+ PREV_CONTEXT_HASH="$current_hash"
797
+ return 0
798
+ }
799
+
800
+ # =============================================================================
801
+ # Error Detection
802
+ # =============================================================================
803
+
804
+ # --- US-003: API error detector using tmux pane buffer ---
805
+ is_api_error() {
806
+ local pane_id="$1"
807
+ local pane_output
808
+ pane_output=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
809
+ if [[ -z "$pane_output" ]]; then
810
+ return 1
811
+ fi
812
+
813
+ if echo "$pane_output" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
814
+ || echo "$pane_output" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
815
+ || echo "$pane_output" | grep -qi 'overloaded' \
816
+ || echo "$pane_output" | grep -qi 'too many requests' \
817
+ || echo "$pane_output" | grep -qi 'service unavailable'; then
818
+ return 0
819
+ fi
820
+ return 1
821
+ }
822
+
823
+ # =============================================================================
824
+ # Security Warning
825
+ # =============================================================================
826
+
827
+ print_security_warning() {
828
+ echo ""
829
+ echo "================================================================"
830
+ echo " WARNING: Running with --dangerously-skip-permissions"
831
+ echo ""
832
+ echo " The claude CLI will execute tools (file writes, shell commands)"
833
+ echo " without asking for confirmation. Only run this on code you"
834
+ echo " trust in an environment you control."
835
+ echo "================================================================"
836
+ echo ""
837
+ }