@ai-dev-methodologies/rlp-desk 0.7.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,890 +0,0 @@
1
- # lib_ralph_desk.zsh — Shared business logic for RLP Desk runner
2
- # SOURCED by run_ralph_desk.zsh. Do NOT execute directly.
3
- #
4
- # IMPORTANT: Must be sourced at file scope, not inside a function.
5
- # typeset -A creates local arrays inside functions, breaking global state.
6
- # Functions in this file read/write globals defined by the sourcing script.
7
-
8
- if [[ -n "${funcstack[2]:-}" ]]; then
9
- echo "FATAL: lib_ralph_desk.zsh must be sourced at file scope" >&2
10
- exit 1
11
- fi
12
-
13
- # =============================================================================
14
- # Utility Functions
15
- # =============================================================================
16
-
17
- log() {
18
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
19
- }
20
-
21
- log_debug() {
22
- if (( DEBUG )); then
23
- mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
24
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
25
- fi
26
- }
27
-
28
- log_error() {
29
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
30
- }
31
-
32
- # build_claude_cmd() — centralized claude CLI command builder
33
- # Single source of truth for all claude invocation flags (--no-mcp, DISABLE_OMC, etc.)
34
- # Inspired by codex-plugin-cc companion pattern: CLI abstraction in one place.
35
- # Args: $1=mode (tui|print) $2=model $3=prompt_file (print mode only) $4=output_log (print mode only)
36
- # Output: complete command string on stdout
37
- # Globals read: CLAUDE_BIN
38
- build_claude_cmd() {
39
- local mode="$1"
40
- local model="$2"
41
- local prompt_file="${3:-}"
42
- local output_log="${4:-}"
43
-
44
- local base="DISABLE_OMC=1 $CLAUDE_BIN --model $model --mcp-config '{\"mcpServers\":{}}' --strict-mcp-config --dangerously-skip-permissions"
45
- case "$mode" in
46
- tui)
47
- echo "$base"
48
- ;;
49
- print)
50
- echo "$base -p \"\$(cat $prompt_file)\" --output-format text 2>&1 | tee $output_log"
51
- ;;
52
- *)
53
- echo "ERROR: build_claude_cmd unknown mode '$mode'" >&2
54
- return 1
55
- ;;
56
- esac
57
- }
58
-
59
- # parse_model_flag() — parse unified --worker-model / --verifier-model value
60
- # Colon format (model:reasoning) → codex engine; plain name → claude engine.
61
- # Spark alias: bare "spark" is expanded to full model ID "gpt-5.3-codex-spark".
62
- # Usage: parse_model_flag <value> <role>
63
- # Output (stdout): "engine model [reasoning]" e.g. "codex gpt-5.4 medium" | "claude sonnet"
64
- # Returns: 0 on success, 1 on invalid format (error written to stderr)
65
- parse_model_flag() {
66
- local value="$1"
67
- local role="${2:-worker}"
68
- local colon_count
69
- colon_count=$(printf '%s' "$value" | tr -cd ':' | wc -c | tr -d ' ')
70
- if (( colon_count > 1 )); then
71
- echo "ERROR: Invalid --${role}-model format '${value}'. Use 'model:reasoning' (codex) or 'model-name' (claude)." >&2
72
- return 1
73
- fi
74
- if (( colon_count == 1 )); then
75
- local model="${value%%:*}"
76
- local reasoning="${value##*:}"
77
- if [[ "$model" == "spark" ]]; then
78
- model="gpt-5.3-codex-spark"
79
- fi
80
- echo "codex $model $reasoning"
81
- else
82
- echo "claude $value"
83
- fi
84
- }
85
-
86
- # get_model_string() — return engine-appropriate model identifier string
87
- # Claude: returns model name (e.g., "sonnet")
88
- # Codex: returns model:reasoning (e.g., "gpt-5.4:high")
89
- # Args: $1=engine (claude|codex) $2=model $3=codex_reasoning (optional)
90
- # Output: model string on stdout
91
- get_model_string() {
92
- local engine="$1"
93
- local model="$2"
94
- local reasoning="${3:-}"
95
-
96
- if [[ "$engine" = "codex" && -n "$reasoning" ]]; then
97
- echo "${model}:${reasoning}"
98
- else
99
- echo "$model"
100
- fi
101
- }
102
-
103
- # get_next_model() — return next model in Worker upgrade path, or empty at ceiling
104
- # Usage: get_next_model <model_str>
105
- # claude: "haiku"|"sonnet"|"opus"
106
- # codex: "gpt-5.4:medium"|"gpt-5.4:high"|"gpt-5.4:xhigh"|"gpt-5.3-codex-spark:medium"|...
107
- # Output: next model string, or empty string if at ceiling
108
- get_next_model() {
109
- local current="$1"
110
- case "$current" in
111
- # Claude upgrade path (Worker only — Verifier fixed)
112
- haiku) echo "sonnet" ;;
113
- sonnet) echo "opus" ;;
114
- opus) echo "" ;;
115
- # Codex GPT Pro (spark) upgrade path
116
- gpt-5.3-codex-spark:low) echo "gpt-5.3-codex-spark:medium" ;;
117
- gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
118
- gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
119
- gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling
120
- # Codex Non-Pro upgrade path
121
- gpt-5.4:low) echo "gpt-5.4:medium" ;;
122
- gpt-5.4:medium) echo "gpt-5.4:high" ;;
123
- gpt-5.4:high) echo "gpt-5.4:xhigh" ;;
124
- gpt-5.4:xhigh) echo "" ;;
125
- *) echo "" ;; # unknown → treat as ceiling
126
- esac
127
- }
128
-
129
- # check_model_upgrade() — evaluate and apply Worker model upgrade on repeated same-US failure
130
- # Called in the fail verdict path. Upgrades Worker model when same US fails >= 2 consecutive times.
131
- # Respects LOCK_WORKER_MODEL flag. Never modifies VERIFIER_MODEL.
132
- # Usage: check_model_upgrade <us_id>
133
- check_model_upgrade() {
134
- local current_us="$1"
135
-
136
- # Track consecutive failures on same US
137
- if [[ "$current_us" = "$_LAST_FAILED_US" ]]; then
138
- (( _SAME_US_FAIL_COUNT++ ))
139
- else
140
- _SAME_US_FAIL_COUNT=1
141
- _LAST_FAILED_US="$current_us"
142
- fi
143
-
144
- # Respect --lock-worker-model: no upgrade; CB threshold handles BLOCKED
145
- if (( LOCK_WORKER_MODEL )); then
146
- log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=locked"
147
- return 0
148
- fi
149
-
150
- # Upgrade when same US fails >= 2 consecutive times
151
- if (( _SAME_US_FAIL_COUNT >= 2 )); then
152
- local current_model_str
153
- current_model_str=$(get_model_string "$WORKER_ENGINE" "${WORKER_CODEX_MODEL:-$WORKER_MODEL}" "${WORKER_CODEX_REASONING:-}")
154
-
155
- local next_model
156
- next_model=$(get_next_model "$current_model_str")
157
-
158
- if [[ -z "$next_model" ]]; then
159
- # Already at ceiling — CB threshold will trigger BLOCKED with escalation message
160
- log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=already_max current=$current_model_str"
161
- return 0
162
- fi
163
-
164
- # Save original model on first upgrade only
165
- if (( _MODEL_UPGRADED == 0 )); then
166
- _ORIGINAL_WORKER_MODEL="$WORKER_MODEL"
167
- _ORIGINAL_WORKER_CODEX_REASONING="$WORKER_CODEX_REASONING"
168
- fi
169
- _MODEL_UPGRADED=1
170
-
171
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
172
- WORKER_CODEX_MODEL="${next_model%%:*}"
173
- WORKER_CODEX_REASONING="${next_model##*:}"
174
- WORKER_MODEL="$WORKER_CODEX_MODEL"
175
- else
176
- WORKER_MODEL="$next_model"
177
- fi
178
-
179
- log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
180
- log " [WARN] Same AC failing repeatedly — consider IL-2 re-assessment of AC quality (spec quality check)"
181
- log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
182
- _SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
183
- fi
184
-
185
- return 0
186
- }
187
-
188
- # record_us_failure() — track per-US cumulative failure count (dual counter, Option D)
189
- # Unlike CONSECUTIVE_FAILURES which resets on pass, US_FAIL_HISTORY persists across phases.
190
- # This enables prior-failure warnings when a US that struggled in per-US mode fails again in final verify.
191
- # Usage: record_us_failure <us_id>
192
- record_us_failure() {
193
- local us_id="$1"
194
- [[ -z "$us_id" || "$us_id" = "unknown" ]] && return 0
195
-
196
- local prev_count="${US_FAIL_HISTORY[$us_id]:-0}"
197
- US_FAIL_HISTORY[$us_id]=$(( prev_count + 1 ))
198
-
199
- # Prior-failure warning: if this US has failed before, it's showing fragility
200
- if (( prev_count > 0 )); then
201
- log " [WARN] US $us_id has prior failure history (${US_FAIL_HISTORY[$us_id]} total failures) — consider IL-2 AC quality re-assessment"
202
- log_debug "[GOV] iter=${ITERATION:-0} us_prior_failures=$us_id count=${US_FAIL_HISTORY[$us_id]}"
203
- fi
204
-
205
- return 0
206
- }
207
-
208
- # --- governance.md s7: Atomic file writes (tmux pattern) ---
209
- # All file writes by the Leader use tmp+mv to prevent corruption.
210
- atomic_write() {
211
- local target="$1"
212
- local tmp="${target}.tmp.$$"
213
- cat > "$tmp"
214
- mv "$tmp" "$target"
215
- }
216
-
217
- # =============================================================================
218
- # Scaffold Validation
219
- # =============================================================================
220
-
221
- validate_scaffold() {
222
- local errors=0
223
-
224
- if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
225
- log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
226
- errors=1
227
- fi
228
-
229
- if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
230
- log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
231
- errors=1
232
- fi
233
-
234
- if [[ ! -f "$CONTEXT_FILE" ]]; then
235
- log_error "Context file not found: $CONTEXT_FILE"
236
- errors=1
237
- fi
238
-
239
- if [[ ! -f "$MEMORY_FILE" ]]; then
240
- log_error "Memory file not found: $MEMORY_FILE"
241
- errors=1
242
- fi
243
-
244
- if (( errors )); then
245
- log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
246
- exit 1
247
- fi
248
-
249
- mkdir -p "$LOGS_DIR"
250
- }
251
-
252
- # =============================================================================
253
- # Status Updates
254
- # =============================================================================
255
-
256
- # --- governance.md s7 step 8: Update status.json ---
257
- update_status() {
258
- local phase="$1"
259
- local last_result="$2"
260
-
261
- # Build verified_us as JSON array
262
- local verified_us_json="[]"
263
- if [[ -n "$VERIFIED_US" ]]; then
264
- verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
265
- fi
266
-
267
- # Build consensus fields
268
- local consensus_json=""
269
- if [[ "$CONSENSUS_MODE" != "off" ]]; then
270
- consensus_json=',
271
- "consensus_scope": "'"$CONSENSUS_SCOPE"'",
272
- "consensus_round": '"$CONSENSUS_ROUND"',
273
- "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
274
- "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
275
- fi
276
-
277
- echo '{
278
- "slug": "'"$SLUG"'",
279
- "baseline_commit": "'"${BASELINE_COMMIT:-none}"'",
280
- "iteration": '"$ITERATION"',
281
- "max_iter": '"$MAX_ITER"',
282
- "phase": "'"$phase"'",
283
- "worker_model": "'"$WORKER_MODEL"'",
284
- "verifier_model": "'"$VERIFIER_MODEL"'",
285
- "worker_engine": "'"$WORKER_ENGINE"'",
286
- "verifier_engine": "'"$VERIFIER_ENGINE"'",
287
- "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
288
- "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
289
- "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
290
- "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
291
- "verify_mode": "'"$VERIFY_MODE"'",
292
- "consensus_mode": "'"$CONSENSUS_MODE"'",
293
- "last_result": "'"$last_result"'",
294
- "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
295
- "verified_us": '"$verified_us_json"''"$consensus_json"',
296
- "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
297
- }' | atomic_write "$STATUS_FILE"
298
- }
299
-
300
- # --- governance.md s7 step 8: Write result log ---
301
- write_result_log() {
302
- local iter="$1"
303
- local result="$2"
304
- local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
305
-
306
- local git_diff=""
307
- if git -C "$ROOT" rev-parse HEAD &>/dev/null; then
308
- git_diff=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(no git diff available)")
309
- else
310
- git_diff="(no commits in repo — cannot diff)"
311
- fi
312
- # Include untracked new files in result log
313
- local result_untracked
314
- result_untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
315
- if [[ -n "$result_untracked" ]]; then
316
- git_diff="${git_diff}
317
-
318
- Untracked new files:
319
- ${result_untracked}"
320
- fi
321
-
322
- {
323
- echo "# Iteration $iter Result"
324
- echo ""
325
- echo "## Status"
326
- echo "$result [leader-measured]"
327
- echo ""
328
- echo "## Files Changed"
329
- echo '```'
330
- echo "$git_diff"
331
- echo '```'
332
- echo "[git-measured]"
333
- echo ""
334
- echo "## Timestamp"
335
- echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
336
- } | atomic_write "$result_file"
337
- }
338
-
339
- # --- step 7d: Archive iteration artifacts (done-claim + verdict) to logs/ ---
340
- archive_iter_artifacts() {
341
- local iter="$1"
342
- local iter_padded
343
- iter_padded=$(printf '%03d' "$iter")
344
- if [[ -f "$DONE_CLAIM_FILE" ]]; then
345
- cp "$DONE_CLAIM_FILE" "$LOGS_DIR/iter-${iter_padded}-done-claim.json" 2>/dev/null
346
- fi
347
- if [[ -f "$VERDICT_FILE" ]]; then
348
- cp "$VERDICT_FILE" "$LOGS_DIR/iter-${iter_padded}-verify-verdict.json" 2>/dev/null
349
- fi
350
- }
351
-
352
- # --- AC5: Write per-iteration cost estimate to cost-log.jsonl ---
353
- write_cost_log() {
354
- local iter="$1"
355
- local iter_padded
356
- iter_padded=$(printf '%03d' "$iter")
357
-
358
- local prompt_bytes=0 claim_bytes=0 verdict_bytes=0
359
- local worker_prompt_file="$LOGS_DIR/iter-${iter_padded}.worker-prompt.md"
360
- [[ -f "$worker_prompt_file" ]] && prompt_bytes=$(wc -c < "$worker_prompt_file" 2>/dev/null || echo 0)
361
- [[ -f "$DONE_CLAIM_FILE" ]] && claim_bytes=$(wc -c < "$DONE_CLAIM_FILE" 2>/dev/null || echo 0)
362
- [[ -f "$VERDICT_FILE" ]] && verdict_bytes=$(wc -c < "$VERDICT_FILE" 2>/dev/null || echo 0)
363
-
364
- local estimated_tokens=$(( (prompt_bytes + claim_bytes + verdict_bytes) / 4 ))
365
-
366
- # AC1: per-phase timing fields
367
- local worker_start_time="" worker_end_time="" worker_duration_s=0
368
- local verifier_start_time="" verifier_end_time="" verifier_duration_s=0
369
- if [[ -n "${ITER_WORKER_START:-}" ]]; then
370
- worker_start_time=$(date -u -r "$ITER_WORKER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
371
- worker_end_time=$(date -u -r "${ITER_WORKER_END:-$ITER_WORKER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
372
- worker_duration_s=$(( ${ITER_WORKER_END:-$ITER_WORKER_START} - ITER_WORKER_START ))
373
- fi
374
- if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
375
- verifier_start_time=$(date -u -r "$ITER_VERIFIER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
376
- verifier_end_time=$(date -u -r "${ITER_VERIFIER_END:-$ITER_VERIFIER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
377
- verifier_duration_s=$(( ${ITER_VERIFIER_END:-$ITER_VERIFIER_START} - ITER_VERIFIER_START ))
378
- fi
379
-
380
- # AC2: consensus mode per-engine timing
381
- local consensus_fields=""
382
- if [[ -n "${ITER_VERIFIER_CLAUDE_DURATION_S:-}" ]]; then
383
- consensus_fields="${consensus_fields}"',"verifier_claude_duration_s":'"${ITER_VERIFIER_CLAUDE_DURATION_S}"
384
- fi
385
- if [[ -n "${ITER_VERIFIER_CODEX_DURATION_S:-}" ]]; then
386
- consensus_fields="${consensus_fields}"',"verifier_codex_duration_s":'"${ITER_VERIFIER_CODEX_DURATION_S}"
387
- fi
388
-
389
- echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"worker_start_time":"'"$worker_start_time"'","worker_end_time":"'"$worker_end_time"'","worker_duration_s":'"$worker_duration_s"',"verifier_start_time":"'"$verifier_start_time"'","verifier_end_time":"'"$verifier_end_time"'","verifier_duration_s":'"$verifier_duration_s"''"$consensus_fields"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
390
- }
391
-
392
- # --- Analytics: write per-iteration structured data to campaign.jsonl (always-on) ---
393
- write_campaign_jsonl() {
394
- local iter="$1"
395
- local us_id="${2:-unknown}"
396
- local verdict="${3:-unknown}"
397
-
398
- local worker_duration_s=0
399
- local verifier_duration_s=0
400
- if [[ -n "${ITER_WORKER_START:-}" ]]; then
401
- worker_duration_s=$(( ${ITER_WORKER_END:-$(date +%s)} - ITER_WORKER_START ))
402
- fi
403
- if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
404
- verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
405
- fi
406
-
407
- # Build us_fail_history JSON object from associative array
408
- local us_fail_history_json="{}"
409
- if (( ${#US_FAIL_HISTORY[@]} > 0 )); then
410
- us_fail_history_json="{"
411
- local first=1
412
- for key in "${(@k)US_FAIL_HISTORY}"; do
413
- (( first )) || us_fail_history_json+=","
414
- us_fail_history_json+="\"$key\":${US_FAIL_HISTORY[$key]}"
415
- first=0
416
- done
417
- us_fail_history_json+="}"
418
- fi
419
-
420
- jq -nc \
421
- --argjson iter "$iter" \
422
- --arg us_id "$us_id" \
423
- --arg worker_model "$WORKER_MODEL" \
424
- --arg worker_engine "$WORKER_ENGINE" \
425
- --arg verifier_engine "$VERIFIER_ENGINE" \
426
- --arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
427
- --arg codex_verdict "${CODEX_VERDICT:-N/A}" \
428
- --arg consensus_mode "$CONSENSUS_MODE" \
429
- --argjson consecutive_failures "$CONSECUTIVE_FAILURES" \
430
- --argjson model_upgraded "${_MODEL_UPGRADED:-0}" \
431
- --argjson us_fail_history "$us_fail_history_json" \
432
- --argjson duration_worker_s "$worker_duration_s" \
433
- --argjson duration_verifier_s "$verifier_duration_s" \
434
- --arg project_root "$ROOT" \
435
- --arg slug "$SLUG" \
436
- --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
437
- '{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus_mode: $consensus_mode, consecutive_failures: $consecutive_failures, model_upgraded: $model_upgraded, us_fail_history: $us_fail_history, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
438
- >> "$CAMPAIGN_JSONL"
439
- }
440
-
441
- # --- AC4: Generate campaign-report.md on all terminal states ---
442
- generate_campaign_report() {
443
- # Guard: idempotent — only generate once per campaign run
444
- if (( CAMPAIGN_REPORT_GENERATED )); then return 0; fi
445
- CAMPAIGN_REPORT_GENERATED=1
446
-
447
- local final_status="UNKNOWN"
448
- if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
449
- elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
450
- else final_status="TIMEOUT"; fi
451
-
452
- local report_file="$LOGS_DIR/campaign-report.md"
453
-
454
- # AC9: Version existing report before writing new one
455
- if [[ -f "$report_file" ]]; then
456
- local v=1
457
- while [[ -f "${report_file%.md}-v${v}.md" ]]; do (( v++ )); done
458
- mv "$report_file" "${report_file%.md}-v${v}.md"
459
- fi
460
-
461
- local end_time
462
- end_time=$(date +%s)
463
- local elapsed=$(( end_time - START_TIME ))
464
-
465
- local baseline_commit_val="${BASELINE_COMMIT:-none}"
466
- local files_changed=""
467
- if [[ "$baseline_commit_val" != "none" ]]; then
468
- files_changed=$(git -C "$ROOT" diff --stat "${baseline_commit_val}" 2>/dev/null || echo "(git diff unavailable)")
469
- elif git -C "$ROOT" rev-parse HEAD &>/dev/null; then
470
- files_changed=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(git diff unavailable)")
471
- else
472
- files_changed="(no commits in repo — cannot diff)"
473
- fi
474
- # Include untracked new files
475
- local untracked
476
- untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
477
- if [[ -n "$untracked" ]]; then
478
- files_changed="${files_changed}
479
-
480
- Untracked new files:
481
- ${untracked}"
482
- fi
483
-
484
- local sv_summary=""
485
- if (( WITH_SELF_VERIFICATION )); then
486
- local sv_report
487
- sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
488
- if [[ -n "$sv_report" ]]; then
489
- sv_summary="See: $sv_report"
490
- else
491
- sv_summary="SV report generation pending — will be appended after this report."
492
- fi
493
- else
494
- sv_summary="N/A — --with-self-verification not enabled"
495
- fi
496
-
497
- {
498
- echo "# Campaign Report: $SLUG"
499
- echo ""
500
- echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) | Status: $final_status | Iterations: $ITERATION"
501
- echo ""
502
- echo "## Objective"
503
- local prd_file="$DESK/plans/prd-$SLUG.md"
504
- if [[ -f "$prd_file" ]]; then
505
- grep '^## Objective' -A3 "$prd_file" 2>/dev/null | tail -n +2 | head -3
506
- else
507
- echo "(PRD not found)"
508
- fi
509
- echo ""
510
- echo "## Execution Summary"
511
- echo "- Terminal state: $final_status"
512
- echo "- Iterations run: $ITERATION / $MAX_ITER"
513
- echo "- Elapsed: ${elapsed}s"
514
- echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
515
- echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
516
- echo "- Consensus: mode=$CONSENSUS_MODE model=$CONSENSUS_MODEL final_model=$FINAL_CONSENSUS_MODEL"
517
- echo ""
518
- echo "## US Status"
519
- echo "- Verified: ${VERIFIED_US:-none}"
520
- echo "- Consecutive failures at end: $CONSECUTIVE_FAILURES"
521
- echo ""
522
- echo "## Verification Results"
523
- local ri=1
524
- while (( ri <= ITERATION )); do
525
- local iter_dc="$LOGS_DIR/iter-$(printf '%03d' $ri)-done-claim.json"
526
- if [[ -f "$iter_dc" ]]; then
527
- local us_id
528
- us_id=$(jq -r '.us_id // "unknown"' "$iter_dc" 2>/dev/null)
529
- echo "- $(basename "$iter_dc"): us_id=$us_id"
530
- fi
531
- (( ri++ ))
532
- done
533
- echo ""
534
- echo "## Issues Encountered"
535
- local fi_found=0
536
- local fi_i=1
537
- while (( fi_i <= ITERATION )); do
538
- local fix_f="$LOGS_DIR/iter-$(printf '%03d' $fi_i).fix-contract.md"
539
- if [[ -f "$fix_f" ]]; then
540
- echo "- $(basename "$fix_f")"
541
- fi_found=1
542
- fi
543
- (( fi_i++ ))
544
- done
545
- (( fi_found == 0 )) && echo "- None"
546
- echo ""
547
- echo "## Cost & Performance"
548
- if [[ -f "$COST_LOG" ]]; then
549
- local total_tokens=0
550
- while IFS= read -r line; do
551
- local t
552
- t=$(echo "$line" | jq -r '.estimated_tokens // 0' 2>/dev/null || echo 0)
553
- total_tokens=$(( total_tokens + t ))
554
- done < "$COST_LOG"
555
- echo "- Total estimated tokens: $total_tokens (source: estimated, tmux mode)"
556
- echo "- See: cost-log.jsonl for per-iteration breakdown"
557
- else
558
- echo "- No cost data available"
559
- fi
560
- echo ""
561
- echo "## SV Summary"
562
- echo "$sv_summary"
563
- echo ""
564
- echo "## Files Changed"
565
- echo '```'
566
- echo "$files_changed"
567
- echo '```'
568
- echo "Note: Files Changed may include pre-existing uncommitted changes if the campaign started in a dirty worktree."
569
- echo ""
570
- echo "## Suggested Next Actions"
571
- if [[ "$final_status" == "COMPLETE" ]]; then
572
- echo "- Review verified US list and plan next feature campaign or next cycle"
573
- echo "- Consider re-run with --mode improve for quality refinement"
574
- echo "- Archive campaign artifacts and update project documentation"
575
- elif [[ "$final_status" == "BLOCKED" ]]; then
576
- echo "- Review PRD acceptance criteria for the failing US"
577
- echo "- Check circuit breaker history (consecutive failures: $CONSECUTIVE_FAILURES)"
578
- echo "- Consider relaxing verifier criteria if false-negative pattern detected"
579
- elif [[ "$final_status" == "TIMEOUT" ]]; then
580
- echo "- Increase --max-iter to allow more iterations for completion"
581
- echo "- Reduce scope by splitting remaining US into a follow-up campaign"
582
- echo "- Review last iteration done-claim for partial progress"
583
- fi
584
- } | atomic_write "$report_file"
585
-
586
- log "Campaign report written: $report_file"
587
- }
588
-
589
- generate_sv_report() {
590
- # AC1-boundary: SV_REPORT_GENERATED guard (init + check + set = 3 occurrences)
591
- if (( SV_REPORT_GENERATED )); then return 0; fi
592
-
593
- # AC3-negative: early return if ! WITH_SELF_VERIFICATION flag not set
594
- if (( ! WITH_SELF_VERIFICATION )); then return 0; fi
595
-
596
- SV_REPORT_GENERATED=1
597
-
598
- # AC4: check claude CLI availability — graceful degradation, not exit 1
599
- if ! command -v claude &>/dev/null; then
600
- echo "SV report generation failed: claude CLI not found" >> "$LOGS_DIR/campaign-report.md"
601
- return 0
602
- fi
603
-
604
- # AC2: versioning — find next available sv_version slot (in logs dir)
605
- local sv_version=1
606
- while [[ -f "$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md" ]]; do
607
- (( sv_version++ ))
608
- done
609
- local sv_report_file="$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md"
610
-
611
- log "Generating SV report: $(basename "$sv_report_file")"
612
-
613
- # AC5: configurable timeout with in-process watchdog
614
- local _sv_timeout_secs="${_SV_TIMEOUT_SECS:-300}"
615
- local _sv_timeout_flag=0
616
- local _sv_timeout_file="$LOGS_DIR/.sv_timeout_${$}.tmp"
617
- rm -f "$_sv_timeout_file"
618
-
619
- # Spawn claude CLI in background — write to sv_report_file
620
- claude --print "Analyze campaign artifacts in $LOGS_DIR and generate a self-verification report with sections: 1. Automated Validation Summary, 2. Failure Deep Dive, 3. Worker Process Quality, 4. Verifier Judgment Quality, 5. AC Lifecycle, 6. Test-Spec Adherence, 7. Patterns: Strengths & Weaknesses, 8. Recommendations for Next Cycle, 9. Cost & Performance, 10. Blind Spots." \
621
- > "$sv_report_file" 2>/dev/null &
622
- local _sv_pid=$!
623
-
624
- # AC5: watchdog — signals timeout file THEN kills _sv_pid after _sv_timeout_secs
625
- local _sv_watchdog
626
- (
627
- sleep "$_sv_timeout_secs"
628
- if kill -0 "$_sv_pid" 2>/dev/null; then
629
- touch "$_sv_timeout_file"
630
- kill "$_sv_pid" 2>/dev/null
631
- fi
632
- ) &
633
- _sv_watchdog=$!
634
-
635
- wait "$_sv_pid"
636
- local _sv_exit=$?
637
- kill "$_sv_watchdog" 2>/dev/null
638
- wait "$_sv_watchdog" 2>/dev/null
639
-
640
- # AC5: detect timeout — exit code 124 or watchdog file present
641
- if [[ "$_sv_exit" == 124 ]] || [[ -f "$_sv_timeout_file" ]]; then
642
- _sv_timeout_flag=1
643
- rm -f "$_sv_timeout_file"
644
- local _timeout_msg="SV report generation TIMEOUT: exceeded ${_sv_timeout_secs}s"
645
- echo "$_timeout_msg" >> "$sv_report_file"
646
- echo "$_timeout_msg" >> "$LOGS_DIR/campaign-report.md"
647
- log "$_timeout_msg"
648
- return 0
649
- fi
650
-
651
- # On success: append reference to campaign-report (full path, cross-directory)
652
- echo "See: $sv_report_file" >> "$LOGS_DIR/campaign-report.md"
653
- log "SV report written: $sv_report_file"
654
- return 0
655
- }
656
-
657
- # =============================================================================
658
- # Sentinel Writers
659
- # =============================================================================
660
-
661
- # --- governance.md s7: Only the Leader writes sentinels ---
662
- write_complete_sentinel() {
663
- local summary="$1"
664
- echo "# Campaign Complete
665
-
666
- Completed at iteration $ITERATION.
667
- $summary
668
-
669
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
670
- log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
671
- }
672
-
673
- write_blocked_sentinel() {
674
- local reason="$1"
675
- echo "# Campaign Blocked
676
-
677
- Blocked at iteration $ITERATION.
678
- Reason: $reason
679
-
680
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
681
- log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
682
- }
683
-
684
- # =============================================================================
685
- # PRD Tracking
686
- # =============================================================================
687
-
688
- # --- US-004: Live PRD update helpers ---
689
- compute_prd_hash() {
690
- local prd_file="${PRD_FILE:-}"
691
- if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
692
- prd_file="$DESK/plans/prd-$SLUG.md"
693
- fi
694
- if [[ -f "$prd_file" ]]; then
695
- md5 -q "$prd_file" 2>/dev/null || md5sum "$prd_file" 2>/dev/null | cut -d' ' -f1
696
- else
697
- echo ""
698
- fi
699
- }
700
-
701
- count_prd_us() {
702
- local prd_file="${PRD_FILE:-}"
703
- if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
704
- prd_file="$DESK/plans/prd-$SLUG.md"
705
- fi
706
- if [[ -f "$prd_file" ]]; then
707
- grep -oE '^### US-[0-9]+' "$prd_file" 2>/dev/null | sed 's/^### //' | sort -u | tr '\n' ',' | sed 's/,$//'
708
- else
709
- echo ""
710
- fi
711
- }
712
-
713
- split_prd_by_us() {
714
- local prd_file="$1"
715
- local slug="$2"
716
- local plans_dir
717
- plans_dir="$(dirname "$prd_file")"
718
-
719
- [[ -f "$prd_file" ]] || return 0
720
-
721
- local us_count
722
- us_count=$(grep -oE '^### US-' "$prd_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
723
- if [[ "$us_count" -eq 0 ]]; then
724
- return 0
725
- fi
726
-
727
- awk -v dir="$plans_dir" -v slug="$slug" '
728
- /^### US-[0-9]+:/ {
729
- if (out != "") close(out)
730
- match($0, /US-[0-9]+/)
731
- us_id = substr($0, RSTART, RLENGTH)
732
- out = dir "/prd-" slug "-" us_id ".md"
733
- }
734
- out != "" { print > out }
735
- ' "$prd_file"
736
- }
737
-
738
- split_test_spec_by_us() {
739
- local ts_file="$1"
740
- local slug="$2"
741
- local plans_dir
742
- plans_dir="$(dirname "$ts_file")"
743
-
744
- [[ -f "$ts_file" ]] || return 0
745
-
746
- local us_count
747
- us_count=$(grep -oE '^## US-' "$ts_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
748
- if [[ "$us_count" -eq 0 ]]; then
749
- return 0
750
- fi
751
-
752
- local header_tmp="${plans_dir}/test-spec-${slug}-header.tmp.$$"
753
- awk '/^## US-[0-9]+:/{exit} {print}' "$ts_file" > "$header_tmp"
754
-
755
- awk -v dir="$plans_dir" -v slug="$slug" '
756
- /^## US-[0-9]+:/ {
757
- if (out != "") close(out)
758
- match($0, /US-[0-9]+/)
759
- us_id = substr($0, RSTART, RLENGTH)
760
- out = dir "/test-spec-" slug "-" us_id ".md"
761
- }
762
- out != "" { print > out }
763
- ' "$ts_file"
764
-
765
- for split_file in "$plans_dir"/test-spec-"$slug"-US-*.md; do
766
- [[ -f "$split_file" ]] || continue
767
- local tmp="${split_file}.tmp.$$"
768
- cat "$header_tmp" "$split_file" > "$tmp" && mv "$tmp" "$split_file"
769
- done
770
- rm -f "$header_tmp"
771
- }
772
-
773
- check_prd_update() {
774
- local current_hash current_us_list us_count_prev us_count_now new_us
775
- current_hash=$(compute_prd_hash)
776
- current_us_list=$(count_prd_us)
777
- us_count_prev=$(echo "$PREV_PRD_US_LIST" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
778
- us_count_now=$(echo "$current_us_list" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
779
-
780
- _PRD_CHANGED=0
781
-
782
- if [[ "$current_hash" != "$PREV_PRD_HASH" ]]; then
783
- _PRD_CHANGED=1
784
- new_us=$(printf '%s\n' "$current_us_list" | tr ',' '\n' | awk -v prev="$PREV_PRD_US_LIST" '
785
- BEGIN {
786
- split(prev, p, ",")
787
- for (i in p) {
788
- seen[p[i]] = 1
789
- }
790
- }
791
- {
792
- if ($0 != "" && !seen[$0]) {
793
- if (out == "") out = $0
794
- else out = out "," $0
795
- }
796
- }
797
- END { print out }
798
- ')
799
- log_debug "prd_changed=true prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now} new_us=${new_us:-none}"
800
- split_prd_by_us "$PRD_FILE" "$SLUG"
801
- split_test_spec_by_us "$TEST_SPEC_FILE" "$SLUG"
802
- US_LIST="$current_us_list"
803
- else
804
- log_debug "prd_changed=false prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now}"
805
- fi
806
-
807
- PREV_PRD_HASH="$current_hash"
808
- PREV_PRD_US_LIST="$current_us_list"
809
- }
810
-
811
- # =============================================================================
812
- # Circuit Breakers: Stale Context Detection
813
- # =============================================================================
814
-
815
- # --- governance.md s7 step 8: Stale context detection ---
816
- compute_context_hash() {
817
- # Hash context-latest.md + memory.md + verified_us from status.json
818
- # This prevents false stale detection when Worker updates memory but not context,
819
- # or when verified_us changes between iterations
820
- local hash_input=""
821
- if [[ -f "$CONTEXT_FILE" ]]; then
822
- hash_input+=$(md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1)
823
- fi
824
- local memory_file="$DESK/memos/${SLUG}-memory.md"
825
- if [[ -f "$memory_file" ]]; then
826
- hash_input+=$(md5 -q "$memory_file" 2>/dev/null || md5sum "$memory_file" 2>/dev/null | cut -d' ' -f1)
827
- fi
828
- if [[ -f "$STATUS_FILE" ]]; then
829
- hash_input+=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
830
- fi
831
- echo -n "$hash_input" | md5 -q 2>/dev/null || echo -n "$hash_input" | md5sum 2>/dev/null | cut -d' ' -f1
832
- }
833
-
834
- check_stale_context() {
835
- local current_hash
836
- current_hash=$(compute_context_hash)
837
-
838
- if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
839
- (( STALE_CONTEXT_COUNT++ ))
840
- log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
841
- if (( STALE_CONTEXT_COUNT >= 3 )); then
842
- log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
843
- return 1
844
- fi
845
- else
846
- STALE_CONTEXT_COUNT=0
847
- fi
848
-
849
- PREV_CONTEXT_HASH="$current_hash"
850
- return 0
851
- }
852
-
853
- # =============================================================================
854
- # Error Detection
855
- # =============================================================================
856
-
857
- # --- US-003: API error detector using tmux pane buffer ---
858
- is_api_error() {
859
- local pane_id="$1"
860
- local pane_output
861
- pane_output=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
862
- if [[ -z "$pane_output" ]]; then
863
- return 1
864
- fi
865
-
866
- if echo "$pane_output" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
867
- || echo "$pane_output" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
868
- || echo "$pane_output" | grep -qi 'overloaded' \
869
- || echo "$pane_output" | grep -qi 'too many requests' \
870
- || echo "$pane_output" | grep -qi 'service unavailable'; then
871
- return 0
872
- fi
873
- return 1
874
- }
875
-
876
- # =============================================================================
877
- # Security Warning
878
- # =============================================================================
879
-
880
- print_security_warning() {
881
- echo ""
882
- echo "================================================================"
883
- echo " WARNING: Running with --dangerously-skip-permissions"
884
- echo ""
885
- echo " The claude CLI will execute tools (file writes, shell commands)"
886
- echo " without asking for confirmation. Only run this on code you"
887
- echo " trust in an environment you control."
888
- echo "================================================================"
889
- echo ""
890
- }