@ai-dev-methodologies/rlp-desk 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,904 +0,0 @@
1
- # lib_ralph_desk.zsh — Shared business logic for RLP Desk runner
2
- # SOURCED by run_ralph_desk.zsh. Do NOT execute directly.
3
- #
4
- # IMPORTANT: Must be sourced at file scope, not inside a function.
5
- # typeset -A creates local arrays inside functions, breaking global state.
6
- # Functions in this file read/write globals defined by the sourcing script.
7
-
8
- if [[ -n "${funcstack[2]:-}" ]]; then
9
- echo "FATAL: lib_ralph_desk.zsh must be sourced at file scope" >&2
10
- exit 1
11
- fi
12
-
13
- # =============================================================================
14
- # Utility Functions
15
- # =============================================================================
16
-
17
- log() {
18
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
19
- }
20
-
21
- log_debug() {
22
- if (( DEBUG )); then
23
- mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
24
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
25
- fi
26
- }
27
-
28
- log_error() {
29
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
30
- }
31
-
32
- # build_claude_cmd() — centralized claude CLI command builder
33
- # Single source of truth for all claude invocation flags (--mcp-config, DISABLE_OMC, --effort, etc.)
34
- # Inspired by codex-plugin-cc companion pattern: CLI abstraction in one place.
35
- # Args: $1=mode (tui|print) $2=model $3=prompt_file (print mode only) $4=output_log (print mode only) $5=effort (optional: low|medium|high|max)
36
- # Output: complete command string on stdout
37
- # Globals read: CLAUDE_BIN
38
- build_claude_cmd() {
39
- local mode="$1"
40
- local model="$2"
41
- local prompt_file="${3:-}"
42
- local output_log="${4:-}"
43
- local effort="${5:-}"
44
-
45
- local base="DISABLE_OMC=1 $CLAUDE_BIN --model $model --mcp-config '{\"mcpServers\":{}}' --strict-mcp-config --dangerously-skip-permissions"
46
- if [[ -n "$effort" ]]; then
47
- base="$base --effort $effort"
48
- fi
49
- case "$mode" in
50
- tui)
51
- echo "$base"
52
- ;;
53
- print)
54
- echo "$base -p \"\$(cat $prompt_file)\" --output-format text 2>&1 | tee $output_log"
55
- ;;
56
- *)
57
- echo "ERROR: build_claude_cmd unknown mode '$mode'" >&2
58
- return 1
59
- ;;
60
- esac
61
- }
62
-
63
- # parse_model_flag() — parse unified --worker-model / --verifier-model value
64
- # Colon format: claude models (haiku/sonnet/opus) with effort → claude engine + effort
65
- # codex models (gpt-*/spark) with reasoning → codex engine + reasoning
66
- # plain name → claude engine (no effort override)
67
- # Usage: parse_model_flag <value> <role>
68
- # Output (stdout): "engine model [reasoning_or_effort]"
69
- # e.g. "codex gpt-5.4 medium" | "claude opus max" | "claude sonnet"
70
- # Returns: 0 on success, 1 on invalid format (error written to stderr)
71
- parse_model_flag() {
72
- local value="$1"
73
- local role="${2:-worker}"
74
- local colon_count
75
- colon_count=$(printf '%s' "$value" | tr -cd ':' | wc -c | tr -d ' ')
76
- if (( colon_count > 1 )); then
77
- echo "ERROR: Invalid --${role}-model format '${value}'. Use 'model:effort' (claude) or 'model:reasoning' (codex)." >&2
78
- return 1
79
- fi
80
- if (( colon_count == 1 )); then
81
- local model="${value%%:*}"
82
- local level="${value##*:}"
83
- # Detect engine by model name
84
- case "$model" in
85
- haiku|sonnet|opus)
86
- echo "claude $model $level"
87
- ;;
88
- spark)
89
- echo "codex gpt-5.3-codex-spark $level"
90
- ;;
91
- *)
92
- echo "codex $model $level"
93
- ;;
94
- esac
95
- else
96
- echo "claude $value"
97
- fi
98
- }
99
-
100
- # get_model_string() — return engine-appropriate model identifier string
101
- # Claude: returns model name (e.g., "sonnet")
102
- # Codex: returns model:reasoning (e.g., "gpt-5.4:high")
103
- # Args: $1=engine (claude|codex) $2=model $3=codex_reasoning (optional)
104
- # Output: model string on stdout
105
- get_model_string() {
106
- local engine="$1"
107
- local model="$2"
108
- local reasoning="${3:-}"
109
-
110
- if [[ "$engine" = "codex" && -n "$reasoning" ]]; then
111
- echo "${model}:${reasoning}"
112
- else
113
- echo "$model"
114
- fi
115
- }
116
-
117
- # get_next_model() — return next model in Worker upgrade path, or empty at ceiling
118
- # Usage: get_next_model <model_str>
119
- # claude: "haiku"|"sonnet"|"opus"
120
- # codex: "gpt-5.4:medium"|"gpt-5.4:high"|"gpt-5.4:xhigh"|"gpt-5.3-codex-spark:medium"|...
121
- # Output: next model string, or empty string if at ceiling
122
- get_next_model() {
123
- local current="$1"
124
- case "$current" in
125
- # Claude upgrade path (Worker only — Verifier fixed)
126
- haiku) echo "sonnet" ;;
127
- sonnet) echo "opus" ;;
128
- opus) echo "" ;;
129
- # Codex GPT Pro (spark) upgrade path
130
- gpt-5.3-codex-spark:low) echo "gpt-5.3-codex-spark:medium" ;;
131
- gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
132
- gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
133
- gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling
134
- # Codex Non-Pro upgrade path
135
- gpt-5.4:low) echo "gpt-5.4:medium" ;;
136
- gpt-5.4:medium) echo "gpt-5.4:high" ;;
137
- gpt-5.4:high) echo "gpt-5.4:xhigh" ;;
138
- gpt-5.4:xhigh) echo "" ;;
139
- *) echo "" ;; # unknown → treat as ceiling
140
- esac
141
- }
142
-
143
- # check_model_upgrade() — evaluate and apply Worker model upgrade on repeated same-US failure
144
- # Called in the fail verdict path. Upgrades Worker model when same US fails >= 2 consecutive times.
145
- # Respects LOCK_WORKER_MODEL flag. Never modifies VERIFIER_MODEL.
146
- # Usage: check_model_upgrade <us_id>
147
- check_model_upgrade() {
148
- local current_us="$1"
149
-
150
- # Track consecutive failures on same US
151
- if [[ "$current_us" = "$_LAST_FAILED_US" ]]; then
152
- (( _SAME_US_FAIL_COUNT++ ))
153
- else
154
- _SAME_US_FAIL_COUNT=1
155
- _LAST_FAILED_US="$current_us"
156
- fi
157
-
158
- # Respect --lock-worker-model: no upgrade; CB threshold handles BLOCKED
159
- if (( LOCK_WORKER_MODEL )); then
160
- log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=locked"
161
- return 0
162
- fi
163
-
164
- # Upgrade when same US fails >= 2 consecutive times
165
- if (( _SAME_US_FAIL_COUNT >= 2 )); then
166
- local current_model_str
167
- current_model_str=$(get_model_string "$WORKER_ENGINE" "${WORKER_CODEX_MODEL:-$WORKER_MODEL}" "${WORKER_CODEX_REASONING:-}")
168
-
169
- local next_model
170
- next_model=$(get_next_model "$current_model_str")
171
-
172
- if [[ -z "$next_model" ]]; then
173
- # Already at ceiling — CB threshold will trigger BLOCKED with escalation message
174
- log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=already_max current=$current_model_str"
175
- return 0
176
- fi
177
-
178
- # Save original model on first upgrade only
179
- if (( _MODEL_UPGRADED == 0 )); then
180
- _ORIGINAL_WORKER_MODEL="$WORKER_MODEL"
181
- _ORIGINAL_WORKER_CODEX_REASONING="$WORKER_CODEX_REASONING"
182
- fi
183
- _MODEL_UPGRADED=1
184
-
185
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
186
- WORKER_CODEX_MODEL="${next_model%%:*}"
187
- WORKER_CODEX_REASONING="${next_model##*:}"
188
- WORKER_MODEL="$WORKER_CODEX_MODEL"
189
- else
190
- WORKER_MODEL="$next_model"
191
- fi
192
-
193
- log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
194
- log " [WARN] Same AC failing repeatedly — consider IL-2 re-assessment of AC quality (spec quality check)"
195
- log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
196
- _SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
197
- fi
198
-
199
- return 0
200
- }
201
-
202
- # record_us_failure() — track per-US cumulative failure count (dual counter, Option D)
203
- # Unlike CONSECUTIVE_FAILURES which resets on pass, US_FAIL_HISTORY persists across phases.
204
- # This enables prior-failure warnings when a US that struggled in per-US mode fails again in final verify.
205
- # Usage: record_us_failure <us_id>
206
- record_us_failure() {
207
- local us_id="$1"
208
- [[ -z "$us_id" || "$us_id" = "unknown" ]] && return 0
209
-
210
- local prev_count="${US_FAIL_HISTORY[$us_id]:-0}"
211
- US_FAIL_HISTORY[$us_id]=$(( prev_count + 1 ))
212
-
213
- # Prior-failure warning: if this US has failed before, it's showing fragility
214
- if (( prev_count > 0 )); then
215
- log " [WARN] US $us_id has prior failure history (${US_FAIL_HISTORY[$us_id]} total failures) — consider IL-2 AC quality re-assessment"
216
- log_debug "[GOV] iter=${ITERATION:-0} us_prior_failures=$us_id count=${US_FAIL_HISTORY[$us_id]}"
217
- fi
218
-
219
- return 0
220
- }
221
-
222
- # --- governance.md s7: Atomic file writes (tmux pattern) ---
223
- # All file writes by the Leader use tmp+mv to prevent corruption.
224
- atomic_write() {
225
- local target="$1"
226
- local tmp="${target}.tmp.$$"
227
- cat > "$tmp"
228
- mv "$tmp" "$target"
229
- }
230
-
231
- # =============================================================================
232
- # Scaffold Validation
233
- # =============================================================================
234
-
235
- validate_scaffold() {
236
- local errors=0
237
-
238
- if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
239
- log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
240
- errors=1
241
- fi
242
-
243
- if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
244
- log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
245
- errors=1
246
- fi
247
-
248
- if [[ ! -f "$CONTEXT_FILE" ]]; then
249
- log_error "Context file not found: $CONTEXT_FILE"
250
- errors=1
251
- fi
252
-
253
- if [[ ! -f "$MEMORY_FILE" ]]; then
254
- log_error "Memory file not found: $MEMORY_FILE"
255
- errors=1
256
- fi
257
-
258
- if (( errors )); then
259
- log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
260
- exit 1
261
- fi
262
-
263
- mkdir -p "$LOGS_DIR"
264
- }
265
-
266
- # =============================================================================
267
- # Status Updates
268
- # =============================================================================
269
-
270
- # --- governance.md s7 step 8: Update status.json ---
271
- update_status() {
272
- local phase="$1"
273
- local last_result="$2"
274
-
275
- # Build verified_us as JSON array
276
- local verified_us_json="[]"
277
- if [[ -n "$VERIFIED_US" ]]; then
278
- verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
279
- fi
280
-
281
- # Build consensus fields
282
- local consensus_json=""
283
- if [[ "$CONSENSUS_MODE" != "off" ]]; then
284
- consensus_json=',
285
- "consensus_scope": "'"$CONSENSUS_SCOPE"'",
286
- "consensus_round": '"$CONSENSUS_ROUND"',
287
- "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
288
- "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
289
- fi
290
-
291
- echo '{
292
- "slug": "'"$SLUG"'",
293
- "baseline_commit": "'"${BASELINE_COMMIT:-none}"'",
294
- "iteration": '"$ITERATION"',
295
- "max_iter": '"$MAX_ITER"',
296
- "phase": "'"$phase"'",
297
- "worker_model": "'"$WORKER_MODEL"'",
298
- "verifier_model": "'"$VERIFIER_MODEL"'",
299
- "worker_engine": "'"$WORKER_ENGINE"'",
300
- "verifier_engine": "'"$VERIFIER_ENGINE"'",
301
- "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
302
- "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
303
- "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
304
- "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
305
- "verify_mode": "'"$VERIFY_MODE"'",
306
- "consensus_mode": "'"$CONSENSUS_MODE"'",
307
- "last_result": "'"$last_result"'",
308
- "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
309
- "verified_us": '"$verified_us_json"''"$consensus_json"',
310
- "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
311
- }' | atomic_write "$STATUS_FILE"
312
- }
313
-
314
- # --- governance.md s7 step 8: Write result log ---
315
- write_result_log() {
316
- local iter="$1"
317
- local result="$2"
318
- local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
319
-
320
- local git_diff=""
321
- if git -C "$ROOT" rev-parse HEAD &>/dev/null; then
322
- git_diff=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(no git diff available)")
323
- else
324
- git_diff="(no commits in repo — cannot diff)"
325
- fi
326
- # Include untracked new files in result log
327
- local result_untracked
328
- result_untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
329
- if [[ -n "$result_untracked" ]]; then
330
- git_diff="${git_diff}
331
-
332
- Untracked new files:
333
- ${result_untracked}"
334
- fi
335
-
336
- {
337
- echo "# Iteration $iter Result"
338
- echo ""
339
- echo "## Status"
340
- echo "$result [leader-measured]"
341
- echo ""
342
- echo "## Files Changed"
343
- echo '```'
344
- echo "$git_diff"
345
- echo '```'
346
- echo "[git-measured]"
347
- echo ""
348
- echo "## Timestamp"
349
- echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
350
- } | atomic_write "$result_file"
351
- }
352
-
353
- # --- step 7d: Archive iteration artifacts (done-claim + verdict) to logs/ ---
354
- archive_iter_artifacts() {
355
- local iter="$1"
356
- local iter_padded
357
- iter_padded=$(printf '%03d' "$iter")
358
- if [[ -f "$DONE_CLAIM_FILE" ]]; then
359
- cp "$DONE_CLAIM_FILE" "$LOGS_DIR/iter-${iter_padded}-done-claim.json" 2>/dev/null
360
- fi
361
- if [[ -f "$VERDICT_FILE" ]]; then
362
- cp "$VERDICT_FILE" "$LOGS_DIR/iter-${iter_padded}-verify-verdict.json" 2>/dev/null
363
- fi
364
- }
365
-
366
- # --- AC5: Write per-iteration cost estimate to cost-log.jsonl ---
367
- write_cost_log() {
368
- local iter="$1"
369
- local iter_padded
370
- iter_padded=$(printf '%03d' "$iter")
371
-
372
- local prompt_bytes=0 claim_bytes=0 verdict_bytes=0
373
- local worker_prompt_file="$LOGS_DIR/iter-${iter_padded}.worker-prompt.md"
374
- [[ -f "$worker_prompt_file" ]] && prompt_bytes=$(wc -c < "$worker_prompt_file" 2>/dev/null || echo 0)
375
- [[ -f "$DONE_CLAIM_FILE" ]] && claim_bytes=$(wc -c < "$DONE_CLAIM_FILE" 2>/dev/null || echo 0)
376
- [[ -f "$VERDICT_FILE" ]] && verdict_bytes=$(wc -c < "$VERDICT_FILE" 2>/dev/null || echo 0)
377
-
378
- local estimated_tokens=$(( (prompt_bytes + claim_bytes + verdict_bytes) / 4 ))
379
-
380
- # AC1: per-phase timing fields
381
- local worker_start_time="" worker_end_time="" worker_duration_s=0
382
- local verifier_start_time="" verifier_end_time="" verifier_duration_s=0
383
- if [[ -n "${ITER_WORKER_START:-}" ]]; then
384
- worker_start_time=$(date -u -r "$ITER_WORKER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
385
- worker_end_time=$(date -u -r "${ITER_WORKER_END:-$ITER_WORKER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
386
- worker_duration_s=$(( ${ITER_WORKER_END:-$ITER_WORKER_START} - ITER_WORKER_START ))
387
- fi
388
- if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
389
- verifier_start_time=$(date -u -r "$ITER_VERIFIER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
390
- verifier_end_time=$(date -u -r "${ITER_VERIFIER_END:-$ITER_VERIFIER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
391
- verifier_duration_s=$(( ${ITER_VERIFIER_END:-$ITER_VERIFIER_START} - ITER_VERIFIER_START ))
392
- fi
393
-
394
- # AC2: consensus mode per-engine timing
395
- local consensus_fields=""
396
- if [[ -n "${ITER_VERIFIER_CLAUDE_DURATION_S:-}" ]]; then
397
- consensus_fields="${consensus_fields}"',"verifier_claude_duration_s":'"${ITER_VERIFIER_CLAUDE_DURATION_S}"
398
- fi
399
- if [[ -n "${ITER_VERIFIER_CODEX_DURATION_S:-}" ]]; then
400
- consensus_fields="${consensus_fields}"',"verifier_codex_duration_s":'"${ITER_VERIFIER_CODEX_DURATION_S}"
401
- fi
402
-
403
- echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"worker_start_time":"'"$worker_start_time"'","worker_end_time":"'"$worker_end_time"'","worker_duration_s":'"$worker_duration_s"',"verifier_start_time":"'"$verifier_start_time"'","verifier_end_time":"'"$verifier_end_time"'","verifier_duration_s":'"$verifier_duration_s"''"$consensus_fields"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
404
- }
405
-
406
- # --- Analytics: write per-iteration structured data to campaign.jsonl (always-on) ---
407
- write_campaign_jsonl() {
408
- local iter="$1"
409
- local us_id="${2:-unknown}"
410
- local verdict="${3:-unknown}"
411
-
412
- local worker_duration_s=0
413
- local verifier_duration_s=0
414
- if [[ -n "${ITER_WORKER_START:-}" ]]; then
415
- worker_duration_s=$(( ${ITER_WORKER_END:-$(date +%s)} - ITER_WORKER_START ))
416
- fi
417
- if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
418
- verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
419
- fi
420
-
421
- # Build us_fail_history JSON object from associative array
422
- local us_fail_history_json="{}"
423
- if (( ${#US_FAIL_HISTORY[@]} > 0 )); then
424
- us_fail_history_json="{"
425
- local first=1
426
- for key in "${(@k)US_FAIL_HISTORY}"; do
427
- (( first )) || us_fail_history_json+=","
428
- us_fail_history_json+="\"$key\":${US_FAIL_HISTORY[$key]}"
429
- first=0
430
- done
431
- us_fail_history_json+="}"
432
- fi
433
-
434
- jq -nc \
435
- --argjson iter "$iter" \
436
- --arg us_id "$us_id" \
437
- --arg worker_model "$WORKER_MODEL" \
438
- --arg worker_engine "$WORKER_ENGINE" \
439
- --arg verifier_engine "$VERIFIER_ENGINE" \
440
- --arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
441
- --arg codex_verdict "${CODEX_VERDICT:-N/A}" \
442
- --arg consensus_mode "$CONSENSUS_MODE" \
443
- --argjson consecutive_failures "$CONSECUTIVE_FAILURES" \
444
- --argjson model_upgraded "${_MODEL_UPGRADED:-0}" \
445
- --argjson us_fail_history "$us_fail_history_json" \
446
- --argjson duration_worker_s "$worker_duration_s" \
447
- --argjson duration_verifier_s "$verifier_duration_s" \
448
- --arg project_root "$ROOT" \
449
- --arg slug "$SLUG" \
450
- --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
451
- '{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus_mode: $consensus_mode, consecutive_failures: $consecutive_failures, model_upgraded: $model_upgraded, us_fail_history: $us_fail_history, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
452
- >> "$CAMPAIGN_JSONL"
453
- }
454
-
455
- # --- AC4: Generate campaign-report.md on all terminal states ---
456
- generate_campaign_report() {
457
- # Guard: idempotent — only generate once per campaign run
458
- if (( CAMPAIGN_REPORT_GENERATED )); then return 0; fi
459
- CAMPAIGN_REPORT_GENERATED=1
460
-
461
- local final_status="UNKNOWN"
462
- if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
463
- elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
464
- else final_status="TIMEOUT"; fi
465
-
466
- local report_file="$LOGS_DIR/campaign-report.md"
467
-
468
- # AC9: Version existing report before writing new one
469
- if [[ -f "$report_file" ]]; then
470
- local v=1
471
- while [[ -f "${report_file%.md}-v${v}.md" ]]; do (( v++ )); done
472
- mv "$report_file" "${report_file%.md}-v${v}.md"
473
- fi
474
-
475
- local end_time
476
- end_time=$(date +%s)
477
- local elapsed=$(( end_time - START_TIME ))
478
-
479
- local baseline_commit_val="${BASELINE_COMMIT:-none}"
480
- local files_changed=""
481
- if [[ "$baseline_commit_val" != "none" ]]; then
482
- files_changed=$(git -C "$ROOT" diff --stat "${baseline_commit_val}" 2>/dev/null || echo "(git diff unavailable)")
483
- elif git -C "$ROOT" rev-parse HEAD &>/dev/null; then
484
- files_changed=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(git diff unavailable)")
485
- else
486
- files_changed="(no commits in repo — cannot diff)"
487
- fi
488
- # Include untracked new files
489
- local untracked
490
- untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
491
- if [[ -n "$untracked" ]]; then
492
- files_changed="${files_changed}
493
-
494
- Untracked new files:
495
- ${untracked}"
496
- fi
497
-
498
- local sv_summary=""
499
- if (( WITH_SELF_VERIFICATION )); then
500
- local sv_report
501
- sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
502
- if [[ -n "$sv_report" ]]; then
503
- sv_summary="See: $sv_report"
504
- else
505
- sv_summary="SV report generation pending — will be appended after this report."
506
- fi
507
- else
508
- sv_summary="N/A — --with-self-verification not enabled"
509
- fi
510
-
511
- {
512
- echo "# Campaign Report: $SLUG"
513
- echo ""
514
- echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) | Status: $final_status | Iterations: $ITERATION"
515
- echo ""
516
- echo "## Objective"
517
- local prd_file="$DESK/plans/prd-$SLUG.md"
518
- if [[ -f "$prd_file" ]]; then
519
- grep '^## Objective' -A3 "$prd_file" 2>/dev/null | tail -n +2 | head -3
520
- else
521
- echo "(PRD not found)"
522
- fi
523
- echo ""
524
- echo "## Execution Summary"
525
- echo "- Terminal state: $final_status"
526
- echo "- Iterations run: $ITERATION / $MAX_ITER"
527
- echo "- Elapsed: ${elapsed}s"
528
- echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
529
- echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
530
- echo "- Consensus: mode=$CONSENSUS_MODE model=$CONSENSUS_MODEL final_model=$FINAL_CONSENSUS_MODEL"
531
- echo ""
532
- echo "## US Status"
533
- echo "- Verified: ${VERIFIED_US:-none}"
534
- echo "- Consecutive failures at end: $CONSECUTIVE_FAILURES"
535
- echo ""
536
- echo "## Verification Results"
537
- local ri=1
538
- while (( ri <= ITERATION )); do
539
- local iter_dc="$LOGS_DIR/iter-$(printf '%03d' $ri)-done-claim.json"
540
- if [[ -f "$iter_dc" ]]; then
541
- local us_id
542
- us_id=$(jq -r '.us_id // "unknown"' "$iter_dc" 2>/dev/null)
543
- echo "- $(basename "$iter_dc"): us_id=$us_id"
544
- fi
545
- (( ri++ ))
546
- done
547
- echo ""
548
- echo "## Issues Encountered"
549
- local fi_found=0
550
- local fi_i=1
551
- while (( fi_i <= ITERATION )); do
552
- local fix_f="$LOGS_DIR/iter-$(printf '%03d' $fi_i).fix-contract.md"
553
- if [[ -f "$fix_f" ]]; then
554
- echo "- $(basename "$fix_f")"
555
- fi_found=1
556
- fi
557
- (( fi_i++ ))
558
- done
559
- (( fi_found == 0 )) && echo "- None"
560
- echo ""
561
- echo "## Cost & Performance"
562
- if [[ -f "$COST_LOG" ]]; then
563
- local total_tokens=0
564
- while IFS= read -r line; do
565
- local t
566
- t=$(echo "$line" | jq -r '.estimated_tokens // 0' 2>/dev/null || echo 0)
567
- total_tokens=$(( total_tokens + t ))
568
- done < "$COST_LOG"
569
- echo "- Total estimated tokens: $total_tokens (source: estimated, tmux mode)"
570
- echo "- See: cost-log.jsonl for per-iteration breakdown"
571
- else
572
- echo "- No cost data available"
573
- fi
574
- echo ""
575
- echo "## SV Summary"
576
- echo "$sv_summary"
577
- echo ""
578
- echo "## Files Changed"
579
- echo '```'
580
- echo "$files_changed"
581
- echo '```'
582
- echo "Note: Files Changed may include pre-existing uncommitted changes if the campaign started in a dirty worktree."
583
- echo ""
584
- echo "## Suggested Next Actions"
585
- if [[ "$final_status" == "COMPLETE" ]]; then
586
- echo "- Review verified US list and plan next feature campaign or next cycle"
587
- echo "- Consider re-run with --mode improve for quality refinement"
588
- echo "- Archive campaign artifacts and update project documentation"
589
- elif [[ "$final_status" == "BLOCKED" ]]; then
590
- echo "- Review PRD acceptance criteria for the failing US"
591
- echo "- Check circuit breaker history (consecutive failures: $CONSECUTIVE_FAILURES)"
592
- echo "- Consider relaxing verifier criteria if false-negative pattern detected"
593
- elif [[ "$final_status" == "TIMEOUT" ]]; then
594
- echo "- Increase --max-iter to allow more iterations for completion"
595
- echo "- Reduce scope by splitting remaining US into a follow-up campaign"
596
- echo "- Review last iteration done-claim for partial progress"
597
- fi
598
- } | atomic_write "$report_file"
599
-
600
- log "Campaign report written: $report_file"
601
- }
602
-
603
- generate_sv_report() {
604
- # AC1-boundary: SV_REPORT_GENERATED guard (init + check + set = 3 occurrences)
605
- if (( SV_REPORT_GENERATED )); then return 0; fi
606
-
607
- # AC3-negative: early return if ! WITH_SELF_VERIFICATION flag not set
608
- if (( ! WITH_SELF_VERIFICATION )); then return 0; fi
609
-
610
- SV_REPORT_GENERATED=1
611
-
612
- # AC4: check claude CLI availability — graceful degradation, not exit 1
613
- if ! command -v claude &>/dev/null; then
614
- echo "SV report generation failed: claude CLI not found" >> "$LOGS_DIR/campaign-report.md"
615
- return 0
616
- fi
617
-
618
- # AC2: versioning — find next available sv_version slot (in logs dir)
619
- local sv_version=1
620
- while [[ -f "$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md" ]]; do
621
- (( sv_version++ ))
622
- done
623
- local sv_report_file="$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md"
624
-
625
- log "Generating SV report: $(basename "$sv_report_file")"
626
-
627
- # AC5: configurable timeout with in-process watchdog
628
- local _sv_timeout_secs="${_SV_TIMEOUT_SECS:-300}"
629
- local _sv_timeout_flag=0
630
- local _sv_timeout_file="$LOGS_DIR/.sv_timeout_${$}.tmp"
631
- rm -f "$_sv_timeout_file"
632
-
633
- # Spawn claude CLI in background — write to sv_report_file
634
- claude --print "Analyze campaign artifacts in $LOGS_DIR and generate a self-verification report with sections: 1. Automated Validation Summary, 2. Failure Deep Dive, 3. Worker Process Quality, 4. Verifier Judgment Quality, 5. AC Lifecycle, 6. Test-Spec Adherence, 7. Patterns: Strengths & Weaknesses, 8. Recommendations for Next Cycle, 9. Cost & Performance, 10. Blind Spots." \
635
- > "$sv_report_file" 2>/dev/null &
636
- local _sv_pid=$!
637
-
638
- # AC5: watchdog — signals timeout file THEN kills _sv_pid after _sv_timeout_secs
639
- local _sv_watchdog
640
- (
641
- sleep "$_sv_timeout_secs"
642
- if kill -0 "$_sv_pid" 2>/dev/null; then
643
- touch "$_sv_timeout_file"
644
- kill "$_sv_pid" 2>/dev/null
645
- fi
646
- ) &
647
- _sv_watchdog=$!
648
-
649
- wait "$_sv_pid"
650
- local _sv_exit=$?
651
- kill "$_sv_watchdog" 2>/dev/null
652
- wait "$_sv_watchdog" 2>/dev/null
653
-
654
- # AC5: detect timeout — exit code 124 or watchdog file present
655
- if [[ "$_sv_exit" == 124 ]] || [[ -f "$_sv_timeout_file" ]]; then
656
- _sv_timeout_flag=1
657
- rm -f "$_sv_timeout_file"
658
- local _timeout_msg="SV report generation TIMEOUT: exceeded ${_sv_timeout_secs}s"
659
- echo "$_timeout_msg" >> "$sv_report_file"
660
- echo "$_timeout_msg" >> "$LOGS_DIR/campaign-report.md"
661
- log "$_timeout_msg"
662
- return 0
663
- fi
664
-
665
- # On success: append reference to campaign-report (full path, cross-directory)
666
- echo "See: $sv_report_file" >> "$LOGS_DIR/campaign-report.md"
667
- log "SV report written: $sv_report_file"
668
- return 0
669
- }
670
-
671
- # =============================================================================
672
- # Sentinel Writers
673
- # =============================================================================
674
-
675
- # --- governance.md s7: Only the Leader writes sentinels ---
676
- write_complete_sentinel() {
677
- local summary="$1"
678
- echo "# Campaign Complete
679
-
680
- Completed at iteration $ITERATION.
681
- $summary
682
-
683
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
684
- log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
685
- }
686
-
687
- write_blocked_sentinel() {
688
- local reason="$1"
689
- echo "# Campaign Blocked
690
-
691
- Blocked at iteration $ITERATION.
692
- Reason: $reason
693
-
694
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
695
- log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
696
- }
697
-
698
- # =============================================================================
699
- # PRD Tracking
700
- # =============================================================================
701
-
702
- # --- US-004: Live PRD update helpers ---
703
- compute_prd_hash() {
704
- local prd_file="${PRD_FILE:-}"
705
- if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
706
- prd_file="$DESK/plans/prd-$SLUG.md"
707
- fi
708
- if [[ -f "$prd_file" ]]; then
709
- md5 -q "$prd_file" 2>/dev/null || md5sum "$prd_file" 2>/dev/null | cut -d' ' -f1
710
- else
711
- echo ""
712
- fi
713
- }
714
-
715
- count_prd_us() {
716
- local prd_file="${PRD_FILE:-}"
717
- if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
718
- prd_file="$DESK/plans/prd-$SLUG.md"
719
- fi
720
- if [[ -f "$prd_file" ]]; then
721
- grep -oE '^### US-[0-9]+' "$prd_file" 2>/dev/null | sed 's/^### //' | sort -u | tr '\n' ',' | sed 's/,$//'
722
- else
723
- echo ""
724
- fi
725
- }
726
-
727
- split_prd_by_us() {
728
- local prd_file="$1"
729
- local slug="$2"
730
- local plans_dir
731
- plans_dir="$(dirname "$prd_file")"
732
-
733
- [[ -f "$prd_file" ]] || return 0
734
-
735
- local us_count
736
- us_count=$(grep -oE '^### US-' "$prd_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
737
- if [[ "$us_count" -eq 0 ]]; then
738
- return 0
739
- fi
740
-
741
- awk -v dir="$plans_dir" -v slug="$slug" '
742
- /^### US-[0-9]+:/ {
743
- if (out != "") close(out)
744
- match($0, /US-[0-9]+/)
745
- us_id = substr($0, RSTART, RLENGTH)
746
- out = dir "/prd-" slug "-" us_id ".md"
747
- }
748
- out != "" { print > out }
749
- ' "$prd_file"
750
- }
751
-
752
- split_test_spec_by_us() {
753
- local ts_file="$1"
754
- local slug="$2"
755
- local plans_dir
756
- plans_dir="$(dirname "$ts_file")"
757
-
758
- [[ -f "$ts_file" ]] || return 0
759
-
760
- local us_count
761
- us_count=$(grep -oE '^## US-' "$ts_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
762
- if [[ "$us_count" -eq 0 ]]; then
763
- return 0
764
- fi
765
-
766
- local header_tmp="${plans_dir}/test-spec-${slug}-header.tmp.$$"
767
- awk '/^## US-[0-9]+:/{exit} {print}' "$ts_file" > "$header_tmp"
768
-
769
- awk -v dir="$plans_dir" -v slug="$slug" '
770
- /^## US-[0-9]+:/ {
771
- if (out != "") close(out)
772
- match($0, /US-[0-9]+/)
773
- us_id = substr($0, RSTART, RLENGTH)
774
- out = dir "/test-spec-" slug "-" us_id ".md"
775
- }
776
- out != "" { print > out }
777
- ' "$ts_file"
778
-
779
- for split_file in "$plans_dir"/test-spec-"$slug"-US-*.md; do
780
- [[ -f "$split_file" ]] || continue
781
- local tmp="${split_file}.tmp.$$"
782
- cat "$header_tmp" "$split_file" > "$tmp" && mv "$tmp" "$split_file"
783
- done
784
- rm -f "$header_tmp"
785
- }
786
-
787
- check_prd_update() {
788
- local current_hash current_us_list us_count_prev us_count_now new_us
789
- current_hash=$(compute_prd_hash)
790
- current_us_list=$(count_prd_us)
791
- us_count_prev=$(echo "$PREV_PRD_US_LIST" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
792
- us_count_now=$(echo "$current_us_list" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
793
-
794
- _PRD_CHANGED=0
795
-
796
- if [[ "$current_hash" != "$PREV_PRD_HASH" ]]; then
797
- _PRD_CHANGED=1
798
- new_us=$(printf '%s\n' "$current_us_list" | tr ',' '\n' | awk -v prev="$PREV_PRD_US_LIST" '
799
- BEGIN {
800
- split(prev, p, ",")
801
- for (i in p) {
802
- seen[p[i]] = 1
803
- }
804
- }
805
- {
806
- if ($0 != "" && !seen[$0]) {
807
- if (out == "") out = $0
808
- else out = out "," $0
809
- }
810
- }
811
- END { print out }
812
- ')
813
- log_debug "prd_changed=true prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now} new_us=${new_us:-none}"
814
- split_prd_by_us "$PRD_FILE" "$SLUG"
815
- split_test_spec_by_us "$TEST_SPEC_FILE" "$SLUG"
816
- US_LIST="$current_us_list"
817
- else
818
- log_debug "prd_changed=false prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now}"
819
- fi
820
-
821
- PREV_PRD_HASH="$current_hash"
822
- PREV_PRD_US_LIST="$current_us_list"
823
- }
824
-
825
- # =============================================================================
826
- # Circuit Breakers: Stale Context Detection
827
- # =============================================================================
828
-
829
- # --- governance.md s7 step 8: Stale context detection ---
830
- compute_context_hash() {
831
- # Hash context-latest.md + memory.md + verified_us from status.json
832
- # This prevents false stale detection when Worker updates memory but not context,
833
- # or when verified_us changes between iterations
834
- local hash_input=""
835
- if [[ -f "$CONTEXT_FILE" ]]; then
836
- hash_input+=$(md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1)
837
- fi
838
- local memory_file="$DESK/memos/${SLUG}-memory.md"
839
- if [[ -f "$memory_file" ]]; then
840
- hash_input+=$(md5 -q "$memory_file" 2>/dev/null || md5sum "$memory_file" 2>/dev/null | cut -d' ' -f1)
841
- fi
842
- if [[ -f "$STATUS_FILE" ]]; then
843
- hash_input+=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
844
- fi
845
- echo -n "$hash_input" | md5 -q 2>/dev/null || echo -n "$hash_input" | md5sum 2>/dev/null | cut -d' ' -f1
846
- }
847
-
848
- check_stale_context() {
849
- local current_hash
850
- current_hash=$(compute_context_hash)
851
-
852
- if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
853
- (( STALE_CONTEXT_COUNT++ ))
854
- log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
855
- if (( STALE_CONTEXT_COUNT >= 3 )); then
856
- log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
857
- return 1
858
- fi
859
- else
860
- STALE_CONTEXT_COUNT=0
861
- fi
862
-
863
- PREV_CONTEXT_HASH="$current_hash"
864
- return 0
865
- }
866
-
867
- # =============================================================================
868
- # Error Detection
869
- # =============================================================================
870
-
871
- # --- US-003: API error detector using tmux pane buffer ---
872
- is_api_error() {
873
- local pane_id="$1"
874
- local pane_output
875
- pane_output=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
876
- if [[ -z "$pane_output" ]]; then
877
- return 1
878
- fi
879
-
880
- if echo "$pane_output" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
881
- || echo "$pane_output" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
882
- || echo "$pane_output" | grep -qi 'overloaded' \
883
- || echo "$pane_output" | grep -qi 'too many requests' \
884
- || echo "$pane_output" | grep -qi 'service unavailable'; then
885
- return 0
886
- fi
887
- return 1
888
- }
889
-
890
- # =============================================================================
891
- # Security Warning
892
- # =============================================================================
893
-
894
- print_security_warning() {
895
- echo ""
896
- echo "================================================================"
897
- echo " WARNING: Running with --dangerously-skip-permissions"
898
- echo ""
899
- echo " The claude CLI will execute tools (file writes, shell commands)"
900
- echo " without asking for confirmation. Only run this on code you"
901
- echo " trust in an environment you control."
902
- echo "================================================================"
903
- echo ""
904
- }