shipwright-cli 2.4.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/README.md +16 -11
  2. package/completions/_shipwright +248 -94
  3. package/completions/shipwright.bash +68 -19
  4. package/completions/shipwright.fish +310 -42
  5. package/config/decision-tiers.json +55 -0
  6. package/config/defaults.json +111 -0
  7. package/config/event-schema.json +218 -0
  8. package/config/policy.json +21 -18
  9. package/dashboard/coverage/coverage-summary.json +14 -0
  10. package/dashboard/public/index.html +1 -1
  11. package/dashboard/server.ts +306 -17
  12. package/dashboard/src/components/charts/bar.test.ts +79 -0
  13. package/dashboard/src/components/charts/donut.test.ts +68 -0
  14. package/dashboard/src/components/charts/pipeline-rail.test.ts +117 -0
  15. package/dashboard/src/components/charts/sparkline.test.ts +125 -0
  16. package/dashboard/src/core/api.test.ts +309 -0
  17. package/dashboard/src/core/helpers.test.ts +301 -0
  18. package/dashboard/src/core/router.test.ts +307 -0
  19. package/dashboard/src/core/router.ts +7 -0
  20. package/dashboard/src/core/sse.test.ts +144 -0
  21. package/dashboard/src/views/metrics.test.ts +186 -0
  22. package/dashboard/src/views/overview.test.ts +173 -0
  23. package/dashboard/src/views/pipelines.test.ts +183 -0
  24. package/dashboard/src/views/team.test.ts +253 -0
  25. package/dashboard/vitest.config.ts +14 -5
  26. package/docs/TIPS.md +1 -1
  27. package/docs/patterns/README.md +1 -1
  28. package/package.json +7 -9
  29. package/scripts/adapters/docker-deploy.sh +1 -1
  30. package/scripts/adapters/tmux-adapter.sh +11 -1
  31. package/scripts/adapters/wezterm-adapter.sh +1 -1
  32. package/scripts/check-version-consistency.sh +1 -1
  33. package/scripts/lib/architecture.sh +127 -0
  34. package/scripts/lib/bootstrap.sh +75 -0
  35. package/scripts/lib/compat.sh +89 -6
  36. package/scripts/lib/config.sh +91 -0
  37. package/scripts/lib/daemon-adaptive.sh +3 -3
  38. package/scripts/lib/daemon-dispatch.sh +63 -17
  39. package/scripts/lib/daemon-failure.sh +0 -0
  40. package/scripts/lib/daemon-health.sh +1 -1
  41. package/scripts/lib/daemon-patrol.sh +64 -17
  42. package/scripts/lib/daemon-poll.sh +54 -25
  43. package/scripts/lib/daemon-state.sh +125 -23
  44. package/scripts/lib/daemon-triage.sh +31 -9
  45. package/scripts/lib/decide-autonomy.sh +295 -0
  46. package/scripts/lib/decide-scoring.sh +228 -0
  47. package/scripts/lib/decide-signals.sh +462 -0
  48. package/scripts/lib/fleet-failover.sh +63 -0
  49. package/scripts/lib/helpers.sh +29 -6
  50. package/scripts/lib/pipeline-detection.sh +2 -2
  51. package/scripts/lib/pipeline-github.sh +9 -9
  52. package/scripts/lib/pipeline-intelligence.sh +105 -38
  53. package/scripts/lib/pipeline-quality-checks.sh +17 -16
  54. package/scripts/lib/pipeline-quality.sh +1 -1
  55. package/scripts/lib/pipeline-stages.sh +440 -59
  56. package/scripts/lib/pipeline-state.sh +54 -4
  57. package/scripts/lib/policy.sh +0 -0
  58. package/scripts/lib/test-helpers.sh +247 -0
  59. package/scripts/postinstall.mjs +78 -12
  60. package/scripts/signals/example-collector.sh +36 -0
  61. package/scripts/sw +17 -7
  62. package/scripts/sw-activity.sh +1 -11
  63. package/scripts/sw-adaptive.sh +109 -85
  64. package/scripts/sw-adversarial.sh +4 -14
  65. package/scripts/sw-architecture-enforcer.sh +1 -11
  66. package/scripts/sw-auth.sh +8 -17
  67. package/scripts/sw-autonomous.sh +111 -49
  68. package/scripts/sw-changelog.sh +1 -11
  69. package/scripts/sw-checkpoint.sh +144 -20
  70. package/scripts/sw-ci.sh +2 -12
  71. package/scripts/sw-cleanup.sh +13 -17
  72. package/scripts/sw-code-review.sh +16 -36
  73. package/scripts/sw-connect.sh +5 -12
  74. package/scripts/sw-context.sh +9 -26
  75. package/scripts/sw-cost.sh +17 -18
  76. package/scripts/sw-daemon.sh +76 -71
  77. package/scripts/sw-dashboard.sh +57 -17
  78. package/scripts/sw-db.sh +524 -26
  79. package/scripts/sw-decide.sh +685 -0
  80. package/scripts/sw-decompose.sh +1 -11
  81. package/scripts/sw-deps.sh +15 -25
  82. package/scripts/sw-developer-simulation.sh +1 -11
  83. package/scripts/sw-discovery.sh +138 -30
  84. package/scripts/sw-doc-fleet.sh +7 -17
  85. package/scripts/sw-docs-agent.sh +6 -16
  86. package/scripts/sw-docs.sh +4 -12
  87. package/scripts/sw-doctor.sh +134 -43
  88. package/scripts/sw-dora.sh +11 -19
  89. package/scripts/sw-durable.sh +35 -52
  90. package/scripts/sw-e2e-orchestrator.sh +11 -27
  91. package/scripts/sw-eventbus.sh +115 -115
  92. package/scripts/sw-evidence.sh +114 -30
  93. package/scripts/sw-feedback.sh +3 -13
  94. package/scripts/sw-fix.sh +2 -20
  95. package/scripts/sw-fleet-discover.sh +1 -11
  96. package/scripts/sw-fleet-viz.sh +10 -18
  97. package/scripts/sw-fleet.sh +13 -17
  98. package/scripts/sw-github-app.sh +6 -16
  99. package/scripts/sw-github-checks.sh +1 -11
  100. package/scripts/sw-github-deploy.sh +1 -11
  101. package/scripts/sw-github-graphql.sh +2 -12
  102. package/scripts/sw-guild.sh +1 -11
  103. package/scripts/sw-heartbeat.sh +49 -12
  104. package/scripts/sw-hygiene.sh +45 -43
  105. package/scripts/sw-incident.sh +48 -74
  106. package/scripts/sw-init.sh +35 -37
  107. package/scripts/sw-instrument.sh +1 -11
  108. package/scripts/sw-intelligence.sh +368 -53
  109. package/scripts/sw-jira.sh +5 -14
  110. package/scripts/sw-launchd.sh +2 -12
  111. package/scripts/sw-linear.sh +8 -17
  112. package/scripts/sw-logs.sh +4 -12
  113. package/scripts/sw-loop.sh +905 -104
  114. package/scripts/sw-memory.sh +263 -20
  115. package/scripts/sw-mission-control.sh +2 -12
  116. package/scripts/sw-model-router.sh +73 -34
  117. package/scripts/sw-otel.sh +15 -23
  118. package/scripts/sw-oversight.sh +1 -11
  119. package/scripts/sw-patrol-meta.sh +5 -11
  120. package/scripts/sw-pipeline-composer.sh +7 -17
  121. package/scripts/sw-pipeline-vitals.sh +1 -11
  122. package/scripts/sw-pipeline.sh +550 -122
  123. package/scripts/sw-pm.sh +2 -12
  124. package/scripts/sw-pr-lifecycle.sh +33 -28
  125. package/scripts/sw-predictive.sh +16 -22
  126. package/scripts/sw-prep.sh +6 -16
  127. package/scripts/sw-ps.sh +1 -11
  128. package/scripts/sw-public-dashboard.sh +2 -12
  129. package/scripts/sw-quality.sh +85 -14
  130. package/scripts/sw-reaper.sh +1 -11
  131. package/scripts/sw-recruit.sh +15 -25
  132. package/scripts/sw-regression.sh +11 -21
  133. package/scripts/sw-release-manager.sh +19 -28
  134. package/scripts/sw-release.sh +8 -16
  135. package/scripts/sw-remote.sh +1 -11
  136. package/scripts/sw-replay.sh +48 -44
  137. package/scripts/sw-retro.sh +70 -92
  138. package/scripts/sw-review-rerun.sh +1 -1
  139. package/scripts/sw-scale.sh +174 -41
  140. package/scripts/sw-security-audit.sh +12 -22
  141. package/scripts/sw-self-optimize.sh +239 -23
  142. package/scripts/sw-session.sh +5 -15
  143. package/scripts/sw-setup.sh +8 -18
  144. package/scripts/sw-standup.sh +5 -15
  145. package/scripts/sw-status.sh +32 -23
  146. package/scripts/sw-strategic.sh +129 -13
  147. package/scripts/sw-stream.sh +1 -11
  148. package/scripts/sw-swarm.sh +76 -36
  149. package/scripts/sw-team-stages.sh +10 -20
  150. package/scripts/sw-templates.sh +4 -14
  151. package/scripts/sw-testgen.sh +3 -13
  152. package/scripts/sw-tmux-pipeline.sh +1 -19
  153. package/scripts/sw-tmux-role-color.sh +0 -10
  154. package/scripts/sw-tmux-status.sh +3 -11
  155. package/scripts/sw-tmux.sh +2 -20
  156. package/scripts/sw-trace.sh +1 -19
  157. package/scripts/sw-tracker-github.sh +0 -10
  158. package/scripts/sw-tracker-jira.sh +1 -11
  159. package/scripts/sw-tracker-linear.sh +1 -11
  160. package/scripts/sw-tracker.sh +7 -24
  161. package/scripts/sw-triage.sh +29 -39
  162. package/scripts/sw-upgrade.sh +5 -23
  163. package/scripts/sw-ux.sh +1 -19
  164. package/scripts/sw-webhook.sh +18 -32
  165. package/scripts/sw-widgets.sh +3 -21
  166. package/scripts/sw-worktree.sh +11 -27
  167. package/scripts/update-homebrew-sha.sh +73 -0
  168. package/templates/pipelines/tdd.json +72 -0
  169. package/scripts/sw-pipeline.sh.mock +0 -7
@@ -23,6 +23,13 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
23
23
  # Canonical helpers (colors, output, events)
24
24
  # shellcheck source=lib/helpers.sh
25
25
  [[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
26
+ [[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
27
+ # Source DB for dual-write (emit_event → JSONL + SQLite).
28
+ # Note: do NOT call init_schema here — the pipeline (sw-pipeline.sh) owns schema
29
+ # initialization. Calling it here would create an empty DB that shadows JSON cost data.
30
+ if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
31
+ source "$SCRIPT_DIR/sw-db.sh" 2>/dev/null || true
32
+ fi
26
33
  # Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
27
34
  [[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
28
35
  [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
@@ -40,15 +47,6 @@ if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
40
47
  echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
41
48
  }
42
49
  fi
43
- CYAN="${CYAN:-\033[38;2;0;212;255m}"
44
- PURPLE="${PURPLE:-\033[38;2;124;58;237m}"
45
- BLUE="${BLUE:-\033[38;2;0;102;255m}"
46
- GREEN="${GREEN:-\033[38;2;74;222;128m}"
47
- YELLOW="${YELLOW:-\033[38;2;250;204;21m}"
48
- RED="${RED:-\033[38;2;248;113;113m}"
49
- DIM="${DIM:-\033[2m}"
50
- BOLD="${BOLD:-\033[1m}"
51
- RESET="${RESET:-\033[0m}"
52
50
 
53
51
  # ─── Defaults ─────────────────────────────────────────────────────────────────
54
52
  GOAL=""
@@ -67,11 +65,11 @@ MAX_TURNS=""
67
65
  RESUME=false
68
66
  VERBOSE=false
69
67
  MAX_ITERATIONS_EXPLICIT=false
70
- MAX_RESTARTS=0
68
+ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
71
69
  SESSION_RESTART=false
72
70
  RESTART_COUNT=0
73
71
  REPO_OVERRIDE=""
74
- VERSION="2.4.0"
72
+ VERSION="3.1.0"
75
73
 
76
74
  # ─── Token Tracking ─────────────────────────────────────────────────────────
77
75
  LOOP_INPUT_TOKENS=0
@@ -335,13 +333,13 @@ if [[ -n "$REPO_OVERRIDE" ]]; then
335
333
  info "Using repository: $(pwd)"
336
334
  fi
337
335
 
338
- if ! command -v claude &>/dev/null; then
336
+ if ! command -v claude >/dev/null 2>&1; then
339
337
  error "Claude Code CLI not found. Install it first:"
340
338
  echo -e " ${DIM}npm install -g @anthropic-ai/claude-code${RESET}"
341
339
  exit 1
342
340
  fi
343
341
 
344
- if ! git rev-parse --is-inside-work-tree &>/dev/null 2>&1; then
342
+ if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
345
343
  error "Not inside a git repository. The loop requires git for progress tracking."
346
344
  exit 1
347
345
  fi
@@ -351,15 +349,15 @@ ORIGINAL_GOAL="$GOAL"
351
349
 
352
350
  # ─── Timeout Detection ────────────────────────────────────────────────────────
353
351
  TIMEOUT_CMD=""
354
- if command -v timeout &>/dev/null; then
352
+ if command -v timeout >/dev/null 2>&1; then
355
353
  TIMEOUT_CMD="timeout"
356
- elif command -v gtimeout &>/dev/null; then
354
+ elif command -v gtimeout >/dev/null 2>&1; then
357
355
  TIMEOUT_CMD="gtimeout"
358
356
  fi
359
- CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-1800}" # 30 min default
357
+ CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-$(_config_get_int "loop.claude_timeout" 1800 2>/dev/null || echo 1800)}" # 30 min default
360
358
 
361
359
  if [[ "$AGENTS" -gt 1 ]]; then
362
- if ! command -v tmux &>/dev/null; then
360
+ if ! command -v tmux >/dev/null 2>&1; then
363
361
  error "tmux is required for multi-agent mode."
364
362
  echo -e " ${DIM}brew install tmux${RESET} (macOS)"
365
363
  exit 1
@@ -393,7 +391,7 @@ select_adaptive_model() {
393
391
  fi
394
392
  # Read learned model routing
395
393
  local _routing_file="${HOME}/.shipwright/optimization/model-routing.json"
396
- if [[ -f "$_routing_file" ]] && command -v jq &>/dev/null; then
394
+ if [[ -f "$_routing_file" ]] && command -v jq >/dev/null 2>&1; then
397
395
  local _routed_model
398
396
  _routed_model=$(jq -r --arg r "$role" '.routes[$r].model // ""' "$_routing_file" 2>/dev/null) || true
399
397
  if [[ -n "${_routed_model:-}" && "${_routed_model:-}" != "null" ]]; then
@@ -403,7 +401,7 @@ select_adaptive_model() {
403
401
  fi
404
402
 
405
403
  # Try intelligence-based recommendation
406
- if type intelligence_recommend_model &>/dev/null 2>&1; then
404
+ if type intelligence_recommend_model >/dev/null 2>&1; then
407
405
  local rec
408
406
  rec=$(intelligence_recommend_model "$role" "${COMPLEXITY:-5}" "${BUDGET:-0}" 2>/dev/null || echo "")
409
407
  if [[ -n "$rec" ]]; then
@@ -422,7 +420,7 @@ select_adaptive_model() {
422
420
  select_audit_model() {
423
421
  local default_model="haiku"
424
422
  local opt_file="$HOME/.shipwright/optimization/audit-tuning.json"
425
- if [[ -f "$opt_file" ]] && command -v jq &>/dev/null; then
423
+ if [[ -f "$opt_file" ]] && command -v jq >/dev/null 2>&1; then
426
424
  local success_rate
427
425
  success_rate=$(jq -r '.haiku_success_rate // 100' "$opt_file" 2>/dev/null || echo "100")
428
426
  if [[ "${success_rate%%.*}" -lt 90 ]]; then
@@ -442,7 +440,7 @@ accumulate_loop_tokens() {
442
440
  [[ ! -f "$log_file" ]] && return 0
443
441
 
444
442
  # If jq is available and the file looks like JSON, parse structured output
445
- if command -v jq &>/dev/null && head -c1 "$log_file" 2>/dev/null | grep -q '\['; then
443
+ if command -v jq >/dev/null 2>&1 && head -c1 "$log_file" 2>/dev/null | grep -q '\['; then
446
444
  local input_tok output_tok cache_read cache_create cost_usd
447
445
  # The result object is the last element in the JSON array
448
446
  input_tok=$(jq -r '.[-1].usage.input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
@@ -458,6 +456,20 @@ accumulate_loop_tokens() {
458
456
  local cost_millicents
459
457
  cost_millicents=$(echo "$cost_usd" | awk '{printf "%.0f", $1 * 100000}' 2>/dev/null || echo "0")
460
458
  LOOP_COST_MILLICENTS=$(( ${LOOP_COST_MILLICENTS:-0} + ${cost_millicents:-0} ))
459
+ else
460
+ # Estimate cost from tokens when Claude doesn't provide it (rates per million tokens)
461
+ local total_in total_out
462
+ total_in=$(( ${input_tok:-0} + ${cache_read:-0} + ${cache_create:-0} ))
463
+ total_out=${output_tok:-0}
464
+ local cost=0
465
+ case "${MODEL:-${CLAUDE_MODEL:-sonnet}}" in
466
+ *opus*) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 15 + o * 75) / 1000000}') ;;
467
+ *sonnet*) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 3 + o * 15) / 1000000}') ;;
468
+ *haiku*) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 0.25 + o * 1.25) / 1000000}') ;;
469
+ *) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 3 + o * 15) / 1000000}') ;;
470
+ esac
471
+ cost_millicents=$(echo "$cost" | awk '{printf "%.0f", $1 * 100000}' 2>/dev/null || echo "0")
472
+ LOOP_COST_MILLICENTS=$(( ${LOOP_COST_MILLICENTS:-0} + ${cost_millicents:-0} ))
461
473
  fi
462
474
  else
463
475
  # Fallback: regex-based parsing for non-JSON output
@@ -491,7 +503,7 @@ _extract_text_from_json() {
491
503
  first_char=$(head -c1 "$json_file" 2>/dev/null || true)
492
504
 
493
505
  # Case 2: Valid JSON array — extract .result from last element
494
- if [[ "$first_char" == "[" ]] && command -v jq &>/dev/null; then
506
+ if [[ "$first_char" == "[" ]] && command -v jq >/dev/null 2>&1; then
495
507
  local extracted
496
508
  extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
497
509
  if [[ -n "$extracted" ]]; then
@@ -542,7 +554,7 @@ TOKJSON
542
554
  # Reads tuning config for smarter iteration/circuit-breaker thresholds.
543
555
  apply_adaptive_budget() {
544
556
  local tuning_file="$HOME/.shipwright/optimization/loop-tuning.json"
545
- if [[ -f "$tuning_file" ]] && command -v jq &>/dev/null; then
557
+ if [[ -f "$tuning_file" ]] && command -v jq >/dev/null 2>&1; then
546
558
  local tuned_max tuned_ext tuned_ext_count tuned_cb
547
559
  tuned_max=$(jq -r '.max_iterations // ""' "$tuning_file" 2>/dev/null || echo "")
548
560
  tuned_ext=$(jq -r '.extension_size // ""' "$tuning_file" 2>/dev/null || echo "")
@@ -560,7 +572,7 @@ apply_adaptive_budget() {
560
572
 
561
573
  # Read learned iteration model
562
574
  local _iter_model="${HOME}/.shipwright/optimization/iteration-model.json"
563
- if [[ -f "$_iter_model" ]] && ! $MAX_ITERATIONS_EXPLICIT && command -v jq &>/dev/null; then
575
+ if [[ -f "$_iter_model" ]] && ! $MAX_ITERATIONS_EXPLICIT && command -v jq >/dev/null 2>&1; then
564
576
  local _complexity="${ISSUE_COMPLEXITY:-${COMPLEXITY:-medium}}"
565
577
  local _predicted_max
566
578
  _predicted_max=$(jq -r --arg c "$_complexity" '.predictions[$c].max_iterations // ""' "$_iter_model" 2>/dev/null) || true
@@ -571,7 +583,7 @@ apply_adaptive_budget() {
571
583
  fi
572
584
 
573
585
  # Try intelligence-based iteration estimate
574
- if type intelligence_estimate_iterations &>/dev/null 2>&1 && ! $MAX_ITERATIONS_EXPLICIT; then
586
+ if type intelligence_estimate_iterations >/dev/null 2>&1 && ! $MAX_ITERATIONS_EXPLICIT; then
575
587
  local est
576
588
  est=$(intelligence_estimate_iterations "${GOAL:-}" "${COMPLEXITY:-5}" 2>/dev/null || echo "")
577
589
  if [[ -n "$est" && "$est" =~ ^[0-9]+$ ]]; then
@@ -619,9 +631,6 @@ compute_velocity_avg() {
619
631
 
620
632
  # ─── Timing Helpers ───────────────────────────────────────────────────────────
621
633
 
622
- now_iso() { date -u +%Y-%m-%dT%H:%M:%SZ; }
623
- now_epoch() { date +%s; }
624
-
625
634
  format_duration() {
626
635
  local secs="$1"
627
636
  local mins=$(( secs / 60 ))
@@ -652,6 +661,9 @@ initialize_state() {
652
661
  STATUS="running"
653
662
  LOG_ENTRIES=""
654
663
 
664
+ # Record starting commit for cumulative diff in quality gates
665
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
666
+
655
667
  write_state
656
668
  }
657
669
 
@@ -723,6 +735,11 @@ resume_state() {
723
735
  START_EPOCH="$(now_epoch)"
724
736
  STATUS="running"
725
737
 
738
+ # Set starting commit for cumulative diff (approximate: use earliest tracked commit)
739
+ if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
740
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
741
+ fi
742
+
726
743
  # If we hit max iterations before, warn user to extend
727
744
  if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
728
745
  warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
@@ -730,6 +747,21 @@ resume_state() {
730
747
  exit 0
731
748
  fi
732
749
 
750
+ # Restore Claude context for meaningful resume (source so exports persist to this shell)
751
+ if [[ -f "$SCRIPT_DIR/sw-checkpoint.sh" ]] && [[ -d "${PROJECT_ROOT:-}" ]]; then
752
+ source "$SCRIPT_DIR/sw-checkpoint.sh"
753
+ local _orig_pwd="$PWD"
754
+ cd "$PROJECT_ROOT" 2>/dev/null || true
755
+ if checkpoint_restore_context "build" 2>/dev/null; then
756
+ RESUMED_FROM_ITERATION="${RESTORED_ITERATION:-}"
757
+ RESUMED_MODIFIED="${RESTORED_MODIFIED:-}"
758
+ RESUMED_FINDINGS="${RESTORED_FINDINGS:-}"
759
+ RESUMED_TEST_OUTPUT="${RESTORED_TEST_OUTPUT:-}"
760
+ [[ -n "${RESTORED_ITERATION:-}" && "${RESTORED_ITERATION:-0}" -gt 0 ]] && info "Restored context from iteration ${RESTORED_ITERATION}"
761
+ fi
762
+ cd "$_orig_pwd" 2>/dev/null || true
763
+ fi
764
+
733
765
  success "Resumed: iteration $ITERATION/$MAX_ITERATIONS"
734
766
  }
735
767
 
@@ -807,6 +839,86 @@ ${entry}"
807
839
  fi
808
840
  }
809
841
 
842
+ # ─── Semantic Validation for Claude Output ─────────────────────────────────────
843
+ # Validates changed files before commit to catch syntax errors and API error leakage.
844
+ validate_claude_output() {
845
+ local workdir="${1:-.}"
846
+ local issues=0
847
+
848
+ # Check for syntax errors in changed files
849
+ local changed_files
850
+ changed_files=$(git -C "$workdir" diff --cached --name-only 2>/dev/null || git -C "$workdir" diff --name-only 2>/dev/null)
851
+
852
+ while IFS= read -r file; do
853
+ [[ -z "$file" ]] && continue
854
+ [[ ! -f "$workdir/$file" ]] && continue
855
+
856
+ case "$file" in
857
+ *.sh)
858
+ if ! bash -n "$workdir/$file" 2>/dev/null; then
859
+ warn "Syntax error in shell script: $file"
860
+ issues=$((issues + 1))
861
+ fi
862
+ ;;
863
+ *.py)
864
+ if command -v python3 >/dev/null 2>&1; then
865
+ if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$workdir/$file" 2>/dev/null; then
866
+ warn "Syntax error in Python file: $file"
867
+ issues=$((issues + 1))
868
+ fi
869
+ fi
870
+ ;;
871
+ *.json)
872
+ if command -v jq >/dev/null 2>&1 && ! jq empty "$workdir/$file" 2>/dev/null; then
873
+ warn "Invalid JSON: $file"
874
+ issues=$((issues + 1))
875
+ fi
876
+ ;;
877
+ *.ts|*.js|*.tsx|*.jsx)
878
+ # Check for obvious corruption: API error text leaked into source
879
+ if grep -qE '(CLAUDE_CODE_OAUTH_TOKEN|api key|rate limit|503 Service|DOCTYPE html)' "$workdir/$file" 2>/dev/null; then
880
+ warn "Claude API error leaked into source file: $file"
881
+ issues=$((issues + 1))
882
+ fi
883
+ ;;
884
+ esac
885
+ done <<< "$changed_files"
886
+
887
+ # Check for obviously corrupt output (API errors dumped as code)
888
+ local total_changed
889
+ total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || true)
890
+ total_changed="${total_changed:-0}"
891
+ if [[ "$total_changed" -eq 0 ]]; then
892
+ warn "Claude iteration produced no file changes"
893
+ issues=$((issues + 1))
894
+ fi
895
+
896
+ return "$issues"
897
+ }
898
+
899
+ # ─── Budget Gate (hard stop when exhausted) ───────────────────────────────────
900
+ check_budget_gate() {
901
+ [[ ! -x "$SCRIPT_DIR/sw-cost.sh" ]] && return 0
902
+ local remaining
903
+ remaining=$(bash "$SCRIPT_DIR/sw-cost.sh" remaining-budget 2>/dev/null || echo "")
904
+ [[ -z "$remaining" ]] && return 0
905
+ [[ "$remaining" == "unlimited" ]] && return 0
906
+
907
+ # Parse remaining as float, check if <= 0
908
+ if awk -v r="$remaining" 'BEGIN { exit !(r <= 0) }' 2>/dev/null; then
909
+ error "Budget exhausted (remaining: \$${remaining}) — stopping pipeline"
910
+ emit_event "pipeline.budget_exhausted" "remaining=$remaining"
911
+ return 1
912
+ fi
913
+
914
+ # Warn at 10% threshold (remaining < 1.0 when typical job ~$5+)
915
+ if awk -v r="$remaining" 'BEGIN { exit !(r < 1.0) }' 2>/dev/null; then
916
+ warn "Budget low: \$${remaining} remaining"
917
+ fi
918
+
919
+ return 0
920
+ }
921
+
810
922
  # ─── Git Helpers ──────────────────────────────────────────────────────────────
811
923
 
812
924
  git_commit_count() {
@@ -834,6 +946,14 @@ git_auto_commit() {
834
946
  fi
835
947
 
836
948
  git -C "$work_dir" add -A 2>/dev/null || true
949
+
950
+ # Semantic validation before commit — skip commit if validation fails
951
+ if ! validate_claude_output "$work_dir"; then
952
+ warn "Validation failed — skipping commit for this iteration"
953
+ git -C "$work_dir" reset --hard HEAD 2>/dev/null || true
954
+ return 1
955
+ fi
956
+
837
957
  git -C "$work_dir" commit -m "loop: iteration $ITERATION — autonomous progress" --no-verify 2>/dev/null || return 1
838
958
  return 0
839
959
  }
@@ -861,7 +981,8 @@ check_fatal_error() {
861
981
  # Non-zero exit + tiny output = likely CLI crash
862
982
  if [[ "$cli_exit_code" -ne 0 ]]; then
863
983
  local line_count
864
- line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || echo 0)
984
+ line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
985
+ line_count="${line_count:-0}"
865
986
  if [[ "$line_count" -lt 3 ]]; then
866
987
  local content
867
988
  content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
@@ -897,7 +1018,7 @@ check_completion() {
897
1018
 
898
1019
  check_circuit_breaker() {
899
1020
  # Vitals-driven circuit breaker (preferred over static threshold)
900
- if type pipeline_compute_vitals &>/dev/null 2>&1 && type pipeline_health_verdict &>/dev/null 2>&1; then
1021
+ if type pipeline_compute_vitals >/dev/null 2>&1 && type pipeline_health_verdict >/dev/null 2>&1; then
901
1022
  local _vitals_json _verdict
902
1023
  local _loop_state="${STATE_FILE:-}"
903
1024
  local _loop_artifacts="${ARTIFACTS_DIR:-}"
@@ -989,6 +1110,114 @@ check_max_iterations() {
989
1110
  return 1
990
1111
  }
991
1112
 
1113
+ # ─── Failure Diagnosis ─────────────────────────────────────────────────────────
1114
+ # Pattern-based root-cause classification for smarter retries (no Claude needed).
1115
+ # Returns markdown context to inject into the next iteration's goal.
1116
+
1117
+ diagnose_failure() {
1118
+ local error_output="$1"
1119
+ local changed_files="$2"
1120
+ local iteration="$3"
1121
+
1122
+ local diagnosis=""
1123
+ local strategy="retry_with_context" # default
1124
+
1125
+ # Pattern-based classification (fast, no Claude needed)
1126
+ if echo "$error_output" | grep -qiE 'import.*not found|cannot find module|no module named'; then
1127
+ diagnosis="missing_import"
1128
+ strategy="fix_imports"
1129
+ elif echo "$error_output" | grep -qiE 'syntax error|unexpected token|parse error'; then
1130
+ diagnosis="syntax_error"
1131
+ strategy="fix_syntax"
1132
+ elif echo "$error_output" | grep -qiE 'type.*not assignable|type error|TypeError'; then
1133
+ diagnosis="type_error"
1134
+ strategy="fix_types"
1135
+ elif echo "$error_output" | grep -qiE 'undefined.*variable|not defined|ReferenceError'; then
1136
+ diagnosis="undefined_reference"
1137
+ strategy="fix_references"
1138
+ elif echo "$error_output" | grep -qiE 'timeout|timed out|ETIMEDOUT'; then
1139
+ diagnosis="timeout"
1140
+ strategy="optimize_performance"
1141
+ elif echo "$error_output" | grep -qiE 'assertion.*fail|expect.*to|AssertionError'; then
1142
+ diagnosis="test_assertion"
1143
+ strategy="fix_logic"
1144
+ elif echo "$error_output" | grep -qiE 'permission denied|EACCES|forbidden'; then
1145
+ diagnosis="permission_error"
1146
+ strategy="fix_permissions"
1147
+ elif echo "$error_output" | grep -qiE 'out of memory|heap|OOM|ENOMEM'; then
1148
+ diagnosis="resource_error"
1149
+ strategy="reduce_resource_usage"
1150
+ else
1151
+ diagnosis="unknown"
1152
+ strategy="retry_with_context"
1153
+ fi
1154
+
1155
+ # Check if we've seen this diagnosis before in this session
1156
+ local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
1157
+ local repeat_count=0
1158
+ if [[ -f "$diagnosis_file" ]]; then
1159
+ repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
1160
+ repeat_count="${repeat_count:-0}"
1161
+ fi
1162
+ echo "$diagnosis" >> "$diagnosis_file"
1163
+
1164
+ # Escalate strategy if same diagnosis repeats
1165
+ if [[ "$repeat_count" -ge 2 ]]; then
1166
+ strategy="alternative_approach"
1167
+ fi
1168
+
1169
+ # Try memory-based fix lookup
1170
+ local known_fix=""
1171
+ if type memory_query_fix_for_error &>/dev/null; then
1172
+ local fix_json
1173
+ fix_json=$(memory_query_fix_for_error "$error_output" 2>/dev/null || true)
1174
+ if [[ -n "$fix_json" && "$fix_json" != "null" ]]; then
1175
+ known_fix=$(echo "$fix_json" | jq -r '.fix // ""' 2>/dev/null | head -5)
1176
+ fi
1177
+ fi
1178
+
1179
+ # Build diagnosis context for Claude
1180
+ local diagnosis_context="## Failure Diagnosis (Iteration $iteration)
1181
+ Classification: $diagnosis
1182
+ Strategy: $strategy
1183
+ Repeat count: $repeat_count"
1184
+
1185
+ if [[ -n "$known_fix" ]]; then
1186
+ diagnosis_context+="
1187
+ Known fix from memory: $known_fix"
1188
+ fi
1189
+
1190
+ # Strategy-specific guidance
1191
+ case "$strategy" in
1192
+ fix_imports)
1193
+ diagnosis_context+="
1194
+ INSTRUCTION: The error is about missing imports/modules. Check that all imports are correct, packages are installed, and paths are right. Do NOT change the logic - just fix the imports."
1195
+ ;;
1196
+ fix_syntax)
1197
+ diagnosis_context+="
1198
+ INSTRUCTION: This is a syntax error. Carefully check the exact line mentioned in the error. Look for missing brackets, semicolons, commas, or mismatched quotes."
1199
+ ;;
1200
+ fix_types)
1201
+ diagnosis_context+="
1202
+ INSTRUCTION: Type mismatch error. Check the types at the error location. Ensure function signatures match their usage."
1203
+ ;;
1204
+ fix_logic)
1205
+ diagnosis_context+="
1206
+ INSTRUCTION: Test assertion failure. The code logic is wrong, not the syntax. Re-read the test expectations and fix the implementation to match."
1207
+ ;;
1208
+ alternative_approach)
1209
+ diagnosis_context+="
1210
+ INSTRUCTION: This error has occurred $repeat_count times. The previous approach is not working. Try a FUNDAMENTALLY DIFFERENT approach:
1211
+ - If you were modifying existing code, try rewriting the function from scratch
1212
+ - If you were using one library, try a different one
1213
+ - If you were adding to a file, try creating a new file instead
1214
+ - Step back and reconsider the requirements"
1215
+ ;;
1216
+ esac
1217
+
1218
+ echo "$diagnosis_context"
1219
+ }
1220
+
992
1221
  # ─── Test Gate ────────────────────────────────────────────────────────────────
993
1222
 
994
1223
  run_test_gate() {
@@ -1018,9 +1247,9 @@ run_test_gate() {
1018
1247
  # Wrap test command with timeout (5 min default) to prevent hanging
1019
1248
  local test_timeout="${SW_TEST_TIMEOUT:-300}"
1020
1249
  local test_wrapper="$active_test_cmd"
1021
- if command -v timeout &>/dev/null; then
1250
+ if command -v timeout >/dev/null 2>&1; then
1022
1251
  test_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
1023
- elif command -v gtimeout &>/dev/null; then
1252
+ elif command -v gtimeout >/dev/null 2>&1; then
1024
1253
  test_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
1025
1254
  fi
1026
1255
  if bash -c "$test_wrapper" > "$test_log" 2>&1; then
@@ -1072,7 +1301,7 @@ write_error_summary() {
1072
1301
  local tmp_json="${error_json}.tmp.$$"
1073
1302
 
1074
1303
  # Build JSON with jq (preferred) or plain-text fallback
1075
- if command -v jq &>/dev/null; then
1304
+ if command -v jq >/dev/null 2>&1; then
1076
1305
  jq -n \
1077
1306
  --argjson iteration "${ITERATION:-0}" \
1078
1307
  --arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \
@@ -1105,33 +1334,60 @@ run_audit_agent() {
1105
1334
  local log_file="$LOG_DIR/iteration-${ITERATION}.log"
1106
1335
  local audit_log="$LOG_DIR/audit-iter-${ITERATION}.log"
1107
1336
 
1108
- # Gather context: tail of implementer output + git diff
1337
+ # Gather context: tail of implementer output + cumulative diff
1109
1338
  local impl_tail
1110
1339
  impl_tail="$(tail -100 "$log_file" 2>/dev/null || echo "(no output)")"
1111
- local diff_stat
1112
- diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
1340
+
1341
+ # Use cumulative diff from loop start so auditor sees ALL work, not just latest commit
1342
+ local diff_stat cumulative_note=""
1343
+ if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
1344
+ diff_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no changes)")"
1345
+ cumulative_note="Note: This diff shows ALL changes since the loop started (iteration 1 through ${ITERATION}), not just the latest commit."
1346
+ else
1347
+ diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
1348
+ fi
1349
+
1350
+ # Include verified test status so auditor doesn't have to guess
1351
+ local test_context=""
1352
+ if [[ -n "$TEST_CMD" ]]; then
1353
+ if [[ "${TEST_PASSED:-}" == "true" ]]; then
1354
+ test_context="## Verified Test Status (from harness, not from agent)
1355
+ Tests: ALL PASSING (command: ${TEST_CMD})"
1356
+ else
1357
+ test_context="## Verified Test Status (from harness)
1358
+ Tests: FAILING (command: ${TEST_CMD})
1359
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1360
+ fi
1361
+ fi
1113
1362
 
1114
1363
  local audit_prompt
1115
1364
  read -r -d '' audit_prompt <<AUDIT_PROMPT || true
1116
- You are an independent code auditor reviewing an autonomous coding agent.
1365
+ You are an independent code auditor reviewing an autonomous coding agent's CUMULATIVE work.
1366
+ This is iteration ${ITERATION}. The agent may have done most of the work in earlier iterations.
1117
1367
 
1118
1368
  ## Goal the agent was working toward
1119
1369
  ${GOAL}
1120
1370
 
1121
- ## Agent Output (last 100 lines)
1371
+ ## Agent Output This Iteration (last 100 lines)
1122
1372
  ${impl_tail}
1123
1373
 
1124
- ## Changes Made (git diff --stat)
1374
+ ## Cumulative Changes Made (git diff --stat)
1375
+ ${cumulative_note}
1125
1376
  ${diff_stat}
1126
1377
 
1378
+ ${test_context}
1379
+
1127
1380
  ## Your Task
1128
- Critically review the work:
1129
- 1. Did the agent make meaningful progress toward the goal?
1130
- 2. Are there obvious bugs, logic errors, or security issues?
1381
+ Critically review the CUMULATIVE work (not just the latest iteration):
1382
+ 1. Has the agent made meaningful progress toward the goal across all iterations?
1383
+ 2. Are there obvious bugs, logic errors, or security issues in the current codebase?
1131
1384
  3. Did the agent leave incomplete work (TODOs, placeholder code)?
1132
1385
  4. Are there any regressions or broken patterns?
1133
1386
  5. Is the code quality acceptable?
1134
1387
 
1388
+ IMPORTANT: If the current iteration made small or no code changes, that may be acceptable
1389
+ if earlier iterations already completed the substantive work. Judge the whole body of work.
1390
+
1135
1391
  If the work is acceptable and moves toward the goal, output exactly: AUDIT_PASS
1136
1392
  Otherwise, list the specific issues that need fixing.
1137
1393
  AUDIT_PROMPT
@@ -1217,21 +1473,52 @@ check_definition_of_done() {
1217
1473
 
1218
1474
  local dod_content
1219
1475
  dod_content="$(cat "$DOD_FILE")"
1476
+
1477
+ # Use cumulative diff from loop start (not just HEAD~1) so the evaluator
1478
+ # can see ALL work done across every iteration, not just the latest commit.
1220
1479
  local diff_content
1221
- diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
1480
+ if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
1481
+ diff_content="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no diff)")"
1482
+ diff_content="${diff_content}
1483
+
1484
+ ## Detailed Changes (cumulative diff, truncated to 200 lines)
1485
+ $(git -C "$PROJECT_ROOT" diff "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -200 || echo "(no diff)")"
1486
+ else
1487
+ diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
1488
+ fi
1489
+
1490
+ # Inject verified runtime facts so the evaluator doesn't have to guess
1491
+ local runtime_facts=""
1492
+ if [[ -n "$TEST_CMD" ]]; then
1493
+ if [[ "${TEST_PASSED:-}" == "true" ]]; then
1494
+ runtime_facts="## Verified Runtime Facts (from the loop harness, not from the agent)
1495
+ - Tests: ALL PASSING (verified by running '${TEST_CMD}' after this iteration)
1496
+ - Test output (last 10 lines):
1497
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1498
+ else
1499
+ runtime_facts="## Verified Runtime Facts
1500
+ - Tests: FAILING (verified by running '${TEST_CMD}')
1501
+ - Test output (last 10 lines):
1502
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1503
+ fi
1504
+ fi
1222
1505
 
1223
1506
  local dod_prompt
1224
1507
  read -r -d '' dod_prompt <<DOD_PROMPT || true
1225
- You are evaluating whether code changes satisfy a Definition of Done checklist.
1508
+ You are evaluating whether a project satisfies a Definition of Done checklist.
1509
+ You are reviewing the CUMULATIVE work across all iterations, not just the latest commit.
1226
1510
 
1227
1511
  ## Definition of Done
1228
1512
  ${dod_content}
1229
1513
 
1230
- ## Changes Made (git diff)
1514
+ ${runtime_facts}
1515
+
1516
+ ## Cumulative Changes Made (git diff from start of loop to now)
1231
1517
  ${diff_content}
1232
1518
 
1233
1519
  ## Your Task
1234
- For each item in the Definition of Done, determine if the changes satisfy it.
1520
+ For each item in the Definition of Done, determine if the project satisfies it.
1521
+ The runtime facts above are verified by the harness — trust them as ground truth.
1235
1522
  If ALL items are satisfied, output exactly: DOD_PASS
1236
1523
  Otherwise, list which items are NOT satisfied and why.
1237
1524
  DOD_PROMPT
@@ -1285,6 +1572,14 @@ guard_completion() {
1285
1572
  rejection_reasons+=("tests failing")
1286
1573
  fi
1287
1574
 
1575
+ # Holistic final gate: when all other gates pass, run a project-level assessment
1576
+ # that evaluates the entire codebase against the goal (not just the latest diff)
1577
+ if [[ ${#rejection_reasons[@]} -eq 0 ]]; then
1578
+ if ! run_holistic_gate; then
1579
+ rejection_reasons+=("holistic project assessment found gaps")
1580
+ fi
1581
+ fi
1582
+
1288
1583
  if [[ ${#rejection_reasons[@]} -gt 0 ]]; then
1289
1584
  local reasons_str
1290
1585
  reasons_str="$(printf ', %s' "${rejection_reasons[@]}")"
@@ -1298,6 +1593,143 @@ guard_completion() {
1298
1593
  return 0
1299
1594
  }
1300
1595
 
1596
+ # Holistic gate: evaluates the full project against the original goal.
1597
+ # Only runs when all other gates pass (final checkpoint before acceptance).
1598
+ run_holistic_gate() {
1599
+ # Skip if no starting commit (can't compute cumulative diff)
1600
+ [[ -z "${LOOP_START_COMMIT:-}" ]] && return 0
1601
+
1602
+ local holistic_log="$LOG_DIR/holistic-iter-${ITERATION}.log"
1603
+
1604
+ # Build a project summary: file tree, test count, cumulative diff stats
1605
+ local file_count
1606
+ file_count=$(git -C "$PROJECT_ROOT" ls-files | wc -l | tr -d ' ')
1607
+ local cumulative_stat
1608
+ cumulative_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || echo "(no changes)")"
1609
+ local test_summary=""
1610
+ if [[ -n "${TEST_OUTPUT:-}" ]]; then
1611
+ test_summary="$(echo "$TEST_OUTPUT" | tail -5)"
1612
+ fi
1613
+
1614
+ local holistic_prompt
1615
+ read -r -d '' holistic_prompt <<HOLISTIC_PROMPT || true
1616
+ You are a final quality gate evaluating whether an autonomous coding agent has FULLY achieved its goal.
1617
+
1618
+ ## Original Goal
1619
+ ${GOAL}
1620
+
1621
+ ## Project Stats
1622
+ - Files in repo: ${file_count}
1623
+ - Iterations completed: ${ITERATION}
1624
+ - Cumulative changes: ${cumulative_stat}
1625
+ - Tests: ${TEST_PASSED:-unknown} (command: ${TEST_CMD:-none})
1626
+ ${test_summary:+- Test output: ${test_summary}}
1627
+
1628
+ ## Cumulative Git Changes (diff --stat from start)
1629
+ $(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -40 || echo "(none)")
1630
+
1631
+ ## Your Task
1632
+ Based on the goal and the cumulative work done:
1633
+ 1. Has the goal been FULLY achieved (not partially)?
1634
+ 2. Is there any critical gap that would make this unacceptable for production?
1635
+
1636
+ If the goal is fully achieved, output exactly: HOLISTIC_PASS
1637
+ Otherwise, list the specific gaps remaining.
1638
+ HOLISTIC_PROMPT
1639
+
1640
+ echo -e " ${PURPLE}▸${RESET} Running holistic project assessment..."
1641
+
1642
+ local hol_model
1643
+ hol_model="$(select_audit_model)"
1644
+ local hol_flags=("--model" "$hol_model")
1645
+ if $SKIP_PERMISSIONS; then
1646
+ hol_flags+=("--dangerously-skip-permissions")
1647
+ fi
1648
+
1649
+ claude -p "$holistic_prompt" "${hol_flags[@]}" > "$holistic_log" 2>&1 || true
1650
+
1651
+ if grep -q "HOLISTIC_PASS" "$holistic_log" 2>/dev/null; then
1652
+ echo -e " ${GREEN}✓${RESET} Holistic assessment: passed"
1653
+ return 0
1654
+ else
1655
+ echo -e " ${YELLOW}⚠${RESET} Holistic assessment: gaps found"
1656
+ return 1
1657
+ fi
1658
+ }
1659
+
1660
+ # ─── Context Window Management ───────────────────────────────────────────────
1661
+ # Prevents prompt from exceeding Claude's context limit (~200K tokens).
1662
+ # Trims least-critical sections first when over budget.
1663
+
1664
+ CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-180000}" # ~45K tokens at 4 chars/token
1665
+
1666
+ manage_context_window() {
1667
+ local prompt="$1"
1668
+ local budget="${CONTEXT_BUDGET_CHARS}"
1669
+ local current_len=${#prompt}
1670
+
1671
+ if [[ "$current_len" -le "$budget" ]]; then
1672
+ echo "$prompt"
1673
+ return
1674
+ fi
1675
+
1676
+ # Over budget — progressively trim sections (least important first)
1677
+ local trimmed="$prompt"
1678
+
1679
+ # 1. Trim DORA/Performance baselines (least critical for code generation)
1680
+ if [[ "${#trimmed}" -gt "$budget" ]]; then
1681
+ trimmed=$(echo "$trimmed" | awk '/^## Performance Baselines/{skip=1; next} skip && /^## [^#]/{skip=0} !skip{print}')
1682
+ fi
1683
+
1684
+ # 2. Trim file hotspots to top 5
1685
+ if [[ "${#trimmed}" -gt "$budget" ]]; then
1686
+ trimmed=$(echo "$trimmed" | awk '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>5) next} {print}')
1687
+ fi
1688
+
1689
+ # 3. Trim git log to last 10 entries
1690
+ if [[ "${#trimmed}" -gt "$budget" ]]; then
1691
+ trimmed=$(echo "$trimmed" | awk '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>10) next} {print}')
1692
+ fi
1693
+
1694
+ # 4. Truncate memory context to first 20K chars
1695
+ if [[ "${#trimmed}" -gt "$budget" ]]; then
1696
+ trimmed=$(echo "$trimmed" | awk -v max=20000 '
1697
+ /## Memory Context/{mem=1; skip_rest=0; chars=0; print; next}
1698
+ mem && /^## [^#]/{mem=0; print; next}
1699
+ mem{chars+=length($0)+1; if(chars>max){print "... (memory truncated for context budget)"; skip_rest=1; mem=0; next}}
1700
+ skip_rest && /^## [^#]/{skip_rest=0; print; next}
1701
+ skip_rest{next}
1702
+ {print}
1703
+ ')
1704
+ fi
1705
+
1706
+ # 5. Truncate test output to last 50 lines
1707
+ if [[ "${#trimmed}" -gt "$budget" ]]; then
1708
+ trimmed=$(echo "$trimmed" | awk '
1709
+ /## Test Results/{found=1; buf=""; print; next}
1710
+ found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>50)?(n-49):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
1711
+ found{buf=buf $0 "\n"; next}
1712
+ {print}
1713
+ ')
1714
+ fi
1715
+
1716
+ # 6. Last resort: hard truncate with notice
1717
+ if [[ "${#trimmed}" -gt "$budget" ]]; then
1718
+ trimmed="${trimmed:0:$budget}
1719
+
1720
+ ... [CONTEXT TRUNCATED: prompt exceeded ${budget} char budget. Focus on the goal and most recent errors.]"
1721
+ fi
1722
+
1723
+ # Log the trimming
1724
+ local final_len=${#trimmed}
1725
+ if [[ "$final_len" -lt "$current_len" ]]; then
1726
+ warn "Context trimmed from ${current_len} to ${final_len} chars (budget: ${budget})"
1727
+ emit_event "loop.context_trimmed" "original=$current_len" "trimmed=$final_len" "budget=$budget" 2>/dev/null || true
1728
+ fi
1729
+
1730
+ echo "$trimmed"
1731
+ }
1732
+
1301
1733
  # ─── Prompt Composition ──────────────────────────────────────────────────────
1302
1734
 
1303
1735
  compose_prompt() {
@@ -1348,7 +1780,7 @@ Fix these specific errors. Each line above is one distinct error from the test o
1348
1780
 
1349
1781
  # Memory context injection (failure patterns + past learnings)
1350
1782
  local memory_section=""
1351
- if type memory_inject_context &>/dev/null 2>&1; then
1783
+ if type memory_inject_context >/dev/null 2>&1; then
1352
1784
  memory_section="$(memory_inject_context "build" 2>/dev/null || true)"
1353
1785
  elif [[ -f "$SCRIPT_DIR/sw-memory.sh" ]]; then
1354
1786
  memory_section="$("$SCRIPT_DIR/sw-memory.sh" inject build 2>/dev/null || true)"
@@ -1356,7 +1788,7 @@ Fix these specific errors. Each line above is one distinct error from the test o
1356
1788
 
1357
1789
  # DORA baselines for context
1358
1790
  local dora_section=""
1359
- if type memory_get_dora_baseline &>/dev/null 2>&1; then
1791
+ if type memory_get_dora_baseline >/dev/null 2>&1; then
1360
1792
  local dora_json
1361
1793
  dora_json="$(memory_get_dora_baseline 7 2>/dev/null || echo "{}")"
1362
1794
  local dora_total
@@ -1385,7 +1817,7 @@ $(cat "$memory_refresh_file")"
1385
1817
  local intelligence_section=""
1386
1818
  if [[ "${NO_GITHUB:-}" != "true" ]]; then
1387
1819
  # File hotspots — top 5 most-changed files
1388
- if type gh_file_change_frequency &>/dev/null 2>&1; then
1820
+ if type gh_file_change_frequency >/dev/null 2>&1; then
1389
1821
  local hotspots
1390
1822
  hotspots=$(gh_file_change_frequency 2>/dev/null | head -5 || true)
1391
1823
  if [[ -n "$hotspots" ]]; then
@@ -1396,7 +1828,7 @@ ${hotspots}"
1396
1828
  fi
1397
1829
 
1398
1830
  # CODEOWNERS context
1399
- if type gh_codeowners &>/dev/null 2>&1; then
1831
+ if type gh_codeowners >/dev/null 2>&1; then
1400
1832
  local owners
1401
1833
  owners=$(gh_codeowners 2>/dev/null | head -10 || true)
1402
1834
  if [[ -n "$owners" ]]; then
@@ -1407,7 +1839,7 @@ ${owners}"
1407
1839
  fi
1408
1840
 
1409
1841
  # Active security alerts
1410
- if type gh_security_alerts &>/dev/null 2>&1; then
1842
+ if type gh_security_alerts >/dev/null 2>&1; then
1411
1843
  local alerts
1412
1844
  alerts=$(gh_security_alerts 2>/dev/null | head -5 || true)
1413
1845
  if [[ -n "$alerts" ]]; then
@@ -1459,6 +1891,34 @@ ${last_error}"
1459
1891
  # Stuckness detection — compare last 3 iteration outputs
1460
1892
  local stuckness_section=""
1461
1893
  stuckness_section="$(detect_stuckness)"
1894
+ local _stuck_ret=$?
1895
+ local stuckness_detected=false
1896
+ [[ "$_stuck_ret" -eq 0 ]] && stuckness_detected=true
1897
+
1898
+ # Strategy exploration when stuck — append alternative strategy to GOAL
1899
+ if [[ "$stuckness_detected" == "true" ]]; then
1900
+ local last_error diagnosis
1901
+ last_error=$(tail -1 "${ARTIFACTS_DIR:-${PROJECT_ROOT:-.}/.claude/pipeline-artifacts}/error-log.jsonl" 2>/dev/null | jq -r '"Type: \(.type), Exit: \(.exit_code), Error: \(.error | split("\n") | first)"' 2>/dev/null || true)
1902
+ [[ -z "$last_error" || "$last_error" == "null" ]] && last_error="unknown"
1903
+ diagnosis="${STUCKNESS_DIAGNOSIS:-}"
1904
+ local alt_strategy
1905
+ alt_strategy=$(explore_alternative_strategy "$last_error" "${ITERATION:-0}" "$diagnosis")
1906
+ GOAL="${GOAL}
1907
+
1908
+ ${alt_strategy}"
1909
+
1910
+ # Handle model escalation
1911
+ if [[ "${ESCALATE_MODEL:-}" == "true" ]]; then
1912
+ if [[ -f "$SCRIPT_DIR/sw-model-router.sh" ]]; then
1913
+ source "$SCRIPT_DIR/sw-model-router.sh" 2>/dev/null || true
1914
+ fi
1915
+ if type escalate_model &>/dev/null; then
1916
+ MODEL=$(escalate_model "${MODEL:-sonnet}")
1917
+ info "Escalated to model: $MODEL"
1918
+ fi
1919
+ unset ESCALATE_MODEL
1920
+ fi
1921
+ fi
1462
1922
 
1463
1923
  # Session restart context — inject previous session progress
1464
1924
  local restart_section=""
@@ -1470,12 +1930,52 @@ You are starting a FRESH session after the previous one exhausted its iterations
1470
1930
  Read the progress above and continue from where it left off. Do NOT repeat work already done."
1471
1931
  fi
1472
1932
 
1933
+ # Resume-from-checkpoint context — reconstruct Claude context for meaningful resume
1934
+ local resume_section=""
1935
+ if [[ -n "${RESUMED_FROM_ITERATION:-}" && "${RESUMED_FROM_ITERATION:-0}" -gt 0 ]]; then
1936
+ local _test_tail=" (none recorded)"
1937
+ [[ -n "${RESUMED_TEST_OUTPUT:-}" ]] && _test_tail="$(echo "$RESUMED_TEST_OUTPUT" | tail -20)"
1938
+ resume_section="## RESUMING FROM ITERATION ${RESUMED_FROM_ITERATION}
1939
+
1940
+ Continue from where you left off. Do NOT repeat work already done.
1941
+
1942
+ Previous work modified these files:
1943
+ ${RESUMED_MODIFIED:- (none recorded)}
1944
+
1945
+ Previous findings/errors from earlier iterations:
1946
+ ${RESUMED_FINDINGS:- (none recorded)}
1947
+
1948
+ Last test output (fix any failures, tail):
1949
+ ${_test_tail}
1950
+
1951
+ ---
1952
+ "
1953
+ # Clear after first use so we don't keep injecting on every iteration
1954
+ RESUMED_FROM_ITERATION=""
1955
+ RESUMED_MODIFIED=""
1956
+ RESUMED_FINDINGS=""
1957
+ RESUMED_TEST_OUTPUT=""
1958
+ fi
1959
+
1960
+ # Build cumulative progress summary showing all iterations' work
1961
+ local cumulative_section=""
1962
+ if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
1963
+ local cum_stat
1964
+ cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
1965
+ if [[ -n "$cum_stat" ]]; then
1966
+ cumulative_section="## Cumulative Progress (all iterations combined)
1967
+ ${cum_stat}
1968
+ "
1969
+ fi
1970
+ fi
1971
+
1473
1972
  cat <<PROMPT
1474
1973
  You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
1475
-
1974
+ ${resume_section}
1476
1975
  ## Your Goal
1477
1976
  ${GOAL}
1478
1977
 
1978
+ ${cumulative_section}
1479
1979
  ## Current Progress
1480
1980
  ${recent_log}
1481
1981
 
@@ -1521,56 +2021,227 @@ ${stuckness_section}
1521
2021
  PROMPT
1522
2022
  }
1523
2023
 
2024
+ # ─── Alternative Strategy Exploration ─────────────────────────────────────────
2025
+ # When stuckness is detected, generate a context-aware alternative strategy.
2026
+ # Uses pattern matching on error type + iteration count to suggest different approaches.
2027
+
2028
+ explore_alternative_strategy() {
2029
+ local last_error="${1:-unknown}"
2030
+ local iteration="${2:-0}"
2031
+ local diagnosis="${3:-}"
2032
+
2033
+ # Track attempted strategies to avoid repeating them
2034
+ local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
2035
+ local attempted
2036
+ attempted=$(cat "$strategy_file" 2>/dev/null || true)
2037
+
2038
+ local strategy=""
2039
+
2040
+ # If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
2041
+ if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
2042
+ if ! echo "$attempted" | grep -q "evaluator_alignment"; then
2043
+ echo "evaluator_alignment" >> "$strategy_file"
2044
+ strategy="## Alternative Strategy: Evaluator Alignment
2045
+ The code appears functionally complete (tests pass). Focus on satisfying the remaining
2046
+ quality gate evaluators. Check the DoD log and audit log for specific complaints, then
2047
+ address those exact points rather than adding new features."
2048
+ fi
2049
+ fi
2050
+
2051
+ # If no code changes in last iteration, suggest verifying existing work
2052
+ if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
2053
+ if ! echo "$attempted" | grep -q "verify_existing"; then
2054
+ echo "verify_existing" >> "$strategy_file"
2055
+ strategy="## Alternative Strategy: Verify Existing Work
2056
+ Recent iterations made no code changes. The work may already be complete.
2057
+ Run the full test suite, verify all features work, and if everything passes,
2058
+ commit a verification message and declare LOOP_COMPLETE with evidence."
2059
+ fi
2060
+ fi
2061
+
2062
+ # Generic fallback: break the problem down
2063
+ if [[ -z "$strategy" ]]; then
2064
+ if ! echo "$attempted" | grep -q "decompose"; then
2065
+ echo "decompose" >> "$strategy_file"
2066
+ strategy="## Alternative Strategy: Decompose
2067
+ Break the remaining work into smaller, independent steps. Focus on one specific
2068
+ file or function at a time. Read error messages literally — the root cause may
2069
+ differ from your assumption."
2070
+ fi
2071
+ fi
2072
+
2073
+ echo "$strategy"
2074
+ }
2075
+
1524
2076
  # ─── Stuckness Detection ─────────────────────────────────────────────────────
1525
- # Compares last 3 iteration log outputs for high overlap (>90% similar lines).
2077
+ # Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
2078
+ # Returns 0 when stuck, 1 when not. Outputs stuckness section and sets STUCKNESS_HINT when stuck.
2079
+ # When stuck: increments STUCKNESS_COUNT, emits event; if STUCKNESS_COUNT >= 3, caller triggers session restart.
2080
+ STUCKNESS_COUNT=0
2081
+ STUCKNESS_TRACKING_FILE=""
2082
+
2083
+ record_iteration_stuckness_data() {
2084
+ local exit_code="${1:-0}"
2085
+ [[ -z "$LOG_DIR" ]] && return 0
2086
+ local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
2087
+ local diff_hash error_hash
2088
+ diff_hash=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
2089
+ local error_log="${ARTIFACTS_DIR:-${STATE_DIR:-${PROJECT_ROOT:-.}/.claude}/pipeline-artifacts}/error-log.jsonl"
2090
+ if [[ -f "$error_log" ]]; then
2091
+ error_hash=$(tail -5 "$error_log" 2>/dev/null | sort -u | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
2092
+ else
2093
+ error_hash="none"
2094
+ fi
2095
+ echo "${diff_hash}|${error_hash}|${exit_code}" >> "$tracking_file"
2096
+ }
2097
+
1526
2098
  detect_stuckness() {
1527
- if [[ "$ITERATION" -lt 3 ]]; then
1528
- return 0
2099
+ STUCKNESS_HINT=""
2100
+ local iteration="${ITERATION:-0}"
2101
+ local stuckness_signals=0
2102
+ local stuckness_reasons=()
2103
+ local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
2104
+ local tracking_lines
2105
+ tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || echo "0")
2106
+
2107
+ # Signal 1: Text overlap (existing logic) — compare last 2 iteration logs
2108
+ if [[ "$iteration" -ge 3 ]]; then
2109
+ local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
2110
+ local log2="$LOG_DIR/iteration-$(( iteration - 2 )).log"
2111
+ local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
2112
+
2113
+ if [[ -f "$log1" && -f "$log2" ]]; then
2114
+ local lines1 lines2 common total overlap_pct
2115
+ lines1=$(tail -50 "$log1" 2>/dev/null | grep -v '^$' | sort || true)
2116
+ lines2=$(tail -50 "$log2" 2>/dev/null | grep -v '^$' | sort || true)
2117
+
2118
+ if [[ -n "$lines1" && -n "$lines2" ]]; then
2119
+ total=$(echo "$lines1" | wc -l | tr -d ' ')
2120
+ common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || echo "0")
2121
+ if [[ "$total" -gt 0 ]]; then
2122
+ overlap_pct=$(( common * 100 / total ))
2123
+ else
2124
+ overlap_pct=0
2125
+ fi
2126
+ if [[ "${overlap_pct:-0}" -ge 90 ]]; then
2127
+ stuckness_signals=$((stuckness_signals + 1))
2128
+ stuckness_reasons+=("high text overlap (${overlap_pct}%) between iterations")
2129
+ fi
2130
+ fi
2131
+ fi
1529
2132
  fi
1530
2133
 
1531
- local log1="$LOG_DIR/iteration-$(( ITERATION - 1 )).log"
1532
- local log2="$LOG_DIR/iteration-$(( ITERATION - 2 )).log"
1533
- local log3="$LOG_DIR/iteration-$(( ITERATION - 3 )).log"
2134
+ # Signal 2: Git diff hash — last 3 iterations produced zero or identical diffs
2135
+ if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
2136
+ local last_three
2137
+ last_three=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f1 || true)
2138
+ local unique_hashes
2139
+ unique_hashes=$(echo "$last_three" | sort -u | grep -v '^$' | wc -l | tr -d ' ')
2140
+ if [[ "$unique_hashes" -le 1 ]] && [[ -n "$last_three" ]]; then
2141
+ stuckness_signals=$((stuckness_signals + 1))
2142
+ stuckness_reasons+=("identical or zero git diffs in last 3 iterations")
2143
+ fi
2144
+ fi
1534
2145
 
1535
- # Need at least 2 previous logs
1536
- if [[ ! -f "$log1" || ! -f "$log2" ]]; then
1537
- return 0
2146
+ # Signal 3: Error repetition same error hash in last 3 iterations
2147
+ if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
2148
+ local last_three_errors
2149
+ last_three_errors=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f2 || true)
2150
+ local unique_error_hashes
2151
+ unique_error_hashes=$(echo "$last_three_errors" | sort -u | grep -v '^none$' | grep -v '^$' | wc -l | tr -d ' ')
2152
+ if [[ "$unique_error_hashes" -eq 1 ]] && [[ -n "$(echo "$last_three_errors" | grep -v '^none$')" ]]; then
2153
+ stuckness_signals=$((stuckness_signals + 1))
2154
+ stuckness_reasons+=("same error in last 3 iterations")
2155
+ fi
1538
2156
  fi
1539
2157
 
1540
- # Compare last 50 lines of each (ignoring timestamps and blank lines)
1541
- local lines1 lines2 common total overlap_pct
1542
- lines1=$(tail -50 "$log1" 2>/dev/null | grep -v '^$' | sort || true)
1543
- lines2=$(tail -50 "$log2" 2>/dev/null | grep -v '^$' | sort || true)
2158
+ # Signal 4: Same error repeating 3+ times (legacy check on error-log content)
2159
+ local error_log
2160
+ error_log="${ARTIFACTS_DIR:-$PROJECT_ROOT/.claude/pipeline-artifacts}/error-log.jsonl"
2161
+ if [[ -f "$error_log" ]]; then
2162
+ local last_errors
2163
+ last_errors=$(tail -5 "$error_log" 2>/dev/null | jq -r '.error // .message // .error_hash // empty' 2>/dev/null | sort | uniq -c | sort -rn | head -1 || true)
2164
+ local repeat_count
2165
+ repeat_count=$(echo "$last_errors" | awk '{print $1}' 2>/dev/null || echo "0")
2166
+ if [[ "${repeat_count:-0}" -ge 3 ]]; then
2167
+ stuckness_signals=$((stuckness_signals + 1))
2168
+ stuckness_reasons+=("same error repeated ${repeat_count} times")
2169
+ fi
2170
+ fi
1544
2171
 
1545
- if [[ -z "$lines1" || -z "$lines2" ]]; then
1546
- return 0
2172
+ # Signal 5: Exit code pattern — last 3 iterations had same non-zero exit code
2173
+ if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
2174
+ local last_three_exits
2175
+ last_three_exits=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f3 || true)
2176
+ local first_exit
2177
+ first_exit=$(echo "$last_three_exits" | head -1)
2178
+ if [[ "$first_exit" =~ ^[0-9]+$ ]] && [[ "$first_exit" -ne 0 ]]; then
2179
+ local all_same=true
2180
+ while IFS= read -r ex; do
2181
+ [[ "$ex" != "$first_exit" ]] && all_same=false
2182
+ done <<< "$last_three_exits"
2183
+ if [[ "$all_same" == true ]]; then
2184
+ stuckness_signals=$((stuckness_signals + 1))
2185
+ stuckness_reasons+=("same non-zero exit code (${first_exit}) in last 3 iterations")
2186
+ fi
2187
+ fi
1547
2188
  fi
1548
2189
 
1549
- total=$(echo "$lines1" | wc -l | tr -d ' ')
1550
- common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || echo "0")
2190
+ # Signal 6: Git diff size no or minimal code changes (existing)
2191
+ local diff_lines
2192
+ diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || echo "0")
2193
+ if [[ "${diff_lines:-0}" -lt 5 ]] && [[ "$iteration" -gt 2 ]]; then
2194
+ stuckness_signals=$((stuckness_signals + 1))
2195
+ stuckness_reasons+=("no code changes in last iteration")
2196
+ fi
1551
2197
 
1552
- if [[ "$total" -gt 0 ]]; then
1553
- overlap_pct=$(( common * 100 / total ))
1554
- else
1555
- overlap_pct=0
2198
+ # Signal 7: Iteration budget used >70% without passing tests
2199
+ local max_iter="${MAX_ITERATIONS:-20}"
2200
+ local progress_pct=0
2201
+ if [[ "$max_iter" -gt 0 ]]; then
2202
+ progress_pct=$(( iteration * 100 / max_iter ))
1556
2203
  fi
2204
+ if [[ "$progress_pct" -gt 70 ]] && [[ "${TEST_PASSED:-false}" != "true" ]]; then
2205
+ stuckness_signals=$((stuckness_signals + 1))
2206
+ stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
2207
+ fi
2208
+
2209
+ # Gate-aware dampening: if tests pass and the agent has made progress overall,
2210
+ # reduce stuckness signal count. The "no code changes" and "identical diffs" signals
2211
+ # fire when code is already complete and the agent is fighting evaluator quirks —
2212
+ # that's not genuine stuckness, it's "done but gates disagree."
2213
+ if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
2214
+ # If at least one quality signal is positive, dampen by 1
2215
+ if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
2216
+ stuckness_signals=$((stuckness_signals - 1))
2217
+ fi
2218
+ fi
2219
+
2220
+ # Decision: 2+ signals = stuck
2221
+ if [[ "$stuckness_signals" -ge 2 ]]; then
2222
+ STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
2223
+ STUCKNESS_DIAGNOSIS="${stuckness_reasons[*]}"
2224
+ if type emit_event >/dev/null 2>&1; then
2225
+ emit_event "loop.stuckness_detected" "signals=$stuckness_signals" "count=$STUCKNESS_COUNT" "iteration=$iteration" "reasons=${stuckness_reasons[*]}"
2226
+ fi
2227
+ STUCKNESS_HINT="IMPORTANT: The loop appears stuck. Previous approaches have not worked. You MUST try a fundamentally different strategy. Reasons: ${stuckness_reasons[*]}"
2228
+ warn "Stuckness detected (${stuckness_signals} signals, count ${STUCKNESS_COUNT}): ${stuckness_reasons[*]}"
1557
2229
 
1558
- if [[ "$overlap_pct" -ge 90 ]]; then
1559
2230
  local diff_summary=""
1560
- if [[ -f "$log3" ]]; then
2231
+ local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
2232
+ local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
2233
+ if [[ -f "$log3" && -f "$log1" ]]; then
1561
2234
  diff_summary=$(diff <(tail -30 "$log3" 2>/dev/null) <(tail -30 "$log1" 2>/dev/null) 2>/dev/null | head -10 || true)
1562
2235
  fi
1563
2236
 
1564
- # Gather memory-based alternative approaches
1565
2237
  local alternatives=""
1566
- if type memory_inject_context &>/dev/null 2>&1; then
2238
+ if type memory_inject_context >/dev/null 2>&1; then
1567
2239
  alternatives=$(memory_inject_context "build" 2>/dev/null | grep -i "fix:" | head -3 || true)
1568
2240
  fi
1569
2241
 
1570
2242
  cat <<STUCK_SECTION
1571
2243
  ## Stuckness Detected
1572
- Your last ${CONSECUTIVE_FAILURES:-2}+ iterations produced very similar output (${overlap_pct}% overlap).
1573
- You appear to be stuck on the same approach.
2244
+ ${STUCKNESS_HINT}
1574
2245
 
1575
2246
  ${diff_summary:+Changes between recent iterations:
1576
2247
  $diff_summary
@@ -1584,7 +2255,10 @@ Try a fundamentally different approach:
1584
2255
  - Check if there's a dependency or configuration issue blocking progress
1585
2256
  - Read error messages more carefully — the root cause may differ from your assumption
1586
2257
  STUCK_SECTION
2258
+ return 0
1587
2259
  fi
2260
+
2261
+ return 1
1588
2262
  }
1589
2263
 
1590
2264
  compose_audit_section() {
@@ -1675,7 +2349,7 @@ compose_worker_prompt() {
1675
2349
  local role_desc=""
1676
2350
  # Try to pull description from recruit's roles DB first
1677
2351
  local recruit_roles_db="${HOME}/.shipwright/recruitment/roles.json"
1678
- if [[ -f "$recruit_roles_db" ]] && command -v jq &>/dev/null; then
2352
+ if [[ -f "$recruit_roles_db" ]] && command -v jq >/dev/null 2>&1; then
1679
2353
  local recruit_desc
1680
2354
  recruit_desc=$(jq -r --arg r "$role" '.[$r].description // ""' "$recruit_roles_db" 2>/dev/null) || true
1681
2355
  if [[ -n "$recruit_desc" && "$recruit_desc" != "null" ]]; then
@@ -1735,6 +2409,12 @@ run_claude_iteration() {
1735
2409
  local json_file="$LOG_DIR/iteration-${ITERATION}.json"
1736
2410
  local prompt
1737
2411
  prompt="$(compose_prompt)"
2412
+ local final_prompt
2413
+ final_prompt=$(manage_context_window "$prompt")
2414
+
2415
+ local prompt_chars=${#final_prompt}
2416
+ local approx_tokens=$((prompt_chars / 4))
2417
+ info "Prompt: ~${approx_tokens} tokens (${prompt_chars} chars)"
1738
2418
 
1739
2419
  local flags
1740
2420
  flags="$(build_claude_flags)"
@@ -1750,9 +2430,9 @@ run_claude_iteration() {
1750
2430
  # shellcheck disable=SC2086
1751
2431
  local err_file="${json_file%.json}.stderr"
1752
2432
  if [[ -n "$TIMEOUT_CMD" ]]; then
1753
- $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
2433
+ $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
1754
2434
  else
1755
- claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
2435
+ claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
1756
2436
  fi
1757
2437
  CHILD_PID=$!
1758
2438
  wait "$CHILD_PID" 2>/dev/null || exit_code=$?
@@ -1835,12 +2515,13 @@ show_summary() {
1835
2515
 
1836
2516
  local status_display
1837
2517
  case "$STATUS" in
1838
- complete) status_display="${GREEN}✓ Complete (LOOP_COMPLETE detected)${RESET}" ;;
1839
- circuit_breaker) status_display="${RED}✗ Circuit breaker tripped${RESET}" ;;
1840
- max_iterations) status_display="${YELLOW}⚠ Max iterations reached${RESET}" ;;
1841
- interrupted) status_display="${YELLOW} Interrupted by user${RESET}" ;;
1842
- error) status_display="${RED} Error${RESET}" ;;
1843
- *) status_display="${DIM}$STATUS${RESET}" ;;
2518
+ complete) status_display="${GREEN}✓ Complete (LOOP_COMPLETE detected)${RESET}" ;;
2519
+ circuit_breaker) status_display="${RED}✗ Circuit breaker tripped${RESET}" ;;
2520
+ max_iterations) status_display="${YELLOW}⚠ Max iterations reached${RESET}" ;;
2521
+ budget_exhausted) status_display="${RED} Budget exhausted${RESET}" ;;
2522
+ interrupted) status_display="${YELLOW} Interrupted by user${RESET}" ;;
2523
+ error) status_display="${RED}✗ Error${RESET}" ;;
2524
+ *) status_display="${DIM}$STATUS${RESET}" ;;
1844
2525
  esac
1845
2526
 
1846
2527
  local test_display
@@ -1909,6 +2590,15 @@ cleanup() {
1909
2590
  --iteration "$ITERATION" \
1910
2591
  --git-sha "$(git rev-parse HEAD 2>/dev/null || echo unknown)" 2>/dev/null || true
1911
2592
 
2593
+ # Save Claude context for meaningful resume (goal, findings, test output)
2594
+ export SW_LOOP_GOAL="$GOAL"
2595
+ export SW_LOOP_ITERATION="$ITERATION"
2596
+ export SW_LOOP_STATUS="$STATUS"
2597
+ export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
2598
+ export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
2599
+ export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
2600
+ "$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
2601
+
1912
2602
  # Clear heartbeat
1913
2603
  "$SCRIPT_DIR/sw-heartbeat.sh" clear "${PIPELINE_JOB_ID:-loop-$$}" 2>/dev/null || true
1914
2604
 
@@ -1934,7 +2624,7 @@ setup_worktrees() {
1934
2624
  fi
1935
2625
 
1936
2626
  # Create branch if it doesn't exist
1937
- if ! git -C "$PROJECT_ROOT" rev-parse --verify "$branch_name" &>/dev/null; then
2627
+ if ! git -C "$PROJECT_ROOT" rev-parse --verify "$branch_name" >/dev/null 2>&1; then
1938
2628
  git -C "$PROJECT_ROOT" branch "$branch_name" HEAD 2>/dev/null || true
1939
2629
  fi
1940
2630
 
@@ -1996,6 +2686,17 @@ CONSECUTIVE_FAILURES=0
1996
2686
  echo -e "${CYAN}${BOLD}▸${RESET} Agent ${AGENT_NUM}/${TOTAL_AGENTS} starting in ${WORK_DIR}"
1997
2687
 
1998
2688
  while [[ "$ITERATION" -lt "$MAX_ITERATIONS" ]]; do
2689
+ # Budget gate: stop if daily budget exhausted
2690
+ if [[ -x "$SCRIPT_DIR/sw-cost.sh" ]]; then
2691
+ budget_remaining=$("$SCRIPT_DIR/sw-cost.sh" remaining-budget 2>/dev/null || echo "")
2692
+ if [[ -n "$budget_remaining" && "$budget_remaining" != "unlimited" ]]; then
2693
+ if awk -v r="$budget_remaining" 'BEGIN { exit !(r <= 0) }' 2>/dev/null; then
2694
+ echo -e " ${RED}✗${RESET} Budget exhausted (\$${budget_remaining}) — stopping agent ${AGENT_NUM}"
2695
+ break
2696
+ fi
2697
+ fi
2698
+ fi
2699
+
1999
2700
  ITERATION=$(( ITERATION + 1 ))
2000
2701
  echo -e "\n${CYAN}${BOLD}▸${RESET} Agent ${AGENT_NUM} — Iteration ${ITERATION}/${MAX_ITERATIONS}"
2001
2702
 
@@ -2064,8 +2765,12 @@ PROMPT
2064
2765
  # Auto-commit
2065
2766
  git add -A 2>/dev/null || true
2066
2767
  if git commit -m "agent-${AGENT_NUM}: iteration ${ITERATION}" --no-verify 2>/dev/null; then
2067
- git push origin "loop/agent-${AGENT_NUM}" 2>/dev/null || true
2068
- echo -e " ${GREEN}✓${RESET} Committed and pushed"
2768
+ if ! git push origin "loop/agent-${AGENT_NUM}" 2>/dev/null; then
2769
+ echo -e " ${YELLOW}⚠${RESET} git push failed for loop/agent-${AGENT_NUM} — remote may be out of sync"
2770
+ type emit_event >/dev/null 2>&1 && emit_event "loop.push_failed" "branch=loop/agent-${AGENT_NUM}"
2771
+ else
2772
+ echo -e " ${GREEN}✓${RESET} Committed and pushed"
2773
+ fi
2069
2774
  fi
2070
2775
 
2071
2776
  # Circuit breaker: check for progress
@@ -2083,7 +2788,7 @@ PROMPT
2083
2788
  break
2084
2789
  fi
2085
2790
 
2086
- sleep 2
2791
+ sleep __SLEEP_BETWEEN_ITERATIONS__
2087
2792
  done
2088
2793
 
2089
2794
  echo -e "\n${DIM}Agent ${AGENT_NUM} finished after ${ITERATION} iterations${RESET}"
@@ -2094,11 +2799,14 @@ WORKEREOF
2094
2799
  sed_i "s|__AGENT_NUM__|${agent_num}|g" "$worker_script"
2095
2800
  sed_i "s|__TOTAL_AGENTS__|${total_agents}|g" "$worker_script"
2096
2801
  sed_i "s|__MAX_ITERATIONS__|${MAX_ITERATIONS}|g" "$worker_script"
2802
+ sed_i "s|__SLEEP_BETWEEN_ITERATIONS__|$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)|g" "$worker_script"
2097
2803
  # Paths and commands may contain sed-special chars — use awk
2098
2804
  awk -v val="$wt_path" '{gsub(/__WORK_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
2099
2805
  && mv "${worker_script}.tmp" "$worker_script"
2100
2806
  awk -v val="$LOG_DIR" '{gsub(/__LOG_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
2101
2807
  && mv "${worker_script}.tmp" "$worker_script"
2808
+ awk -v val="$SCRIPT_DIR" '{gsub(/__SCRIPT_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
2809
+ && mv "${worker_script}.tmp" "$worker_script"
2102
2810
  awk -v val="$TEST_CMD" '{gsub(/__TEST_CMD__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
2103
2811
  && mv "${worker_script}.tmp" "$worker_script"
2104
2812
  awk -v val="$claude_flags" '{gsub(/__CLAUDE_FLAGS__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
@@ -2137,11 +2845,12 @@ launch_multi_agent() {
2137
2845
  local worker_script
2138
2846
  worker_script="$(generate_worker_script "$i" "$AGENTS")"
2139
2847
 
2140
- tmux split-window -t "$MULTI_WINDOW_NAME" -c "$PROJECT_ROOT"
2848
+ local worker_pane_id
2849
+ worker_pane_id="$(tmux split-window -t "$MULTI_WINDOW_NAME" -c "$PROJECT_ROOT" -P -F '#{pane_id}')"
2141
2850
  sleep 0.1
2142
- tmux send-keys -t "$MULTI_WINDOW_NAME" "printf '\\033]2;agent-${i}\\033\\\\'" Enter
2851
+ tmux send-keys -t "$worker_pane_id" "printf '\\033]2;agent-${i}\\033\\\\'" Enter
2143
2852
  sleep 0.1
2144
- tmux send-keys -t "$MULTI_WINDOW_NAME" "bash '$worker_script'" Enter
2853
+ tmux send-keys -t "$worker_pane_id" "bash '$worker_script'" Enter
2145
2854
  done
2146
2855
 
2147
2856
  # Layout: monitor pane on top (35%), worker agents tile below
@@ -2181,7 +2890,7 @@ wait_for_multi_completion() {
2181
2890
  latest_log="$(ls -t "$LOG_DIR"/agent-"${i}"-iter-*.log 2>/dev/null | head -1)"
2182
2891
  if [[ -n "$latest_log" ]]; then
2183
2892
  local age
2184
- age=$(( $(now_epoch) - $(stat -f %m "$latest_log" 2>/dev/null || echo 0) ))
2893
+ age=$(( $(now_epoch) - $(file_mtime "$latest_log") ))
2185
2894
  if [[ $age -lt 300 ]]; then # Active within 5 minutes
2186
2895
  running=$(( running + 1 ))
2187
2896
  fi
@@ -2200,7 +2909,7 @@ wait_for_multi_completion() {
2200
2909
  fi
2201
2910
  fi
2202
2911
 
2203
- sleep 5
2912
+ sleep "$(_config_get_int "loop.multi_agent_sleep" 5 2>/dev/null || echo 5)"
2204
2913
  done
2205
2914
  }
2206
2915
 
@@ -2233,12 +2942,21 @@ run_single_agent_loop() {
2233
2942
  initialize_state
2234
2943
  fi
2235
2944
 
2945
+ # Ensure LOOP_START_COMMIT is set (may not be on resume/restart)
2946
+ if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
2947
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
2948
+ fi
2949
+
2236
2950
  # Apply adaptive budget/model before showing banner
2237
2951
  apply_adaptive_budget
2238
2952
  MODEL="$(select_adaptive_model "build" "$MODEL")"
2239
2953
 
2240
2954
  # Track applied memory fix patterns for outcome recording
2241
2955
  _applied_fix_pattern=""
2956
+ STUCKNESS_COUNT=0
2957
+ STUCKNESS_TRACKING_FILE="$LOG_DIR/stuckness-tracking.txt"
2958
+ : > "$STUCKNESS_TRACKING_FILE" 2>/dev/null || true
2959
+ : > "${LOG_DIR:-/tmp}/strategy-attempts.txt" 2>/dev/null || true
2242
2960
 
2243
2961
  show_banner
2244
2962
 
@@ -2246,17 +2964,58 @@ run_single_agent_loop() {
2246
2964
  # Pre-checks (before incrementing — ITERATION tracks completed count)
2247
2965
  check_circuit_breaker || break
2248
2966
  check_max_iterations || break
2967
+ check_budget_gate || {
2968
+ STATUS="budget_exhausted"
2969
+ write_state
2970
+ write_progress
2971
+ error "Budget exhausted — stopping pipeline"
2972
+ show_summary
2973
+ return 1
2974
+ }
2249
2975
  ITERATION=$(( ITERATION + 1 ))
2250
2976
 
2251
- # Try memory-based fix suggestion on retry after test failure
2977
+ # Emit iteration start event for pipeline visibility
2978
+ if type emit_event >/dev/null 2>&1; then
2979
+ emit_event "loop.iteration_start" \
2980
+ "iteration=$ITERATION" \
2981
+ "max=$MAX_ITERATIONS" \
2982
+ "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
2983
+ "agent=${AGENT_NUM:-1}" \
2984
+ "test_passed=${TEST_PASSED:-unknown}"
2985
+ fi
2986
+
2987
+ # Root-cause diagnosis and memory-based fix on retry after test failure
2252
2988
  if [[ "${TEST_PASSED:-}" == "false" ]]; then
2989
+ # Source memory module for diagnosis and fix lookup
2990
+ [[ -f "$SCRIPT_DIR/sw-memory.sh" ]] && source "$SCRIPT_DIR/sw-memory.sh" 2>/dev/null || true
2991
+
2992
+ # Capture failure for memory (enables memory_analyze_failure and future fix lookup)
2993
+ if type memory_capture_failure &>/dev/null && [[ -n "${TEST_OUTPUT:-}" ]]; then
2994
+ memory_capture_failure "test" "$TEST_OUTPUT" 2>/dev/null || true
2995
+ fi
2996
+
2997
+ # Pattern-based diagnosis (no Claude needed) — inject into goal for smarter retry
2998
+ local _changed_files=""
2999
+ _changed_files=$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')
3000
+ local _diagnosis
3001
+ _diagnosis=$(diagnose_failure "${TEST_OUTPUT:-}" "$_changed_files" "$ITERATION" 2>/dev/null || true)
3002
+
3003
+ if [[ -n "$_diagnosis" ]]; then
3004
+ GOAL="${GOAL}
3005
+
3006
+ ${_diagnosis}"
3007
+ info "Failure diagnosis injected (classification from error pattern)"
3008
+ fi
3009
+
3010
+ # Memory-based fix suggestion (from past successful fixes)
2253
3011
  local _last_error=""
2254
3012
  local _prev_log="$LOG_DIR/iteration-$(( ITERATION - 1 )).log"
2255
3013
  if [[ -f "$_prev_log" ]]; then
2256
3014
  _last_error=$(tail -20 "$_prev_log" 2>/dev/null | grep -iE '(error|fail|exception)' | head -1 || true)
2257
3015
  fi
3016
+ [[ -z "$_last_error" ]] && _last_error=$(echo "${TEST_OUTPUT:-}" | head -3 | tr '\n' ' ')
2258
3017
  local _fix_suggestion=""
2259
- if type memory_closed_loop_inject &>/dev/null 2>&1 && [[ -n "${_last_error:-}" ]]; then
3018
+ if type memory_closed_loop_inject >/dev/null 2>&1 && [[ -n "${_last_error:-}" ]]; then
2260
3019
  _fix_suggestion=$(memory_closed_loop_inject "$_last_error" 2>/dev/null) || true
2261
3020
  fi
2262
3021
  if [[ -n "${_fix_suggestion:-}" ]]; then
@@ -2266,6 +3025,14 @@ run_single_agent_loop() {
2266
3025
  ${GOAL}"
2267
3026
  info "Memory fix injected: ${_fix_suggestion:0:80}"
2268
3027
  fi
3028
+
3029
+ # Analyze failure via Claude (background, non-blocking) for richer root_cause/fix in memory
3030
+ if type memory_analyze_failure &>/dev/null && [[ "${INTELLIGENCE_ENABLED:-auto}" != "false" ]]; then
3031
+ local _test_log="${TEST_LOG_FILE:-$LOG_DIR/tests-iter-$(( ITERATION - 1 )).log}"
3032
+ if [[ -f "$_test_log" ]]; then
3033
+ memory_analyze_failure "$_test_log" "test" 2>/dev/null &
3034
+ fi
3035
+ fi
2269
3036
  fi
2270
3037
 
2271
3038
  # Run Claude
@@ -2274,6 +3041,9 @@ ${GOAL}"
2274
3041
 
2275
3042
  local log_file="$LOG_DIR/iteration-${ITERATION}.log"
2276
3043
 
3044
+ # Record iteration data for stuckness detection (diff hash, error hash, exit code)
3045
+ record_iteration_stuckness_data "$exit_code"
3046
+
2277
3047
  # Detect fatal CLI errors (API key, auth, network) — abort immediately
2278
3048
  if check_fatal_error "$log_file" "$exit_code"; then
2279
3049
  STATUS="error"
@@ -2285,7 +3055,7 @@ ${GOAL}"
2285
3055
  fi
2286
3056
 
2287
3057
  # Mid-loop memory refresh — re-query with current error context after iteration 3
2288
- if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context &>/dev/null 2>&1; then
3058
+ if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context >/dev/null 2>&1; then
2289
3059
  local refresh_ctx
2290
3060
  refresh_ctx=$(tail -20 "$log_file" 2>/dev/null || true)
2291
3061
  if [[ -n "$refresh_ctx" ]]; then
@@ -2331,7 +3101,7 @@ ${GOAL}"
2331
3101
 
2332
3102
  # Track fix outcome for memory effectiveness
2333
3103
  if [[ -n "${_applied_fix_pattern:-}" ]]; then
2334
- if type memory_record_fix_outcome &>/dev/null 2>&1; then
3104
+ if type memory_record_fix_outcome >/dev/null 2>&1; then
2335
3105
  if [[ "${TEST_PASSED:-}" == "true" ]]; then
2336
3106
  memory_record_fix_outcome "$_applied_fix_pattern" "true" "true" 2>/dev/null || true
2337
3107
  else
@@ -2341,6 +3111,15 @@ ${GOAL}"
2341
3111
  _applied_fix_pattern=""
2342
3112
  fi
2343
3113
 
3114
+ # Save Claude context for checkpoint resume (goal, findings, test output)
3115
+ export SW_LOOP_GOAL="$GOAL"
3116
+ export SW_LOOP_ITERATION="$ITERATION"
3117
+ export SW_LOOP_STATUS="${STATUS:-running}"
3118
+ export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
3119
+ export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
3120
+ export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
3121
+ "$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
3122
+
2344
3123
  # Audit agent (reviews implementer's work)
2345
3124
  run_audit_agent
2346
3125
 
@@ -2374,6 +3153,18 @@ $summary
2374
3153
  write_state
2375
3154
  write_progress
2376
3155
 
3156
+ # Emit iteration complete event for pipeline visibility
3157
+ if type emit_event >/dev/null 2>&1; then
3158
+ emit_event "loop.iteration_complete" \
3159
+ "iteration=$ITERATION" \
3160
+ "max=$MAX_ITERATIONS" \
3161
+ "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
3162
+ "agent=${AGENT_NUM:-1}" \
3163
+ "test_passed=${TEST_PASSED:-unknown}" \
3164
+ "commits=$TOTAL_COMMITS" \
3165
+ "status=${STATUS:-running}"
3166
+ fi
3167
+
2377
3168
  # Update heartbeat
2378
3169
  "$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
2379
3170
  --pid $$ \
@@ -2396,7 +3187,16 @@ HUMAN FEEDBACK (received after iteration $ITERATION): $human_msg"
2396
3187
  fi
2397
3188
  fi
2398
3189
 
2399
- sleep 2
3190
+ # Stuckness-triggered restart: if detected 3+ times, break to allow session restart
3191
+ if [[ "${STUCKNESS_COUNT:-0}" -ge 3 ]]; then
3192
+ STATUS="stuck_restart"
3193
+ write_state
3194
+ write_progress
3195
+ warn "Stuckness detected 3+ times — triggering session restart"
3196
+ break
3197
+ fi
3198
+
3199
+ sleep "$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)"
2400
3200
  done
2401
3201
 
2402
3202
  # Write final state after loop exits
@@ -2437,7 +3237,7 @@ run_loop_with_restarts() {
2437
3237
  fi
2438
3238
 
2439
3239
  RESTART_COUNT=$(( RESTART_COUNT + 1 ))
2440
- if type emit_event &>/dev/null 2>&1; then
3240
+ if type emit_event >/dev/null 2>&1; then
2441
3241
  emit_event "loop.restart" "restart=$RESTART_COUNT" "max=$MAX_RESTARTS" "iteration=$ITERATION"
2442
3242
  fi
2443
3243
  info "Session restart ${RESTART_COUNT}/${MAX_RESTARTS} — resetting iteration counter"
@@ -2448,6 +3248,7 @@ run_loop_with_restarts() {
2448
3248
  ITERATION=0
2449
3249
  CONSECUTIVE_FAILURES=0
2450
3250
  EXTENSION_COUNT=0
3251
+ STUCKNESS_COUNT=0
2451
3252
  STATUS="running"
2452
3253
  LOG_ENTRIES=""
2453
3254
  TEST_PASSED=""
@@ -2469,7 +3270,7 @@ run_loop_with_restarts() {
2469
3270
 
2470
3271
  write_state
2471
3272
 
2472
- sleep 2
3273
+ sleep "$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)"
2473
3274
  done
2474
3275
  }
2475
3276