shipwright-cli 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.claude/agents/code-reviewer.md +2 -0
  2. package/.claude/agents/devops-engineer.md +2 -0
  3. package/.claude/agents/doc-fleet-agent.md +2 -0
  4. package/.claude/agents/pipeline-agent.md +2 -0
  5. package/.claude/agents/shell-script-specialist.md +2 -0
  6. package/.claude/agents/test-specialist.md +2 -0
  7. package/.claude/hooks/agent-crash-capture.sh +32 -0
  8. package/.claude/hooks/post-tool-use.sh +3 -2
  9. package/.claude/hooks/pre-tool-use.sh +35 -3
  10. package/README.md +4 -4
  11. package/claude-code/hooks/config-change.sh +18 -0
  12. package/claude-code/hooks/instructions-reloaded.sh +7 -0
  13. package/claude-code/hooks/worktree-create.sh +25 -0
  14. package/claude-code/hooks/worktree-remove.sh +20 -0
  15. package/config/code-constitution.json +130 -0
  16. package/dashboard/middleware/auth.ts +134 -0
  17. package/dashboard/middleware/constants.ts +21 -0
  18. package/dashboard/public/index.html +2 -6
  19. package/dashboard/public/styles.css +100 -97
  20. package/dashboard/routes/auth.ts +38 -0
  21. package/dashboard/server.ts +66 -25
  22. package/dashboard/services/config.ts +26 -0
  23. package/dashboard/services/db.ts +118 -0
  24. package/dashboard/src/canvas/pixel-agent.ts +298 -0
  25. package/dashboard/src/canvas/pixel-sprites.ts +440 -0
  26. package/dashboard/src/canvas/shipyard-effects.ts +367 -0
  27. package/dashboard/src/canvas/shipyard-scene.ts +616 -0
  28. package/dashboard/src/canvas/submarine-layout.ts +267 -0
  29. package/dashboard/src/components/header.ts +8 -7
  30. package/dashboard/src/core/router.ts +1 -0
  31. package/dashboard/src/design/submarine-theme.ts +253 -0
  32. package/dashboard/src/main.ts +2 -0
  33. package/dashboard/src/types/api.ts +2 -1
  34. package/dashboard/src/views/activity.ts +2 -1
  35. package/dashboard/src/views/shipyard.ts +39 -0
  36. package/dashboard/types/index.ts +166 -0
  37. package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
  38. package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
  39. package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
  40. package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
  41. package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
  42. package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
  43. package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
  44. package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
  45. package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
  46. package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
  47. package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
  48. package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
  49. package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
  50. package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
  51. package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
  52. package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
  53. package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
  54. package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
  55. package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
  56. package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
  57. package/docs/research/RESEARCH_INDEX.md +439 -0
  58. package/docs/research/RESEARCH_SOURCES.md +440 -0
  59. package/docs/research/RESEARCH_SUMMARY.txt +275 -0
  60. package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
  61. package/package.json +2 -2
  62. package/scripts/lib/adaptive-model.sh +427 -0
  63. package/scripts/lib/adaptive-timeout.sh +316 -0
  64. package/scripts/lib/audit-trail.sh +309 -0
  65. package/scripts/lib/auto-recovery.sh +471 -0
  66. package/scripts/lib/bandit-selector.sh +431 -0
  67. package/scripts/lib/bootstrap.sh +104 -2
  68. package/scripts/lib/causal-graph.sh +455 -0
  69. package/scripts/lib/compat.sh +126 -0
  70. package/scripts/lib/compound-audit.sh +337 -0
  71. package/scripts/lib/constitutional.sh +454 -0
  72. package/scripts/lib/context-budget.sh +359 -0
  73. package/scripts/lib/convergence.sh +594 -0
  74. package/scripts/lib/cost-optimizer.sh +634 -0
  75. package/scripts/lib/daemon-adaptive.sh +10 -0
  76. package/scripts/lib/daemon-dispatch.sh +106 -17
  77. package/scripts/lib/daemon-failure.sh +34 -4
  78. package/scripts/lib/daemon-patrol.sh +23 -2
  79. package/scripts/lib/daemon-poll-github.sh +361 -0
  80. package/scripts/lib/daemon-poll-health.sh +299 -0
  81. package/scripts/lib/daemon-poll.sh +27 -611
  82. package/scripts/lib/daemon-state.sh +112 -66
  83. package/scripts/lib/daemon-triage.sh +10 -0
  84. package/scripts/lib/dod-scorecard.sh +442 -0
  85. package/scripts/lib/error-actionability.sh +300 -0
  86. package/scripts/lib/formal-spec.sh +461 -0
  87. package/scripts/lib/helpers.sh +177 -4
  88. package/scripts/lib/intent-analysis.sh +409 -0
  89. package/scripts/lib/loop-convergence.sh +350 -0
  90. package/scripts/lib/loop-iteration.sh +682 -0
  91. package/scripts/lib/loop-progress.sh +48 -0
  92. package/scripts/lib/loop-restart.sh +185 -0
  93. package/scripts/lib/memory-effectiveness.sh +506 -0
  94. package/scripts/lib/mutation-executor.sh +352 -0
  95. package/scripts/lib/outcome-feedback.sh +521 -0
  96. package/scripts/lib/pipeline-cli.sh +336 -0
  97. package/scripts/lib/pipeline-commands.sh +1216 -0
  98. package/scripts/lib/pipeline-detection.sh +100 -2
  99. package/scripts/lib/pipeline-execution.sh +897 -0
  100. package/scripts/lib/pipeline-github.sh +28 -3
  101. package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
  102. package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
  103. package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
  104. package/scripts/lib/pipeline-intelligence.sh +100 -1136
  105. package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
  106. package/scripts/lib/pipeline-quality-checks.sh +17 -715
  107. package/scripts/lib/pipeline-quality-gates.sh +563 -0
  108. package/scripts/lib/pipeline-stages-build.sh +730 -0
  109. package/scripts/lib/pipeline-stages-delivery.sh +965 -0
  110. package/scripts/lib/pipeline-stages-intake.sh +1133 -0
  111. package/scripts/lib/pipeline-stages-monitor.sh +407 -0
  112. package/scripts/lib/pipeline-stages-review.sh +1022 -0
  113. package/scripts/lib/pipeline-stages.sh +59 -2929
  114. package/scripts/lib/pipeline-state.sh +36 -5
  115. package/scripts/lib/pipeline-util.sh +487 -0
  116. package/scripts/lib/policy-learner.sh +438 -0
  117. package/scripts/lib/process-reward.sh +493 -0
  118. package/scripts/lib/project-detect.sh +649 -0
  119. package/scripts/lib/quality-profile.sh +334 -0
  120. package/scripts/lib/recruit-commands.sh +885 -0
  121. package/scripts/lib/recruit-learning.sh +739 -0
  122. package/scripts/lib/recruit-roles.sh +648 -0
  123. package/scripts/lib/reward-aggregator.sh +458 -0
  124. package/scripts/lib/rl-optimizer.sh +362 -0
  125. package/scripts/lib/root-cause.sh +427 -0
  126. package/scripts/lib/scope-enforcement.sh +445 -0
  127. package/scripts/lib/session-restart.sh +493 -0
  128. package/scripts/lib/skill-memory.sh +300 -0
  129. package/scripts/lib/skill-registry.sh +775 -0
  130. package/scripts/lib/spec-driven.sh +476 -0
  131. package/scripts/lib/test-helpers.sh +18 -7
  132. package/scripts/lib/test-holdout.sh +429 -0
  133. package/scripts/lib/test-optimizer.sh +511 -0
  134. package/scripts/shipwright-file-suggest.sh +45 -0
  135. package/scripts/skills/adversarial-quality.md +61 -0
  136. package/scripts/skills/api-design.md +44 -0
  137. package/scripts/skills/architecture-design.md +50 -0
  138. package/scripts/skills/brainstorming.md +43 -0
  139. package/scripts/skills/data-pipeline.md +44 -0
  140. package/scripts/skills/deploy-safety.md +64 -0
  141. package/scripts/skills/documentation.md +38 -0
  142. package/scripts/skills/frontend-design.md +45 -0
  143. package/scripts/skills/generated/.gitkeep +0 -0
  144. package/scripts/skills/generated/_refinements/.gitkeep +0 -0
  145. package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
  146. package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
  147. package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
  148. package/scripts/skills/generated/cli-version-management.md +29 -0
  149. package/scripts/skills/generated/collection-system-validation.md +99 -0
  150. package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
  151. package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
  152. package/scripts/skills/generated/test-parallelization-detection.md +65 -0
  153. package/scripts/skills/observability.md +79 -0
  154. package/scripts/skills/performance.md +48 -0
  155. package/scripts/skills/pr-quality.md +49 -0
  156. package/scripts/skills/product-thinking.md +43 -0
  157. package/scripts/skills/security-audit.md +49 -0
  158. package/scripts/skills/systematic-debugging.md +40 -0
  159. package/scripts/skills/testing-strategy.md +47 -0
  160. package/scripts/skills/two-stage-review.md +52 -0
  161. package/scripts/skills/validation-thoroughness.md +55 -0
  162. package/scripts/sw +9 -3
  163. package/scripts/sw-activity.sh +9 -2
  164. package/scripts/sw-adaptive.sh +2 -1
  165. package/scripts/sw-adversarial.sh +2 -1
  166. package/scripts/sw-architecture-enforcer.sh +3 -1
  167. package/scripts/sw-auth.sh +12 -2
  168. package/scripts/sw-autonomous.sh +5 -1
  169. package/scripts/sw-changelog.sh +4 -1
  170. package/scripts/sw-checkpoint.sh +2 -1
  171. package/scripts/sw-ci.sh +5 -1
  172. package/scripts/sw-cleanup.sh +4 -26
  173. package/scripts/sw-code-review.sh +10 -4
  174. package/scripts/sw-connect.sh +2 -1
  175. package/scripts/sw-context.sh +2 -1
  176. package/scripts/sw-cost.sh +48 -3
  177. package/scripts/sw-daemon.sh +66 -9
  178. package/scripts/sw-dashboard.sh +3 -1
  179. package/scripts/sw-db.sh +59 -16
  180. package/scripts/sw-decide.sh +8 -2
  181. package/scripts/sw-decompose.sh +360 -17
  182. package/scripts/sw-deps.sh +4 -1
  183. package/scripts/sw-developer-simulation.sh +4 -1
  184. package/scripts/sw-discovery.sh +325 -2
  185. package/scripts/sw-doc-fleet.sh +4 -1
  186. package/scripts/sw-docs-agent.sh +3 -1
  187. package/scripts/sw-docs.sh +2 -1
  188. package/scripts/sw-doctor.sh +453 -2
  189. package/scripts/sw-dora.sh +4 -1
  190. package/scripts/sw-durable.sh +4 -3
  191. package/scripts/sw-e2e-orchestrator.sh +17 -16
  192. package/scripts/sw-eventbus.sh +7 -1
  193. package/scripts/sw-evidence.sh +364 -12
  194. package/scripts/sw-feedback.sh +550 -9
  195. package/scripts/sw-fix.sh +20 -1
  196. package/scripts/sw-fleet-discover.sh +6 -2
  197. package/scripts/sw-fleet-viz.sh +4 -1
  198. package/scripts/sw-fleet.sh +5 -1
  199. package/scripts/sw-github-app.sh +16 -3
  200. package/scripts/sw-github-checks.sh +3 -2
  201. package/scripts/sw-github-deploy.sh +3 -2
  202. package/scripts/sw-github-graphql.sh +18 -7
  203. package/scripts/sw-guild.sh +5 -1
  204. package/scripts/sw-heartbeat.sh +5 -30
  205. package/scripts/sw-hello.sh +67 -0
  206. package/scripts/sw-hygiene.sh +6 -1
  207. package/scripts/sw-incident.sh +265 -1
  208. package/scripts/sw-init.sh +18 -2
  209. package/scripts/sw-instrument.sh +10 -2
  210. package/scripts/sw-intelligence.sh +42 -6
  211. package/scripts/sw-jira.sh +5 -1
  212. package/scripts/sw-launchd.sh +2 -1
  213. package/scripts/sw-linear.sh +4 -1
  214. package/scripts/sw-logs.sh +4 -1
  215. package/scripts/sw-loop.sh +432 -1128
  216. package/scripts/sw-memory.sh +356 -2
  217. package/scripts/sw-mission-control.sh +6 -1
  218. package/scripts/sw-model-router.sh +481 -26
  219. package/scripts/sw-otel.sh +13 -4
  220. package/scripts/sw-oversight.sh +14 -5
  221. package/scripts/sw-patrol-meta.sh +334 -0
  222. package/scripts/sw-pipeline-composer.sh +5 -1
  223. package/scripts/sw-pipeline-vitals.sh +2 -1
  224. package/scripts/sw-pipeline.sh +53 -2664
  225. package/scripts/sw-pm.sh +12 -5
  226. package/scripts/sw-pr-lifecycle.sh +2 -1
  227. package/scripts/sw-predictive.sh +7 -1
  228. package/scripts/sw-prep.sh +185 -2
  229. package/scripts/sw-ps.sh +5 -25
  230. package/scripts/sw-public-dashboard.sh +15 -3
  231. package/scripts/sw-quality.sh +2 -1
  232. package/scripts/sw-reaper.sh +8 -25
  233. package/scripts/sw-recruit.sh +156 -2303
  234. package/scripts/sw-regression.sh +19 -12
  235. package/scripts/sw-release-manager.sh +3 -1
  236. package/scripts/sw-release.sh +4 -1
  237. package/scripts/sw-remote.sh +3 -1
  238. package/scripts/sw-replay.sh +7 -1
  239. package/scripts/sw-retro.sh +158 -1
  240. package/scripts/sw-review-rerun.sh +3 -1
  241. package/scripts/sw-scale.sh +10 -3
  242. package/scripts/sw-security-audit.sh +6 -1
  243. package/scripts/sw-self-optimize.sh +6 -3
  244. package/scripts/sw-session.sh +9 -3
  245. package/scripts/sw-setup.sh +3 -1
  246. package/scripts/sw-stall-detector.sh +406 -0
  247. package/scripts/sw-standup.sh +15 -7
  248. package/scripts/sw-status.sh +3 -1
  249. package/scripts/sw-strategic.sh +4 -1
  250. package/scripts/sw-stream.sh +7 -1
  251. package/scripts/sw-swarm.sh +18 -6
  252. package/scripts/sw-team-stages.sh +13 -6
  253. package/scripts/sw-templates.sh +5 -29
  254. package/scripts/sw-testgen.sh +7 -1
  255. package/scripts/sw-tmux-pipeline.sh +4 -1
  256. package/scripts/sw-tmux-role-color.sh +2 -0
  257. package/scripts/sw-tmux-status.sh +1 -1
  258. package/scripts/sw-tmux.sh +3 -1
  259. package/scripts/sw-trace.sh +3 -1
  260. package/scripts/sw-tracker-github.sh +3 -0
  261. package/scripts/sw-tracker-jira.sh +3 -0
  262. package/scripts/sw-tracker-linear.sh +3 -0
  263. package/scripts/sw-tracker.sh +3 -1
  264. package/scripts/sw-triage.sh +2 -1
  265. package/scripts/sw-upgrade.sh +3 -1
  266. package/scripts/sw-ux.sh +5 -2
  267. package/scripts/sw-webhook.sh +3 -1
  268. package/scripts/sw-widgets.sh +3 -1
  269. package/scripts/sw-worktree.sh +15 -3
  270. package/scripts/test-skill-injection.sh +1233 -0
  271. package/templates/pipelines/autonomous.json +27 -3
  272. package/templates/pipelines/cost-aware.json +34 -8
  273. package/templates/pipelines/deployed.json +12 -0
  274. package/templates/pipelines/enterprise.json +12 -0
  275. package/templates/pipelines/fast.json +6 -0
  276. package/templates/pipelines/full.json +27 -3
  277. package/templates/pipelines/hotfix.json +6 -0
  278. package/templates/pipelines/standard.json +12 -0
  279. package/templates/pipelines/tdd.json +12 -0
@@ -33,6 +33,40 @@ if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
33
33
  fi
34
34
  # Cross-pipeline discovery (learnings from other pipeline runs)
35
35
  [[ -f "$SCRIPT_DIR/sw-discovery.sh" ]] && source "$SCRIPT_DIR/sw-discovery.sh" 2>/dev/null || true
36
+ # Source loop sub-modules for modular iteration management
37
+ [[ -f "$SCRIPT_DIR/lib/loop-iteration.sh" ]] && source "$SCRIPT_DIR/lib/loop-iteration.sh"
38
+ [[ -f "$SCRIPT_DIR/lib/loop-convergence.sh" ]] && source "$SCRIPT_DIR/lib/loop-convergence.sh"
39
+ [[ -f "$SCRIPT_DIR/lib/loop-restart.sh" ]] && source "$SCRIPT_DIR/lib/loop-restart.sh"
40
+ [[ -f "$SCRIPT_DIR/lib/loop-progress.sh" ]] && source "$SCRIPT_DIR/lib/loop-progress.sh"
41
+ # Intelligent session restart with enhanced briefings and cross-session tracking
42
+ [[ -f "$SCRIPT_DIR/lib/session-restart.sh" ]] && source "$SCRIPT_DIR/lib/session-restart.sh"
43
+ # Context window budget monitoring (issue #209)
44
+ # shellcheck source=lib/context-budget.sh
45
+ [[ -f "$SCRIPT_DIR/lib/context-budget.sh" ]] && source "$SCRIPT_DIR/lib/context-budget.sh" 2>/dev/null || true
46
+ # Convergence detection and scoring (issue #203)
47
+ [[ -f "$SCRIPT_DIR/lib/convergence.sh" ]] && source "$SCRIPT_DIR/lib/convergence.sh" 2>/dev/null || true
48
+ # Error actionability scoring and enhancement for better error context
49
+ # shellcheck source=lib/error-actionability.sh
50
+ [[ -f "$SCRIPT_DIR/lib/error-actionability.sh" ]] && source "$SCRIPT_DIR/lib/error-actionability.sh" 2>/dev/null || true
51
+ # Autonomous error recovery with model escalation
52
+ # shellcheck source=lib/auto-recovery.sh
53
+ [[ -f "$SCRIPT_DIR/lib/auto-recovery.sh" ]] && source "$SCRIPT_DIR/lib/auto-recovery.sh" 2>/dev/null || true
54
+ # Test execution optimization (issue #200)
55
+ # shellcheck source=lib/test-optimizer.sh
56
+ [[ -f "$SCRIPT_DIR/lib/test-optimizer.sh" ]] && source "$SCRIPT_DIR/lib/test-optimizer.sh" 2>/dev/null || true
57
+ # Audit trail for compliance-grade pipeline traceability
58
+ # shellcheck source=lib/audit-trail.sh
59
+ [[ -f "$SCRIPT_DIR/lib/audit-trail.sh" ]] && source "$SCRIPT_DIR/lib/audit-trail.sh" 2>/dev/null || true
60
+ # Process reward model for per-step iteration scoring (Phase 3)
61
+ # shellcheck source=lib/process-reward.sh
62
+ [[ -f "$SCRIPT_DIR/lib/process-reward.sh" ]] && source "$SCRIPT_DIR/lib/process-reward.sh" 2>/dev/null || true
63
+ # Cross-session reinforcement learning optimizer (Phase 7)
64
+ # shellcheck source=lib/rl-optimizer.sh
65
+ [[ -f "$SCRIPT_DIR/lib/rl-optimizer.sh" ]] && source "$SCRIPT_DIR/lib/rl-optimizer.sh" 2>/dev/null || true
66
+ # Autoresearch RL modules (Phase 8): reward aggregation, bandit selection, policy learning
67
+ [[ -f "$SCRIPT_DIR/lib/reward-aggregator.sh" ]] && source "$SCRIPT_DIR/lib/reward-aggregator.sh" 2>/dev/null || true
68
+ [[ -f "$SCRIPT_DIR/lib/bandit-selector.sh" ]] && source "$SCRIPT_DIR/lib/bandit-selector.sh" 2>/dev/null || true
69
+ [[ -f "$SCRIPT_DIR/lib/policy-learner.sh" ]] && source "$SCRIPT_DIR/lib/policy-learner.sh" 2>/dev/null || true
36
70
  # Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
37
71
  [[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
38
72
  [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
@@ -45,6 +79,7 @@ fi
45
79
  if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
46
80
  emit_event() {
47
81
  local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
82
+ # shellcheck disable=SC2155
48
83
  local payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
49
84
  while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
50
85
  echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
@@ -72,22 +107,27 @@ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
72
107
  SESSION_RESTART=false
73
108
  RESTART_COUNT=0
74
109
  REPO_OVERRIDE=""
75
- VERSION="3.2.0"
110
+ VERSION="3.3.0"
76
111
 
77
112
  # ─── Token Tracking ─────────────────────────────────────────────────────────
78
113
  LOOP_INPUT_TOKENS=0
79
114
  LOOP_OUTPUT_TOKENS=0
80
115
  LOOP_COST_MILLICENTS=0
81
116
 
82
- # ─── Flexible Iteration Defaults ────────────────────────────────────────────
83
- AUTO_EXTEND=true # Auto-extend iterations when work is incomplete
84
- EXTENSION_SIZE=5 # Additional iterations per extension
85
- MAX_EXTENSIONS=3 # Max number of extensions (hard cap safety net)
86
- EXTENSION_COUNT=0 # Current number of extensions applied
117
+ # ─── Flexible Iteration Defaults (all config-driven) ───────────────────────
118
+ AUTO_EXTEND=true
119
+ EXTENSION_SIZE=$(_smart_int "loop.extension_size" 5)
120
+ MAX_EXTENSIONS=$(_smart_int "loop.max_extensions" 3)
121
+ EXTENSION_COUNT=0
87
122
 
88
- # ─── Circuit Breaker Defaults ──────────────────────────────────────────────
89
- CIRCUIT_BREAKER_THRESHOLD=3 # Consecutive low-progress iterations before stopping
90
- MIN_PROGRESS_LINES=5 # Minimum insertions to count as progress
123
+ # ─── Circuit Breaker Defaults (config-driven) ─────────────────────────────
124
+ CIRCUIT_BREAKER_THRESHOLD=$(_smart_int "loop.circuit_breaker_threshold" 3)
125
+ MIN_PROGRESS_LINES=$(_smart_int "loop.min_progress_lines" 5)
126
+
127
+ # ─── Context Exhaustion Recovery ────────────────────────────────────────────────
128
+ CONTEXT_EXHAUSTION_PATTERNS="context.length.exceeded|maximum context length|context_length_exceeded|prompt is too long"
129
+ CONTEXT_RESTART_COUNT=0
130
+ CONTEXT_RESTART_LIMIT=$(_smart_int "loop.context_restart_limit" 2)
91
131
 
92
132
  # ─── Audit & Quality Gate Defaults ───────────────────────────────────────────
93
133
  AUDIT_ENABLED=false
@@ -98,6 +138,16 @@ AUDIT_RESULT=""
98
138
  COMPLETION_REJECTED=false
99
139
  QUALITY_GATE_PASSED=true
100
140
 
141
+ # ─── Multi-Test Defaults ──────────────────────────────────────────────────
142
+ ADDITIONAL_TEST_CMDS=() # Array of extra test commands (from --additional-test-cmds)
143
+
144
+ # ─── Context Budget ──────────────────────────────────────────────────────────
145
+ CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-200000}" # Max prompt chars before trimming
146
+
147
+ # ─── Claude CLI Flags ─────────────────────────────────────────────────────────
148
+ EFFORT_LEVEL="${SW_EFFORT_LEVEL:-}"
149
+ FALLBACK_MODEL="${SW_FALLBACK_MODEL:-}" # Empty = no fallback flag (intelligent default)
150
+
101
151
  # ─── Parse Arguments ──────────────────────────────────────────────────────────
102
152
  show_help() {
103
153
  echo -e "${CYAN}${BOLD}shipwright${RESET} ${DIM}v${VERSION}${RESET} — ${BOLD}Continuous Loop${RESET}"
@@ -112,7 +162,10 @@ show_help() {
112
162
  echo -e " ${CYAN}--test-cmd${RESET} \"cmd\" Test command to run between iterations"
113
163
  echo -e " ${CYAN}--fast-test-cmd${RESET} \"cmd\" Fast/subset test command (alternates with full)"
114
164
  echo -e " ${CYAN}--fast-test-interval${RESET} N Run full tests every N iterations (default: 5)"
165
+ echo -e " ${CYAN}--additional-test-cmds${RESET} \"cmd\" Extra test command (repeatable)"
115
166
  echo -e " ${CYAN}--model${RESET} MODEL Claude model to use (default: opus)"
167
+ echo -e " ${CYAN}--effort${RESET} low|medium|high Effort level for Claude reasoning (default: auto per stage)"
168
+ echo -e " ${CYAN}--fallback-model${RESET} MODEL Fallback model on rate limits (default: sonnet)"
116
169
  echo -e " ${CYAN}--agents${RESET} N Number of parallel agents (default: 1)"
117
170
  echo -e " ${CYAN}--roles${RESET} \"r1,r2,...\" Role per agent: builder,reviewer,tester,optimizer,docs,security"
118
171
  echo -e " ${CYAN}--worktree${RESET} Use git worktrees for isolation (auto if agents > 1)"
@@ -186,6 +239,18 @@ while [[ $# -gt 0 ]]; do
186
239
  shift 2
187
240
  ;;
188
241
  --model=*) MODEL="${1#--model=}"; shift ;;
242
+ --effort)
243
+ EFFORT_LEVEL="${2:-}"
244
+ [[ -z "$EFFORT_LEVEL" ]] && { error "Missing value for --effort"; exit 1; }
245
+ shift 2
246
+ ;;
247
+ --effort=*) EFFORT_LEVEL="${1#--effort=}"; shift ;;
248
+ --fallback-model)
249
+ FALLBACK_MODEL="${2:-}"
250
+ [[ -z "$FALLBACK_MODEL" ]] && { error "Missing value for --fallback-model"; exit 1; }
251
+ shift 2
252
+ ;;
253
+ --fallback-model=*) FALLBACK_MODEL="${1#--fallback-model=}"; shift ;;
189
254
  --agents)
190
255
  AGENTS="${2:-}"
191
256
  [[ -z "$AGENTS" ]] && { error "Missing value for --agents"; exit 1; }
@@ -236,6 +301,12 @@ while [[ $# -gt 0 ]]; do
236
301
  shift 2
237
302
  ;;
238
303
  --fast-test-interval=*) FAST_TEST_INTERVAL="${1#--fast-test-interval=}"; shift ;;
304
+ --additional-test-cmds)
305
+ ADDITIONAL_TEST_CMDS+=("${2:-}")
306
+ [[ -z "${2:-}" ]] && { error "Missing value for --additional-test-cmds"; exit 1; }
307
+ shift 2
308
+ ;;
309
+ --additional-test-cmds=*) ADDITIONAL_TEST_CMDS+=("${1#--additional-test-cmds=}"); shift ;;
239
310
  --max-restarts)
240
311
  MAX_RESTARTS="${2:-}"
241
312
  [[ -z "$MAX_RESTARTS" ]] && { error "Missing value for --max-restarts"; exit 1; }
@@ -273,6 +344,7 @@ done
273
344
 
274
345
  # Auto-enable worktree for multi-agent
275
346
  if [[ "$AGENTS" -gt 1 ]]; then
347
+ # shellcheck disable=SC2034
276
348
  USE_WORKTREE=true
277
349
  fi
278
350
 
@@ -309,6 +381,12 @@ if ! [[ "$MAX_RESTARTS" =~ ^[0-9]+$ ]]; then
309
381
  exit 1
310
382
  fi
311
383
 
384
+ # Validate effort level
385
+ if [[ -n "$EFFORT_LEVEL" ]] && [[ "$EFFORT_LEVEL" != "low" && "$EFFORT_LEVEL" != "medium" && "$EFFORT_LEVEL" != "high" ]]; then
386
+ error "--effort must be low, medium, or high (got: $EFFORT_LEVEL)"
387
+ exit 1
388
+ fi
389
+
312
390
  # ─── Validate Inputs ─────────────────────────────────────────────────────────
313
391
 
314
392
  if ! $RESUME && [[ -z "$GOAL" ]]; then
@@ -382,6 +460,16 @@ WORKTREE_DIR="$PROJECT_ROOT/.worktrees"
382
460
 
383
461
  mkdir -p "$STATE_DIR" "$LOG_DIR"
384
462
 
463
+ # ─── Context Budget Initialization ────────────────────────────────────────────
464
+ # Initialize context window budget tracker (issue #209)
465
+ ARTIFACTS_DIR="${STATE_DIR}/pipeline-artifacts"
466
+ mkdir -p "$ARTIFACTS_DIR"
467
+ if type context_budget_init >/dev/null 2>&1; then
468
+ # Set total budget (default 800K, configurable via env/config)
469
+ CONTEXT_BUDGET="${CONTEXT_BUDGET_TOKENS:-800000}"
470
+ context_budget_init "$CONTEXT_BUDGET" "$ARTIFACTS_DIR" 2>/dev/null || true
471
+ fi
472
+
385
473
  # ─── Adaptive Model Selection ────────────────────────────────────────────────
386
474
  # Uses intelligence engine when available, falls back to defaults.
387
475
  select_adaptive_model() {
@@ -505,16 +593,28 @@ _extract_text_from_json() {
505
593
  local first_char
506
594
  first_char=$(head -c1 "$json_file" 2>/dev/null || true)
507
595
 
508
- # Case 2: Valid JSON array — extract .result from last element
509
- if [[ "$first_char" == "[" ]] && command -v jq >/dev/null 2>&1; then
596
+ # Case 2: Valid JSON (array or object) — extract text with jq
597
+ if [[ ("$first_char" == "[" || "$first_char" == "{") ]] && command -v jq >/dev/null 2>&1; then
510
598
  local extracted
511
- extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
512
- if [[ -n "$extracted" ]]; then
513
- echo "$extracted" > "$log_file"
514
- return 0
599
+ if [[ "$first_char" == "[" ]]; then
600
+ # Array: extract .result from last element
601
+ extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
602
+ if [[ -n "$extracted" ]]; then
603
+ echo "$extracted" > "$log_file"
604
+ return 0
605
+ fi
606
+ # Try .content fields
607
+ extracted=$(jq -r '.[].content // empty' "$json_file" 2>/dev/null | head -500) || true
608
+ else
609
+ # Object: extract .result directly
610
+ extracted=$(jq -r '.result // empty' "$json_file" 2>/dev/null) || true
611
+ if [[ -n "$extracted" ]]; then
612
+ echo "$extracted" > "$log_file"
613
+ return 0
614
+ fi
615
+ # Try .content field
616
+ extracted=$(jq -r '.content // empty' "$json_file" 2>/dev/null) || true
515
617
  fi
516
- # jq succeeded but result was null/empty — try .content or raw text
517
- extracted=$(jq -r '.[].content // empty' "$json_file" 2>/dev/null | head -500) || true
518
618
  if [[ -n "$extracted" ]]; then
519
619
  echo "$extracted" > "$log_file"
520
620
  return 0
@@ -525,7 +625,7 @@ _extract_text_from_json() {
525
625
  return 0
526
626
  fi
527
627
 
528
- # Case 3: Looks like JSON but no jq — can't parse, use raw
628
+ # Case 3: Looks like JSON but jq is not available — can't parse, use raw
529
629
  if [[ "$first_char" == "[" || "$first_char" == "{" ]]; then
530
630
  warn "JSON output but jq not available — using raw output"
531
631
  cp "$json_file" "$log_file"
@@ -546,6 +646,7 @@ write_loop_tokens() {
546
646
  fi
547
647
  local tmp_file
548
648
  tmp_file=$(mktemp "${token_file}.XXXXXX" 2>/dev/null || mktemp)
649
+ # shellcheck disable=SC2064
549
650
  trap "rm -f '$tmp_file'" RETURN
550
651
  cat > "$tmp_file" <<TOKJSON
551
652
  {"input_tokens":${LOOP_INPUT_TOKENS},"output_tokens":${LOOP_OUTPUT_TOKENS},"cost_usd":${cost_usd},"iterations":${ITERATION:-0}}
@@ -599,38 +700,8 @@ apply_adaptive_budget() {
599
700
  ITERATION_LINES_CHANGED=""
600
701
  VELOCITY_HISTORY=""
601
702
 
602
- track_iteration_velocity() {
603
- local changes
604
- changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null | tail -1 || echo "")"
605
- local insertions
606
- insertions="$(echo "$changes" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)"
607
- ITERATION_LINES_CHANGED="${insertions:-0}"
608
- if [[ -n "$VELOCITY_HISTORY" ]]; then
609
- VELOCITY_HISTORY="${VELOCITY_HISTORY},${ITERATION_LINES_CHANGED}"
610
- else
611
- VELOCITY_HISTORY="${ITERATION_LINES_CHANGED}"
612
- fi
613
- }
614
703
 
615
704
  # Compute average lines/iteration from recent history
616
- compute_velocity_avg() {
617
- if [[ -z "$VELOCITY_HISTORY" ]]; then
618
- echo "0"
619
- return 0
620
- fi
621
- local total=0 count=0
622
- local IFS=','
623
- local val
624
- for val in $VELOCITY_HISTORY; do
625
- total=$((total + val))
626
- count=$((count + 1))
627
- done
628
- if [[ "$count" -gt 0 ]]; then
629
- echo $((total / count))
630
- else
631
- echo "0"
632
- fi
633
- }
634
705
 
635
706
  # ─── Timing Helpers ───────────────────────────────────────────────────────────
636
707
 
@@ -656,191 +727,10 @@ TEST_PASSED=""
656
727
  TEST_OUTPUT=""
657
728
  LOG_ENTRIES=""
658
729
 
659
- initialize_state() {
660
- ITERATION=0
661
- CONSECUTIVE_FAILURES=0
662
- TOTAL_COMMITS=0
663
- START_EPOCH="$(now_epoch)"
664
- STATUS="running"
665
- LOG_ENTRIES=""
666
-
667
- # Record starting commit for cumulative diff in quality gates
668
- LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
669
-
670
- write_state
671
- }
672
-
673
- resume_state() {
674
- if [[ ! -f "$STATE_FILE" ]]; then
675
- error "No state file found at $STATE_FILE"
676
- echo -e " Start a new loop instead: ${DIM}shipwright loop \"<goal>\"${RESET}"
677
- exit 1
678
- fi
679
-
680
- info "Resuming from $STATE_FILE"
681
730
 
682
- # Save CLI values before parsing state (CLI takes precedence)
683
- local cli_max_iterations="$MAX_ITERATIONS"
684
731
 
685
- # Parse YAML front matter
686
- local in_frontmatter=false
687
- while IFS= read -r line; do
688
- if [[ "$line" == "---" ]]; then
689
- if $in_frontmatter; then
690
- break
691
- else
692
- in_frontmatter=true
693
- continue
694
- fi
695
- fi
696
- if $in_frontmatter; then
697
- case "$line" in
698
- goal:*) [[ -z "$GOAL" ]] && GOAL="$(echo "${line#goal:}" | sed 's/^ *"//;s/" *$//')" ;;
699
- iteration:*) ITERATION="$(echo "${line#iteration:}" | tr -d ' ')" ;;
700
- max_iterations:*) MAX_ITERATIONS="$(echo "${line#max_iterations:}" | tr -d ' ')" ;;
701
- status:*) STATUS="$(echo "${line#status:}" | tr -d ' ')" ;;
702
- test_cmd:*) [[ -z "$TEST_CMD" ]] && TEST_CMD="$(echo "${line#test_cmd:}" | sed 's/^ *"//;s/" *$//')" ;;
703
- model:*) MODEL="$(echo "${line#model:}" | tr -d ' ')" ;;
704
- agents:*) AGENTS="$(echo "${line#agents:}" | tr -d ' ')" ;;
705
- consecutive_failures:*) CONSECUTIVE_FAILURES="$(echo "${line#consecutive_failures:}" | tr -d ' ')" ;;
706
- total_commits:*) TOTAL_COMMITS="$(echo "${line#total_commits:}" | tr -d ' ')" ;;
707
- audit_enabled:*) AUDIT_ENABLED="$(echo "${line#audit_enabled:}" | tr -d ' ')" ;;
708
- audit_agent_enabled:*) AUDIT_AGENT_ENABLED="$(echo "${line#audit_agent_enabled:}" | tr -d ' ')" ;;
709
- quality_gates_enabled:*) QUALITY_GATES_ENABLED="$(echo "${line#quality_gates_enabled:}" | tr -d ' ')" ;;
710
- dod_file:*) DOD_FILE="$(echo "${line#dod_file:}" | sed 's/^ *"//;s/" *$//')" ;;
711
- auto_extend:*) AUTO_EXTEND="$(echo "${line#auto_extend:}" | tr -d ' ')" ;;
712
- extension_count:*) EXTENSION_COUNT="$(echo "${line#extension_count:}" | tr -d ' ')" ;;
713
- max_extensions:*) MAX_EXTENSIONS="$(echo "${line#max_extensions:}" | tr -d ' ')" ;;
714
- esac
715
- fi
716
- done < "$STATE_FILE"
717
732
 
718
- # CLI --max-iterations overrides state file
719
- if $MAX_ITERATIONS_EXPLICIT; then
720
- MAX_ITERATIONS="$cli_max_iterations"
721
- fi
722
733
 
723
- # Extract the log section (everything after ## Log)
724
- LOG_ENTRIES="$(sed -n '/^## Log$/,$ { /^## Log$/d; p; }' "$STATE_FILE" 2>/dev/null || true)"
725
-
726
- if [[ -z "$GOAL" ]]; then
727
- error "Could not parse goal from state file."
728
- exit 1
729
- fi
730
-
731
- if [[ "$STATUS" == "complete" ]]; then
732
- warn "Previous loop completed. Start a new one or edit the state file."
733
- exit 0
734
- fi
735
-
736
- # Reset circuit breaker on resume
737
- CONSECUTIVE_FAILURES=0
738
- START_EPOCH="$(now_epoch)"
739
- STATUS="running"
740
-
741
- # Set starting commit for cumulative diff (approximate: use earliest tracked commit)
742
- if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
743
- LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
744
- fi
745
-
746
- # If we hit max iterations before, warn user to extend
747
- if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
748
- warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
749
- echo -e " Extend with: ${DIM}shipwright loop --resume --max-iterations $(( MAX_ITERATIONS + 10 ))${RESET}"
750
- exit 0
751
- fi
752
-
753
- # Restore Claude context for meaningful resume (source so exports persist to this shell)
754
- if [[ -f "$SCRIPT_DIR/sw-checkpoint.sh" ]] && [[ -d "${PROJECT_ROOT:-}" ]]; then
755
- source "$SCRIPT_DIR/sw-checkpoint.sh"
756
- local _orig_pwd="$PWD"
757
- cd "$PROJECT_ROOT" 2>/dev/null || true
758
- if checkpoint_restore_context "build" 2>/dev/null; then
759
- RESUMED_FROM_ITERATION="${RESTORED_ITERATION:-}"
760
- RESUMED_MODIFIED="${RESTORED_MODIFIED:-}"
761
- RESUMED_FINDINGS="${RESTORED_FINDINGS:-}"
762
- RESUMED_TEST_OUTPUT="${RESTORED_TEST_OUTPUT:-}"
763
- [[ -n "${RESTORED_ITERATION:-}" && "${RESTORED_ITERATION:-0}" -gt 0 ]] && info "Restored context from iteration ${RESTORED_ITERATION}"
764
- fi
765
- cd "$_orig_pwd" 2>/dev/null || true
766
- fi
767
-
768
- success "Resumed: iteration $ITERATION/$MAX_ITERATIONS"
769
- }
770
-
771
- write_state() {
772
- local tmp_state="${STATE_FILE}.tmp.$$"
773
- # Use printf instead of heredoc to avoid delimiter injection from GOAL
774
- {
775
- printf -- '---\n'
776
- printf 'goal: "%s"\n' "$GOAL"
777
- printf 'iteration: %s\n' "$ITERATION"
778
- printf 'max_iterations: %s\n' "$MAX_ITERATIONS"
779
- printf 'status: %s\n' "$STATUS"
780
- printf 'test_cmd: "%s"\n' "$TEST_CMD"
781
- printf 'model: %s\n' "$MODEL"
782
- printf 'agents: %s\n' "$AGENTS"
783
- printf 'started_at: %s\n' "$(now_iso)"
784
- printf 'last_iteration_at: %s\n' "$(now_iso)"
785
- printf 'consecutive_failures: %s\n' "$CONSECUTIVE_FAILURES"
786
- printf 'total_commits: %s\n' "$TOTAL_COMMITS"
787
- printf 'audit_enabled: %s\n' "$AUDIT_ENABLED"
788
- printf 'audit_agent_enabled: %s\n' "$AUDIT_AGENT_ENABLED"
789
- printf 'quality_gates_enabled: %s\n' "$QUALITY_GATES_ENABLED"
790
- printf 'dod_file: "%s"\n' "$DOD_FILE"
791
- printf 'auto_extend: %s\n' "$AUTO_EXTEND"
792
- printf 'extension_count: %s\n' "$EXTENSION_COUNT"
793
- printf 'max_extensions: %s\n' "$MAX_EXTENSIONS"
794
- printf -- '---\n\n'
795
- printf '## Log\n'
796
- printf '%s\n' "$LOG_ENTRIES"
797
- } > "$tmp_state"
798
- if ! mv "$tmp_state" "$STATE_FILE" 2>/dev/null; then
799
- warn "Failed to write state file: $STATE_FILE"
800
- fi
801
- }
802
-
803
- write_progress() {
804
- local progress_file="$LOG_DIR/progress.md"
805
- local recent_commits
806
- recent_commits=$(git -C "$PROJECT_ROOT" log --oneline -5 2>/dev/null || echo "(no commits)")
807
- local changed_files
808
- changed_files=$(git -C "$PROJECT_ROOT" diff --name-only HEAD~3 2>/dev/null | head -20 || echo "(none)")
809
- local last_error=""
810
- local prev_test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
811
- if [[ -f "$prev_test_log" ]] && [[ "${TEST_PASSED:-}" == "false" ]]; then
812
- last_error=$(tail -10 "$prev_test_log" 2>/dev/null || true)
813
- fi
814
-
815
- # Use printf to avoid heredoc delimiter injection from GOAL content
816
- local tmp_progress="${progress_file}.tmp.$$"
817
- {
818
- printf '# Session Progress (Auto-Generated)\n\n'
819
- printf '## Goal\n%s\n\n' "${GOAL}"
820
- printf '## Status\n'
821
- printf -- '- Iteration: %s/%s\n' "${ITERATION}" "${MAX_ITERATIONS}"
822
- printf -- '- Session restart: %s/%s\n' "${RESTART_COUNT:-0}" "${MAX_RESTARTS:-0}"
823
- printf -- '- Tests passing: %s\n' "${TEST_PASSED:-unknown}"
824
- printf -- '- Status: %s\n\n' "${STATUS:-running}"
825
- printf '## Recent Commits\n%s\n\n' "${recent_commits}"
826
- printf '## Changed Files\n%s\n\n' "${changed_files}"
827
- if [[ -n "$last_error" ]]; then
828
- printf '## Last Error\n%s\n\n' "$last_error"
829
- fi
830
- printf '## Timestamp\n%s\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
831
- } > "$tmp_progress" 2>/dev/null
832
- mv "$tmp_progress" "$progress_file" 2>/dev/null || rm -f "$tmp_progress" 2>/dev/null
833
- }
834
-
835
- append_log_entry() {
836
- local entry="$1"
837
- if [[ -n "$LOG_ENTRIES" ]]; then
838
- LOG_ENTRIES="${LOG_ENTRIES}
839
- ${entry}"
840
- else
841
- LOG_ENTRIES="$entry"
842
- fi
843
- }
844
734
 
845
735
  # ─── Semantic Validation for Claude Output ─────────────────────────────────────
846
736
  # Validates changed files before commit to catch syntax errors and API error leakage.
@@ -963,155 +853,12 @@ git_auto_commit() {
963
853
 
964
854
  # ─── Fatal Error Detection ────────────────────────────────────────────────────
965
855
 
966
- check_fatal_error() {
967
- local log_file="$1"
968
- local cli_exit_code="${2:-0}"
969
- [[ -f "$log_file" ]] || return 1
970
-
971
- # Known fatal error patterns from Claude CLI / Anthropic API
972
- local fatal_patterns="Invalid API key|invalid_api_key|authentication_error|API key expired"
973
- fatal_patterns="${fatal_patterns}|rate_limit_error|overloaded_error|billing"
974
- fatal_patterns="${fatal_patterns}|Could not resolve host|connection refused|ECONNREFUSED"
975
- fatal_patterns="${fatal_patterns}|ANTHROPIC_API_KEY.*not set|No API key"
976
-
977
- if grep -qiE "$fatal_patterns" "$log_file" 2>/dev/null; then
978
- local match
979
- match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
980
- error "Fatal CLI error: $match"
981
- return 1 # fatal error detected
982
- fi
983
-
984
- # Non-zero exit + tiny output = likely CLI crash
985
- if [[ "$cli_exit_code" -ne 0 ]]; then
986
- local line_count
987
- line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
988
- line_count="${line_count:-0}"
989
- if [[ "$line_count" -lt 3 ]]; then
990
- local content
991
- content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
992
- error "CLI exited $cli_exit_code with minimal output: $content"
993
- return 0
994
- fi
995
- fi
996
-
997
- return 1 # no fatal error
998
- }
999
856
 
1000
857
  # ─── Progress & Circuit Breaker ───────────────────────────────────────────────
1001
858
 
1002
- check_progress() {
1003
- local changes
1004
- # Exclude loop bookkeeping files — only count real code changes as progress
1005
- changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 \
1006
- -- . ':!.claude/loop-state.md' ':!.claude/pipeline-state.md' \
1007
- ':!**/progress.md' ':!**/error-summary.json' \
1008
- 2>/dev/null | tail -1 || echo "")"
1009
- local insertions
1010
- insertions="$(echo "$changes" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)"
1011
- if [[ "${insertions:-0}" -lt "$MIN_PROGRESS_LINES" ]]; then
1012
- return 1 # No meaningful progress
1013
- fi
1014
- return 0
1015
- }
1016
-
1017
- check_completion() {
1018
- local log_file="$1"
1019
- grep -q "LOOP_COMPLETE" "$log_file" 2>/dev/null
1020
- }
1021
-
1022
- check_circuit_breaker() {
1023
- # Vitals-driven circuit breaker (preferred over static threshold)
1024
- if type pipeline_compute_vitals >/dev/null 2>&1 && type pipeline_health_verdict >/dev/null 2>&1; then
1025
- local _vitals_json _verdict
1026
- local _loop_state="${STATE_FILE:-}"
1027
- local _loop_artifacts="${ARTIFACTS_DIR:-}"
1028
- local _loop_issue="${ISSUE_NUMBER:-}"
1029
- _vitals_json=$(pipeline_compute_vitals "$_loop_state" "$_loop_artifacts" "$_loop_issue" 2>/dev/null) || true
1030
- if [[ -n "$_vitals_json" && "$_vitals_json" != "{}" ]]; then
1031
- _verdict=$(echo "$_vitals_json" | jq -r '.verdict // "continue"' 2>/dev/null || echo "continue")
1032
- if [[ "$_verdict" == "abort" ]]; then
1033
- local _health_score
1034
- _health_score=$(echo "$_vitals_json" | jq -r '.health_score // 0' 2>/dev/null || echo "0")
1035
- error "Vitals circuit breaker: health score ${_health_score}/100 — aborting (${CONSECUTIVE_FAILURES} stagnant iterations)"
1036
- STATUS="circuit_breaker"
1037
- return 1
1038
- fi
1039
- # Vitals say continue/warn/intervene — don't trip circuit breaker yet
1040
- if [[ "$_verdict" == "continue" || "$_verdict" == "warn" ]]; then
1041
- return 0
1042
- fi
1043
- fi
1044
- fi
1045
-
1046
- # Fallback: static threshold circuit breaker
1047
- if [[ "$CONSECUTIVE_FAILURES" -ge "$CIRCUIT_BREAKER_THRESHOLD" ]]; then
1048
- error "Circuit breaker tripped: ${CIRCUIT_BREAKER_THRESHOLD} consecutive iterations with no meaningful progress."
1049
- STATUS="circuit_breaker"
1050
- return 1
1051
- fi
1052
- return 0
1053
- }
1054
-
1055
- check_max_iterations() {
1056
- if [[ "$ITERATION" -le "$MAX_ITERATIONS" ]]; then
1057
- return 0
1058
- fi
1059
-
1060
- # Hit the cap — check if we should auto-extend
1061
- if ! $AUTO_EXTEND || [[ "$EXTENSION_COUNT" -ge "$MAX_EXTENSIONS" ]]; then
1062
- if [[ "$EXTENSION_COUNT" -ge "$MAX_EXTENSIONS" ]]; then
1063
- warn "Hard cap reached: ${EXTENSION_COUNT} extensions applied (max ${MAX_EXTENSIONS})."
1064
- fi
1065
- warn "Max iterations ($MAX_ITERATIONS) reached."
1066
- STATUS="max_iterations"
1067
- return 1
1068
- fi
1069
859
 
1070
- # Checkpoint audit: is there meaningful progress worth extending for?
1071
- echo -e "\n ${CYAN}${BOLD}▸ Checkpoint${RESET} — max iterations ($MAX_ITERATIONS) reached, evaluating progress..."
1072
860
 
1073
- local should_extend=false
1074
- local extension_reason=""
1075
861
 
1076
- # Check 1: recent meaningful progress (not stuck)
1077
- if [[ "${CONSECUTIVE_FAILURES:-0}" -lt 2 ]]; then
1078
- # Check 2: agent hasn't signaled completion (if it did, guard_completion handles it)
1079
- local last_log="$LOG_DIR/iteration-$(( ITERATION - 1 )).log"
1080
- if [[ -f "$last_log" ]] && ! grep -q "LOOP_COMPLETE" "$last_log" 2>/dev/null; then
1081
- should_extend=true
1082
- extension_reason="work in progress with recent progress"
1083
- fi
1084
- fi
1085
-
1086
- # Check 3: if quality gates or tests are failing, extend to let agent fix them
1087
- if [[ "$TEST_PASSED" == "false" ]] || ! $QUALITY_GATE_PASSED; then
1088
- should_extend=true
1089
- extension_reason="quality gates or tests not yet passing"
1090
- fi
1091
-
1092
- if $should_extend; then
1093
- # Scale extension size by velocity — good progress earns more iterations
1094
- local velocity_avg
1095
- velocity_avg="$(compute_velocity_avg)"
1096
- local effective_extension="$EXTENSION_SIZE"
1097
- if [[ "$velocity_avg" -gt 20 ]]; then
1098
- # High velocity: grant more iterations
1099
- effective_extension=$(( EXTENSION_SIZE + 3 ))
1100
- elif [[ "$velocity_avg" -lt 5 ]]; then
1101
- # Low velocity: grant fewer iterations
1102
- effective_extension=$(( EXTENSION_SIZE > 2 ? EXTENSION_SIZE - 2 : 1 ))
1103
- fi
1104
- EXTENSION_COUNT=$(( EXTENSION_COUNT + 1 ))
1105
- MAX_ITERATIONS=$(( MAX_ITERATIONS + effective_extension ))
1106
- echo -e " ${GREEN}✓${RESET} Auto-extending: +${effective_extension} iterations (now ${MAX_ITERATIONS} max, extension ${EXTENSION_COUNT}/${MAX_EXTENSIONS})"
1107
- echo -e " ${DIM}Reason: ${extension_reason} | velocity: ~${velocity_avg} lines/iter${RESET}"
1108
- return 0
1109
- fi
1110
-
1111
- warn "Max iterations reached — no recent progress detected."
1112
- STATUS="max_iterations"
1113
- return 1
1114
- }
1115
862
 
1116
863
  # ─── Failure Diagnosis ─────────────────────────────────────────────────────────
1117
864
  # Pattern-based root-cause classification for smarter retries (no Claude needed).
@@ -1156,7 +903,7 @@ diagnose_failure() {
1156
903
  fi
1157
904
 
1158
905
  # Check if we've seen this diagnosis before in this session
1159
- local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
906
+ local diagnosis_file="${LOG_DIR}/diagnoses.txt"
1160
907
  local repeat_count=0
1161
908
  if [[ -f "$diagnosis_file" ]]; then
1162
909
  repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
@@ -1224,7 +971,7 @@ INSTRUCTION: This error has occurred $repeat_count times. The previous approach
1224
971
  # ─── Test Gate ────────────────────────────────────────────────────────────────
1225
972
 
1226
973
  run_test_gate() {
1227
- if [[ -z "$TEST_CMD" ]]; then
974
+ if [[ -z "$TEST_CMD" ]] && [[ ${#ADDITIONAL_TEST_CMDS[@]} -eq 0 ]]; then
1228
975
  TEST_PASSED=""
1229
976
  TEST_OUTPUT=""
1230
977
  return
@@ -1244,24 +991,91 @@ run_test_gate() {
1244
991
  fi
1245
992
  fi
1246
993
 
1247
- local test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
1248
- TEST_LOG_FILE="$test_log"
1249
- echo -e " ${DIM}Running ${test_mode} tests...${RESET}"
1250
- # Wrap test command with timeout (5 min default) to prevent hanging
1251
- local test_timeout="${SW_TEST_TIMEOUT:-300}"
1252
- local test_wrapper="$active_test_cmd"
1253
- if command -v timeout >/dev/null 2>&1; then
1254
- test_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
1255
- elif command -v gtimeout >/dev/null 2>&1; then
1256
- test_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
1257
- fi
1258
- if bash -c "$test_wrapper" > "$test_log" 2>&1; then
1259
- TEST_PASSED=true
1260
- TEST_OUTPUT="All tests passed (${test_mode} mode)."
1261
- else
1262
- TEST_PASSED=false
1263
- TEST_OUTPUT="$(tail -50 "$test_log")"
994
+ local all_passed=true
995
+ local test_results="[]"
996
+ local combined_output=""
997
+ local test_timeout="${SW_TEST_TIMEOUT:-900}"
998
+
999
+ # Run primary test command
1000
+ if [[ -n "$active_test_cmd" ]]; then
1001
+ local test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
1002
+ TEST_LOG_FILE="$test_log"
1003
+ echo -e " ${DIM}Running ${test_mode} tests...${RESET}"
1004
+
1005
+ local test_wrapper="$active_test_cmd"
1006
+ if command -v timeout >/dev/null 2>&1; then
1007
+ test_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
1008
+ elif command -v gtimeout >/dev/null 2>&1; then
1009
+ test_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
1010
+ fi
1011
+
1012
+ local start_ts exit_code=0
1013
+ start_ts=$(date +%s)
1014
+ bash -c "$test_wrapper" > "$test_log" 2>&1 || exit_code=$?
1015
+ local duration=$(( $(date +%s) - start_ts ))
1016
+
1017
+ if command -v jq >/dev/null 2>&1; then
1018
+ test_results=$(echo "$test_results" | jq --arg cmd "$active_test_cmd" \
1019
+ --argjson exit "$exit_code" --argjson dur "$duration" \
1020
+ '. + [{"command": $cmd, "exit_code": $exit, "duration_s": $dur}]')
1021
+ fi
1022
+
1023
+ [[ "$exit_code" -ne 0 ]] && all_passed=false
1024
+ combined_output+="$(cat "$test_log" 2>/dev/null)"$'\n'
1025
+ fi
1026
+
1027
+ # Run additional test commands (discovered or explicit)
1028
+ # Mid-build discovery: find test files created since loop start
1029
+ local mid_build_cmds=()
1030
+ if [[ -n "${LOOP_START_COMMIT:-}" ]] && type detect_created_test_files >/dev/null 2>&1; then
1031
+ while IFS= read -r _cmd; do
1032
+ [[ -n "$_cmd" ]] && mid_build_cmds+=("$_cmd")
1033
+ done < <(detect_created_test_files "$LOOP_START_COMMIT" 2>/dev/null || true)
1264
1034
  fi
1035
+ local all_extra=("${ADDITIONAL_TEST_CMDS[@]+"${ADDITIONAL_TEST_CMDS[@]}"}" "${mid_build_cmds[@]+"${mid_build_cmds[@]}"}")
1036
+
1037
+ for extra_cmd in "${all_extra[@]+"${all_extra[@]}"}"; do
1038
+ [[ -z "$extra_cmd" ]] && continue
1039
+ local extra_log="${LOG_DIR}/tests-extra-iter-${ITERATION}.log"
1040
+ echo -e " ${DIM}Running additional: ${extra_cmd}${RESET}"
1041
+
1042
+ local extra_wrapper="$extra_cmd"
1043
+ if command -v timeout >/dev/null 2>&1; then
1044
+ extra_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$extra_cmd")"
1045
+ elif command -v gtimeout >/dev/null 2>&1; then
1046
+ extra_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$extra_cmd")"
1047
+ fi
1048
+
1049
+ local start_ts exit_code=0
1050
+ start_ts=$(date +%s)
1051
+ bash -c "$extra_wrapper" >> "$extra_log" 2>&1 || exit_code=$?
1052
+ local duration=$(( $(date +%s) - start_ts ))
1053
+
1054
+ if command -v jq >/dev/null 2>&1; then
1055
+ test_results=$(echo "$test_results" | jq --arg cmd "$extra_cmd" \
1056
+ --argjson exit "$exit_code" --argjson dur "$duration" \
1057
+ '. + [{"command": $cmd, "exit_code": $exit, "duration_s": $dur}]')
1058
+ fi
1059
+
1060
+ [[ "$exit_code" -ne 0 ]] && all_passed=false
1061
+ combined_output+="$(cat "$extra_log" 2>/dev/null)"$'\n'
1062
+ done
1063
+
1064
+ # Write structured test evidence
1065
+ if command -v jq >/dev/null 2>&1; then
1066
+ echo "$test_results" > "${LOG_DIR}/test-evidence-iter-${ITERATION}.json"
1067
+ fi
1068
+
1069
+ # Audit: emit test gate event
1070
+ if type audit_emit >/dev/null 2>&1; then
1071
+ local cmd_count=0
1072
+ command -v jq >/dev/null 2>&1 && cmd_count=$(echo "$test_results" | jq 'length' 2>/dev/null || echo 0)
1073
+ audit_emit "loop.test_gate" "iteration=$ITERATION" "commands=$cmd_count" \
1074
+ "all_passed=$all_passed" "evidence_path=test-evidence-iter-${ITERATION}.json" || true
1075
+ fi
1076
+
1077
+ TEST_PASSED=$all_passed
1078
+ TEST_OUTPUT="$(echo "$combined_output" | tail -50)"
1265
1079
  }
1266
1080
 
1267
1081
  write_error_summary() {
@@ -1352,7 +1166,18 @@ run_audit_agent() {
1352
1166
 
1353
1167
  # Include verified test status so auditor doesn't have to guess
1354
1168
  local test_context=""
1355
- if [[ -n "$TEST_CMD" ]]; then
1169
+ local evidence_file="${LOG_DIR}/test-evidence-iter-${ITERATION}.json"
1170
+ if [[ -f "$evidence_file" ]] && command -v jq >/dev/null 2>&1; then
1171
+ local cmd_count total_cmds evidence_detail
1172
+ cmd_count=$(jq 'length' "$evidence_file" 2>/dev/null || echo 0)
1173
+ total_cmds=$(jq -r '[.[].command] | join(", ")' "$evidence_file" 2>/dev/null || echo "unknown")
1174
+ evidence_detail=$(jq -r '.[] | "- \(.command): exit \(.exit_code) (\(.duration_s)s)"' "$evidence_file" 2>/dev/null || echo "")
1175
+ test_context="## Verified Test Status (from harness, not from agent)
1176
+ Test commands run: ${cmd_count} (${total_cmds})
1177
+ ${evidence_detail}
1178
+ Overall: $(if [[ "${TEST_PASSED:-}" == "true" ]]; then echo "ALL PASSING"; else echo "FAILING"; fi)"
1179
+ elif [[ -n "$TEST_CMD" ]]; then
1180
+ # Fallback to existing boolean
1356
1181
  if [[ "${TEST_PASSED:-}" == "true" ]]; then
1357
1182
  test_context="## Verified Test Status (from harness, not from agent)
1358
1183
  Tests: ALL PASSING (command: ${TEST_CMD})"
@@ -1406,6 +1231,12 @@ AUDIT_PROMPT
1406
1231
  audit_flags+=("--dangerously-skip-permissions")
1407
1232
  fi
1408
1233
 
1234
+ # Use structured output for machine-parseable audit results
1235
+ local schema_file="${SCRIPT_DIR}/../schemas/audit-result.json"
1236
+ if [[ -f "$schema_file" ]]; then
1237
+ audit_flags+=("--json-schema" "$(cat "$schema_file")")
1238
+ fi
1239
+
1409
1240
  local exit_code=0
1410
1241
  claude -p "$audit_prompt" "${audit_flags[@]}" > "$audit_log" 2>&1 || exit_code=$?
1411
1242
 
@@ -1442,9 +1273,11 @@ run_quality_gates() {
1442
1273
  gate_failures+=("uncommitted changes present")
1443
1274
  fi
1444
1275
 
1445
- # Gate 3: No TODO/FIXME/HACK/XXX in new code
1276
+ # Gate 3: No TODO/FIXME/HACK/XXX in new source code
1277
+ # Exclude .claude/, docs/plans/, and markdown files (which legitimately contain task markers)
1446
1278
  local todo_count
1447
- todo_count="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null | grep -cE '^\+.*(TODO|FIXME|HACK|XXX)' || true)"
1279
+ todo_count="$(git -C "$PROJECT_ROOT" diff HEAD~1 -- ':!.claude/' ':!docs/plans/' ':!*.md' 2>/dev/null \
1280
+ | grep -cE '^\+.*(TODO|FIXME|HACK|XXX)' || true)"
1448
1281
  todo_count="${todo_count:-0}"
1449
1282
  if [[ "${todo_count:-0}" -gt 0 ]]; then
1450
1283
  gate_failures+=("${todo_count} TODO/FIXME/HACK/XXX markers in new code")
@@ -1661,447 +1494,14 @@ HOLISTIC_PROMPT
1661
1494
  }
1662
1495
 
1663
1496
  # ─── Context Window Management ───────────────────────────────────────────────
1664
- # Prevents prompt from exceeding Claude's context limit (~200K tokens).
1665
- # Trims least-critical sections first when over budget.
1666
-
1667
- CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-$(_config_get_int "loop.context_budget_chars" 180000 2>/dev/null || echo 180000)}" # ~45K tokens at 4 chars/token
1668
-
1669
- manage_context_window() {
1670
- local prompt="$1"
1671
- local budget="${CONTEXT_BUDGET_CHARS}"
1672
- local current_len=${#prompt}
1673
-
1674
- # Read trimming tunables from config (env > daemon-config > policy > defaults.json)
1675
- local trim_memory_chars trim_git_entries trim_hotspot_files trim_test_lines
1676
- trim_memory_chars=$(_config_get_int "loop.context_trim_memory_chars" 20000 2>/dev/null || echo 20000)
1677
- trim_git_entries=$(_config_get_int "loop.context_trim_git_entries" 10 2>/dev/null || echo 10)
1678
- trim_hotspot_files=$(_config_get_int "loop.context_trim_hotspot_files" 5 2>/dev/null || echo 5)
1679
- trim_test_lines=$(_config_get_int "loop.context_trim_test_lines" 50 2>/dev/null || echo 50)
1680
-
1681
- if [[ "$current_len" -le "$budget" ]]; then
1682
- echo "$prompt"
1683
- return
1684
- fi
1685
-
1686
- # Over budget — progressively trim sections (least important first)
1687
- local trimmed="$prompt"
1688
-
1689
- # 1. Trim DORA/Performance baselines (least critical for code generation)
1690
- if [[ "${#trimmed}" -gt "$budget" ]]; then
1691
- trimmed=$(echo "$trimmed" | awk '/^## Performance Baselines/{skip=1; next} skip && /^## [^#]/{skip=0} !skip{print}')
1692
- fi
1693
-
1694
- # 2. Trim file hotspots to top N
1695
- if [[ "${#trimmed}" -gt "$budget" ]]; then
1696
- trimmed=$(echo "$trimmed" | awk -v max="$trim_hotspot_files" '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>max) next} {print}')
1697
- fi
1698
-
1699
- # 3. Trim git log to last N entries
1700
- if [[ "${#trimmed}" -gt "$budget" ]]; then
1701
- trimmed=$(echo "$trimmed" | awk -v max="$trim_git_entries" '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>max) next} {print}')
1702
- fi
1703
-
1704
- # 4. Truncate memory context to first N chars
1705
- if [[ "${#trimmed}" -gt "$budget" ]]; then
1706
- trimmed=$(echo "$trimmed" | awk -v max="$trim_memory_chars" '
1707
- /## Memory Context/{mem=1; skip_rest=0; chars=0; print; next}
1708
- mem && /^## [^#]/{mem=0; print; next}
1709
- mem{chars+=length($0)+1; if(chars>max){print "... (memory truncated for context budget)"; skip_rest=1; mem=0; next}}
1710
- skip_rest && /^## [^#]/{skip_rest=0; print; next}
1711
- skip_rest{next}
1712
- {print}
1713
- ')
1714
- fi
1715
-
1716
- # 5. Truncate test output to last N lines
1717
- if [[ "${#trimmed}" -gt "$budget" ]]; then
1718
- trimmed=$(echo "$trimmed" | awk -v max="$trim_test_lines" '
1719
- /## Test Results/{found=1; buf=""; print; next}
1720
- found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>max)?(n-max+1):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
1721
- found{buf=buf $0 "\n"; next}
1722
- {print}
1723
- ')
1724
- fi
1725
-
1726
- # 6. Last resort: hard truncate with notice
1727
- if [[ "${#trimmed}" -gt "$budget" ]]; then
1728
- trimmed="${trimmed:0:$budget}
1729
-
1730
- ... [CONTEXT TRUNCATED: prompt exceeded ${budget} char budget. Focus on the goal and most recent errors.]"
1731
- fi
1732
-
1733
- # Log the trimming
1734
- local final_len=${#trimmed}
1735
- if [[ "$final_len" -lt "$current_len" ]]; then
1736
- warn "Context trimmed from ${current_len} to ${final_len} chars (budget: ${budget})"
1737
- emit_event "loop.context_trimmed" "original=$current_len" "trimmed=$final_len" "budget=$budget" 2>/dev/null || true
1738
- fi
1739
-
1740
- echo "$trimmed"
1741
- }
1742
1497
 
1743
1498
  # ─── Prompt Composition ──────────────────────────────────────────────────────
1744
-
1745
- compose_prompt() {
1746
- local recent_log
1747
- # Get last 3 iteration summaries from log entries
1748
- recent_log="$(echo "$LOG_ENTRIES" | tail -15)"
1749
- if [[ -z "$recent_log" ]]; then
1750
- recent_log="(first iteration — no previous progress)"
1751
- fi
1752
-
1753
- local git_log
1754
- git_log="$(git_recent_log)"
1755
-
1756
- local test_section
1757
- if [[ -z "$TEST_CMD" ]]; then
1758
- test_section="No test command configured."
1759
- elif [[ -z "$TEST_PASSED" ]]; then
1760
- test_section="No test results yet (first iteration). Test command: $TEST_CMD"
1761
- elif $TEST_PASSED; then
1762
- test_section="$TEST_OUTPUT"
1763
- else
1764
- test_section="TESTS FAILED — fix these before proceeding:
1765
- $TEST_OUTPUT"
1766
- fi
1767
-
1768
- # Structured error context (machine-readable)
1769
- local error_summary_section=""
1770
- local error_json="$LOG_DIR/error-summary.json"
1771
- if [[ -f "$error_json" ]]; then
1772
- local err_count err_lines
1773
- err_count=$(jq -r '.error_count // 0' "$error_json" 2>/dev/null || echo "0")
1774
- err_lines=$(jq -r '.error_lines[]? // empty' "$error_json" 2>/dev/null | head -10 || true)
1775
- if [[ "$err_count" -gt 0 ]] && [[ -n "$err_lines" ]]; then
1776
- error_summary_section="## Structured Error Summary (${err_count} errors detected)
1777
- ${err_lines}
1778
-
1779
- Fix these specific errors. Each line above is one distinct error from the test output."
1780
- fi
1781
- fi
1782
-
1783
- # Build audit sections (captured before heredoc to avoid nested heredoc issues)
1784
- local audit_section
1785
- audit_section="$(compose_audit_section)"
1786
- local audit_feedback_section
1787
- audit_feedback_section="$(compose_audit_feedback_section)"
1788
- local rejection_notice_section
1789
- rejection_notice_section="$(compose_rejection_notice_section)"
1790
-
1791
- # Memory context injection (failure patterns + past learnings)
1792
- local memory_section=""
1793
- if type memory_inject_context >/dev/null 2>&1; then
1794
- memory_section="$(memory_inject_context "build" 2>/dev/null || true)"
1795
- elif [[ -f "$SCRIPT_DIR/sw-memory.sh" ]]; then
1796
- memory_section="$("$SCRIPT_DIR/sw-memory.sh" inject build 2>/dev/null || true)"
1797
- fi
1798
-
1799
- # Cross-pipeline discovery injection (learnings from other pipeline runs)
1800
- local discovery_section=""
1801
- if type inject_discoveries >/dev/null 2>&1; then
1802
- local disc_output
1803
- disc_output="$(inject_discoveries "${GOAL:-}" 2>/dev/null || true)"
1804
- if [[ -n "$disc_output" ]]; then
1805
- discovery_section="$disc_output"
1806
- fi
1807
- fi
1808
-
1809
- # DORA baselines for context
1810
- local dora_section=""
1811
- if type memory_get_dora_baseline >/dev/null 2>&1; then
1812
- local dora_json
1813
- dora_json="$(memory_get_dora_baseline 7 2>/dev/null || echo "{}")"
1814
- local dora_total
1815
- dora_total=$(echo "$dora_json" | jq -r '.total // 0' 2>/dev/null || echo "0")
1816
- if [[ "$dora_total" -gt 0 ]]; then
1817
- local dora_df dora_cfr
1818
- dora_df=$(echo "$dora_json" | jq -r '.deploy_freq // 0' 2>/dev/null || echo "0")
1819
- dora_cfr=$(echo "$dora_json" | jq -r '.cfr // 0' 2>/dev/null || echo "0")
1820
- dora_section="## Performance Baselines (Last 7 Days)
1821
- - Deploy frequency: ${dora_df}/week
1822
- - Change failure rate: ${dora_cfr}%
1823
- - Total pipeline runs: ${dora_total}"
1824
- fi
1825
- fi
1826
-
1827
- # Append mid-loop memory refresh if available
1828
- local memory_refresh_file="$LOG_DIR/memory-refresh-$(( ITERATION - 1 )).txt"
1829
- if [[ -f "$memory_refresh_file" ]]; then
1830
- memory_section="${memory_section}
1831
-
1832
- ## Fresh Context (from iteration $(( ITERATION - 1 )) analysis)
1833
- $(cat "$memory_refresh_file")"
1834
- fi
1835
-
1836
- # GitHub intelligence context (gated by availability)
1837
- local intelligence_section=""
1838
- if [[ "${NO_GITHUB:-}" != "true" ]]; then
1839
- # File hotspots — top 5 most-changed files
1840
- if type gh_file_change_frequency >/dev/null 2>&1; then
1841
- local hotspots
1842
- hotspots=$(gh_file_change_frequency 2>/dev/null | head -5 || true)
1843
- if [[ -n "$hotspots" ]]; then
1844
- intelligence_section="${intelligence_section}
1845
- ## File Hotspots (most frequently changed)
1846
- ${hotspots}"
1847
- fi
1848
- fi
1849
-
1850
- # CODEOWNERS context
1851
- if type gh_codeowners >/dev/null 2>&1; then
1852
- local owners
1853
- owners=$(gh_codeowners 2>/dev/null | head -10 || true)
1854
- if [[ -n "$owners" ]]; then
1855
- intelligence_section="${intelligence_section}
1856
- ## Code Owners
1857
- ${owners}"
1858
- fi
1859
- fi
1860
-
1861
- # Active security alerts
1862
- if type gh_security_alerts >/dev/null 2>&1; then
1863
- local alerts
1864
- alerts=$(gh_security_alerts 2>/dev/null | head -5 || true)
1865
- if [[ -n "$alerts" ]]; then
1866
- intelligence_section="${intelligence_section}
1867
- ## Active Security Alerts
1868
- ${alerts}"
1869
- fi
1870
- fi
1871
- fi
1872
-
1873
- # Architecture rules (from intelligence layer)
1874
- local repo_hash
1875
- repo_hash=$(echo -n "$(pwd)" | shasum -a 256 2>/dev/null | cut -c1-12 || echo "unknown")
1876
- local arch_file="${HOME}/.shipwright/memory/${repo_hash}/architecture.json"
1877
- if [[ -f "$arch_file" ]]; then
1878
- local arch_rules
1879
- arch_rules=$(jq -r '.rules[]? // empty' "$arch_file" 2>/dev/null | head -10 || true)
1880
- if [[ -n "$arch_rules" ]]; then
1881
- intelligence_section="${intelligence_section}
1882
- ## Architecture Rules
1883
- ${arch_rules}"
1884
- fi
1885
- fi
1886
-
1887
- # Coverage baseline
1888
- local coverage_file="${HOME}/.shipwright/baselines/${repo_hash}/coverage.json"
1889
- if [[ -f "$coverage_file" ]]; then
1890
- local coverage_pct
1891
- coverage_pct=$(jq -r '.coverage_percent // empty' "$coverage_file" 2>/dev/null || true)
1892
- if [[ -n "$coverage_pct" ]]; then
1893
- intelligence_section="${intelligence_section}
1894
- ## Coverage Baseline
1895
- Current coverage: ${coverage_pct}% — do not decrease this."
1896
- fi
1897
- fi
1898
-
1899
- # Error classification from last failure
1900
- local error_log=".claude/pipeline-artifacts/error-log.jsonl"
1901
- if [[ -f "$error_log" ]]; then
1902
- local last_error
1903
- last_error=$(tail -1 "$error_log" 2>/dev/null | jq -r '"Type: \(.type), Exit: \(.exit_code), Error: \(.error | split("\n") | first)"' 2>/dev/null || true)
1904
- if [[ -n "$last_error" ]]; then
1905
- intelligence_section="${intelligence_section}
1906
- ## Last Error Context
1907
- ${last_error}"
1908
- fi
1909
- fi
1910
-
1911
- # Stuckness detection — compare last 3 iteration outputs
1912
- local stuckness_section=""
1913
- stuckness_section="$(detect_stuckness)"
1914
- local _stuck_ret=$?
1915
- local stuckness_detected=false
1916
- [[ "$_stuck_ret" -eq 0 ]] && stuckness_detected=true
1917
-
1918
- # Strategy exploration when stuck — append alternative strategy to GOAL
1919
- if [[ "$stuckness_detected" == "true" ]]; then
1920
- local last_error diagnosis
1921
- last_error=$(tail -1 "${ARTIFACTS_DIR:-${PROJECT_ROOT:-.}/.claude/pipeline-artifacts}/error-log.jsonl" 2>/dev/null | jq -r '"Type: \(.type), Exit: \(.exit_code), Error: \(.error | split("\n") | first)"' 2>/dev/null || true)
1922
- [[ -z "$last_error" || "$last_error" == "null" ]] && last_error="unknown"
1923
- diagnosis="${STUCKNESS_DIAGNOSIS:-}"
1924
- local alt_strategy
1925
- alt_strategy=$(explore_alternative_strategy "$last_error" "${ITERATION:-0}" "$diagnosis")
1926
- GOAL="${GOAL}
1927
-
1928
- ${alt_strategy}"
1929
-
1930
- # Handle model escalation
1931
- if [[ "${ESCALATE_MODEL:-}" == "true" ]]; then
1932
- if [[ -f "$SCRIPT_DIR/sw-model-router.sh" ]]; then
1933
- source "$SCRIPT_DIR/sw-model-router.sh" 2>/dev/null || true
1934
- fi
1935
- if type escalate_model &>/dev/null; then
1936
- MODEL=$(escalate_model "${MODEL:-sonnet}")
1937
- info "Escalated to model: $MODEL"
1938
- fi
1939
- unset ESCALATE_MODEL
1940
- fi
1941
- fi
1942
-
1943
- # Session restart context — inject previous session progress
1944
- local restart_section=""
1945
- if [[ "$SESSION_RESTART" == "true" ]] && [[ -f "$LOG_DIR/progress.md" ]]; then
1946
- restart_section="## Previous Session Progress
1947
- $(cat "$LOG_DIR/progress.md")
1948
-
1949
- You are starting a FRESH session after the previous one exhausted its iterations.
1950
- Read the progress above and continue from where it left off. Do NOT repeat work already done."
1951
- fi
1952
-
1953
- # Resume-from-checkpoint context — reconstruct Claude context for meaningful resume
1954
- local resume_section=""
1955
- if [[ -n "${RESUMED_FROM_ITERATION:-}" && "${RESUMED_FROM_ITERATION:-0}" -gt 0 ]]; then
1956
- local _test_tail=" (none recorded)"
1957
- [[ -n "${RESUMED_TEST_OUTPUT:-}" ]] && _test_tail="$(echo "$RESUMED_TEST_OUTPUT" | tail -20)"
1958
- resume_section="## RESUMING FROM ITERATION ${RESUMED_FROM_ITERATION}
1959
-
1960
- Continue from where you left off. Do NOT repeat work already done.
1961
-
1962
- Previous work modified these files:
1963
- ${RESUMED_MODIFIED:- (none recorded)}
1964
-
1965
- Previous findings/errors from earlier iterations:
1966
- ${RESUMED_FINDINGS:- (none recorded)}
1967
-
1968
- Last test output (fix any failures, tail):
1969
- ${_test_tail}
1970
-
1971
- ---
1972
- "
1973
- # Clear after first use so we don't keep injecting on every iteration
1974
- RESUMED_FROM_ITERATION=""
1975
- RESUMED_MODIFIED=""
1976
- RESUMED_FINDINGS=""
1977
- RESUMED_TEST_OUTPUT=""
1978
- fi
1979
-
1980
- # Build cumulative progress summary showing all iterations' work
1981
- local cumulative_section=""
1982
- if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
1983
- local cum_stat
1984
- cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
1985
- if [[ -n "$cum_stat" ]]; then
1986
- cumulative_section="## Cumulative Progress (all iterations combined)
1987
- ${cum_stat}
1988
- "
1989
- fi
1990
- fi
1991
-
1992
- cat <<PROMPT
1993
- You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
1994
- ${resume_section}
1995
- ## Your Goal
1996
- ${GOAL}
1997
-
1998
- ${cumulative_section}
1999
- ## Current Progress
2000
- ${recent_log}
2001
-
2002
- ## Recent Git Activity
2003
- ${git_log}
2004
-
2005
- ## Test Results (Previous Iteration)
2006
- ${test_section}
2007
-
2008
- ${error_summary_section:+$error_summary_section
2009
- }
2010
- ${memory_section:+## Memory Context
2011
- $memory_section
2012
- }
2013
- ${discovery_section:+## Cross-Pipeline Learnings
2014
- $discovery_section
2015
- }
2016
- ${dora_section:+$dora_section
2017
- }
2018
- ${intelligence_section:+$intelligence_section
2019
- }
2020
- ${restart_section:+$restart_section
2021
- }
2022
- ## Instructions
2023
- 1. Read the codebase and understand the current state
2024
- 2. Identify the highest-priority remaining work toward the goal
2025
- 3. Implement ONE meaningful chunk of progress
2026
- 4. Run tests if a test command exists: ${TEST_CMD:-"(none)"}
2027
- 5. Commit your work with a descriptive message
2028
- 6. When the goal is FULLY achieved, output exactly: LOOP_COMPLETE
2029
-
2030
- ## Context Efficiency
2031
- - Batch independent tool calls in parallel — avoid sequential round-trips
2032
- - Use targeted file reads (offset/limit) instead of reading entire large files
2033
- - Delegate large searches to subagents — only import the summary
2034
- - Filter tool results with grep/jq before reasoning over them
2035
- - Keep working memory lean — summarize completed steps, don't preserve full outputs
2036
-
2037
- ${audit_section}
2038
-
2039
- ${audit_feedback_section}
2040
-
2041
- ${rejection_notice_section}
2042
-
2043
- ${stuckness_section}
2044
-
2045
- ## Rules
2046
- - Focus on ONE task per iteration — do it well
2047
- - Always commit with descriptive messages
2048
- - If tests fail, fix them before ending
2049
- - If stuck on the same issue for 2+ iterations, try a different approach
2050
- - Do NOT output LOOP_COMPLETE unless the goal is genuinely achieved
2051
- PROMPT
2052
- }
1499
+ # NOTE: compose_prompt() is now in lib/loop-iteration.sh (extracted upstream)
2053
1500
 
2054
1501
  # ─── Alternative Strategy Exploration ─────────────────────────────────────────
2055
1502
  # When stuckness is detected, generate a context-aware alternative strategy.
2056
1503
  # Uses pattern matching on error type + iteration count to suggest different approaches.
2057
1504
 
2058
- explore_alternative_strategy() {
2059
- local last_error="${1:-unknown}"
2060
- local iteration="${2:-0}"
2061
- local diagnosis="${3:-}"
2062
-
2063
- # Track attempted strategies to avoid repeating them
2064
- local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
2065
- local attempted
2066
- attempted=$(cat "$strategy_file" 2>/dev/null || true)
2067
-
2068
- local strategy=""
2069
-
2070
- # If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
2071
- if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
2072
- if ! echo "$attempted" | grep -q "evaluator_alignment"; then
2073
- echo "evaluator_alignment" >> "$strategy_file"
2074
- strategy="## Alternative Strategy: Evaluator Alignment
2075
- The code appears functionally complete (tests pass). Focus on satisfying the remaining
2076
- quality gate evaluators. Check the DoD log and audit log for specific complaints, then
2077
- address those exact points rather than adding new features."
2078
- fi
2079
- fi
2080
-
2081
- # If no code changes in last iteration, suggest verifying existing work
2082
- if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
2083
- if ! echo "$attempted" | grep -q "verify_existing"; then
2084
- echo "verify_existing" >> "$strategy_file"
2085
- strategy="## Alternative Strategy: Verify Existing Work
2086
- Recent iterations made no code changes. The work may already be complete.
2087
- Run the full test suite, verify all features work, and if everything passes,
2088
- commit a verification message and declare LOOP_COMPLETE with evidence."
2089
- fi
2090
- fi
2091
-
2092
- # Generic fallback: break the problem down
2093
- if [[ -z "$strategy" ]]; then
2094
- if ! echo "$attempted" | grep -q "decompose"; then
2095
- echo "decompose" >> "$strategy_file"
2096
- strategy="## Alternative Strategy: Decompose
2097
- Break the remaining work into smaller, independent steps. Focus on one specific
2098
- file or function at a time. Read error messages literally — the root cause may
2099
- differ from your assumption."
2100
- fi
2101
- fi
2102
-
2103
- echo "$strategy"
2104
- }
2105
1505
 
2106
1506
  # ─── Stuckness Detection ─────────────────────────────────────────────────────
2107
1507
  # Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
@@ -2110,189 +1510,7 @@ differ from your assumption."
2110
1510
  STUCKNESS_COUNT=0
2111
1511
  STUCKNESS_TRACKING_FILE=""
2112
1512
 
2113
- record_iteration_stuckness_data() {
2114
- local exit_code="${1:-0}"
2115
- [[ -z "$LOG_DIR" ]] && return 0
2116
- local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
2117
- local diff_hash error_hash
2118
- diff_hash=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
2119
- local error_log="${ARTIFACTS_DIR:-${STATE_DIR:-${PROJECT_ROOT:-.}/.claude}/pipeline-artifacts}/error-log.jsonl"
2120
- if [[ -f "$error_log" ]]; then
2121
- error_hash=$(tail -5 "$error_log" 2>/dev/null | sort -u | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
2122
- else
2123
- error_hash="none"
2124
- fi
2125
- echo "${diff_hash}|${error_hash}|${exit_code}" >> "$tracking_file"
2126
- }
2127
-
2128
- detect_stuckness() {
2129
- STUCKNESS_HINT=""
2130
- local iteration="${ITERATION:-0}"
2131
- local stuckness_signals=0
2132
- local stuckness_reasons=()
2133
- local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
2134
- local tracking_lines
2135
- tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || true)
2136
- tracking_lines="${tracking_lines:-0}"
2137
-
2138
- # Signal 1: Text overlap (existing logic) — compare last 2 iteration logs
2139
- if [[ "$iteration" -ge 3 ]]; then
2140
- local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
2141
- local log2="$LOG_DIR/iteration-$(( iteration - 2 )).log"
2142
- local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
2143
-
2144
- if [[ -f "$log1" && -f "$log2" ]]; then
2145
- local lines1 lines2 common total overlap_pct
2146
- lines1=$(tail -50 "$log1" 2>/dev/null | grep -v '^$' | sort || true)
2147
- lines2=$(tail -50 "$log2" 2>/dev/null | grep -v '^$' | sort || true)
2148
-
2149
- if [[ -n "$lines1" && -n "$lines2" ]]; then
2150
- total=$(echo "$lines1" | wc -l | tr -d ' ')
2151
- common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || true)
2152
- common="${common:-0}"
2153
- if [[ "$total" -gt 0 ]]; then
2154
- overlap_pct=$(( common * 100 / total ))
2155
- else
2156
- overlap_pct=0
2157
- fi
2158
- if [[ "${overlap_pct:-0}" -ge 90 ]]; then
2159
- stuckness_signals=$((stuckness_signals + 1))
2160
- stuckness_reasons+=("high text overlap (${overlap_pct}%) between iterations")
2161
- fi
2162
- fi
2163
- fi
2164
- fi
2165
-
2166
- # Signal 2: Git diff hash — last 3 iterations produced zero or identical diffs
2167
- if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
2168
- local last_three
2169
- last_three=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f1 || true)
2170
- local unique_hashes
2171
- unique_hashes=$(echo "$last_three" | sort -u | grep -v '^$' | wc -l | tr -d ' ')
2172
- if [[ "$unique_hashes" -le 1 ]] && [[ -n "$last_three" ]]; then
2173
- stuckness_signals=$((stuckness_signals + 1))
2174
- stuckness_reasons+=("identical or zero git diffs in last 3 iterations")
2175
- fi
2176
- fi
2177
-
2178
- # Signal 3: Error repetition — same error hash in last 3 iterations
2179
- if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
2180
- local last_three_errors
2181
- last_three_errors=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f2 || true)
2182
- local unique_error_hashes
2183
- unique_error_hashes=$(echo "$last_three_errors" | sort -u | grep -v '^none$' | grep -v '^$' | wc -l | tr -d ' ')
2184
- if [[ "$unique_error_hashes" -eq 1 ]] && [[ -n "$(echo "$last_three_errors" | grep -v '^none$')" ]]; then
2185
- stuckness_signals=$((stuckness_signals + 1))
2186
- stuckness_reasons+=("same error in last 3 iterations")
2187
- fi
2188
- fi
2189
-
2190
- # Signal 4: Same error repeating 3+ times (legacy check on error-log content)
2191
- local error_log
2192
- error_log="${ARTIFACTS_DIR:-$PROJECT_ROOT/.claude/pipeline-artifacts}/error-log.jsonl"
2193
- if [[ -f "$error_log" ]]; then
2194
- local last_errors
2195
- last_errors=$(tail -5 "$error_log" 2>/dev/null | jq -r '.error // .message // .error_hash // empty' 2>/dev/null | sort | uniq -c | sort -rn | head -1 || true)
2196
- local repeat_count
2197
- repeat_count=$(echo "$last_errors" | awk '{print $1}' 2>/dev/null || echo "0")
2198
- if [[ "${repeat_count:-0}" -ge 3 ]]; then
2199
- stuckness_signals=$((stuckness_signals + 1))
2200
- stuckness_reasons+=("same error repeated ${repeat_count} times")
2201
- fi
2202
- fi
2203
-
2204
- # Signal 5: Exit code pattern — last 3 iterations had same non-zero exit code
2205
- if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
2206
- local last_three_exits
2207
- last_three_exits=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f3 || true)
2208
- local first_exit
2209
- first_exit=$(echo "$last_three_exits" | head -1)
2210
- if [[ "$first_exit" =~ ^[0-9]+$ ]] && [[ "$first_exit" -ne 0 ]]; then
2211
- local all_same=true
2212
- while IFS= read -r ex; do
2213
- [[ "$ex" != "$first_exit" ]] && all_same=false
2214
- done <<< "$last_three_exits"
2215
- if [[ "$all_same" == true ]]; then
2216
- stuckness_signals=$((stuckness_signals + 1))
2217
- stuckness_reasons+=("same non-zero exit code (${first_exit}) in last 3 iterations")
2218
- fi
2219
- fi
2220
- fi
2221
-
2222
- # Signal 6: Git diff size — no or minimal code changes (existing)
2223
- local diff_lines
2224
- diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || true)
2225
- diff_lines="${diff_lines:-0}"
2226
- if [[ "${diff_lines:-0}" -lt 5 ]] && [[ "$iteration" -gt 2 ]]; then
2227
- stuckness_signals=$((stuckness_signals + 1))
2228
- stuckness_reasons+=("no code changes in last iteration")
2229
- fi
2230
-
2231
- # Signal 7: Iteration budget — used >70% without passing tests
2232
- local max_iter="${MAX_ITERATIONS:-20}"
2233
- local progress_pct=0
2234
- if [[ "$max_iter" -gt 0 ]]; then
2235
- progress_pct=$(( iteration * 100 / max_iter ))
2236
- fi
2237
- if [[ "$progress_pct" -gt 70 ]] && [[ "${TEST_PASSED:-false}" != "true" ]]; then
2238
- stuckness_signals=$((stuckness_signals + 1))
2239
- stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
2240
- fi
2241
-
2242
- # Gate-aware dampening: if tests pass and the agent has made progress overall,
2243
- # reduce stuckness signal count. The "no code changes" and "identical diffs" signals
2244
- # fire when code is already complete and the agent is fighting evaluator quirks —
2245
- # that's not genuine stuckness, it's "done but gates disagree."
2246
- if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
2247
- # If at least one quality signal is positive, dampen by 1
2248
- if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
2249
- stuckness_signals=$((stuckness_signals - 1))
2250
- fi
2251
- fi
2252
1513
 
2253
- # Decision: 2+ signals = stuck
2254
- if [[ "$stuckness_signals" -ge 2 ]]; then
2255
- STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
2256
- STUCKNESS_DIAGNOSIS="${stuckness_reasons[*]}"
2257
- if type emit_event >/dev/null 2>&1; then
2258
- emit_event "loop.stuckness_detected" "signals=$stuckness_signals" "count=$STUCKNESS_COUNT" "iteration=$iteration" "reasons=${stuckness_reasons[*]}"
2259
- fi
2260
- STUCKNESS_HINT="IMPORTANT: The loop appears stuck. Previous approaches have not worked. You MUST try a fundamentally different strategy. Reasons: ${stuckness_reasons[*]}"
2261
- warn "Stuckness detected (${stuckness_signals} signals, count ${STUCKNESS_COUNT}): ${stuckness_reasons[*]}"
2262
-
2263
- local diff_summary=""
2264
- local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
2265
- local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
2266
- if [[ -f "$log3" && -f "$log1" ]]; then
2267
- diff_summary=$(diff <(tail -30 "$log3" 2>/dev/null) <(tail -30 "$log1" 2>/dev/null) 2>/dev/null | head -10 || true)
2268
- fi
2269
-
2270
- local alternatives=""
2271
- if type memory_inject_context >/dev/null 2>&1; then
2272
- alternatives=$(memory_inject_context "build" 2>/dev/null | grep -i "fix:" | head -3 || true)
2273
- fi
2274
-
2275
- cat <<STUCK_SECTION
2276
- ## Stuckness Detected
2277
- ${STUCKNESS_HINT}
2278
-
2279
- ${diff_summary:+Changes between recent iterations:
2280
- $diff_summary
2281
- }
2282
- ${alternatives:+Consider these alternative approaches from past fixes:
2283
- $alternatives
2284
- }
2285
- Try a fundamentally different approach:
2286
- - Break the problem into smaller steps
2287
- - Look for an entirely different implementation strategy
2288
- - Check if there's a dependency or configuration issue blocking progress
2289
- - Read error messages more carefully — the root cause may differ from your assumption
2290
- STUCK_SECTION
2291
- return 0
2292
- fi
2293
-
2294
- return 1
2295
- }
2296
1514
 
2297
1515
  compose_audit_section() {
2298
1516
  if ! $AUDIT_ENABLED; then
@@ -2421,122 +1639,10 @@ PROMPT
2421
1639
 
2422
1640
  # ─── Claude Execution ────────────────────────────────────────────────────────
2423
1641
 
2424
- build_claude_flags() {
2425
- local flags=()
2426
- flags+=("--model" "$MODEL")
2427
- flags+=("--output-format" "json")
2428
-
2429
- if $SKIP_PERMISSIONS; then
2430
- flags+=("--dangerously-skip-permissions")
2431
- fi
2432
-
2433
- if [[ -n "$MAX_TURNS" ]]; then
2434
- flags+=("--max-turns" "$MAX_TURNS")
2435
- fi
2436
1642
 
2437
- echo "${flags[*]}"
2438
- }
2439
-
2440
- run_claude_iteration() {
2441
- local log_file="$LOG_DIR/iteration-${ITERATION}.log"
2442
- local json_file="$LOG_DIR/iteration-${ITERATION}.json"
2443
- local prompt
2444
- prompt="$(compose_prompt)"
2445
- local final_prompt
2446
- final_prompt=$(manage_context_window "$prompt")
2447
-
2448
- local raw_prompt_chars=${#prompt}
2449
- local prompt_chars=${#final_prompt}
2450
- local approx_tokens=$((prompt_chars / 4))
2451
- info "Prompt: ~${approx_tokens} tokens (${prompt_chars} chars)"
2452
-
2453
- # Emit context efficiency metrics
2454
- if type emit_event >/dev/null 2>&1; then
2455
- local trim_ratio=0
2456
- local budget_utilization=0
2457
- if [[ "$raw_prompt_chars" -gt 0 ]]; then
2458
- trim_ratio=$(awk -v raw="$raw_prompt_chars" -v trimmed="$prompt_chars" \
2459
- 'BEGIN { printf "%.1f", ((raw - trimmed) / raw) * 100 }')
2460
- fi
2461
- if [[ "${CONTEXT_BUDGET_CHARS:-0}" -gt 0 ]]; then
2462
- budget_utilization=$(awk -v used="$prompt_chars" -v budget="${CONTEXT_BUDGET_CHARS}" \
2463
- 'BEGIN { printf "%.1f", (used / budget) * 100 }')
2464
- fi
2465
- emit_event "loop.context_efficiency" \
2466
- "iteration=$ITERATION" \
2467
- "raw_prompt_chars=$raw_prompt_chars" \
2468
- "trimmed_prompt_chars=$prompt_chars" \
2469
- "trim_ratio=$trim_ratio" \
2470
- "budget_utilization=$budget_utilization" \
2471
- "budget_chars=${CONTEXT_BUDGET_CHARS:-0}" \
2472
- "job_id=${PIPELINE_JOB_ID:-loop-$$}" 2>/dev/null || true
2473
- fi
2474
-
2475
- local flags
2476
- flags="$(build_claude_flags)"
2477
-
2478
- local iter_start
2479
- iter_start="$(now_epoch)"
2480
-
2481
- echo -e "\n${CYAN}${BOLD}▸${RESET} ${BOLD}Iteration ${ITERATION}/${MAX_ITERATIONS}${RESET} — Starting..."
2482
-
2483
- # Run Claude headless (with timeout + PID capture for signal handling)
2484
- # Output goes to .json first, then we extract text into .log for compat
2485
- local exit_code=0
2486
- # shellcheck disable=SC2086
2487
- local err_file="${json_file%.json}.stderr"
2488
- if [[ -n "$TIMEOUT_CMD" ]]; then
2489
- $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
2490
- else
2491
- claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
2492
- fi
2493
- CHILD_PID=$!
2494
- wait "$CHILD_PID" 2>/dev/null || exit_code=$?
2495
- CHILD_PID=""
2496
- if [[ "$exit_code" -eq 124 ]]; then
2497
- warn "Claude CLI timed out after ${CLAUDE_TIMEOUT}s"
2498
- fi
2499
-
2500
- # Extract text result from JSON into .log for backwards compatibility
2501
- # With --output-format json, stdout is a JSON array; .[-1].result has the text
2502
- _extract_text_from_json "$json_file" "$log_file" "$err_file"
2503
-
2504
- local iter_end
2505
- iter_end="$(now_epoch)"
2506
- local iter_duration=$(( iter_end - iter_start ))
2507
-
2508
- echo -e " ${GREEN}✓${RESET} Claude session completed ($(format_duration "$iter_duration"), exit $exit_code)"
2509
-
2510
- # Accumulate token usage from this iteration's JSON output
2511
- accumulate_loop_tokens "$json_file"
2512
-
2513
- # Show verbose output if requested
2514
- if $VERBOSE; then
2515
- echo -e " ${DIM}─── Claude Output ───${RESET}"
2516
- sed 's/^/ /' "$log_file" | head -100
2517
- echo -e " ${DIM}─────────────────────${RESET}"
2518
- fi
2519
-
2520
- return $exit_code
2521
- }
2522
1643
 
2523
1644
  # ─── Iteration Summary Extraction ────────────────────────────────────────────
2524
1645
 
2525
- extract_summary() {
2526
- local log_file="$1"
2527
- # Grab last meaningful lines from Claude output, skipping empty lines
2528
- local summary
2529
- summary="$(grep -v '^$' "$log_file" | tail -5 | head -3 2>/dev/null || echo "(no output)")"
2530
- # Truncate long lines
2531
- summary="$(echo "$summary" | cut -c1-120)"
2532
-
2533
- # Sanitize: if summary is just a CLI/API error, replace with generic text
2534
- if echo "$summary" | grep -qiE 'Invalid API key|authentication_error|rate_limit|API key expired|ANTHROPIC_API_KEY'; then
2535
- summary="(CLI error — no useful output this iteration)"
2536
- fi
2537
-
2538
- echo "$summary"
2539
- }
2540
1646
 
2541
1647
  # ─── Display Helpers ─────────────────────────────────────────────────────────
2542
1648
 
@@ -2652,6 +1758,7 @@ cleanup() {
2652
1758
  export SW_LOOP_STATUS="$STATUS"
2653
1759
  export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
2654
1760
  export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
1761
+ # shellcheck disable=SC2155
2655
1762
  export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
2656
1763
  "$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
2657
1764
 
@@ -2735,7 +1842,7 @@ DIM='\033[2m'
2735
1842
  BOLD='\033[1m'
2736
1843
  RESET='\033[0m'
2737
1844
 
2738
- cd "$WORK_DIR"
1845
+ cd "$WORK_DIR" || { echo "ERROR: Cannot cd to WORK_DIR: $WORK_DIR" >&2; exit 1; }
2739
1846
  ITERATION=0
2740
1847
  CONSECUTIVE_FAILURES=0
2741
1848
 
@@ -2818,8 +1925,11 @@ PROMPT
2818
1925
  break
2819
1926
  fi
2820
1927
 
2821
- # Auto-commit
1928
+ # Auto-commit — stage only source files, exclude build artifacts
2822
1929
  git add -A 2>/dev/null || true
1930
+ git reset -- .claude/loop-logs/ .claude/loop-state.md .claude/intelligence-cache.json \
1931
+ .claude/platform-hygiene.json .claude/pipeline-artifacts/ .claude/code-review.json \
1932
+ .claude/hygiene-report.json .claude/pr-draft.md 2>/dev/null || true
2823
1933
  if git commit -m "agent-${AGENT_NUM}: iteration ${ITERATION}" --no-verify 2>/dev/null; then
2824
1934
  if ! git push origin "loop/agent-${AGENT_NUM}" 2>/dev/null; then
2825
1935
  echo -e " ${YELLOW}⚠${RESET} git push failed for loop/agent-${AGENT_NUM} — remote may be out of sync"
@@ -2989,8 +2099,16 @@ cleanup_multi_agent() {
2989
2099
  # ─── Main: Single-Agent Loop ─────────────────────────────────────────────────
2990
2100
 
2991
2101
  run_single_agent_loop() {
2102
+ # Save original environment variables before loop starts
2103
+ local SAVED_CLAUDE_MODEL="${CLAUDE_MODEL:-}"
2104
+ local SAVED_ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
2105
+
2992
2106
  if [[ "$SESSION_RESTART" == "true" ]]; then
2993
2107
  # Restart: state already reset by run_loop_with_restarts, skip init
2108
+ # Restore environment variables for clean iteration state
2109
+ [[ -n "$SAVED_CLAUDE_MODEL" ]] && export CLAUDE_MODEL="$SAVED_CLAUDE_MODEL"
2110
+ # Reset context exhaustion counter for this session (it tracks restarts WITHIN a single session)
2111
+ CONTEXT_RESTART_COUNT=0
2994
2112
  info "Session restart ${RESTART_COUNT}/${MAX_RESTARTS} — fresh context, reading progress"
2995
2113
  elif $RESUME; then
2996
2114
  resume_state
@@ -3012,11 +2130,16 @@ run_single_agent_loop() {
3012
2130
  STUCKNESS_COUNT=0
3013
2131
  STUCKNESS_TRACKING_FILE="$LOG_DIR/stuckness-tracking.txt"
3014
2132
  : > "$STUCKNESS_TRACKING_FILE" 2>/dev/null || true
3015
- : > "${LOG_DIR:-/tmp}/strategy-attempts.txt" 2>/dev/null || true
2133
+ : > "${LOG_DIR}/strategy-attempts.txt" 2>/dev/null || true
3016
2134
 
3017
2135
  show_banner
3018
2136
 
3019
2137
  while true; do
2138
+ # Reset environment variables at start of each iteration
2139
+ # Prevents previous iterations from affecting model selection or API keys
2140
+ [[ -n "$SAVED_CLAUDE_MODEL" ]] && export CLAUDE_MODEL="$SAVED_CLAUDE_MODEL"
2141
+ [[ -n "$SAVED_ANTHROPIC_API_KEY" ]] && export ANTHROPIC_API_KEY="$SAVED_ANTHROPIC_API_KEY"
2142
+
3020
2143
  # Pre-checks (before incrementing — ITERATION tracks completed count)
3021
2144
  check_circuit_breaker || break
3022
2145
  check_max_iterations || break
@@ -3100,6 +2223,11 @@ ${GOAL}"
3100
2223
  # Record iteration data for stuckness detection (diff hash, error hash, exit code)
3101
2224
  record_iteration_stuckness_data "$exit_code"
3102
2225
 
2226
+ # Dark factory: score this iteration with process reward model
2227
+ if type process_reward_score_iteration >/dev/null 2>&1; then
2228
+ process_reward_score_iteration "$PROJECT_ROOT" "${TEST_OUTPUT:-}" "$ITERATION" 2>/dev/null || true
2229
+ fi
2230
+
3103
2231
  # Detect fatal CLI errors (API key, auth, network) — abort immediately
3104
2232
  if check_fatal_error "$log_file" "$exit_code"; then
3105
2233
  STATUS="error"
@@ -3110,6 +2238,32 @@ ${GOAL}"
3110
2238
  return 1
3111
2239
  fi
3112
2240
 
2241
+ # Detect context exhaustion and trigger intelligent restart
2242
+ local log_content=""
2243
+ [[ -f "$log_file" ]] && log_content=$(cat "$log_file" 2>/dev/null || true)
2244
+ local stderr_file="${LOG_DIR}/iteration-${ITERATION}.stderr"
2245
+ local stderr_content=""
2246
+ [[ -f "$stderr_file" ]] && stderr_content=$(cat "$stderr_file" 2>/dev/null || true)
2247
+
2248
+ if echo "${log_content}${stderr_content}" | grep -qiE "$CONTEXT_EXHAUSTION_PATTERNS" 2>/dev/null; then
2249
+ if [[ "${CONTEXT_RESTART_COUNT:-0}" -lt "${CONTEXT_RESTART_LIMIT:-2}" ]]; then
2250
+ CONTEXT_RESTART_COUNT=$(( CONTEXT_RESTART_COUNT + 1 ))
2251
+ STATUS="context_exhaustion_restart"
2252
+ write_state
2253
+ write_progress
2254
+ warn "Context exhaustion detected (iteration $ITERATION) — triggering intelligent restart ($CONTEXT_RESTART_COUNT/$CONTEXT_RESTART_LIMIT)"
2255
+ if type emit_event >/dev/null 2>&1; then
2256
+ emit_event "loop.context_exhaustion" "iteration=$ITERATION" "restart_count=$CONTEXT_RESTART_COUNT" "max_restarts=$MAX_RESTARTS"
2257
+ fi
2258
+ break
2259
+ else
2260
+ warn "Context exhaustion detected but restart limit ($CONTEXT_RESTART_LIMIT) reached"
2261
+ STATUS="context_exhaustion_fatal"
2262
+ write_state
2263
+ write_progress
2264
+ fi
2265
+ fi
2266
+
3113
2267
  # Mid-loop memory refresh — re-query with current error context after iteration 3
3114
2268
  if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context >/dev/null 2>&1; then
3115
2269
  local refresh_ctx
@@ -3155,6 +2309,15 @@ ${GOAL}"
3155
2309
  fi
3156
2310
  fi
3157
2311
 
2312
+ # Dark factory: update RL weights based on test outcome
2313
+ if type rl_update_weights >/dev/null 2>&1; then
2314
+ if [[ "${TEST_PASSED:-}" == "true" ]]; then
2315
+ rl_update_weights "success" 2>/dev/null || true
2316
+ elif [[ "${TEST_PASSED:-}" == "false" ]]; then
2317
+ rl_update_weights "failure" 2>/dev/null || true
2318
+ fi
2319
+ fi
2320
+
3158
2321
  # Track fix outcome for memory effectiveness
3159
2322
  if [[ -n "${_applied_fix_pattern:-}" ]]; then
3160
2323
  if type memory_record_fix_outcome >/dev/null 2>&1; then
@@ -3173,15 +2336,98 @@ ${GOAL}"
3173
2336
  export SW_LOOP_STATUS="${STATUS:-running}"
3174
2337
  export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
3175
2338
  export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
2339
+ # shellcheck disable=SC2155
3176
2340
  export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
3177
2341
  "$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
3178
2342
 
3179
2343
  # Audit agent (reviews implementer's work)
3180
2344
  run_audit_agent
3181
2345
 
2346
+ # Verification gap detection: audit failed but tests passed
2347
+ # Instead of a full retry (which causes context bloat/timeout), run targeted verification
2348
+ if [[ "${AUDIT_RESULT:-}" != "pass" ]] && [[ "${TEST_PASSED:-}" == "true" ]]; then
2349
+ echo -e " ${YELLOW}▸${RESET} Verification gap detected (tests pass, audit disagrees)"
2350
+
2351
+ local verification_passed=true
2352
+
2353
+ # 1. Re-run ALL test commands to double-check
2354
+ local recheck_log="${LOG_DIR}/verification-iter-${ITERATION}.log"
2355
+ if [[ -n "$TEST_CMD" ]]; then
2356
+ eval "$TEST_CMD" > "$recheck_log" 2>&1 || verification_passed=false
2357
+ fi
2358
+ for _vg_cmd in "${ADDITIONAL_TEST_CMDS[@]+"${ADDITIONAL_TEST_CMDS[@]}"}"; do
2359
+ [[ -z "$_vg_cmd" ]] && continue
2360
+ eval "$_vg_cmd" >> "$recheck_log" 2>&1 || verification_passed=false
2361
+ done
2362
+
2363
+ # 2. Check for uncommitted changes (quality gate)
2364
+ if ! git -C "$PROJECT_ROOT" diff --quiet 2>/dev/null; then
2365
+ echo -e " ${YELLOW}⚠${RESET} Uncommitted changes detected"
2366
+ verification_passed=false
2367
+ fi
2368
+
2369
+ if [[ "$verification_passed" == "true" ]]; then
2370
+ echo -e " ${GREEN}✓${RESET} Verification passed — overriding audit"
2371
+ AUDIT_RESULT="pass"
2372
+ emit_event "loop.verification_gap_resolved" \
2373
+ "iteration=$ITERATION" "action=override_audit"
2374
+ if type audit_emit >/dev/null 2>&1; then
2375
+ audit_emit "loop.verification_gap" "iteration=$ITERATION" \
2376
+ "resolution=override" "tests_recheck=pass" || true
2377
+ fi
2378
+ else
2379
+ echo -e " ${RED}✗${RESET} Verification failed — audit stands"
2380
+ emit_event "loop.verification_gap_confirmed" \
2381
+ "iteration=$ITERATION" "action=retry"
2382
+ if type audit_emit >/dev/null 2>&1; then
2383
+ audit_emit "loop.verification_gap" "iteration=$ITERATION" \
2384
+ "resolution=retry" "tests_recheck=fail" || true
2385
+ fi
2386
+ fi
2387
+ fi
2388
+
2389
+ # Auto-commit any remaining changes before quality gates
2390
+ # (audit agent, verification handler, or test evidence may create files)
2391
+ if ! git -C "$PROJECT_ROOT" diff --quiet 2>/dev/null || \
2392
+ ! git -C "$PROJECT_ROOT" diff --cached --quiet 2>/dev/null || \
2393
+ [[ -n "$(git -C "$PROJECT_ROOT" ls-files --others --exclude-standard 2>/dev/null | head -1)" ]]; then
2394
+ git -C "$PROJECT_ROOT" add -A 2>/dev/null || true
2395
+ git -C "$PROJECT_ROOT" commit -m "loop: iteration $ITERATION — post-audit cleanup" --no-verify 2>/dev/null || true
2396
+ fi
2397
+
3182
2398
  # Quality gates (automated checks)
3183
2399
  run_quality_gates
3184
2400
 
2401
+ # Convergence detection (issue #203) — score iteration progress and detect convergence
2402
+ if type convergence_integrate >/dev/null 2>&1; then
2403
+ local conv_exit=0
2404
+ convergence_integrate || conv_exit=$?
2405
+ case "$conv_exit" in
2406
+ 1)
2407
+ # Converged — stop successfully
2408
+ info "Build loop converged — stopping"
2409
+ STATUS="complete"
2410
+ write_state
2411
+ write_progress
2412
+ show_summary
2413
+ return 0
2414
+ ;;
2415
+ 2)
2416
+ # Diverging — stop with failure
2417
+ warn "Build loop diverging — stopping (scores declining consistently)"
2418
+ STATUS="diverging"
2419
+ write_state
2420
+ write_progress
2421
+ show_summary
2422
+ return 1
2423
+ ;;
2424
+ 3)
2425
+ # Oscillating — escalate to manual review
2426
+ warn "Build loop oscillating — consider manual review or model escalation"
2427
+ ;;
2428
+ esac
2429
+ fi
2430
+
3185
2431
  # Guarded completion (replaces naive grep check)
3186
2432
  if guard_completion; then
3187
2433
  STATUS="complete"
@@ -3194,6 +2440,10 @@ ${GOAL}"
3194
2440
  # Check progress (circuit breaker)
3195
2441
  if check_progress; then
3196
2442
  CONSECUTIVE_FAILURES=0
2443
+ # Reset auto-recovery state on progress (tests passing, code advancing)
2444
+ if type recovery_reset >/dev/null 2>&1; then
2445
+ recovery_reset
2446
+ fi
3197
2447
  echo -e " ${GREEN}✓${RESET} Progress detected — continuing"
3198
2448
  else
3199
2449
  CONSECUTIVE_FAILURES=$(( CONSECUTIVE_FAILURES + 1 ))
@@ -3272,6 +2522,52 @@ run_loop_with_restarts() {
3272
2522
  if [[ "$STATUS" == "complete" ]]; then
3273
2523
  return 0
3274
2524
  fi
2525
+
2526
+ # Context exhaustion: treat as restart, not failure (unless restart limit hit)
2527
+ if [[ "$STATUS" == "context_exhaustion_restart" ]]; then
2528
+ if [[ "$CONTEXT_RESTART_COUNT" -lt "$CONTEXT_RESTART_LIMIT" ]]; then
2529
+ RESTART_COUNT=$(( RESTART_COUNT + 1 ))
2530
+ if type emit_event >/dev/null 2>&1; then
2531
+ emit_event "loop.restart" "restart=$RESTART_COUNT" "reason=context_exhaustion" "context_restart=$CONTEXT_RESTART_COUNT" "iteration=$ITERATION"
2532
+ fi
2533
+ info "Context exhaustion auto-recovery: restart $RESTART_COUNT/$MAX_RESTARTS (context restart $CONTEXT_RESTART_COUNT/$CONTEXT_RESTART_LIMIT)"
2534
+
2535
+ # Capture comprehensive state and generate briefing before restart
2536
+ if type restart_before_restart >/dev/null 2>&1; then
2537
+ restart_before_restart || warn "Failed to prepare restart briefing (continuing anyway)"
2538
+ fi
2539
+
2540
+ # Reset iteration-level state for fresh session
2541
+ SESSION_RESTART=true
2542
+ ITERATION=0
2543
+ CONSECUTIVE_FAILURES=0
2544
+ EXTENSION_COUNT=0
2545
+ STUCKNESS_COUNT=0
2546
+ STATUS="running"
2547
+ LOG_ENTRIES=""
2548
+ TEST_PASSED=""
2549
+ TEST_OUTPUT=""
2550
+ TEST_LOG_FILE=""
2551
+ GOAL="$ORIGINAL_GOAL"
2552
+
2553
+ # Archive old artifacts
2554
+ local restart_archive="$LOG_DIR/restart-${RESTART_COUNT}"
2555
+ mkdir -p "$restart_archive"
2556
+ for old_log in "$LOG_DIR"/iteration-*.log "$LOG_DIR"/tests-iter-*.log; do
2557
+ [[ -f "$old_log" ]] && mv "$old_log" "$restart_archive/" 2>/dev/null || true
2558
+ done
2559
+ [[ -f "$LOG_DIR/progress.md" ]] && cp "$LOG_DIR/progress.md" "$restart_archive/progress.md" 2>/dev/null || true
2560
+ [[ -f "$LOG_DIR/error-summary.json" ]] && cp "$LOG_DIR/error-summary.json" "$restart_archive/" 2>/dev/null || true
2561
+
2562
+ write_state
2563
+ sleep "$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)"
2564
+ continue
2565
+ else
2566
+ warn "Context exhaustion limit reached — failing build"
2567
+ return "$loop_exit"
2568
+ fi
2569
+ fi
2570
+
3275
2571
  if [[ "$MAX_RESTARTS" -le 0 ]]; then
3276
2572
  return "$loop_exit"
3277
2573
  fi
@@ -3279,9 +2575,11 @@ run_loop_with_restarts() {
3279
2575
  warn "Max restarts ($MAX_RESTARTS) reached — stopping"
3280
2576
  return "$loop_exit"
3281
2577
  fi
3282
- # Hard cap safety net
3283
- if [[ "$RESTART_COUNT" -ge 5 ]]; then
3284
- warn "Hard restart cap (5) reached — stopping"
2578
+ # Hard cap safety net (configurable)
2579
+ local _hard_cap
2580
+ _hard_cap=$(_smart_int "loop.hard_restart_cap" 5)
2581
+ if [[ "$RESTART_COUNT" -ge "$_hard_cap" ]]; then
2582
+ warn "Hard restart cap ($_hard_cap) reached — stopping"
3285
2583
  return "$loop_exit"
3286
2584
  fi
3287
2585
 
@@ -3293,6 +2591,12 @@ run_loop_with_restarts() {
3293
2591
  fi
3294
2592
 
3295
2593
  RESTART_COUNT=$(( RESTART_COUNT + 1 ))
2594
+
2595
+ # Capture comprehensive state and generate briefing before restart
2596
+ if type restart_before_restart >/dev/null 2>&1; then
2597
+ restart_before_restart || warn "Failed to prepare restart briefing (continuing anyway)"
2598
+ fi
2599
+
3296
2600
  if type emit_event >/dev/null 2>&1; then
3297
2601
  emit_event "loop.restart" "restart=$RESTART_COUNT" "max=$MAX_RESTARTS" "iteration=$ITERATION"
3298
2602
  fi