shipwright-cli 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/.claude/agents/code-reviewer.md +2 -0
  2. package/.claude/agents/devops-engineer.md +2 -0
  3. package/.claude/agents/doc-fleet-agent.md +2 -0
  4. package/.claude/agents/pipeline-agent.md +2 -0
  5. package/.claude/agents/shell-script-specialist.md +2 -0
  6. package/.claude/agents/test-specialist.md +2 -0
  7. package/.claude/hooks/agent-crash-capture.sh +32 -0
  8. package/.claude/hooks/post-tool-use.sh +3 -2
  9. package/.claude/hooks/pre-tool-use.sh +35 -3
  10. package/README.md +22 -8
  11. package/claude-code/hooks/config-change.sh +18 -0
  12. package/claude-code/hooks/instructions-reloaded.sh +7 -0
  13. package/claude-code/hooks/worktree-create.sh +25 -0
  14. package/claude-code/hooks/worktree-remove.sh +20 -0
  15. package/config/code-constitution.json +130 -0
  16. package/config/defaults.json +25 -2
  17. package/config/policy.json +1 -1
  18. package/dashboard/middleware/auth.ts +134 -0
  19. package/dashboard/middleware/constants.ts +21 -0
  20. package/dashboard/public/index.html +8 -6
  21. package/dashboard/public/styles.css +176 -97
  22. package/dashboard/routes/auth.ts +38 -0
  23. package/dashboard/server.ts +117 -25
  24. package/dashboard/services/config.ts +26 -0
  25. package/dashboard/services/db.ts +118 -0
  26. package/dashboard/src/canvas/pixel-agent.ts +298 -0
  27. package/dashboard/src/canvas/pixel-sprites.ts +440 -0
  28. package/dashboard/src/canvas/shipyard-effects.ts +367 -0
  29. package/dashboard/src/canvas/shipyard-scene.ts +616 -0
  30. package/dashboard/src/canvas/submarine-layout.ts +267 -0
  31. package/dashboard/src/components/header.ts +8 -7
  32. package/dashboard/src/core/api.ts +5 -0
  33. package/dashboard/src/core/router.ts +1 -0
  34. package/dashboard/src/design/submarine-theme.ts +253 -0
  35. package/dashboard/src/main.ts +2 -0
  36. package/dashboard/src/types/api.ts +12 -1
  37. package/dashboard/src/views/activity.ts +2 -1
  38. package/dashboard/src/views/metrics.ts +69 -1
  39. package/dashboard/src/views/shipyard.ts +39 -0
  40. package/dashboard/types/index.ts +166 -0
  41. package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
  42. package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
  43. package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
  44. package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
  45. package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
  46. package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
  47. package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
  48. package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
  49. package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
  50. package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
  51. package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
  52. package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
  53. package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
  54. package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
  55. package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
  56. package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
  57. package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
  58. package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
  59. package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
  60. package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
  61. package/docs/research/RESEARCH_INDEX.md +439 -0
  62. package/docs/research/RESEARCH_SOURCES.md +440 -0
  63. package/docs/research/RESEARCH_SUMMARY.txt +275 -0
  64. package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
  65. package/package.json +2 -2
  66. package/scripts/lib/adaptive-model.sh +427 -0
  67. package/scripts/lib/adaptive-timeout.sh +316 -0
  68. package/scripts/lib/audit-trail.sh +309 -0
  69. package/scripts/lib/auto-recovery.sh +471 -0
  70. package/scripts/lib/bandit-selector.sh +431 -0
  71. package/scripts/lib/bootstrap.sh +104 -2
  72. package/scripts/lib/causal-graph.sh +455 -0
  73. package/scripts/lib/compat.sh +126 -0
  74. package/scripts/lib/compound-audit.sh +337 -0
  75. package/scripts/lib/constitutional.sh +454 -0
  76. package/scripts/lib/context-budget.sh +359 -0
  77. package/scripts/lib/convergence.sh +594 -0
  78. package/scripts/lib/cost-optimizer.sh +634 -0
  79. package/scripts/lib/daemon-adaptive.sh +14 -2
  80. package/scripts/lib/daemon-dispatch.sh +106 -17
  81. package/scripts/lib/daemon-failure.sh +34 -4
  82. package/scripts/lib/daemon-patrol.sh +25 -4
  83. package/scripts/lib/daemon-poll-github.sh +361 -0
  84. package/scripts/lib/daemon-poll-health.sh +299 -0
  85. package/scripts/lib/daemon-poll.sh +27 -611
  86. package/scripts/lib/daemon-state.sh +119 -66
  87. package/scripts/lib/daemon-triage.sh +10 -0
  88. package/scripts/lib/dod-scorecard.sh +442 -0
  89. package/scripts/lib/error-actionability.sh +300 -0
  90. package/scripts/lib/formal-spec.sh +461 -0
  91. package/scripts/lib/helpers.sh +180 -5
  92. package/scripts/lib/intent-analysis.sh +409 -0
  93. package/scripts/lib/loop-convergence.sh +350 -0
  94. package/scripts/lib/loop-iteration.sh +682 -0
  95. package/scripts/lib/loop-progress.sh +48 -0
  96. package/scripts/lib/loop-restart.sh +185 -0
  97. package/scripts/lib/memory-effectiveness.sh +506 -0
  98. package/scripts/lib/mutation-executor.sh +352 -0
  99. package/scripts/lib/outcome-feedback.sh +521 -0
  100. package/scripts/lib/pipeline-cli.sh +336 -0
  101. package/scripts/lib/pipeline-commands.sh +1216 -0
  102. package/scripts/lib/pipeline-detection.sh +101 -3
  103. package/scripts/lib/pipeline-execution.sh +897 -0
  104. package/scripts/lib/pipeline-github.sh +28 -3
  105. package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
  106. package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
  107. package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
  108. package/scripts/lib/pipeline-intelligence.sh +104 -1138
  109. package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
  110. package/scripts/lib/pipeline-quality-checks.sh +17 -711
  111. package/scripts/lib/pipeline-quality-gates.sh +563 -0
  112. package/scripts/lib/pipeline-stages-build.sh +730 -0
  113. package/scripts/lib/pipeline-stages-delivery.sh +965 -0
  114. package/scripts/lib/pipeline-stages-intake.sh +1133 -0
  115. package/scripts/lib/pipeline-stages-monitor.sh +407 -0
  116. package/scripts/lib/pipeline-stages-review.sh +1022 -0
  117. package/scripts/lib/pipeline-stages.sh +161 -2901
  118. package/scripts/lib/pipeline-state.sh +36 -5
  119. package/scripts/lib/pipeline-util.sh +487 -0
  120. package/scripts/lib/policy-learner.sh +438 -0
  121. package/scripts/lib/process-reward.sh +493 -0
  122. package/scripts/lib/project-detect.sh +649 -0
  123. package/scripts/lib/quality-profile.sh +334 -0
  124. package/scripts/lib/recruit-commands.sh +885 -0
  125. package/scripts/lib/recruit-learning.sh +739 -0
  126. package/scripts/lib/recruit-roles.sh +648 -0
  127. package/scripts/lib/reward-aggregator.sh +458 -0
  128. package/scripts/lib/rl-optimizer.sh +362 -0
  129. package/scripts/lib/root-cause.sh +427 -0
  130. package/scripts/lib/scope-enforcement.sh +445 -0
  131. package/scripts/lib/session-restart.sh +493 -0
  132. package/scripts/lib/skill-memory.sh +300 -0
  133. package/scripts/lib/skill-registry.sh +775 -0
  134. package/scripts/lib/spec-driven.sh +476 -0
  135. package/scripts/lib/test-helpers.sh +18 -7
  136. package/scripts/lib/test-holdout.sh +429 -0
  137. package/scripts/lib/test-optimizer.sh +511 -0
  138. package/scripts/shipwright-file-suggest.sh +45 -0
  139. package/scripts/skills/adversarial-quality.md +61 -0
  140. package/scripts/skills/api-design.md +44 -0
  141. package/scripts/skills/architecture-design.md +50 -0
  142. package/scripts/skills/brainstorming.md +43 -0
  143. package/scripts/skills/data-pipeline.md +44 -0
  144. package/scripts/skills/deploy-safety.md +64 -0
  145. package/scripts/skills/documentation.md +38 -0
  146. package/scripts/skills/frontend-design.md +45 -0
  147. package/scripts/skills/generated/.gitkeep +0 -0
  148. package/scripts/skills/generated/_refinements/.gitkeep +0 -0
  149. package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
  150. package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
  151. package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
  152. package/scripts/skills/generated/cli-version-management.md +29 -0
  153. package/scripts/skills/generated/collection-system-validation.md +99 -0
  154. package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
  155. package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
  156. package/scripts/skills/generated/test-parallelization-detection.md +65 -0
  157. package/scripts/skills/observability.md +79 -0
  158. package/scripts/skills/performance.md +48 -0
  159. package/scripts/skills/pr-quality.md +49 -0
  160. package/scripts/skills/product-thinking.md +43 -0
  161. package/scripts/skills/security-audit.md +49 -0
  162. package/scripts/skills/systematic-debugging.md +40 -0
  163. package/scripts/skills/testing-strategy.md +47 -0
  164. package/scripts/skills/two-stage-review.md +52 -0
  165. package/scripts/skills/validation-thoroughness.md +55 -0
  166. package/scripts/sw +9 -3
  167. package/scripts/sw-activity.sh +9 -8
  168. package/scripts/sw-adaptive.sh +8 -7
  169. package/scripts/sw-adversarial.sh +2 -1
  170. package/scripts/sw-architecture-enforcer.sh +3 -1
  171. package/scripts/sw-auth.sh +12 -2
  172. package/scripts/sw-autonomous.sh +5 -1
  173. package/scripts/sw-changelog.sh +4 -1
  174. package/scripts/sw-checkpoint.sh +2 -1
  175. package/scripts/sw-ci.sh +15 -6
  176. package/scripts/sw-cleanup.sh +4 -26
  177. package/scripts/sw-code-review.sh +45 -20
  178. package/scripts/sw-connect.sh +2 -1
  179. package/scripts/sw-context.sh +2 -1
  180. package/scripts/sw-cost.sh +107 -5
  181. package/scripts/sw-daemon.sh +71 -11
  182. package/scripts/sw-dashboard.sh +3 -1
  183. package/scripts/sw-db.sh +71 -20
  184. package/scripts/sw-decide.sh +8 -2
  185. package/scripts/sw-decompose.sh +360 -17
  186. package/scripts/sw-deps.sh +4 -1
  187. package/scripts/sw-developer-simulation.sh +4 -1
  188. package/scripts/sw-discovery.sh +378 -5
  189. package/scripts/sw-doc-fleet.sh +4 -1
  190. package/scripts/sw-docs-agent.sh +3 -1
  191. package/scripts/sw-docs.sh +2 -1
  192. package/scripts/sw-doctor.sh +453 -2
  193. package/scripts/sw-dora.sh +4 -1
  194. package/scripts/sw-durable.sh +12 -7
  195. package/scripts/sw-e2e-orchestrator.sh +17 -16
  196. package/scripts/sw-eventbus.sh +13 -4
  197. package/scripts/sw-evidence.sh +364 -12
  198. package/scripts/sw-feedback.sh +550 -9
  199. package/scripts/sw-fix.sh +20 -1
  200. package/scripts/sw-fleet-discover.sh +6 -2
  201. package/scripts/sw-fleet-viz.sh +9 -4
  202. package/scripts/sw-fleet.sh +5 -1
  203. package/scripts/sw-github-app.sh +18 -4
  204. package/scripts/sw-github-checks.sh +3 -2
  205. package/scripts/sw-github-deploy.sh +3 -2
  206. package/scripts/sw-github-graphql.sh +18 -7
  207. package/scripts/sw-guild.sh +5 -1
  208. package/scripts/sw-heartbeat.sh +5 -30
  209. package/scripts/sw-hello.sh +67 -0
  210. package/scripts/sw-hygiene.sh +10 -3
  211. package/scripts/sw-incident.sh +273 -5
  212. package/scripts/sw-init.sh +18 -2
  213. package/scripts/sw-instrument.sh +10 -2
  214. package/scripts/sw-intelligence.sh +44 -7
  215. package/scripts/sw-jira.sh +5 -1
  216. package/scripts/sw-launchd.sh +2 -1
  217. package/scripts/sw-linear.sh +4 -1
  218. package/scripts/sw-logs.sh +4 -1
  219. package/scripts/sw-loop.sh +436 -1076
  220. package/scripts/sw-memory.sh +357 -3
  221. package/scripts/sw-mission-control.sh +6 -1
  222. package/scripts/sw-model-router.sh +483 -27
  223. package/scripts/sw-otel.sh +15 -4
  224. package/scripts/sw-oversight.sh +14 -5
  225. package/scripts/sw-patrol-meta.sh +334 -0
  226. package/scripts/sw-pipeline-composer.sh +7 -1
  227. package/scripts/sw-pipeline-vitals.sh +12 -6
  228. package/scripts/sw-pipeline.sh +54 -2653
  229. package/scripts/sw-pm.sh +16 -8
  230. package/scripts/sw-pr-lifecycle.sh +2 -1
  231. package/scripts/sw-predictive.sh +17 -5
  232. package/scripts/sw-prep.sh +185 -2
  233. package/scripts/sw-ps.sh +5 -25
  234. package/scripts/sw-public-dashboard.sh +17 -4
  235. package/scripts/sw-quality.sh +14 -6
  236. package/scripts/sw-reaper.sh +8 -25
  237. package/scripts/sw-recruit.sh +156 -2303
  238. package/scripts/sw-regression.sh +19 -12
  239. package/scripts/sw-release-manager.sh +3 -1
  240. package/scripts/sw-release.sh +4 -1
  241. package/scripts/sw-remote.sh +3 -1
  242. package/scripts/sw-replay.sh +7 -1
  243. package/scripts/sw-retro.sh +158 -1
  244. package/scripts/sw-review-rerun.sh +3 -1
  245. package/scripts/sw-scale.sh +14 -5
  246. package/scripts/sw-security-audit.sh +6 -1
  247. package/scripts/sw-self-optimize.sh +173 -6
  248. package/scripts/sw-session.sh +9 -3
  249. package/scripts/sw-setup.sh +3 -1
  250. package/scripts/sw-stall-detector.sh +406 -0
  251. package/scripts/sw-standup.sh +15 -7
  252. package/scripts/sw-status.sh +3 -1
  253. package/scripts/sw-strategic.sh +14 -6
  254. package/scripts/sw-stream.sh +13 -4
  255. package/scripts/sw-swarm.sh +20 -7
  256. package/scripts/sw-team-stages.sh +13 -6
  257. package/scripts/sw-templates.sh +7 -31
  258. package/scripts/sw-testgen.sh +17 -6
  259. package/scripts/sw-tmux-pipeline.sh +4 -1
  260. package/scripts/sw-tmux-role-color.sh +2 -0
  261. package/scripts/sw-tmux-status.sh +1 -1
  262. package/scripts/sw-tmux.sh +37 -1
  263. package/scripts/sw-trace.sh +3 -1
  264. package/scripts/sw-tracker-github.sh +3 -0
  265. package/scripts/sw-tracker-jira.sh +3 -0
  266. package/scripts/sw-tracker-linear.sh +3 -0
  267. package/scripts/sw-tracker.sh +3 -1
  268. package/scripts/sw-triage.sh +3 -2
  269. package/scripts/sw-upgrade.sh +3 -1
  270. package/scripts/sw-ux.sh +5 -2
  271. package/scripts/sw-webhook.sh +5 -2
  272. package/scripts/sw-widgets.sh +9 -4
  273. package/scripts/sw-worktree.sh +15 -3
  274. package/scripts/test-skill-injection.sh +1233 -0
  275. package/templates/pipelines/autonomous.json +27 -3
  276. package/templates/pipelines/cost-aware.json +34 -8
  277. package/templates/pipelines/deployed.json +12 -0
  278. package/templates/pipelines/enterprise.json +12 -0
  279. package/templates/pipelines/fast.json +6 -0
  280. package/templates/pipelines/full.json +27 -3
  281. package/templates/pipelines/hotfix.json +6 -0
  282. package/templates/pipelines/standard.json +12 -0
  283. package/templates/pipelines/tdd.json +12 -0
@@ -0,0 +1,730 @@
1
+ # pipeline-stages-build.sh — test_first, build, test stages
2
+ # Source from pipeline-stages.sh. Requires all pipeline globals and dependencies.
3
+ [[ -n "${_PIPELINE_STAGES_BUILD_LOADED:-}" ]] && return 0
4
+ _PIPELINE_STAGES_BUILD_LOADED=1
5
+
6
+ # Map pipeline stage to effort level (when no explicit --effort override)
7
+ _stage_effort_level() {
8
+ local stage="$1"
9
+ # Use _smart_effort if available (reads from daemon-config.json → defaults)
10
+ if type _smart_effort >/dev/null 2>&1; then
11
+ _smart_effort "$stage"
12
+ return
13
+ fi
14
+ # Fallback if compat.sh not loaded
15
+ case "$stage" in
16
+ intake) echo "low" ;;
17
+ plan|design) echo "high" ;;
18
+ build) echo "medium" ;;
19
+ test) echo "medium" ;;
20
+ review|compound_quality) echo "high" ;;
21
+ pr|merge) echo "low" ;;
22
+ deploy|validate|monitor) echo "medium" ;;
23
+ *) echo "medium" ;;
24
+ esac
25
+ }
26
+
27
+ # Build common claude flags for pipeline stages
28
+ _pipeline_claude_flags() {
29
+ local stage="$1"
30
+ local model="$2"
31
+ local flags=("--model" "$model")
32
+
33
+ # Effort level: explicit override > per-stage default
34
+ local effort="${EFFORT_LEVEL_OVERRIDE:-$(_stage_effort_level "$stage")}"
35
+ flags+=("--effort" "$effort")
36
+
37
+ # Fallback model — only add if explicitly configured and different from primary
38
+ local _fallback="${FALLBACK_MODEL_OVERRIDE:-${PIPELINE_FALLBACK_MODEL:-}}"
39
+ if [[ -n "$_fallback" ]] && [[ "$_fallback" != "$model" ]]; then
40
+ flags+=("--fallback-model" "$_fallback")
41
+ fi
42
+
43
+ echo "${flags[*]}"
44
+ }
45
+
46
+ stage_test_first() {
47
+ CURRENT_STAGE_ID="test_first"
48
+ info "Generating tests from requirements (TDD mode)"
49
+
50
+ local plan_file="${ARTIFACTS_DIR}/plan.md"
51
+ local goal_file="${PROJECT_ROOT}/.claude/goal.md"
52
+ local requirements=""
53
+ if [[ -f "$plan_file" ]]; then
54
+ requirements=$(cat "$plan_file" 2>/dev/null || true)
55
+ elif [[ -f "$goal_file" ]]; then
56
+ requirements=$(cat "$goal_file" 2>/dev/null || true)
57
+ else
58
+ requirements="${GOAL:-}: ${ISSUE_BODY:-}"
59
+ fi
60
+
61
+ local tdd_prompt="You are writing tests BEFORE implementation (TDD).
62
+
63
+ Based on the following plan/requirements, generate test files that define the expected behavior. These tests should FAIL initially (since the implementation doesn't exist yet) but define the correct interface and behavior.
64
+
65
+ Requirements:
66
+ ${requirements}
67
+
68
+ Instructions:
69
+ 1. Create test files for each component mentioned in the plan
70
+ 2. Tests should verify the PUBLIC interface and expected behavior
71
+ 3. Include edge cases and error handling tests
72
+ 4. Tests should be runnable with the project's test framework
73
+ 5. Mark tests that need implementation with clear TODO comments
74
+ 6. Do NOT write implementation code — only tests
75
+
76
+ Output format: For each test file, use a fenced code block with the file path as the language identifier (e.g. \`\`\`tests/auth.test.ts):
77
+ \`\`\`path/to/test.test.ts
78
+ // file content
79
+ \`\`\`
80
+
81
+ Create files in the appropriate project directories (e.g. tests/, __tests__/, src/**/*.test.ts) per project convention."
82
+
83
+ local model="${CLAUDE_MODEL:-${MODEL:-sonnet}}"
84
+ [[ -z "$model" || "$model" == "null" ]] && model="sonnet"
85
+
86
+ local output=""
87
+ output=$(echo "$tdd_prompt" | timeout 120 claude --print --model "$model" 2>/dev/null) || {
88
+ warn "TDD test generation failed, falling back to standard build"
89
+ return 1
90
+ }
91
+
92
+ # Parse output: extract fenced code blocks and write to files
93
+ local wrote_any=false
94
+ local block_path="" in_block=false block_content=""
95
+ while IFS= read -r line; do
96
+ if [[ "$line" =~ ^\`\`\`([a-zA-Z0-9_/\.\-]+)$ ]]; then
97
+ if [[ -n "$block_path" && -n "$block_content" ]]; then
98
+ local out_file="${PROJECT_ROOT}/${block_path}"
99
+ local out_dir
100
+ out_dir=$(dirname "$out_file")
101
+ mkdir -p "$out_dir" 2>/dev/null || true
102
+ if echo "$block_content" > "$out_file" 2>/dev/null; then
103
+ wrote_any=true
104
+ info " Wrote: $block_path"
105
+ fi
106
+ fi
107
+ block_path="${BASH_REMATCH[1]}"
108
+ block_content=""
109
+ in_block=true
110
+ elif [[ "$line" == "\`\`\`" && "$in_block" == "true" ]]; then
111
+ if [[ -n "$block_path" && -n "$block_content" ]]; then
112
+ local out_file="${PROJECT_ROOT}/${block_path}"
113
+ local out_dir
114
+ out_dir=$(dirname "$out_file")
115
+ mkdir -p "$out_dir" 2>/dev/null || true
116
+ if echo "$block_content" > "$out_file" 2>/dev/null; then
117
+ wrote_any=true
118
+ info " Wrote: $block_path"
119
+ fi
120
+ fi
121
+ block_path=""
122
+ block_content=""
123
+ in_block=false
124
+ elif [[ "$in_block" == "true" && -n "$block_path" ]]; then
125
+ [[ -n "$block_content" ]] && block_content="${block_content}"$'\n'
126
+ block_content="${block_content}${line}"
127
+ fi
128
+ done <<< "$output"
129
+
130
+ # Flush last block if unclosed
131
+ if [[ -n "$block_path" && -n "$block_content" ]]; then
132
+ local out_file="${PROJECT_ROOT}/${block_path}"
133
+ local out_dir
134
+ out_dir=$(dirname "$out_file")
135
+ mkdir -p "$out_dir" 2>/dev/null || true
136
+ if echo "$block_content" > "$out_file" 2>/dev/null; then
137
+ wrote_any=true
138
+ info " Wrote: $block_path"
139
+ fi
140
+ fi
141
+
142
+ if [[ "$wrote_any" == "true" ]]; then
143
+ if (cd "$PROJECT_ROOT" && git diff --name-only 2>/dev/null | grep -qE 'test|spec'); then
144
+ git add -A 2>/dev/null || true
145
+ git commit -m "test: TDD - define expected behavior before implementation" 2>/dev/null || true
146
+ emit_event "tdd.tests_generated" "{\"stage\":\"test_first\"}"
147
+ fi
148
+ success "TDD tests generated"
149
+ else
150
+ warn "No test files extracted from TDD output — check format"
151
+ fi
152
+
153
+ return 0
154
+ }
155
+
156
+ stage_build() {
157
+ CURRENT_STAGE_ID="build"
158
+ # Consume retry context if this is a retry attempt
159
+ local _retry_ctx="${ARTIFACTS_DIR}/.retry-context-build.md"
160
+ if [[ -s "$_retry_ctx" ]]; then
161
+ local _build_retry_hints
162
+ _build_retry_hints=$(cat "$_retry_ctx" 2>/dev/null || true)
163
+ rm -f "$_retry_ctx"
164
+ fi
165
+ local plan_file="$ARTIFACTS_DIR/plan.md"
166
+ local design_file="$ARTIFACTS_DIR/design.md"
167
+ local dod_file="$ARTIFACTS_DIR/dod.md"
168
+ local loop_args=()
169
+
170
+ # Memory integration — inject context if memory system available
171
+ local memory_context=""
172
+ if type intelligence_search_memory >/dev/null 2>&1; then
173
+ local mem_dir="${HOME}/.shipwright/memory"
174
+ memory_context=$(intelligence_search_memory "build stage for: ${GOAL:-}" "$mem_dir" 5 2>/dev/null) || true
175
+ fi
176
+ if [[ -z "$memory_context" ]] && [[ -x "$SCRIPT_DIR/sw-memory.sh" ]]; then
177
+ memory_context=$(bash "$SCRIPT_DIR/sw-memory.sh" inject "build" 2>/dev/null) || true
178
+ fi
179
+
180
+ # Build enriched goal with compact context (avoids prompt bloat)
181
+ local enriched_goal
182
+ enriched_goal=$(_pipeline_compact_goal "$GOAL" "$plan_file" "$design_file")
183
+
184
+ # Dark factory: inject spec context into build goal
185
+ local spec_file="${ARTIFACTS_DIR}/spec.json"
186
+ if [[ -f "$spec_file" ]] && type spec_to_prompt >/dev/null 2>&1; then
187
+ local spec_prompt
188
+ spec_prompt=$(spec_to_prompt "$spec_file" 2>/dev/null || true)
189
+ if [[ -n "$spec_prompt" ]]; then
190
+ enriched_goal="${enriched_goal}
191
+
192
+ ${spec_prompt}"
193
+ fi
194
+ fi
195
+
196
+ # Dark factory: inject formal spec constraints into build goal
197
+ if type formal_spec_inject >/dev/null 2>&1; then
198
+ local _formal_context
199
+ _formal_context=$(formal_spec_inject "${PROJECT_ROOT:-.}" 2>/dev/null || true)
200
+ if [[ -n "$_formal_context" ]]; then
201
+ enriched_goal="${enriched_goal}
202
+
203
+ ${_formal_context}"
204
+ fi
205
+ fi
206
+
207
+ # TDD: when test_first ran, tell build to make existing tests pass
208
+ if [[ "${TDD_ENABLED:-false}" == "true" || "${PIPELINE_TDD:-}" == "true" ]]; then
209
+ enriched_goal="${enriched_goal}
210
+
211
+ IMPORTANT (TDD mode): Test files already exist and define the expected behavior. Write implementation code to make ALL tests pass. Do not delete or modify the test files."
212
+ fi
213
+
214
+ # Inject memory context
215
+ if [[ -n "$memory_context" ]]; then
216
+ enriched_goal="${enriched_goal}
217
+
218
+ Historical context (lessons from previous pipelines):
219
+ ${memory_context}"
220
+ fi
221
+
222
+ # Inject cross-pipeline discoveries for build stage
223
+ if [[ -x "$SCRIPT_DIR/sw-discovery.sh" ]]; then
224
+ local build_discoveries
225
+ build_discoveries=$("$SCRIPT_DIR/sw-discovery.sh" inject "src/*,*.ts,*.tsx,*.js" 2>/dev/null | head -20 || true)
226
+ if [[ -n "$build_discoveries" ]]; then
227
+ enriched_goal="${enriched_goal}
228
+
229
+ Discoveries from other pipelines:
230
+ ${build_discoveries}"
231
+ fi
232
+ fi
233
+
234
+ # Add task list context
235
+ if [[ -s "$TASKS_FILE" ]]; then
236
+ enriched_goal="${enriched_goal}
237
+
238
+ Task tracking (check off items as you complete them):
239
+ $(cat "$TASKS_FILE")"
240
+ fi
241
+
242
+ # Inject file hotspots from GitHub intelligence
243
+ if [[ "${NO_GITHUB:-}" != "true" ]] && type gh_file_change_frequency >/dev/null 2>&1; then
244
+ local build_hotspots
245
+ build_hotspots=$(gh_file_change_frequency 2>/dev/null | head -5 || true)
246
+ if [[ -n "$build_hotspots" ]]; then
247
+ enriched_goal="${enriched_goal}
248
+
249
+ File hotspots (most frequently changed — review these carefully):
250
+ ${build_hotspots}"
251
+ fi
252
+ fi
253
+
254
+ # Inject security alerts context
255
+ if [[ "${NO_GITHUB:-}" != "true" ]] && type gh_security_alerts >/dev/null 2>&1; then
256
+ local build_alerts
257
+ build_alerts=$(gh_security_alerts 2>/dev/null | head -3 || true)
258
+ if [[ -n "$build_alerts" ]]; then
259
+ enriched_goal="${enriched_goal}
260
+
261
+ Active security alerts (do not introduce new vulnerabilities):
262
+ ${build_alerts}"
263
+ fi
264
+ fi
265
+
266
+ # Inject coverage baseline
267
+ local repo_hash_build
268
+ repo_hash_build=$(echo -n "$PROJECT_ROOT" | shasum -a 256 2>/dev/null | cut -c1-12 || echo "unknown")
269
+ local coverage_file_build="${HOME}/.shipwright/baselines/${repo_hash_build}/coverage.json"
270
+ if [[ -f "$coverage_file_build" ]]; then
271
+ local coverage_baseline
272
+ coverage_baseline=$(jq -r '.coverage_percent // empty' "$coverage_file_build" 2>/dev/null || true)
273
+ if [[ -n "$coverage_baseline" ]]; then
274
+ enriched_goal="${enriched_goal}
275
+
276
+ Coverage baseline: ${coverage_baseline}% — do not decrease coverage."
277
+ fi
278
+ fi
279
+
280
+ # Predictive: inject prevention hints when risk/memory patterns suggest build-stage failures
281
+ if [[ -x "$SCRIPT_DIR/sw-predictive.sh" ]]; then
282
+ local issue_json_build="{}"
283
+ [[ -n "${ISSUE_NUMBER:-}" ]] && issue_json_build=$(jq -n --arg title "${GOAL:-}" --arg num "${ISSUE_NUMBER:-}" '{title: $title, number: $num}')
284
+ local prevention_text
285
+ prevention_text=$(bash "$SCRIPT_DIR/sw-predictive.sh" inject-prevention "build" "$issue_json_build" 2>/dev/null || true)
286
+ if [[ -n "$prevention_text" ]]; then
287
+ enriched_goal="${enriched_goal}
288
+
289
+ ${prevention_text}"
290
+ fi
291
+ fi
292
+
293
+ # Inject skill prompts for build stage
294
+ local _skill_prompts=""
295
+ if type skill_load_from_plan >/dev/null 2>&1; then
296
+ _skill_prompts=$(skill_load_from_plan "build" 2>/dev/null || true)
297
+ elif type skill_select_adaptive >/dev/null 2>&1; then
298
+ local _skill_files
299
+ _skill_files=$(skill_select_adaptive "${INTELLIGENCE_ISSUE_TYPE:-backend}" "build" "${ISSUE_BODY:-}" "${INTELLIGENCE_COMPLEXITY:-5}" 2>/dev/null || true)
300
+ if [[ -n "$_skill_files" ]]; then
301
+ _skill_prompts=$(while IFS= read -r _path; do
302
+ [[ -z "$_path" || ! -f "$_path" ]] && continue
303
+ cat "$_path" 2>/dev/null
304
+ done <<< "$_skill_files")
305
+ fi
306
+ elif type skill_load_prompts >/dev/null 2>&1; then
307
+ _skill_prompts=$(skill_load_prompts "${INTELLIGENCE_ISSUE_TYPE:-backend}" "build" 2>/dev/null || true)
308
+ fi
309
+ if [[ -n "$_skill_prompts" ]]; then
310
+ _skill_prompts=$(prune_context_section "skills" "$_skill_prompts" 8000)
311
+ enriched_goal="${enriched_goal}
312
+
313
+ ## Skill Guidance (${INTELLIGENCE_ISSUE_TYPE:-backend} issue, AI-selected)
314
+ ${_skill_prompts}
315
+ "
316
+ fi
317
+
318
+ loop_args+=("$enriched_goal")
319
+
320
+ # Build loop args from pipeline config + CLI overrides
321
+ CURRENT_STAGE_ID="build"
322
+
323
+ local test_cmd="${TEST_CMD}"
324
+ if [[ -z "$test_cmd" ]]; then
325
+ test_cmd=$(jq -r --arg id "build" '(.stages[] | select(.id == $id) | .config.test_cmd) // .defaults.test_cmd // ""' "$PIPELINE_CONFIG" 2>/dev/null) || true
326
+ [[ "$test_cmd" == "null" ]] && test_cmd=""
327
+ fi
328
+ # Auto-detect if still empty — prefer fast variants for build iterations
329
+ if [[ -z "$test_cmd" ]]; then
330
+ # Check for fast test scripts first (build loop runs tests every iteration)
331
+ if [[ -f "$PROJECT_ROOT/package.json" ]]; then
332
+ local _fast_test=""
333
+ _fast_test=$(jq -r '.scripts["test:fast"] // .scripts["test:smoke"] // ""' "$PROJECT_ROOT/package.json" 2>/dev/null) || true
334
+ if [[ -n "$_fast_test" && "$_fast_test" != "null" ]]; then
335
+ local _pm
336
+ _pm=$(detect_package_manager 2>/dev/null || echo "npm")
337
+ test_cmd="$_pm run test:$(jq -r 'if .scripts["test:fast"] then "fast" else "smoke" end' "$PROJECT_ROOT/package.json" 2>/dev/null)"
338
+ info "Using fast test command for build iterations: ${DIM}$test_cmd${RESET}"
339
+ fi
340
+ fi
341
+ # Fall back to full test command
342
+ if [[ -z "$test_cmd" ]]; then
343
+ test_cmd=$(detect_test_cmd)
344
+ fi
345
+ fi
346
+
347
+ # Discover additional test commands (subdirectories, extra scripts)
348
+ local additional_cmds=()
349
+ if type detect_test_commands >/dev/null 2>&1; then
350
+ while IFS= read -r _cmd; do
351
+ [[ -n "$_cmd" ]] && additional_cmds+=("$_cmd")
352
+ done < <(detect_test_commands 2>/dev/null | tail -n +2)
353
+ fi
354
+
355
+ local max_iter
356
+ max_iter=$(jq -r --arg id "build" '(.stages[] | select(.id == $id) | .config.max_iterations) // 20' "$PIPELINE_CONFIG" 2>/dev/null) || true
357
+ [[ -z "$max_iter" || "$max_iter" == "null" ]] && max_iter=20
358
+ # CLI --max-iterations override (from CI strategy engine)
359
+ [[ -n "${MAX_ITERATIONS_OVERRIDE:-}" ]] && max_iter="$MAX_ITERATIONS_OVERRIDE"
360
+
361
+ local agents="${AGENTS}"
362
+ if [[ -z "$agents" ]]; then
363
+ agents=$(jq -r --arg id "build" '(.stages[] | select(.id == $id) | .config.agents) // .defaults.agents // 1' "$PIPELINE_CONFIG" 2>/dev/null) || true
364
+ [[ -z "$agents" || "$agents" == "null" ]] && agents=1
365
+ fi
366
+
367
+ # Intelligence: suggest parallelism if design indicates independent work
368
+ if [[ "${agents:-1}" -le 1 ]] && [[ -s "$ARTIFACTS_DIR/design.md" ]]; then
369
+ local design_lower
370
+ design_lower=$(tr '[:upper:]' '[:lower:]' < "$ARTIFACTS_DIR/design.md" 2>/dev/null || true)
371
+ if echo "$design_lower" | grep -qE 'independent (files|modules|components|services)|separate (modules|packages|directories)|parallel|no shared state'; then
372
+ info "Design mentions independent modules — consider --agents 2 for parallelism"
373
+ emit_event "build.parallelism_suggested" "issue=${ISSUE_NUMBER:-0}" "current_agents=$agents"
374
+ fi
375
+ fi
376
+
377
+ local audit
378
+ audit=$(jq -r --arg id "build" '(.stages[] | select(.id == $id) | .config.audit) // false' "$PIPELINE_CONFIG" 2>/dev/null) || true
379
+ local quality
380
+ quality=$(jq -r --arg id "build" '(.stages[] | select(.id == $id) | .config.quality_gates) // false' "$PIPELINE_CONFIG" 2>/dev/null) || true
381
+
382
+ local build_model="${MODEL}"
383
+ if [[ -z "$build_model" ]]; then
384
+ build_model=$(jq -r '.defaults.model // "opus"' "$PIPELINE_CONFIG" 2>/dev/null) || true
385
+ [[ -z "$build_model" || "$build_model" == "null" ]] && build_model="opus"
386
+ fi
387
+ # Intelligence model routing (when no explicit CLI --model override)
388
+ if [[ -z "$MODEL" && -n "${CLAUDE_MODEL:-}" ]]; then
389
+ build_model="$CLAUDE_MODEL"
390
+ fi
391
+
392
+ # Recruit-powered model selection (when no explicit override)
393
+ if [[ -z "$MODEL" ]] && [[ -x "$SCRIPT_DIR/sw-recruit.sh" ]]; then
394
+ local _recruit_goal="${GOAL:-}"
395
+ if [[ -n "$_recruit_goal" ]]; then
396
+ local _recruit_match
397
+ _recruit_match=$(bash "$SCRIPT_DIR/sw-recruit.sh" match --json "$_recruit_goal" 2>/dev/null) || true
398
+ if [[ -n "$_recruit_match" ]]; then
399
+ local _recruit_model
400
+ _recruit_model=$(echo "$_recruit_match" | jq -r '.model // ""' 2>/dev/null) || true
401
+ if [[ -n "$_recruit_model" && "$_recruit_model" != "null" && "$_recruit_model" != "" ]]; then
402
+ info "Recruit recommends model: ${CYAN}${_recruit_model}${RESET} for this task"
403
+ build_model="$_recruit_model"
404
+ fi
405
+ fi
406
+ fi
407
+ fi
408
+
409
+ [[ -n "$test_cmd" && "$test_cmd" != "null" ]] && loop_args+=(--test-cmd "$test_cmd")
410
+ for _extra_tc in "${additional_cmds[@]+"${additional_cmds[@]}"}"; do
411
+ [[ -n "$_extra_tc" ]] && loop_args+=(--additional-test-cmds "$_extra_tc")
412
+ done
413
+ loop_args+=(--max-iterations "$max_iter")
414
+ loop_args+=(--model "$build_model")
415
+ [[ "$agents" -gt 1 ]] 2>/dev/null && loop_args+=(--agents "$agents")
416
+
417
+ # Quality gates: always enabled in CI, otherwise from template config
418
+ if [[ "${CI_MODE:-false}" == "true" ]]; then
419
+ loop_args+=(--audit --audit-agent --quality-gates)
420
+ else
421
+ [[ "$audit" == "true" ]] && loop_args+=(--audit --audit-agent)
422
+ [[ "$quality" == "true" ]] && loop_args+=(--quality-gates)
423
+ fi
424
+
425
+ # Session restart capability
426
+ [[ -n "${MAX_RESTARTS_OVERRIDE:-}" ]] && loop_args+=(--max-restarts "$MAX_RESTARTS_OVERRIDE")
427
+ # Fast test mode
428
+ [[ -n "${FAST_TEST_CMD_OVERRIDE:-}" ]] && loop_args+=(--fast-test-cmd "$FAST_TEST_CMD_OVERRIDE")
429
+
430
+ # Effort level and fallback model
431
+ [[ -n "${EFFORT_LEVEL_OVERRIDE:-}" ]] && loop_args+=(--effort "$EFFORT_LEVEL_OVERRIDE")
432
+ [[ -n "${FALLBACK_MODEL_OVERRIDE:-}" ]] && loop_args+=(--fallback-model "$FALLBACK_MODEL_OVERRIDE")
433
+ [[ -z "${FALLBACK_MODEL_OVERRIDE:-}" && -n "${PIPELINE_FALLBACK_MODEL:-}" ]] && loop_args+=(--fallback-model "$PIPELINE_FALLBACK_MODEL")
434
+
435
+ # Definition of Done: use plan-extracted DoD if available
436
+ [[ -s "$dod_file" ]] && loop_args+=(--definition-of-done "$dod_file")
437
+
438
+ # Checkpoint resume: when pipeline resumed from build-stage checkpoint, pass --resume to loop
439
+ if [[ "${RESUME_FROM_CHECKPOINT:-false}" == "true" && "${checkpoint_stage:-}" == "build" ]]; then
440
+ loop_args+=(--resume)
441
+ fi
442
+
443
+ # Skip permissions — pipeline runs headlessly (claude -p) and has no terminal
444
+ # for interactive permission prompts. Without this flag, agents can't write files.
445
+ loop_args+=(--skip-permissions)
446
+
447
+ info "Starting build loop: ${DIM}shipwright loop${RESET} (max ${max_iter} iterations, ${agents} agent(s))"
448
+
449
+ # Post build start to GitHub
450
+ if [[ -n "$ISSUE_NUMBER" ]]; then
451
+ gh_comment_issue "$ISSUE_NUMBER" "🔨 **Build started** — \`shipwright loop\` with ${max_iter} max iterations, ${agents} agent(s), model: ${build_model}"
452
+ fi
453
+
454
+ local _token_log="${ARTIFACTS_DIR}/.claude-tokens-build.log"
455
+ export PIPELINE_JOB_ID="${PIPELINE_NAME:-pipeline-$$}"
456
+ sw loop "${loop_args[@]}" < /dev/null 2>"$_token_log" || {
457
+ local _loop_exit=$?
458
+ parse_claude_tokens "$_token_log"
459
+
460
+ # Detect context exhaustion from progress file
461
+ local _progress_file="${PWD}/.claude/loop-logs/progress.md"
462
+ if [[ -f "$_progress_file" ]]; then
463
+ local _prog_tests
464
+ _prog_tests=$(grep -oE 'Tests passing: (true|false)' "$_progress_file" 2>/dev/null | awk '{print $NF}' || echo "unknown")
465
+ if [[ "$_prog_tests" != "true" ]]; then
466
+ warn "Build loop exhausted with failing tests (context exhaustion)"
467
+ emit_event "pipeline.context_exhaustion" "issue=${ISSUE_NUMBER:-0}" "stage=build"
468
+ # Write flag for daemon retry logic
469
+ mkdir -p "$ARTIFACTS_DIR" 2>/dev/null || true
470
+ echo "context_exhaustion" > "$ARTIFACTS_DIR/failure-reason.txt" 2>/dev/null || true
471
+ fi
472
+ fi
473
+
474
+ error "Build loop failed"
475
+ return 1
476
+ }
477
+ parse_claude_tokens "$_token_log"
478
+
479
+ # Read accumulated token counts from build loop (written by sw-loop.sh)
480
+ local _loop_token_file="${PROJECT_ROOT}/.claude/loop-logs/loop-tokens.json"
481
+ if [[ -f "$_loop_token_file" ]] && command -v jq >/dev/null 2>&1; then
482
+ local _loop_in _loop_out _loop_cost
483
+ _loop_in=$(jq -r '.input_tokens // 0' "$_loop_token_file" 2>/dev/null || echo "0")
484
+ _loop_out=$(jq -r '.output_tokens // 0' "$_loop_token_file" 2>/dev/null || echo "0")
485
+ _loop_cost=$(jq -r '.cost_usd // 0' "$_loop_token_file" 2>/dev/null || echo "0")
486
+ TOTAL_INPUT_TOKENS=$(( TOTAL_INPUT_TOKENS + ${_loop_in:-0} ))
487
+ TOTAL_OUTPUT_TOKENS=$(( TOTAL_OUTPUT_TOKENS + ${_loop_out:-0} ))
488
+ if [[ -n "$_loop_cost" && "$_loop_cost" != "0" && "$_loop_cost" != "null" ]]; then
489
+ TOTAL_COST_USD="${_loop_cost}"
490
+ fi
491
+ if [[ "${_loop_in:-0}" -gt 0 || "${_loop_out:-0}" -gt 0 ]]; then
492
+ info "Build loop tokens: in=${_loop_in} out=${_loop_out} cost=\$${_loop_cost:-0}"
493
+ fi
494
+ fi
495
+
496
+ # Count commits made during build
497
+ local commit_count
498
+ commit_count=$(_safe_base_log --oneline | wc -l | xargs)
499
+ info "Build produced ${BOLD}$commit_count${RESET} commit(s)"
500
+
501
+ # Commit quality evaluation when intelligence is enabled
502
+ if type intelligence_search_memory >/dev/null 2>&1 && command -v claude >/dev/null 2>&1 && [[ "${commit_count:-0}" -gt 0 ]]; then
503
+ local commit_msgs
504
+ commit_msgs=$(_safe_base_log --format="%s" | head -20)
505
+ local quality_score
506
+ quality_score=$(claude --print --output-format text -p "Rate the quality of these git commit messages on a scale of 0-100. Consider: focus (one thing per commit), clarity (describes the why), atomicity (small logical units). Reply with ONLY a number 0-100.
507
+
508
+ Commit messages:
509
+ ${commit_msgs}" --model "$(_smart_model commit_quality haiku)" < /dev/null 2>/dev/null || true)
510
+ quality_score=$(echo "$quality_score" | grep -oE '^[0-9]+' | head -1 || true)
511
+ if [[ -n "$quality_score" ]]; then
512
+ emit_event "build.commit_quality" \
513
+ "issue=${ISSUE_NUMBER:-0}" \
514
+ "score=$quality_score" \
515
+ "commit_count=$commit_count"
516
+ if [[ "$quality_score" -lt 40 ]] 2>/dev/null; then
517
+ warn "Commit message quality low (score: ${quality_score}/100)"
518
+ else
519
+ info "Commit quality score: ${quality_score}/100"
520
+ fi
521
+ fi
522
+ fi
523
+
524
+ # ── Scope Enforcement: Compare planned vs actual files (best-effort) ──
525
+ if type generate_scope_report >/dev/null 2>&1; then
526
+ local plan_file="$ARTIFACTS_DIR/plan.md"
527
+ if [[ -f "$plan_file" ]]; then
528
+ info "Analyzing scope: comparing planned vs actual files..."
529
+ # Run in subshell to prevent set -e propagation
530
+ (generate_scope_report "$plan_file" "origin/${BASE_BRANCH:-main}" "$ARTIFACTS_DIR" 2>/dev/null) || true
531
+ if [[ -f "$ARTIFACTS_DIR/scope-report.json" ]]; then
532
+ local unplanned_count
533
+ unplanned_count=$(jq '.unplanned_files | length' "$ARTIFACTS_DIR/scope-report.json" 2>/dev/null || echo "0")
534
+ if [[ "$unplanned_count" -gt 0 ]]; then
535
+ warn "Scope analysis: $unplanned_count unplanned file(s) changed (see scope-report.json)"
536
+ else
537
+ info "Scope analysis: all changes are planned"
538
+ fi
539
+ fi
540
+ fi
541
+ fi
542
+
543
+ log_stage "build" "Build loop completed ($commit_count commits)"
544
+ }
545
+
546
+ stage_test() {
547
+ CURRENT_STAGE_ID="test"
548
+ local test_cmd="${TEST_CMD}"
549
+ if [[ -z "$test_cmd" ]]; then
550
+ test_cmd=$(jq -r --arg id "test" '(.stages[] | select(.id == $id) | .config.test_cmd) // .defaults.test_cmd // ""' "$PIPELINE_CONFIG" 2>/dev/null) || true
551
+ [[ -z "$test_cmd" || "$test_cmd" == "null" ]] && test_cmd=""
552
+ fi
553
+ # Auto-detect
554
+ if [[ -z "$test_cmd" ]]; then
555
+ test_cmd=$(detect_test_cmd)
556
+ fi
557
+ if [[ -z "$test_cmd" ]]; then
558
+ warn "No test command found — skipping test stage"
559
+ return 0
560
+ fi
561
+
562
+ local coverage_min
563
+ coverage_min=$(jq -r --arg id "test" '(.stages[] | select(.id == $id) | .config.coverage_min) // 0' "$PIPELINE_CONFIG" 2>/dev/null) || true
564
+ [[ -z "$coverage_min" || "$coverage_min" == "null" ]] && coverage_min=0
565
+
566
+ local test_log="$ARTIFACTS_DIR/test-results.log"
567
+
568
+ info "Running tests: ${DIM}$test_cmd${RESET}"
569
+ local test_exit=0
570
+ bash -c "$test_cmd" > "$test_log" 2>&1 || test_exit=$?
571
+
572
+ if [[ "$test_exit" -eq 0 ]]; then
573
+ success "Tests passed"
574
+
575
+ # Dark factory: holdout validation — run sealed tests the agent never saw
576
+ if type holdout_validate >/dev/null 2>&1; then
577
+ HOLDOUT_DIR="${ARTIFACTS_DIR}/test-holdout"
578
+ if [[ -f "${HOLDOUT_DIR}/manifest.json" ]]; then
579
+ if holdout_validate "." "$test_cmd" 2>/dev/null; then
580
+ success "Holdout validation passed (agent code works on unseen tests)"
581
+ else
582
+ warn "Holdout validation failed — agent may have overfit to visible tests"
583
+ holdout_reveal 2>/dev/null || true
584
+ # Don't fail the stage — holdout is advisory for now
585
+ emit_event "test.holdout_failed" \
586
+ "issue=${ISSUE_NUMBER:-0}" \
587
+ "stage=test" 2>/dev/null || true
588
+ fi
589
+ fi
590
+ fi
591
+
592
+ # Dark factory: mutation testing — verify test effectiveness
593
+ if type mutation_generate >/dev/null 2>&1; then
594
+ local _mut_dir="${ARTIFACTS_DIR}/mutations"
595
+ mkdir -p "$_mut_dir" 2>/dev/null || true
596
+ local _changed_files
597
+ _changed_files=$(git diff --name-only HEAD~1 2>/dev/null | head -10 || true)
598
+ local _mut_total=0
599
+ while IFS= read -r _mut_file; do
600
+ [[ -z "$_mut_file" || ! -f "$_mut_file" ]] && continue
601
+ local _mc
602
+ _mc=$(mutation_generate "$_mut_file" "$_mut_dir" 2>/dev/null || echo "0")
603
+ _mut_total=$((_mut_total + _mc))
604
+ done <<< "$_changed_files"
605
+ if [[ "$_mut_total" -gt 0 ]]; then
606
+ info "Mutation testing: $_mut_total mutants generated, executing..."
607
+ local _mut_result
608
+ _mut_result=$(mutation_execute "$_mut_dir" "$test_cmd" "${PROJECT_ROOT:-.}" 2>/dev/null || echo '{}')
609
+ local _mut_killed _mut_survived
610
+ _mut_killed=$(echo "$_mut_result" | jq -r '.killed // 0' 2>/dev/null || echo "0")
611
+ _mut_survived=$(echo "$_mut_result" | jq -r '.survived // 0' 2>/dev/null || echo "0")
612
+ mutation_report "$_mut_dir" "${ARTIFACTS_DIR}/mutation-report.json" >/dev/null 2>&1 || true
613
+ if [[ "$_mut_survived" -gt 0 ]]; then
614
+ warn "Mutation testing: $_mut_killed killed, $_mut_survived survived (weak tests detected)"
615
+ else
616
+ success "Mutation testing: $_mut_killed/$_mut_total mutants killed"
617
+ fi
618
+ emit_event "test.mutation_complete" \
619
+ "issue=${ISSUE_NUMBER:-0}" \
620
+ "killed=$_mut_killed" \
621
+ "survived=$_mut_survived" \
622
+ "total=$_mut_total" 2>/dev/null || true
623
+ fi
624
+ fi
625
+ else
626
+ error "Tests failed (exit code: $test_exit)"
627
+
628
+ # Dark factory: build causal graph and trace failure chain
629
+ if type causal_build_graph >/dev/null 2>&1; then
630
+ CAUSAL_GRAPH_FILE="${ARTIFACTS_DIR}/causal-graph.json"
631
+ causal_build_graph "." 2>/dev/null || true
632
+ # Trace the failure to root cause
633
+ if type causal_trace_failure >/dev/null 2>&1; then
634
+ local _failing_tests
635
+ _failing_tests=$(grep -l 'FAIL\|Error\|assert' "$test_log" 2>/dev/null | head -1 || true)
636
+ if [[ -n "$_failing_tests" ]]; then
637
+ causal_trace_failure "$_failing_tests" "." 2>/dev/null || true
638
+ fi
639
+ fi
640
+ # Suggest fix based on causal trace
641
+ if type causal_suggest_fix >/dev/null 2>&1; then
642
+ local _trace_file="${CAUSAL_GRAPH_FILE%.json}-trace.json"
643
+ if [[ -f "$_trace_file" ]]; then
644
+ causal_suggest_fix "$_trace_file" 2>/dev/null || true
645
+ fi
646
+ fi
647
+ fi
648
+
649
+ # Extract most relevant error section (assertion failures, stack traces)
650
+ local relevant_output=""
651
+ relevant_output=$(grep -A5 -E 'FAIL|AssertionError|Expected.*but.*got|Error:|panic:|assert' "$test_log" 2>/dev/null | tail -40 || true)
652
+ if [[ -z "$relevant_output" ]]; then
653
+ relevant_output=$(tail -40 "$test_log")
654
+ fi
655
+ echo "$relevant_output"
656
+
657
+ # Post failure to GitHub with more context
658
+ if [[ -n "$ISSUE_NUMBER" ]]; then
659
+ local log_lines
660
+ log_lines=$(wc -l < "$test_log" 2>/dev/null || true)
661
+ log_lines="${log_lines:-0}"
662
+ local log_excerpt
663
+ if [[ "$log_lines" -lt 60 ]]; then
664
+ log_excerpt="$(cat "$test_log" 2>/dev/null || true)"
665
+ else
666
+ log_excerpt="$(head -20 "$test_log" 2>/dev/null || true)
667
+ ... (${log_lines} lines total, showing head + tail) ...
668
+ $(tail -30 "$test_log" 2>/dev/null || true)"
669
+ fi
670
+ gh_comment_issue "$ISSUE_NUMBER" "❌ **Tests failed** (exit code: $test_exit, ${log_lines} lines)
671
+ \`\`\`
672
+ ${log_excerpt}
673
+ \`\`\`"
674
+ fi
675
+ return 1
676
+ fi
677
+
678
+ # Coverage check — only enforce when coverage data is actually detected
679
+ local coverage=""
680
+ if [[ "$coverage_min" -gt 0 ]] 2>/dev/null; then
681
+ coverage=$(parse_coverage_from_output "$test_log")
682
+ if [[ -z "$coverage" ]]; then
683
+ # No coverage data found — skip enforcement (project may not have coverage tooling)
684
+ info "No coverage data detected — skipping coverage check (min: ${coverage_min}%)"
685
+ elif awk -v cov="$coverage" -v min="$coverage_min" 'BEGIN{exit !(cov < min)}' 2>/dev/null; then
686
+ warn "Coverage ${coverage}% below minimum ${coverage_min}%"
687
+ return 1
688
+ else
689
+ info "Coverage: ${coverage}% (min: ${coverage_min}%)"
690
+ fi
691
+ fi
692
+
693
+ # Emit test.completed with coverage for adaptive learning
694
+ if [[ -n "$coverage" ]]; then
695
+ emit_event "test.completed" \
696
+ "issue=${ISSUE_NUMBER:-0}" \
697
+ "stage=test" \
698
+ "coverage=$coverage"
699
+ fi
700
+
701
+ # Post test results to GitHub
702
+ if [[ -n "$ISSUE_NUMBER" ]]; then
703
+ local test_summary
704
+ test_summary=$(tail -10 "$test_log" | sed 's/\x1b\[[0-9;]*m//g')
705
+ local cov_line=""
706
+ [[ -n "$coverage" ]] && cov_line="
707
+ **Coverage:** ${coverage}%"
708
+ gh_comment_issue "$ISSUE_NUMBER" "✅ **Tests passed**${cov_line}
709
+ <details>
710
+ <summary>Test output</summary>
711
+
712
+ \`\`\`
713
+ ${test_summary}
714
+ \`\`\`
715
+ </details>"
716
+ fi
717
+
718
+ # Write coverage summary for pre-deploy gate
719
+ local _cov_pct=0
720
+ if [[ -f "$ARTIFACTS_DIR/test-results.log" ]]; then
721
+ _cov_pct=$(grep -oE '[0-9]+%' "$ARTIFACTS_DIR/test-results.log" 2>/dev/null | head -1 | tr -d '%' || true)
722
+ _cov_pct="${_cov_pct:-0}"
723
+ fi
724
+ local _cov_tmp
725
+ _cov_tmp=$(mktemp "${ARTIFACTS_DIR}/test-coverage.json.tmp.XXXXXX")
726
+ printf '{"coverage_pct":%d}' "${_cov_pct:-0}" > "$_cov_tmp" && mv "$_cov_tmp" "$ARTIFACTS_DIR/test-coverage.json" || rm -f "$_cov_tmp"
727
+
728
+ log_stage "test" "Tests passed${coverage:+ (coverage: ${coverage}%)}"
729
+ }
730
+