shipwright-cli 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/.claude/agents/code-reviewer.md +2 -0
  2. package/.claude/agents/devops-engineer.md +2 -0
  3. package/.claude/agents/doc-fleet-agent.md +2 -0
  4. package/.claude/agents/pipeline-agent.md +2 -0
  5. package/.claude/agents/shell-script-specialist.md +2 -0
  6. package/.claude/agents/test-specialist.md +2 -0
  7. package/.claude/hooks/agent-crash-capture.sh +32 -0
  8. package/.claude/hooks/post-tool-use.sh +3 -2
  9. package/.claude/hooks/pre-tool-use.sh +35 -3
  10. package/README.md +22 -8
  11. package/claude-code/hooks/config-change.sh +18 -0
  12. package/claude-code/hooks/instructions-reloaded.sh +7 -0
  13. package/claude-code/hooks/worktree-create.sh +25 -0
  14. package/claude-code/hooks/worktree-remove.sh +20 -0
  15. package/config/code-constitution.json +130 -0
  16. package/config/defaults.json +25 -2
  17. package/config/policy.json +1 -1
  18. package/dashboard/middleware/auth.ts +134 -0
  19. package/dashboard/middleware/constants.ts +21 -0
  20. package/dashboard/public/index.html +8 -6
  21. package/dashboard/public/styles.css +176 -97
  22. package/dashboard/routes/auth.ts +38 -0
  23. package/dashboard/server.ts +117 -25
  24. package/dashboard/services/config.ts +26 -0
  25. package/dashboard/services/db.ts +118 -0
  26. package/dashboard/src/canvas/pixel-agent.ts +298 -0
  27. package/dashboard/src/canvas/pixel-sprites.ts +440 -0
  28. package/dashboard/src/canvas/shipyard-effects.ts +367 -0
  29. package/dashboard/src/canvas/shipyard-scene.ts +616 -0
  30. package/dashboard/src/canvas/submarine-layout.ts +267 -0
  31. package/dashboard/src/components/header.ts +8 -7
  32. package/dashboard/src/core/api.ts +5 -0
  33. package/dashboard/src/core/router.ts +1 -0
  34. package/dashboard/src/design/submarine-theme.ts +253 -0
  35. package/dashboard/src/main.ts +2 -0
  36. package/dashboard/src/types/api.ts +12 -1
  37. package/dashboard/src/views/activity.ts +2 -1
  38. package/dashboard/src/views/metrics.ts +69 -1
  39. package/dashboard/src/views/shipyard.ts +39 -0
  40. package/dashboard/types/index.ts +166 -0
  41. package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
  42. package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
  43. package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
  44. package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
  45. package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
  46. package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
  47. package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
  48. package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
  49. package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
  50. package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
  51. package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
  52. package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
  53. package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
  54. package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
  55. package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
  56. package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
  57. package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
  58. package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
  59. package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
  60. package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
  61. package/docs/research/RESEARCH_INDEX.md +439 -0
  62. package/docs/research/RESEARCH_SOURCES.md +440 -0
  63. package/docs/research/RESEARCH_SUMMARY.txt +275 -0
  64. package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
  65. package/package.json +2 -2
  66. package/scripts/lib/adaptive-model.sh +427 -0
  67. package/scripts/lib/adaptive-timeout.sh +316 -0
  68. package/scripts/lib/audit-trail.sh +309 -0
  69. package/scripts/lib/auto-recovery.sh +471 -0
  70. package/scripts/lib/bandit-selector.sh +431 -0
  71. package/scripts/lib/bootstrap.sh +104 -2
  72. package/scripts/lib/causal-graph.sh +455 -0
  73. package/scripts/lib/compat.sh +126 -0
  74. package/scripts/lib/compound-audit.sh +337 -0
  75. package/scripts/lib/constitutional.sh +454 -0
  76. package/scripts/lib/context-budget.sh +359 -0
  77. package/scripts/lib/convergence.sh +594 -0
  78. package/scripts/lib/cost-optimizer.sh +634 -0
  79. package/scripts/lib/daemon-adaptive.sh +14 -2
  80. package/scripts/lib/daemon-dispatch.sh +106 -17
  81. package/scripts/lib/daemon-failure.sh +34 -4
  82. package/scripts/lib/daemon-patrol.sh +25 -4
  83. package/scripts/lib/daemon-poll-github.sh +361 -0
  84. package/scripts/lib/daemon-poll-health.sh +299 -0
  85. package/scripts/lib/daemon-poll.sh +27 -611
  86. package/scripts/lib/daemon-state.sh +119 -66
  87. package/scripts/lib/daemon-triage.sh +10 -0
  88. package/scripts/lib/dod-scorecard.sh +442 -0
  89. package/scripts/lib/error-actionability.sh +300 -0
  90. package/scripts/lib/formal-spec.sh +461 -0
  91. package/scripts/lib/helpers.sh +180 -5
  92. package/scripts/lib/intent-analysis.sh +409 -0
  93. package/scripts/lib/loop-convergence.sh +350 -0
  94. package/scripts/lib/loop-iteration.sh +682 -0
  95. package/scripts/lib/loop-progress.sh +48 -0
  96. package/scripts/lib/loop-restart.sh +185 -0
  97. package/scripts/lib/memory-effectiveness.sh +506 -0
  98. package/scripts/lib/mutation-executor.sh +352 -0
  99. package/scripts/lib/outcome-feedback.sh +521 -0
  100. package/scripts/lib/pipeline-cli.sh +336 -0
  101. package/scripts/lib/pipeline-commands.sh +1216 -0
  102. package/scripts/lib/pipeline-detection.sh +101 -3
  103. package/scripts/lib/pipeline-execution.sh +897 -0
  104. package/scripts/lib/pipeline-github.sh +28 -3
  105. package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
  106. package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
  107. package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
  108. package/scripts/lib/pipeline-intelligence.sh +104 -1138
  109. package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
  110. package/scripts/lib/pipeline-quality-checks.sh +17 -711
  111. package/scripts/lib/pipeline-quality-gates.sh +563 -0
  112. package/scripts/lib/pipeline-stages-build.sh +730 -0
  113. package/scripts/lib/pipeline-stages-delivery.sh +965 -0
  114. package/scripts/lib/pipeline-stages-intake.sh +1133 -0
  115. package/scripts/lib/pipeline-stages-monitor.sh +407 -0
  116. package/scripts/lib/pipeline-stages-review.sh +1022 -0
  117. package/scripts/lib/pipeline-stages.sh +161 -2901
  118. package/scripts/lib/pipeline-state.sh +36 -5
  119. package/scripts/lib/pipeline-util.sh +487 -0
  120. package/scripts/lib/policy-learner.sh +438 -0
  121. package/scripts/lib/process-reward.sh +493 -0
  122. package/scripts/lib/project-detect.sh +649 -0
  123. package/scripts/lib/quality-profile.sh +334 -0
  124. package/scripts/lib/recruit-commands.sh +885 -0
  125. package/scripts/lib/recruit-learning.sh +739 -0
  126. package/scripts/lib/recruit-roles.sh +648 -0
  127. package/scripts/lib/reward-aggregator.sh +458 -0
  128. package/scripts/lib/rl-optimizer.sh +362 -0
  129. package/scripts/lib/root-cause.sh +427 -0
  130. package/scripts/lib/scope-enforcement.sh +445 -0
  131. package/scripts/lib/session-restart.sh +493 -0
  132. package/scripts/lib/skill-memory.sh +300 -0
  133. package/scripts/lib/skill-registry.sh +775 -0
  134. package/scripts/lib/spec-driven.sh +476 -0
  135. package/scripts/lib/test-helpers.sh +18 -7
  136. package/scripts/lib/test-holdout.sh +429 -0
  137. package/scripts/lib/test-optimizer.sh +511 -0
  138. package/scripts/shipwright-file-suggest.sh +45 -0
  139. package/scripts/skills/adversarial-quality.md +61 -0
  140. package/scripts/skills/api-design.md +44 -0
  141. package/scripts/skills/architecture-design.md +50 -0
  142. package/scripts/skills/brainstorming.md +43 -0
  143. package/scripts/skills/data-pipeline.md +44 -0
  144. package/scripts/skills/deploy-safety.md +64 -0
  145. package/scripts/skills/documentation.md +38 -0
  146. package/scripts/skills/frontend-design.md +45 -0
  147. package/scripts/skills/generated/.gitkeep +0 -0
  148. package/scripts/skills/generated/_refinements/.gitkeep +0 -0
  149. package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
  150. package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
  151. package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
  152. package/scripts/skills/generated/cli-version-management.md +29 -0
  153. package/scripts/skills/generated/collection-system-validation.md +99 -0
  154. package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
  155. package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
  156. package/scripts/skills/generated/test-parallelization-detection.md +65 -0
  157. package/scripts/skills/observability.md +79 -0
  158. package/scripts/skills/performance.md +48 -0
  159. package/scripts/skills/pr-quality.md +49 -0
  160. package/scripts/skills/product-thinking.md +43 -0
  161. package/scripts/skills/security-audit.md +49 -0
  162. package/scripts/skills/systematic-debugging.md +40 -0
  163. package/scripts/skills/testing-strategy.md +47 -0
  164. package/scripts/skills/two-stage-review.md +52 -0
  165. package/scripts/skills/validation-thoroughness.md +55 -0
  166. package/scripts/sw +9 -3
  167. package/scripts/sw-activity.sh +9 -8
  168. package/scripts/sw-adaptive.sh +8 -7
  169. package/scripts/sw-adversarial.sh +2 -1
  170. package/scripts/sw-architecture-enforcer.sh +3 -1
  171. package/scripts/sw-auth.sh +12 -2
  172. package/scripts/sw-autonomous.sh +5 -1
  173. package/scripts/sw-changelog.sh +4 -1
  174. package/scripts/sw-checkpoint.sh +2 -1
  175. package/scripts/sw-ci.sh +15 -6
  176. package/scripts/sw-cleanup.sh +4 -26
  177. package/scripts/sw-code-review.sh +45 -20
  178. package/scripts/sw-connect.sh +2 -1
  179. package/scripts/sw-context.sh +2 -1
  180. package/scripts/sw-cost.sh +107 -5
  181. package/scripts/sw-daemon.sh +71 -11
  182. package/scripts/sw-dashboard.sh +3 -1
  183. package/scripts/sw-db.sh +71 -20
  184. package/scripts/sw-decide.sh +8 -2
  185. package/scripts/sw-decompose.sh +360 -17
  186. package/scripts/sw-deps.sh +4 -1
  187. package/scripts/sw-developer-simulation.sh +4 -1
  188. package/scripts/sw-discovery.sh +378 -5
  189. package/scripts/sw-doc-fleet.sh +4 -1
  190. package/scripts/sw-docs-agent.sh +3 -1
  191. package/scripts/sw-docs.sh +2 -1
  192. package/scripts/sw-doctor.sh +453 -2
  193. package/scripts/sw-dora.sh +4 -1
  194. package/scripts/sw-durable.sh +12 -7
  195. package/scripts/sw-e2e-orchestrator.sh +17 -16
  196. package/scripts/sw-eventbus.sh +13 -4
  197. package/scripts/sw-evidence.sh +364 -12
  198. package/scripts/sw-feedback.sh +550 -9
  199. package/scripts/sw-fix.sh +20 -1
  200. package/scripts/sw-fleet-discover.sh +6 -2
  201. package/scripts/sw-fleet-viz.sh +9 -4
  202. package/scripts/sw-fleet.sh +5 -1
  203. package/scripts/sw-github-app.sh +18 -4
  204. package/scripts/sw-github-checks.sh +3 -2
  205. package/scripts/sw-github-deploy.sh +3 -2
  206. package/scripts/sw-github-graphql.sh +18 -7
  207. package/scripts/sw-guild.sh +5 -1
  208. package/scripts/sw-heartbeat.sh +5 -30
  209. package/scripts/sw-hello.sh +67 -0
  210. package/scripts/sw-hygiene.sh +10 -3
  211. package/scripts/sw-incident.sh +273 -5
  212. package/scripts/sw-init.sh +18 -2
  213. package/scripts/sw-instrument.sh +10 -2
  214. package/scripts/sw-intelligence.sh +44 -7
  215. package/scripts/sw-jira.sh +5 -1
  216. package/scripts/sw-launchd.sh +2 -1
  217. package/scripts/sw-linear.sh +4 -1
  218. package/scripts/sw-logs.sh +4 -1
  219. package/scripts/sw-loop.sh +436 -1076
  220. package/scripts/sw-memory.sh +357 -3
  221. package/scripts/sw-mission-control.sh +6 -1
  222. package/scripts/sw-model-router.sh +483 -27
  223. package/scripts/sw-otel.sh +15 -4
  224. package/scripts/sw-oversight.sh +14 -5
  225. package/scripts/sw-patrol-meta.sh +334 -0
  226. package/scripts/sw-pipeline-composer.sh +7 -1
  227. package/scripts/sw-pipeline-vitals.sh +12 -6
  228. package/scripts/sw-pipeline.sh +54 -2653
  229. package/scripts/sw-pm.sh +16 -8
  230. package/scripts/sw-pr-lifecycle.sh +2 -1
  231. package/scripts/sw-predictive.sh +17 -5
  232. package/scripts/sw-prep.sh +185 -2
  233. package/scripts/sw-ps.sh +5 -25
  234. package/scripts/sw-public-dashboard.sh +17 -4
  235. package/scripts/sw-quality.sh +14 -6
  236. package/scripts/sw-reaper.sh +8 -25
  237. package/scripts/sw-recruit.sh +156 -2303
  238. package/scripts/sw-regression.sh +19 -12
  239. package/scripts/sw-release-manager.sh +3 -1
  240. package/scripts/sw-release.sh +4 -1
  241. package/scripts/sw-remote.sh +3 -1
  242. package/scripts/sw-replay.sh +7 -1
  243. package/scripts/sw-retro.sh +158 -1
  244. package/scripts/sw-review-rerun.sh +3 -1
  245. package/scripts/sw-scale.sh +14 -5
  246. package/scripts/sw-security-audit.sh +6 -1
  247. package/scripts/sw-self-optimize.sh +173 -6
  248. package/scripts/sw-session.sh +9 -3
  249. package/scripts/sw-setup.sh +3 -1
  250. package/scripts/sw-stall-detector.sh +406 -0
  251. package/scripts/sw-standup.sh +15 -7
  252. package/scripts/sw-status.sh +3 -1
  253. package/scripts/sw-strategic.sh +14 -6
  254. package/scripts/sw-stream.sh +13 -4
  255. package/scripts/sw-swarm.sh +20 -7
  256. package/scripts/sw-team-stages.sh +13 -6
  257. package/scripts/sw-templates.sh +7 -31
  258. package/scripts/sw-testgen.sh +17 -6
  259. package/scripts/sw-tmux-pipeline.sh +4 -1
  260. package/scripts/sw-tmux-role-color.sh +2 -0
  261. package/scripts/sw-tmux-status.sh +1 -1
  262. package/scripts/sw-tmux.sh +37 -1
  263. package/scripts/sw-trace.sh +3 -1
  264. package/scripts/sw-tracker-github.sh +3 -0
  265. package/scripts/sw-tracker-jira.sh +3 -0
  266. package/scripts/sw-tracker-linear.sh +3 -0
  267. package/scripts/sw-tracker.sh +3 -1
  268. package/scripts/sw-triage.sh +3 -2
  269. package/scripts/sw-upgrade.sh +3 -1
  270. package/scripts/sw-ux.sh +5 -2
  271. package/scripts/sw-webhook.sh +5 -2
  272. package/scripts/sw-widgets.sh +9 -4
  273. package/scripts/sw-worktree.sh +15 -3
  274. package/scripts/test-skill-injection.sh +1233 -0
  275. package/templates/pipelines/autonomous.json +27 -3
  276. package/templates/pipelines/cost-aware.json +34 -8
  277. package/templates/pipelines/deployed.json +12 -0
  278. package/templates/pipelines/enterprise.json +12 -0
  279. package/templates/pipelines/fast.json +6 -0
  280. package/templates/pipelines/full.json +27 -3
  281. package/templates/pipelines/hotfix.json +6 -0
  282. package/templates/pipelines/standard.json +12 -0
  283. package/templates/pipelines/tdd.json +12 -0
@@ -0,0 +1,429 @@
1
+ #!/usr/bin/env bash
2
+ # Module guard - prevent double-sourcing
3
+ [[ -n "${_TEST_HOLDOUT_LOADED:-}" ]] && return 0
4
+ _TEST_HOLDOUT_LOADED=1
5
+
6
+ # ╔═══════════════════════════════════════════════════════════════════════════╗
7
+ # ║ shipwright test-holdout — Test-as-Holdout Validation System ║
8
+ # ║ Prevents agent overfitting by partitioning tests into visible/sealed ║
9
+ # ║ Based on StrongDM pattern: agents can't game what they can't see ║
10
+ # ╚═══════════════════════════════════════════════════════════════════════════╝
11
+
12
+ # shellcheck disable=SC2034
13
+ VERSION="3.3.0"
14
+
15
+ # ─── Output Helpers (fallback if not already loaded) ─────────────────────────
16
+ [[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
17
+ [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
18
+ [[ "$(type -t warn 2>/dev/null)" == "function" ]] || warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
19
+ [[ "$(type -t error 2>/dev/null)" == "function" ]] || error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
20
+ if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
21
+ now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
22
+ now_epoch() { date +%s; }
23
+ fi
24
+
25
+ # ─── Configuration ───────────────────────────────────────────────────────────
26
+
27
+ HOLDOUT_RATIO="${HOLDOUT_RATIO:-30}" # % of tests to hold out (default 30%)
28
+ HOLDOUT_DIR="${HOLDOUT_DIR:-.claude/test-holdout}"
29
+ HOLDOUT_SEALED_DIR="${HOLDOUT_DIR}/.sealed"
30
+ HOLDOUT_MANIFEST="${HOLDOUT_DIR}/manifest.json"
31
+ HOLDOUT_RESULTS="${HOLDOUT_DIR}/results.json"
32
+
33
+ # ─── Test Discovery ─────────────────────────────────────────────────────────
34
+ # Find test files in a project. Returns newline-separated list of test file paths.
35
+
36
+ holdout_discover_tests() {
37
+ local project_dir="${1:-.}"
38
+ local language="${2:-}"
39
+
40
+ # Auto-detect language if not provided
41
+ if [[ -z "$language" ]] && type detect_primary_language >/dev/null 2>&1; then
42
+ language=$(detect_primary_language "$project_dir")
43
+ fi
44
+
45
+ case "$language" in
46
+ typescript|javascript)
47
+ find "$project_dir" \
48
+ -type f \( -name "*.test.ts" -o -name "*.test.js" -o -name "*.spec.ts" -o -name "*.spec.js" \) \
49
+ ! -path "*/node_modules/*" ! -path "*/.claude/*" \
50
+ 2>/dev/null | sort
51
+ ;;
52
+ python)
53
+ find "$project_dir" \
54
+ -type f \( -name "test_*.py" -o -name "*_test.py" \) \
55
+ ! -path "*/__pycache__/*" ! -path "*/.claude/*" \
56
+ 2>/dev/null | sort
57
+ ;;
58
+ go)
59
+ find "$project_dir" \
60
+ -type f -name "*_test.go" \
61
+ ! -path "*/.claude/*" \
62
+ 2>/dev/null | sort
63
+ ;;
64
+ rust)
65
+ # Rust tests are typically in the same files or tests/ dir
66
+ find "$project_dir" \
67
+ -type f -name "*.rs" -path "*/tests/*" \
68
+ ! -path "*/.claude/*" \
69
+ 2>/dev/null | sort
70
+ ;;
71
+ *)
72
+ # Generic: find files with "test" in name
73
+ find "$project_dir" \
74
+ -type f \( -name "*test*" -o -name "*spec*" \) \
75
+ ! -path "*/node_modules/*" ! -path "*/.claude/*" ! -path "*/.git/*" \
76
+ ! -name "*.md" ! -name "*.json" ! -name "*.yml" \
77
+ 2>/dev/null | sort
78
+ ;;
79
+ esac
80
+ }
81
+
82
+ # ─── Partition ───────────────────────────────────────────────────────────────
83
+ # Split discovered tests into visible (agent can see) and holdout (sealed).
84
+ # Uses deterministic hashing so same files always get same partition.
85
+
86
+ holdout_partition() {
87
+ local project_dir="${1:-.}"
88
+ local language="${2:-}"
89
+ local ratio="${3:-$HOLDOUT_RATIO}"
90
+
91
+ local all_tests
92
+ all_tests=$(holdout_discover_tests "$project_dir" "$language")
93
+
94
+ if [[ -z "$all_tests" ]]; then
95
+ warn "No test files discovered in $project_dir"
96
+ return 1
97
+ fi
98
+
99
+ local total_tests visible_count holdout_count
100
+ total_tests=$(echo "$all_tests" | wc -l | tr -d ' ')
101
+ holdout_count=$(( total_tests * ratio / 100 ))
102
+ # Minimum 1 holdout test if we have at least 2 tests
103
+ if [[ "$holdout_count" -eq 0 ]] && [[ "$total_tests" -ge 2 ]]; then
104
+ holdout_count=1
105
+ fi
106
+ visible_count=$(( total_tests - holdout_count ))
107
+
108
+ # Deterministic partition using hash of filename
109
+ local visible_tests=""
110
+ local holdout_tests=""
111
+ local idx=0
112
+
113
+ while IFS= read -r test_file; do
114
+ local hash_val
115
+ # Use md5 for deterministic partitioning
116
+ if command -v md5 >/dev/null 2>&1; then
117
+ hash_val=$(printf '%s' "$test_file" | md5 -q 2>/dev/null)
118
+ else
119
+ hash_val=$(printf '%s' "$test_file" | md5sum 2>/dev/null | cut -d' ' -f1)
120
+ fi
121
+
122
+ # Use last 2 hex chars to get a number 0-255, partition by ratio
123
+ local hash_num
124
+ hash_num=$(printf '%d' "0x${hash_val:30:2}" 2>/dev/null || echo "0")
125
+ local threshold=$(( 256 * ratio / 100 ))
126
+
127
+ if [[ "$hash_num" -lt "$threshold" ]] && [[ -n "$holdout_tests" || "$idx" -gt 0 ]]; then
128
+ if [[ -n "$holdout_tests" ]]; then
129
+ holdout_tests="${holdout_tests}"$'\n'"${test_file}"
130
+ else
131
+ holdout_tests="${test_file}"
132
+ fi
133
+ else
134
+ if [[ -n "$visible_tests" ]]; then
135
+ visible_tests="${visible_tests}"$'\n'"${test_file}"
136
+ else
137
+ visible_tests="${test_file}"
138
+ fi
139
+ fi
140
+ idx=$((idx + 1))
141
+ done <<< "$all_tests"
142
+
143
+ # Ensure we have at least one holdout if possible
144
+ if [[ -z "$holdout_tests" ]] && [[ "$total_tests" -ge 2 ]]; then
145
+ # Move last visible test to holdout
146
+ holdout_tests=$(echo "$visible_tests" | tail -1)
147
+ # BSD head doesn't support -n -1; use sed to remove last line
148
+ visible_tests=$(echo "$visible_tests" | sed '$ d')
149
+ fi
150
+
151
+ # Ensure we have at least one visible test
152
+ if [[ -z "$visible_tests" ]] && [[ -n "$holdout_tests" ]]; then
153
+ visible_tests=$(echo "$holdout_tests" | head -1)
154
+ holdout_tests=$(echo "$holdout_tests" | tail -n +2)
155
+ fi
156
+
157
+ local actual_holdout actual_visible
158
+ actual_holdout=$(echo "$holdout_tests" | grep -c '.' 2>/dev/null) || actual_holdout=0
159
+ actual_visible=$(echo "$visible_tests" | grep -c '.' 2>/dev/null) || actual_visible=0
160
+
161
+ info "Test partition: ${actual_visible} visible, ${actual_holdout} holdout (${ratio}% target)"
162
+
163
+ # Store partition info
164
+ mkdir -p "$HOLDOUT_DIR"
165
+ echo "$visible_tests" > "$HOLDOUT_DIR/visible-tests.txt"
166
+
167
+ # Export for callers
168
+ HOLDOUT_VISIBLE_TESTS="$visible_tests"
169
+ HOLDOUT_SEALED_TESTS="$holdout_tests"
170
+ HOLDOUT_TOTAL="$total_tests"
171
+ HOLDOUT_VISIBLE_COUNT="$actual_visible"
172
+ HOLDOUT_SEALED_COUNT="$actual_holdout"
173
+
174
+ return 0
175
+ }
176
+
177
+ # ─── Seal ────────────────────────────────────────────────────────────────────
178
+ # Move holdout tests to sealed directory where agents can't read them.
179
+ # Creates a manifest tracking original locations for restoration.
180
+
181
+ holdout_seal() {
182
+ local project_dir="${1:-.}"
183
+
184
+ if [[ -z "${HOLDOUT_SEALED_TESTS:-}" ]]; then
185
+ error "No holdout tests to seal. Run holdout_partition first."
186
+ return 1
187
+ fi
188
+
189
+ mkdir -p "$HOLDOUT_SEALED_DIR"
190
+
191
+ local manifest_entries=""
192
+ local sealed_count=0
193
+
194
+ while IFS= read -r test_file; do
195
+ [[ -z "$test_file" ]] && continue
196
+ [[ ! -f "$test_file" ]] && continue
197
+
198
+ # Create relative path for storage
199
+ local rel_path
200
+ rel_path=$(echo "$test_file" | sed "s|^${project_dir}/||")
201
+ local sealed_path="${HOLDOUT_SEALED_DIR}/${rel_path}"
202
+ local sealed_parent
203
+ sealed_parent=$(dirname "$sealed_path")
204
+
205
+ mkdir -p "$sealed_parent"
206
+
207
+ # Copy test to sealed location (don't move — agent might notice missing files)
208
+ cp "$test_file" "$sealed_path"
209
+
210
+ # Build manifest entry
211
+ local entry
212
+ entry=$(printf '{"original":"%s","sealed":"%s","hash":"%s"}' \
213
+ "$rel_path" "$sealed_path" \
214
+ "$(md5 -q "$test_file" 2>/dev/null || md5sum "$test_file" 2>/dev/null | cut -d' ' -f1)")
215
+
216
+ if [[ -n "$manifest_entries" ]]; then
217
+ manifest_entries="${manifest_entries},${entry}"
218
+ else
219
+ manifest_entries="${entry}"
220
+ fi
221
+ sealed_count=$((sealed_count + 1))
222
+ done <<< "$HOLDOUT_SEALED_TESTS"
223
+
224
+ # Write manifest
225
+ cat > "$HOLDOUT_MANIFEST" <<EOF
226
+ {
227
+ "created": "$(now_iso)",
228
+ "ratio": ${HOLDOUT_RATIO},
229
+ "total_tests": ${HOLDOUT_TOTAL:-0},
230
+ "visible_count": ${HOLDOUT_VISIBLE_COUNT:-0},
231
+ "sealed_count": ${sealed_count},
232
+ "tests": [${manifest_entries}]
233
+ }
234
+ EOF
235
+
236
+ # Add sealed directory to .gitignore if not already there
237
+ local gitignore="${project_dir}/.gitignore"
238
+ if [[ -f "$gitignore" ]]; then
239
+ if ! grep -q "test-holdout/.sealed" "$gitignore" 2>/dev/null; then
240
+ echo "" >> "$gitignore"
241
+ echo "# Shipwright test holdout (sealed tests hidden from agents)" >> "$gitignore"
242
+ echo ".claude/test-holdout/.sealed/" >> "$gitignore"
243
+ fi
244
+ fi
245
+
246
+ success "Sealed ${sealed_count} holdout tests"
247
+
248
+ if type emit_event >/dev/null 2>&1; then
249
+ emit_event "test_holdout_sealed" \
250
+ "total=${HOLDOUT_TOTAL:-0}" \
251
+ "visible=${HOLDOUT_VISIBLE_COUNT:-0}" \
252
+ "sealed=${sealed_count}" \
253
+ "ratio=${HOLDOUT_RATIO}"
254
+ fi
255
+
256
+ return 0
257
+ }
258
+
259
+ # ─── Validate ────────────────────────────────────────────────────────────────
260
+ # Run holdout tests AFTER agent claims completion. This is the critical gate.
261
+ # Returns 0 if all holdout tests pass, 1 if any fail.
262
+
263
+ holdout_validate() {
264
+ local project_dir="${1:-.}"
265
+ local test_cmd="${2:-}"
266
+
267
+ if [[ ! -f "$HOLDOUT_MANIFEST" ]]; then
268
+ warn "No holdout manifest found — skipping holdout validation"
269
+ return 0
270
+ fi
271
+
272
+ if [[ -z "$test_cmd" ]]; then
273
+ # Auto-detect test command
274
+ if type detect_test_framework >/dev/null 2>&1; then
275
+ local framework
276
+ framework=$(detect_test_framework "$project_dir")
277
+ case "$framework" in
278
+ vitest) test_cmd="npx vitest run" ;;
279
+ jest) test_cmd="npx jest" ;;
280
+ pytest) test_cmd="pytest" ;;
281
+ "go test") test_cmd="go test" ;;
282
+ "cargo test") test_cmd="cargo test" ;;
283
+ *) test_cmd="" ;;
284
+ esac
285
+ fi
286
+ fi
287
+
288
+ if [[ -z "$test_cmd" ]]; then
289
+ warn "No test command available — cannot validate holdout tests"
290
+ return 0
291
+ fi
292
+
293
+ info "Running holdout validation (sealed tests the agent never saw)..."
294
+
295
+ local sealed_tests
296
+ sealed_tests=$(jq -r '.tests[].original' "$HOLDOUT_MANIFEST" 2>/dev/null)
297
+
298
+ if [[ -z "$sealed_tests" ]]; then
299
+ warn "No sealed tests in manifest"
300
+ return 0
301
+ fi
302
+
303
+ local pass_count=0
304
+ local fail_count=0
305
+ local total_count=0
306
+ local failed_tests=""
307
+
308
+ while IFS= read -r test_file; do
309
+ [[ -z "$test_file" ]] && continue
310
+ total_count=$((total_count + 1))
311
+
312
+ local full_path="${project_dir}/${test_file}"
313
+ if [[ ! -f "$full_path" ]]; then
314
+ warn "Holdout test missing: ${test_file} (may have been deleted by agent)"
315
+ fail_count=$((fail_count + 1))
316
+ if [[ -n "$failed_tests" ]]; then
317
+ failed_tests="${failed_tests},\"${test_file}\""
318
+ else
319
+ failed_tests="\"${test_file}\""
320
+ fi
321
+ continue
322
+ fi
323
+
324
+ # Run the individual test (quote path to handle spaces)
325
+ local test_result=0
326
+ if eval "${test_cmd} \"${full_path}\"" >/dev/null 2>&1; then
327
+ pass_count=$((pass_count + 1))
328
+ else
329
+ test_result=$?
330
+ fail_count=$((fail_count + 1))
331
+ if [[ -n "$failed_tests" ]]; then
332
+ failed_tests="${failed_tests},\"${test_file}\""
333
+ else
334
+ failed_tests="\"${test_file}\""
335
+ fi
336
+ fi
337
+ done <<< "$sealed_tests"
338
+
339
+ # Write results
340
+ cat > "$HOLDOUT_RESULTS" <<EOF
341
+ {
342
+ "validated_at": "$(now_iso)",
343
+ "total": ${total_count},
344
+ "passed": ${pass_count},
345
+ "failed": ${fail_count},
346
+ "pass_rate": $(( total_count > 0 ? pass_count * 100 / total_count : 0 )),
347
+ "failed_tests": [${failed_tests}]
348
+ }
349
+ EOF
350
+
351
+ if type emit_event >/dev/null 2>&1; then
352
+ emit_event "test_holdout_validated" \
353
+ "total=${total_count}" \
354
+ "passed=${pass_count}" \
355
+ "failed=${fail_count}" \
356
+ "pass_rate=$(( total_count > 0 ? pass_count * 100 / total_count : 0 ))"
357
+ fi
358
+
359
+ if [[ "$fail_count" -gt 0 ]]; then
360
+ error "Holdout validation FAILED: ${fail_count}/${total_count} sealed tests failed"
361
+ error "Failed tests: ${failed_tests}"
362
+ return 1
363
+ fi
364
+
365
+ success "Holdout validation PASSED: ${pass_count}/${total_count} sealed tests passed"
366
+ return 0
367
+ }
368
+
369
+ # ─── Reveal ──────────────────────────────────────────────────────────────────
370
+ # Show holdout results and clean up sealed directory.
371
+
372
+ holdout_reveal() {
373
+ if [[ -f "$HOLDOUT_RESULTS" ]]; then
374
+ local pass_rate
375
+ pass_rate=$(jq -r '.pass_rate // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
376
+ local passed failed total
377
+ passed=$(jq -r '.passed // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
378
+ failed=$(jq -r '.failed // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
379
+ total=$(jq -r '.total // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
380
+
381
+ if [[ "$failed" -gt 0 ]]; then
382
+ error "Holdout Results: ${passed}/${total} passed (${pass_rate}%)"
383
+ local failed_list
384
+ failed_list=$(jq -r '.failed_tests[]' "$HOLDOUT_RESULTS" 2>/dev/null || true)
385
+ if [[ -n "$failed_list" ]]; then
386
+ echo " Failed tests:"
387
+ echo "$failed_list" | while IFS= read -r t; do
388
+ echo " - $t"
389
+ done
390
+ fi
391
+ else
392
+ success "Holdout Results: ${passed}/${total} passed (${pass_rate}%)"
393
+ fi
394
+ else
395
+ warn "No holdout results available"
396
+ fi
397
+ }
398
+
399
+ # ─── Cleanup ─────────────────────────────────────────────────────────────────
400
+ # Remove sealed tests and holdout artifacts.
401
+
402
+ holdout_cleanup() {
403
+ if [[ -d "$HOLDOUT_SEALED_DIR" ]]; then
404
+ rm -rf "$HOLDOUT_SEALED_DIR"
405
+ fi
406
+ if [[ -f "$HOLDOUT_MANIFEST" ]]; then
407
+ rm -f "$HOLDOUT_MANIFEST"
408
+ fi
409
+ if [[ -f "$HOLDOUT_RESULTS" ]]; then
410
+ rm -f "$HOLDOUT_RESULTS"
411
+ fi
412
+ if [[ -f "$HOLDOUT_DIR/visible-tests.txt" ]]; then
413
+ rm -f "$HOLDOUT_DIR/visible-tests.txt"
414
+ fi
415
+ }
416
+
417
+ # ─── Status ──────────────────────────────────────────────────────────────────
418
+
419
+ holdout_status() {
420
+ if [[ -f "$HOLDOUT_MANIFEST" ]]; then
421
+ local sealed_count visible_count ratio
422
+ sealed_count=$(jq -r '.sealed_count // 0' "$HOLDOUT_MANIFEST" 2>/dev/null || echo "0")
423
+ visible_count=$(jq -r '.visible_count // 0' "$HOLDOUT_MANIFEST" 2>/dev/null || echo "0")
424
+ ratio=$(jq -r '.ratio // 30' "$HOLDOUT_MANIFEST" 2>/dev/null || echo "30")
425
+ echo "holdout_active=true sealed=${sealed_count} visible=${visible_count} ratio=${ratio}%"
426
+ else
427
+ echo "holdout_active=false"
428
+ fi
429
+ }