shipwright-cli 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/.claude/agents/code-reviewer.md +2 -0
  2. package/.claude/agents/devops-engineer.md +2 -0
  3. package/.claude/agents/doc-fleet-agent.md +2 -0
  4. package/.claude/agents/pipeline-agent.md +2 -0
  5. package/.claude/agents/shell-script-specialist.md +2 -0
  6. package/.claude/agents/test-specialist.md +2 -0
  7. package/.claude/hooks/agent-crash-capture.sh +32 -0
  8. package/.claude/hooks/post-tool-use.sh +3 -2
  9. package/.claude/hooks/pre-tool-use.sh +35 -3
  10. package/README.md +22 -8
  11. package/claude-code/hooks/config-change.sh +18 -0
  12. package/claude-code/hooks/instructions-reloaded.sh +7 -0
  13. package/claude-code/hooks/worktree-create.sh +25 -0
  14. package/claude-code/hooks/worktree-remove.sh +20 -0
  15. package/config/code-constitution.json +130 -0
  16. package/config/defaults.json +25 -2
  17. package/config/policy.json +1 -1
  18. package/dashboard/middleware/auth.ts +134 -0
  19. package/dashboard/middleware/constants.ts +21 -0
  20. package/dashboard/public/index.html +8 -6
  21. package/dashboard/public/styles.css +176 -97
  22. package/dashboard/routes/auth.ts +38 -0
  23. package/dashboard/server.ts +117 -25
  24. package/dashboard/services/config.ts +26 -0
  25. package/dashboard/services/db.ts +118 -0
  26. package/dashboard/src/canvas/pixel-agent.ts +298 -0
  27. package/dashboard/src/canvas/pixel-sprites.ts +440 -0
  28. package/dashboard/src/canvas/shipyard-effects.ts +367 -0
  29. package/dashboard/src/canvas/shipyard-scene.ts +616 -0
  30. package/dashboard/src/canvas/submarine-layout.ts +267 -0
  31. package/dashboard/src/components/header.ts +8 -7
  32. package/dashboard/src/core/api.ts +5 -0
  33. package/dashboard/src/core/router.ts +1 -0
  34. package/dashboard/src/design/submarine-theme.ts +253 -0
  35. package/dashboard/src/main.ts +2 -0
  36. package/dashboard/src/types/api.ts +12 -1
  37. package/dashboard/src/views/activity.ts +2 -1
  38. package/dashboard/src/views/metrics.ts +69 -1
  39. package/dashboard/src/views/shipyard.ts +39 -0
  40. package/dashboard/types/index.ts +166 -0
  41. package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
  42. package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
  43. package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
  44. package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
  45. package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
  46. package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
  47. package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
  48. package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
  49. package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
  50. package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
  51. package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
  52. package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
  53. package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
  54. package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
  55. package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
  56. package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
  57. package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
  58. package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
  59. package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
  60. package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
  61. package/docs/research/RESEARCH_INDEX.md +439 -0
  62. package/docs/research/RESEARCH_SOURCES.md +440 -0
  63. package/docs/research/RESEARCH_SUMMARY.txt +275 -0
  64. package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
  65. package/package.json +2 -2
  66. package/scripts/lib/adaptive-model.sh +427 -0
  67. package/scripts/lib/adaptive-timeout.sh +316 -0
  68. package/scripts/lib/audit-trail.sh +309 -0
  69. package/scripts/lib/auto-recovery.sh +471 -0
  70. package/scripts/lib/bandit-selector.sh +431 -0
  71. package/scripts/lib/bootstrap.sh +104 -2
  72. package/scripts/lib/causal-graph.sh +455 -0
  73. package/scripts/lib/compat.sh +126 -0
  74. package/scripts/lib/compound-audit.sh +337 -0
  75. package/scripts/lib/constitutional.sh +454 -0
  76. package/scripts/lib/context-budget.sh +359 -0
  77. package/scripts/lib/convergence.sh +594 -0
  78. package/scripts/lib/cost-optimizer.sh +634 -0
  79. package/scripts/lib/daemon-adaptive.sh +14 -2
  80. package/scripts/lib/daemon-dispatch.sh +106 -17
  81. package/scripts/lib/daemon-failure.sh +34 -4
  82. package/scripts/lib/daemon-patrol.sh +25 -4
  83. package/scripts/lib/daemon-poll-github.sh +361 -0
  84. package/scripts/lib/daemon-poll-health.sh +299 -0
  85. package/scripts/lib/daemon-poll.sh +27 -611
  86. package/scripts/lib/daemon-state.sh +119 -66
  87. package/scripts/lib/daemon-triage.sh +10 -0
  88. package/scripts/lib/dod-scorecard.sh +442 -0
  89. package/scripts/lib/error-actionability.sh +300 -0
  90. package/scripts/lib/formal-spec.sh +461 -0
  91. package/scripts/lib/helpers.sh +180 -5
  92. package/scripts/lib/intent-analysis.sh +409 -0
  93. package/scripts/lib/loop-convergence.sh +350 -0
  94. package/scripts/lib/loop-iteration.sh +682 -0
  95. package/scripts/lib/loop-progress.sh +48 -0
  96. package/scripts/lib/loop-restart.sh +185 -0
  97. package/scripts/lib/memory-effectiveness.sh +506 -0
  98. package/scripts/lib/mutation-executor.sh +352 -0
  99. package/scripts/lib/outcome-feedback.sh +521 -0
  100. package/scripts/lib/pipeline-cli.sh +336 -0
  101. package/scripts/lib/pipeline-commands.sh +1216 -0
  102. package/scripts/lib/pipeline-detection.sh +101 -3
  103. package/scripts/lib/pipeline-execution.sh +897 -0
  104. package/scripts/lib/pipeline-github.sh +28 -3
  105. package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
  106. package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
  107. package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
  108. package/scripts/lib/pipeline-intelligence.sh +104 -1138
  109. package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
  110. package/scripts/lib/pipeline-quality-checks.sh +17 -711
  111. package/scripts/lib/pipeline-quality-gates.sh +563 -0
  112. package/scripts/lib/pipeline-stages-build.sh +730 -0
  113. package/scripts/lib/pipeline-stages-delivery.sh +965 -0
  114. package/scripts/lib/pipeline-stages-intake.sh +1133 -0
  115. package/scripts/lib/pipeline-stages-monitor.sh +407 -0
  116. package/scripts/lib/pipeline-stages-review.sh +1022 -0
  117. package/scripts/lib/pipeline-stages.sh +161 -2901
  118. package/scripts/lib/pipeline-state.sh +36 -5
  119. package/scripts/lib/pipeline-util.sh +487 -0
  120. package/scripts/lib/policy-learner.sh +438 -0
  121. package/scripts/lib/process-reward.sh +493 -0
  122. package/scripts/lib/project-detect.sh +649 -0
  123. package/scripts/lib/quality-profile.sh +334 -0
  124. package/scripts/lib/recruit-commands.sh +885 -0
  125. package/scripts/lib/recruit-learning.sh +739 -0
  126. package/scripts/lib/recruit-roles.sh +648 -0
  127. package/scripts/lib/reward-aggregator.sh +458 -0
  128. package/scripts/lib/rl-optimizer.sh +362 -0
  129. package/scripts/lib/root-cause.sh +427 -0
  130. package/scripts/lib/scope-enforcement.sh +445 -0
  131. package/scripts/lib/session-restart.sh +493 -0
  132. package/scripts/lib/skill-memory.sh +300 -0
  133. package/scripts/lib/skill-registry.sh +775 -0
  134. package/scripts/lib/spec-driven.sh +476 -0
  135. package/scripts/lib/test-helpers.sh +18 -7
  136. package/scripts/lib/test-holdout.sh +429 -0
  137. package/scripts/lib/test-optimizer.sh +511 -0
  138. package/scripts/shipwright-file-suggest.sh +45 -0
  139. package/scripts/skills/adversarial-quality.md +61 -0
  140. package/scripts/skills/api-design.md +44 -0
  141. package/scripts/skills/architecture-design.md +50 -0
  142. package/scripts/skills/brainstorming.md +43 -0
  143. package/scripts/skills/data-pipeline.md +44 -0
  144. package/scripts/skills/deploy-safety.md +64 -0
  145. package/scripts/skills/documentation.md +38 -0
  146. package/scripts/skills/frontend-design.md +45 -0
  147. package/scripts/skills/generated/.gitkeep +0 -0
  148. package/scripts/skills/generated/_refinements/.gitkeep +0 -0
  149. package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
  150. package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
  151. package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
  152. package/scripts/skills/generated/cli-version-management.md +29 -0
  153. package/scripts/skills/generated/collection-system-validation.md +99 -0
  154. package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
  155. package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
  156. package/scripts/skills/generated/test-parallelization-detection.md +65 -0
  157. package/scripts/skills/observability.md +79 -0
  158. package/scripts/skills/performance.md +48 -0
  159. package/scripts/skills/pr-quality.md +49 -0
  160. package/scripts/skills/product-thinking.md +43 -0
  161. package/scripts/skills/security-audit.md +49 -0
  162. package/scripts/skills/systematic-debugging.md +40 -0
  163. package/scripts/skills/testing-strategy.md +47 -0
  164. package/scripts/skills/two-stage-review.md +52 -0
  165. package/scripts/skills/validation-thoroughness.md +55 -0
  166. package/scripts/sw +9 -3
  167. package/scripts/sw-activity.sh +9 -8
  168. package/scripts/sw-adaptive.sh +8 -7
  169. package/scripts/sw-adversarial.sh +2 -1
  170. package/scripts/sw-architecture-enforcer.sh +3 -1
  171. package/scripts/sw-auth.sh +12 -2
  172. package/scripts/sw-autonomous.sh +5 -1
  173. package/scripts/sw-changelog.sh +4 -1
  174. package/scripts/sw-checkpoint.sh +2 -1
  175. package/scripts/sw-ci.sh +15 -6
  176. package/scripts/sw-cleanup.sh +4 -26
  177. package/scripts/sw-code-review.sh +45 -20
  178. package/scripts/sw-connect.sh +2 -1
  179. package/scripts/sw-context.sh +2 -1
  180. package/scripts/sw-cost.sh +107 -5
  181. package/scripts/sw-daemon.sh +71 -11
  182. package/scripts/sw-dashboard.sh +3 -1
  183. package/scripts/sw-db.sh +71 -20
  184. package/scripts/sw-decide.sh +8 -2
  185. package/scripts/sw-decompose.sh +360 -17
  186. package/scripts/sw-deps.sh +4 -1
  187. package/scripts/sw-developer-simulation.sh +4 -1
  188. package/scripts/sw-discovery.sh +378 -5
  189. package/scripts/sw-doc-fleet.sh +4 -1
  190. package/scripts/sw-docs-agent.sh +3 -1
  191. package/scripts/sw-docs.sh +2 -1
  192. package/scripts/sw-doctor.sh +453 -2
  193. package/scripts/sw-dora.sh +4 -1
  194. package/scripts/sw-durable.sh +12 -7
  195. package/scripts/sw-e2e-orchestrator.sh +17 -16
  196. package/scripts/sw-eventbus.sh +13 -4
  197. package/scripts/sw-evidence.sh +364 -12
  198. package/scripts/sw-feedback.sh +550 -9
  199. package/scripts/sw-fix.sh +20 -1
  200. package/scripts/sw-fleet-discover.sh +6 -2
  201. package/scripts/sw-fleet-viz.sh +9 -4
  202. package/scripts/sw-fleet.sh +5 -1
  203. package/scripts/sw-github-app.sh +18 -4
  204. package/scripts/sw-github-checks.sh +3 -2
  205. package/scripts/sw-github-deploy.sh +3 -2
  206. package/scripts/sw-github-graphql.sh +18 -7
  207. package/scripts/sw-guild.sh +5 -1
  208. package/scripts/sw-heartbeat.sh +5 -30
  209. package/scripts/sw-hello.sh +67 -0
  210. package/scripts/sw-hygiene.sh +10 -3
  211. package/scripts/sw-incident.sh +273 -5
  212. package/scripts/sw-init.sh +18 -2
  213. package/scripts/sw-instrument.sh +10 -2
  214. package/scripts/sw-intelligence.sh +44 -7
  215. package/scripts/sw-jira.sh +5 -1
  216. package/scripts/sw-launchd.sh +2 -1
  217. package/scripts/sw-linear.sh +4 -1
  218. package/scripts/sw-logs.sh +4 -1
  219. package/scripts/sw-loop.sh +436 -1076
  220. package/scripts/sw-memory.sh +357 -3
  221. package/scripts/sw-mission-control.sh +6 -1
  222. package/scripts/sw-model-router.sh +483 -27
  223. package/scripts/sw-otel.sh +15 -4
  224. package/scripts/sw-oversight.sh +14 -5
  225. package/scripts/sw-patrol-meta.sh +334 -0
  226. package/scripts/sw-pipeline-composer.sh +7 -1
  227. package/scripts/sw-pipeline-vitals.sh +12 -6
  228. package/scripts/sw-pipeline.sh +54 -2653
  229. package/scripts/sw-pm.sh +16 -8
  230. package/scripts/sw-pr-lifecycle.sh +2 -1
  231. package/scripts/sw-predictive.sh +17 -5
  232. package/scripts/sw-prep.sh +185 -2
  233. package/scripts/sw-ps.sh +5 -25
  234. package/scripts/sw-public-dashboard.sh +17 -4
  235. package/scripts/sw-quality.sh +14 -6
  236. package/scripts/sw-reaper.sh +8 -25
  237. package/scripts/sw-recruit.sh +156 -2303
  238. package/scripts/sw-regression.sh +19 -12
  239. package/scripts/sw-release-manager.sh +3 -1
  240. package/scripts/sw-release.sh +4 -1
  241. package/scripts/sw-remote.sh +3 -1
  242. package/scripts/sw-replay.sh +7 -1
  243. package/scripts/sw-retro.sh +158 -1
  244. package/scripts/sw-review-rerun.sh +3 -1
  245. package/scripts/sw-scale.sh +14 -5
  246. package/scripts/sw-security-audit.sh +6 -1
  247. package/scripts/sw-self-optimize.sh +173 -6
  248. package/scripts/sw-session.sh +9 -3
  249. package/scripts/sw-setup.sh +3 -1
  250. package/scripts/sw-stall-detector.sh +406 -0
  251. package/scripts/sw-standup.sh +15 -7
  252. package/scripts/sw-status.sh +3 -1
  253. package/scripts/sw-strategic.sh +14 -6
  254. package/scripts/sw-stream.sh +13 -4
  255. package/scripts/sw-swarm.sh +20 -7
  256. package/scripts/sw-team-stages.sh +13 -6
  257. package/scripts/sw-templates.sh +7 -31
  258. package/scripts/sw-testgen.sh +17 -6
  259. package/scripts/sw-tmux-pipeline.sh +4 -1
  260. package/scripts/sw-tmux-role-color.sh +2 -0
  261. package/scripts/sw-tmux-status.sh +1 -1
  262. package/scripts/sw-tmux.sh +37 -1
  263. package/scripts/sw-trace.sh +3 -1
  264. package/scripts/sw-tracker-github.sh +3 -0
  265. package/scripts/sw-tracker-jira.sh +3 -0
  266. package/scripts/sw-tracker-linear.sh +3 -0
  267. package/scripts/sw-tracker.sh +3 -1
  268. package/scripts/sw-triage.sh +3 -2
  269. package/scripts/sw-upgrade.sh +3 -1
  270. package/scripts/sw-ux.sh +5 -2
  271. package/scripts/sw-webhook.sh +5 -2
  272. package/scripts/sw-widgets.sh +9 -4
  273. package/scripts/sw-worktree.sh +15 -3
  274. package/scripts/test-skill-injection.sh +1233 -0
  275. package/templates/pipelines/autonomous.json +27 -3
  276. package/templates/pipelines/cost-aware.json +34 -8
  277. package/templates/pipelines/deployed.json +12 -0
  278. package/templates/pipelines/enterprise.json +12 -0
  279. package/templates/pipelines/fast.json +6 -0
  280. package/templates/pipelines/full.json +27 -3
  281. package/templates/pipelines/hotfix.json +6 -0
  282. package/templates/pipelines/standard.json +12 -0
  283. package/templates/pipelines/tdd.json +12 -0
@@ -0,0 +1,50 @@
1
+ ## Architecture Design Expertise
2
+
3
+ Create an Architecture Decision Record (ADR) that future developers can use as a map.
4
+
5
+ ### Component Decomposition
6
+ - Identify the 3-5 key components this change touches
7
+ - Define clear boundaries — each component should have ONE reason to change
8
+ - Specify interfaces between components (function signatures, data contracts, event schemas)
9
+ - Dependencies should point inward — outer layers depend on inner, never the reverse
10
+
11
+ ### Interface Contracts
12
+ - Define input/output types for every public function or API boundary
13
+ - Specify error contracts — what errors can each component return?
14
+ - Document preconditions and postconditions
15
+ - Use types to enforce invariants — make invalid states unrepresentable
16
+
17
+ ### Design Decisions
18
+ For each non-obvious design decision, document:
19
+ 1. **Context** — What constraint or requirement drives this?
20
+ 2. **Decision** — What did you choose?
21
+ 3. **Alternatives** — What else was considered? Why rejected?
22
+ 4. **Consequences** — What trade-offs does this create?
23
+
24
+ ### Patterns to Apply
25
+ - **Dependency Injection** — Don't hardcode dependencies, accept them as parameters
26
+ - **Single Responsibility** — Each module does one thing well
27
+ - **Open/Closed** — Extend through composition, not modification
28
+ - **Interface Segregation** — Don't force consumers to depend on methods they don't use
29
+
30
+ ### Anti-Patterns to Flag
31
+ - God objects that know about everything
32
+ - Circular dependencies between modules
33
+ - Shared mutable state across components
34
+ - Leaky abstractions (implementation details in public interfaces)
35
+
36
+ ### Testing Architecture
37
+ - How will each component be tested in isolation?
38
+ - What are the integration test boundaries?
39
+ - Which external dependencies need mocking?
40
+
41
+ ### Required Output (Mandatory)
42
+
43
+ Your output MUST include these sections when this skill is active:
44
+
45
+ 1. **Component Diagram**: ASCII-art or structured text diagram showing 3-5 components and their dependencies
46
+ 2. **Interface Contracts**: TypeScript-style signatures for all public APIs/functions with input/output types and error contracts
47
+ 3. **Data Flow**: How data moves between components (request → processing → response)
48
+ 4. **Error Boundaries**: Which components handle which errors, and how errors propagate up the stack
49
+
50
+ If any section is not applicable, explicitly state why it's skipped.
@@ -0,0 +1,43 @@
1
+ ## Brainstorming: Socratic Design Refinement
2
+
3
+ **IMPORTANT: You are in an autonomous pipeline. Do NOT ask questions or wait for answers. Instead, answer each question yourself based on the issue context, codebase analysis, and your best judgment. Document your reasoning directly in the plan.**
4
+
5
+ Before writing the implementation plan, challenge your assumptions with these questions:
6
+
7
+ ### Requirements Clarity
8
+ - What is the **minimum viable change** that satisfies this issue?
9
+ - Are there implicit requirements not stated in the issue?
10
+ - What are the acceptance criteria? If none are stated, define them.
11
+
12
+ ### Design Alternatives
13
+ - What are at least 2 different approaches to solve this?
14
+ - What are the trade-offs of each? (complexity, performance, maintainability)
15
+ - Which approach minimizes the blast radius of changes?
16
+
17
+ ### Risk Assessment
18
+ - What could go wrong with the chosen approach?
19
+ - What existing functionality could break?
20
+ - Are there edge cases not covered by the issue description?
21
+
22
+ ### Dependency Analysis
23
+ - What existing code does this depend on?
24
+ - What other code depends on what you're changing?
25
+ - Are there any circular dependency risks?
26
+
27
+ ### Simplicity Check
28
+ - Can this be solved with fewer files changed?
29
+ - Is there existing infrastructure you can reuse?
30
+ - Would a simpler approach work for 90% of cases?
31
+
32
+ Document your reasoning in the plan. Show the alternatives you considered and why you chose this approach.
33
+
34
+ ### Required Output (Mandatory)
35
+
36
+ Your output MUST include these sections when this skill is active:
37
+
38
+ 1. **Task Decomposition**: Numbered list of concrete implementation tasks with explicit dependencies (e.g., "Task 3 blocks Task 5")
39
+ 2. **Risk Analysis**: For each identified risk, state what could break and your mitigation strategy
40
+ 3. **Definition of Done**: Specific, testable acceptance criteria that prove this issue is resolved
41
+ 4. **Alternatives Considered**: At least 2 approaches with explicit trade-offs (complexity, performance, maintainability, blast radius)
42
+
43
+ If any section is not applicable, explicitly state why it's skipped.
@@ -0,0 +1,44 @@
1
+ ## Data Pipeline Expertise
2
+
3
+ Apply these data engineering patterns:
4
+
5
+ ### Schema Design
6
+ - Define schemas explicitly — never rely on implicit structure
7
+ - Use migrations for all schema changes (never manual ALTER TABLE)
8
+ - Add indexes for frequently queried columns
9
+ - Consider denormalization for read-heavy paths
10
+
11
+ ### Data Integrity
12
+ - Use transactions for multi-step operations
13
+ - Implement idempotency keys for operations that could be retried
14
+ - Validate data at ingestion — reject bad data early
15
+ - Use constraints (NOT NULL, UNIQUE, FOREIGN KEY) in the database layer
16
+
17
+ ### Query Patterns
18
+ - Avoid N+1 queries — use JOINs or batch loading
19
+ - Use EXPLAIN to verify query plans for complex queries
20
+ - Paginate large result sets — never SELECT * without LIMIT
21
+ - Use parameterized queries — never string concatenation for SQL
22
+
23
+ ### Migration Safety
24
+ - Migrations must be reversible (include rollback steps)
25
+ - Test migrations on a copy of production data
26
+ - Add new columns as nullable, then backfill, then add NOT NULL
27
+ - Never drop columns in the same deploy as code changes
28
+
29
+ ### Backpressure & Resilience
30
+ - Implement circuit breakers for external data sources
31
+ - Use dead letter queues for failed processing
32
+ - Set timeouts on all external calls
33
+ - Monitor queue depths and processing latency
34
+
35
+ ### Required Output (Mandatory)
36
+
37
+ Your output MUST include these sections when this skill is active:
38
+
39
+ 1. **Schema Changes**: Full migration SQL with both forward and rollback scripts, plus data backfill strategy if required
40
+ 2. **Data Flow Diagram**: Text diagram showing data ingestion → processing → output with failure points marked
41
+ 3. **Idempotency Strategy**: How the system handles duplicate requests (idempotency keys, deduplication, side-effect safety)
42
+ 4. **Rollback Plan**: Step-by-step process to revert schema changes and restore data consistency
43
+
44
+ If any section is not applicable, explicitly state why it's skipped.
@@ -0,0 +1,64 @@
1
+ ## Deploy Safety: Ship Without Breaking Production
2
+
3
+ Every deploy is a controlled experiment. Verify before promoting.
4
+
5
+ ### Pre-Deploy Checklist
6
+ - [ ] All CI checks green on the exact commit being deployed
7
+ - [ ] No open critical/security review findings
8
+ - [ ] Database migrations are backward-compatible (old code can run with new schema)
9
+ - [ ] Feature flags are in place for risky changes
10
+ - [ ] Rollback plan is documented and tested
11
+
12
+ ### Blue-Green / Canary Strategy
13
+ 1. Deploy to inactive slot (green) — do NOT shift traffic yet
14
+ 2. Run health checks against green slot directly
15
+ 3. Run smoke tests against green slot
16
+ 4. Shift small percentage of traffic (canary: 5-10%)
17
+ 5. Monitor error rates for 5 minutes
18
+ 6. If clean, promote to 100%
19
+ 7. If errors spike, rollback immediately
20
+
21
+ ### Rollback Readiness
22
+ - Verify rollback command works BEFORE deploying
23
+ - Keep previous version running until new version is verified
24
+ - Database migrations must be reversible (never drop columns in same deploy)
25
+ - Cache invalidation: new version must handle old cached data
26
+
27
+ ### Deploy Risk by Issue Type
28
+
29
+ **Frontend deploys:**
30
+ - CDN cache invalidation timing
31
+ - Browser cache busting (new asset hashes)
32
+ - Progressive enhancement for users with old cached bundles
33
+
34
+ **API deploys:**
35
+ - Backward compatibility with existing clients
36
+ - API versioning if breaking changes
37
+ - Rate limit configuration for new endpoints
38
+
39
+ **Database deploys:**
40
+ - Migration order: schema first, then code, then cleanup
41
+ - Backfill operations should be idempotent
42
+ - Monitor query performance after index changes
43
+
44
+ **Infrastructure deploys:**
45
+ - DNS propagation delay
46
+ - Connection draining for load balancer changes
47
+ - Secret rotation: both old and new must work during transition
48
+
49
+ ### Incident Prevention
50
+ - Deploy during low-traffic windows when possible
51
+ - Have a human (or monitor) watching for 15 minutes post-deploy
52
+ - Set up alerts for error rate spikes before deploying
53
+ - Never deploy on Friday unless it's a hotfix
54
+
55
+ ### Required Output (Mandatory)
56
+
57
+ Your output MUST include these sections when this skill is active:
58
+
59
+ 1. **Pre-Deploy Checklist**: Verification of all items (CI green, no critical findings, migrations backward-compatible, feature flags in place, rollback plan tested)
60
+ 2. **Blue-Green Strategy**: Specific sequence of steps from green deployment through canary through full promotion
61
+ 3. **Rollback Verification**: Confirmation that rollback command has been tested and works (not just theoretical)
62
+ 4. **Deploy Risk Assessment**: Explicit identification of risks by issue type (frontend cache, API compatibility, database migration, infrastructure changes)
63
+
64
+ If any section is not applicable, explicitly state why it's skipped.
@@ -0,0 +1,38 @@
1
+ ## Documentation Expertise
2
+
3
+ For documentation-focused issues, apply a lightweight approach:
4
+
5
+ ### Scope
6
+ - Focus on accuracy over comprehensiveness
7
+ - Update only what's actually changed or incorrect
8
+ - Remove outdated information rather than marking it deprecated
9
+ - Keep examples current and runnable
10
+
11
+ ### Writing Style
12
+ - Use active voice and present tense
13
+ - Lead with the most important information
14
+ - Use code examples for anything technical
15
+ - Keep paragraphs short — 2-3 sentences max
16
+
17
+ ### Structure
18
+ - Start with a one-line summary of what this documents
19
+ - Include prerequisites and setup if applicable
20
+ - Provide a quick start / most common usage first
21
+ - Put advanced topics and edge cases later
22
+
23
+ ### Skip Heavy Stages
24
+ This is a documentation change. The following pipeline stages can be simplified:
25
+ - **Design stage**: Skip — documentation doesn't need architecture design
26
+ - **Build stage**: Focus on file edits only, no compilation needed
27
+ - **Test stage**: Verify links work and examples are syntactically correct
28
+ - **Review stage**: Focus on accuracy and clarity, not code patterns
29
+
30
+ ### Required Output (Mandatory)
31
+
32
+ Your output MUST include these sections when this skill is active:
33
+
34
+ 1. **What to Document**: List of documentation files created/modified with specific sections added to each
35
+ 2. **What to Skip**: Explicitly state which topics are NOT documented and why (e.g., "Advanced topic X is out of scope for this issue")
36
+ 3. **Audience**: Who will read this documentation (developers, users, operators) and what level of detail is appropriate
37
+
38
+ If any section is not applicable, explicitly state why it's skipped.
@@ -0,0 +1,45 @@
1
+ ## Frontend Design Expertise
2
+
3
+ Apply these frontend patterns to your implementation:
4
+
5
+ ### Accessibility (Required)
6
+ - All interactive elements must have keyboard support
7
+ - Use semantic HTML elements (button, nav, main, article)
8
+ - Include aria-labels for non-text interactive elements
9
+ - Ensure color contrast meets WCAG AA (4.5:1 for text)
10
+ - Test with screen reader mental model: does the DOM order make sense?
11
+
12
+ ### Responsive Design
13
+ - Mobile-first: start with mobile layout, enhance for larger screens
14
+ - Use relative units (rem, %, vh/vw) instead of fixed pixels
15
+ - Test breakpoints: 320px, 768px, 1024px, 1440px
16
+ - Touch targets: minimum 44x44px
17
+
18
+ ### Component Patterns
19
+ - Keep components focused — one responsibility per component
20
+ - Lift state up only when siblings need to share it
21
+ - Use composition over inheritance
22
+ - Handle loading, error, and empty states for every data-dependent component
23
+
24
+ ### Performance
25
+ - Lazy-load below-the-fold content
26
+ - Optimize images (appropriate format, size, lazy loading)
27
+ - Minimize re-renders — check dependency arrays in effects
28
+ - Avoid layout thrashing — batch DOM reads and writes
29
+
30
+ ### User Experience
31
+ - Provide immediate feedback for user actions
32
+ - Show loading indicators for operations > 300ms
33
+ - Use optimistic updates where safe
34
+ - Preserve user input on errors — never clear forms on failed submit
35
+
36
+ ### Required Output (Mandatory)
37
+
38
+ Your output MUST include these sections when this skill is active:
39
+
40
+ 1. **Component Hierarchy**: Tree structure showing parent/child relationships and where state lives
41
+ 2. **State Management Approach**: How state flows (props, context, local state, external store) with explicit data flow
42
+ 3. **Accessibility Checklist**: WCAG AA compliance items checked (keyboard support, semantic HTML, color contrast, aria-labels)
43
+ 4. **Responsive Breakpoints**: Explicit breakpoints tested (320px, 768px, 1024px, 1440px) and how layout changes at each
44
+
45
+ If any section is not applicable, explicitly state why it's skipped.
File without changes
File without changes
@@ -0,0 +1,3 @@
1
+
2
+ ### Learned (2026-03-09)
3
+ Produce a manifest of identified edge cases with resolution status (fixed, deferred, acknowledged) so that future iterations can prioritize unresolved gaps.
@@ -0,0 +1,3 @@
1
+
2
+ ### Learned (2026-03-09)
3
+ Include a step to verify that the designed configuration is discoverable by the relevant CLI command (e.g., `shipwright templates list`) before design is considered complete.
@@ -0,0 +1,3 @@
1
+
2
+ ### Learned (2026-03-09)
3
+ For configuration/infrastructure tasks, brainstorming should explicitly produce time/complexity bounds and validate them against at least one real-world example of the target use case before proceeding to design.
@@ -0,0 +1,29 @@
1
+ ## CLI Version Management: Package.json Integration
2
+
3
+ **Pattern**: Read version from package.json, format for display, handle edge cases.
4
+
5
+ ### Version Source Resolution
6
+ - **Canonical source**: Always read from `package.json` at repo root or script-relative location
7
+ - **Path resolution**: Use `$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)` to find script dir, then traverse up to repo root (where package.json lives)
8
+ - **Validation**: Verify JSON is valid before parsing; provide actionable error if missing/corrupt
9
+
10
+ ### Version Extraction & Formatting
11
+ - **Parser**: Use `jq '.version' package.json` (safer than regex or sed)
12
+ - **Format standard**: `Shipwright vX.Y.Z` (prefix + space + v + semver)
13
+ - **Validation**: Warn if version doesn't match semver (x.y.z pattern); don't fail, but log warning
14
+
15
+ ### Error Handling
16
+ - Missing file: `error "package.json not found at <path>"` → exit 1
17
+ - Invalid JSON: Catch jq error → `error "package.json is malformed"` → exit 1
18
+ - Missing version field: `error "version field missing in package.json"` → exit 1
19
+
20
+ ### Testing Pattern
21
+ - **Unit test**: Create temp package.json with known version, invoke command, assert exact output
22
+ - **Isolation**: Don't depend on real package.json; create fixtures for each test case
23
+ - **Edge cases**: test missing file, malformed JSON, missing version field, non-semver version
24
+ - **Bash 3.2 safe**: Use `$()` not `<()`, no `readarray`, no `declare -A`
25
+
26
+ ### CLI Display
27
+ - **Standard output**: Version string only (e.g., `Shipwright v1.2.3`), no extra whitespace or formatting
28
+ - **Exit code**: 0 on success, 1 on error
29
+ - **Integration**: `sw hello` should output version alongside any other greeting output
@@ -0,0 +1,99 @@
1
+ ## Collection System Validation & Auto-Repair
2
+
3
+ ### Core Responsibility
4
+ Design and implement validators that check heterogeneous data collection systems (events.jsonl, pipeline state, DORA metrics, cost tracking, memory patterns) for health, detect gaps systematically, and safely auto-repair broken collectors.
5
+
6
+ ### Multi-System Validation Architecture
7
+
8
+ **System-Specific Validators**
9
+ - Events system: Check events.jsonl writes, verify timestamps are recent, detect missing event types (pipeline_start, pipeline_complete, stage_start)
10
+ - Pipeline state: Verify .claude/pipeline-state.md writes work, timestamps are fresh
11
+ - Cost tracking: Validate ~/.shipwright/costs.json updates, compare against expected frequency
12
+ - DORA metrics: Check metrics.json is populated, has recent data points
13
+ - Memory system: Validate memory files created, readable, contain valid patterns
14
+
15
+ **Gap Detection Patterns**
16
+ - Missing events for active pipelines (spawn time + expected stages = missing events)
17
+ - Stale timestamps (last write > threshold, e.g., 24h)
18
+ - Unreachable files (ENOENT, EPERM on expected paths)
19
+ - Incomplete writes (truncated JSON, missing closing braces)
20
+ - Permission issues (ls -l reveals 000 or other broken states)
21
+
22
+ **Health Scoring**
23
+ - Per-system: 0-100 based on recency, write success rate, completeness
24
+ - Overall: Weighted average (events 30%, state 25%, cost 15%, DORA 20%, memory 10%)
25
+ - Thresholds: Critical (<30), Warning (30-70), Healthy (>70)
26
+
27
+ ### Auto-Repair Strategies (Safety First)
28
+
29
+ **File System Repairs**
30
+ - Fix permissions: `chmod 755 ~/.shipwright/` (idempotent, safe)
31
+ - Create missing dirs: `mkdir -p` on standard paths (safe if idempotent)
32
+ - Cleanup truncated files: Back up to `.bak`, recreate empty or last-known-good version
33
+ - Rotate stale logs: Move logs >30d to archive (preserve data)
34
+
35
+ **Collector Restarts**
36
+ - Daemon restart: Signal SIGHUP, not SIGKILL (graceful)
37
+ - Loop restart: Only if process is hung (check for zombie)
38
+ - Checkpoint restore: Use last valid state from .claude/checkpoints/ before restart
39
+
40
+ **Data Restoration**
41
+ - Never delete data unilaterally—always preserve backups
42
+ - Restore from last checkpoint if available
43
+ - If repair requires data loss, alert and wait for manual approval
44
+
45
+ ### Health Reporting Format
46
+
47
+ ```json
48
+ {
49
+ "timestamp": "2026-03-10T14:23:00Z",
50
+ "overall_health": 85,
51
+ "systems": {
52
+ "events": {"health": 95, "last_write": "2026-03-10T14:22:00Z", "status": "healthy"},
53
+ "pipeline_state": {"health": 80, "last_write": "2026-03-10T14:21:00Z", "status": "warning", "gaps": ["build stage missing"]},
54
+ "cost_tracking": {"health": 100, "last_write": "2026-03-10T14:20:00Z", "status": "healthy"},
55
+ "dora_metrics": {"health": 60, "last_write": "2026-03-10T12:00:00Z", "status": "warning", "stale_hours": 2},
56
+ "memory": {"health": 90, "status": "healthy"}
57
+ },
58
+ "repairs_attempted": [{"system": "dora", "action": "chmod 755", "success": true}],
59
+ "alerts": ["DORA metrics not updated in 2 hours"]
60
+ }
61
+ ```
62
+
63
+ ### Patrol Integration
64
+
65
+ **Daily Validation Run**
66
+ - Schedule: 02:00 UTC (off-peak, before metrics review)
67
+ - Runs: `shipwright metrics validate --repair` (auto-repair enabled in daemon)
68
+ - Output: JSON + summary logged to events.jsonl with type `metrics_validation`
69
+
70
+ **Alert Thresholds**
71
+ - Overall health < 70: Alert to patrol log, escalate for manual review
72
+ - Missing events > 5 consecutive runs: Critical alert
73
+ - Permission failures: Attempt repair, alert if repair fails
74
+
75
+ **Repair Decision Logic**
76
+ - Low-risk repairs (permissions, mkdir): Auto-execute
77
+ - Medium-risk (truncated file cleanup): Log and alert, wait 10 min for manual override, then auto-execute
78
+ - High-risk (collector restart): Alert and wait for approval, or skip if patrol is in critical path
79
+
80
+ ### Testing Strategy
81
+
82
+ **Unit Tests per Validator**
83
+ - events.jsonl: Simulate ENOENT, EPERM, truncated JSON, missing event types
84
+ - State file: Simulate stale timestamp, missing fields
85
+ - Cost tracker: Simulate missing file, zero events
86
+ - DORA: Simulate outdated metrics.json, malformed JSON
87
+ - Memory: Simulate unreadable patterns, corrupted files
88
+
89
+ **Integration Test (Proof of Repair)**
90
+ 1. Create healthy baseline (all systems populated)
91
+ 2. Inject failures (chmod 000, truncate file, stop daemon)
92
+ 3. Run validator with --repair
93
+ 4. Verify: All systems restored to healthy state, backups created, alerts fired
94
+ 5. Run again: Zero new repairs needed (idempotency proof)
95
+
96
+ **Negative Tests**
97
+ - High-risk repairs skipped correctly when approval not given
98
+ - Repair doesn't cause data loss (backups preserved)
99
+ - Validator doesn't create false positives on legitimate stale data (e.g., idle repos)
@@ -0,0 +1,97 @@
1
+ ## Large-Scale C Refactoring Phase Coordination
2
+
3
+ When refactoring 30+ files across a C codebase with strict testing requirements (3849+ tests, ASan compliance), poor phase planning causes cascading test failures, leaked allocations detected late, and scope creep that undermines velocity.
4
+
5
+ ### Phase Planning Discipline
6
+
7
+ 1. **Identify minimal-dependency phases** — Group files by coupling:
8
+ - Phase 1: New module + infrastructure (hu_data_loader, CMake xxd setup)
9
+ - Phase 2: Static/non-behavioral changes (word lists, prompts)
10
+ - Phase 3: Threshold configurations (no logic changes)
11
+ - Phase 4: Integrations (callers updated)
12
+ - Don't merge phases with circular dependencies or high rework risk
13
+
14
+ 2. **Test stability checkpoints** — After each phase:
15
+ - Run full suite: `./build/human_tests` (all 3849+)
16
+ - Run ASan: `./build/human_tests --asan-report` (0 errors)
17
+ - Diff test counts: ensure no tests skip or disappear
18
+ - Single regression fails the phase
19
+
20
+ 3. **Rollback points** — If a phase breaks tests:
21
+ - Never push through red tests hoping later phases fix them
22
+ - Revert the phase, fix root cause, re-test in isolation
23
+ - Document why it failed in your memory for similar patterns
24
+
25
+ ### One-Concern-Per-Commit Rule
26
+
27
+ Large refactors tempt you to batch changes. Resist:
28
+
29
+ ```
30
+ ❌ WRONG: "Externalize data + refactor loader API + add config"
31
+ ✅ RIGHT: "Add hu_data_loader module with xxd embedding"
32
+ "Update CMakeLists.txt for xxd generation"
33
+ "Replace hardcoded word lists with hu_data_load() calls"
34
+ ```
35
+
36
+ Each commit should pass tests independently. If a later commit breaks something, bisect pinpoints the exact change.
37
+
38
+ ### ASan Leak Detection Between Phases
39
+
40
+ - After each phase, run: `ASAN_OPTIONS=detect_leaks=1 ./build/human_tests`
41
+ - New leaks in data loading must be fixed before moving forward
42
+ - Track ASan suppressions in `.claudeignore` or test config, document why
43
+ - Example: if xxd-embedded data needs special cleanup, add integration test to verify
44
+
45
+ ### Scope Creep Prevention
46
+
47
+ - **Resist refactoring temptation** — If you find ugly code while phasing, note it in MEMORY but don't fix it now. Separate PR later.
48
+ - **Document phase boundaries** — Write them in your task list and stick to scope.
49
+ - **Review diffs carefully** — Large phases hide changes. Keep phase PRs under 400 lines if possible.
50
+
51
+ ### Coordination Across Phases
52
+
53
+ - Use a shared checklist (`.claude/phase-checklist.md`) to track: data files created, config schema updated, tests passing, ASan clean
54
+ - If a later phase reveals earlier phase needs rework, update the phase and re-run its tests before continuing
55
+ - Don't hold uncommitted changes across phases—commit or stash between phases
56
+
57
+ ### Common Pitfalls
58
+
59
+ 1. **Building embedded defaults before measuring** — Measure original hardcoded values first (word count, threshold ranges, string encodings). Ensure embedded defaults match exactly.
60
+ 2. **Forgetting cleanup paths** — New data loader functions must free allocations. ASan will catch this at phase end, but better to test per-function.
61
+ 3. **CMake fragility** — xxd-based file generation can fail silently on some platforms. Test incremental rebuilds (`touch data/file.txt && cmake --build build`) on Linux + macOS.
62
+ 4. **Config backward compatibility** — If adding new required fields (e.g., `data_dir`), don't break existing deployments. Provide sensible defaults or environment variable overrides.
63
+ 5. **Mixing behaviors** — Don't change logic (e.g., "also apply new threshold") in the same phase as externalizing the threshold. Two phases: externalize first, change behavior second.
64
+
65
+ ### Example Phase Sequence
66
+
67
+ ```
68
+ Phase 0: Setup
69
+ - Create src/data/loader.c with hu_data_load() skeleton
70
+ - Create data/ directory structure
71
+ - Add CMake xxd command (doesn't embed anything yet)
72
+ - Tests: 3849 pass, ASan clean
73
+
74
+ Phase 1: Embedded Defaults
75
+ - Add a single data file (e.g., data/prompts/safety_rules.txt)
76
+ - Generate embedded_safety_rules.c via xxd
77
+ - Implement hu_data_load() to return embedded data
78
+ - Update one caller to use hu_data_load()
79
+ - Tests: 3849 pass, ASan clean, verify embedded data loads correctly
80
+
81
+ Phase 2: File Override Path
82
+ - Extend hu_data_load() to check ~/.human/data/ first
83
+ - Add unit test: hu_data_load() returns file override if present
84
+ - Tests: 3849 pass, ASan clean
85
+
86
+ Phase 3: Remaining Data Files
87
+ - Add remaining data files (word lists, prompts, etc.)
88
+ - Update all callers to hu_data_load()
89
+ - Tests: 3849 pass, ASan clean
90
+
91
+ Phase 4: Config Integration
92
+ - Add temp_dir, data_dir, threshold fields to config
93
+ - Update callers to use config fields instead of hardcoded values
94
+ - Tests: 3849 pass, ASan clean
95
+ ```
96
+
97
+ Each phase is independently testable and deployable.