shipwright-cli 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.claude/agents/code-reviewer.md +2 -0
  2. package/.claude/agents/devops-engineer.md +2 -0
  3. package/.claude/agents/doc-fleet-agent.md +2 -0
  4. package/.claude/agents/pipeline-agent.md +2 -0
  5. package/.claude/agents/shell-script-specialist.md +2 -0
  6. package/.claude/agents/test-specialist.md +2 -0
  7. package/.claude/hooks/agent-crash-capture.sh +32 -0
  8. package/.claude/hooks/post-tool-use.sh +3 -2
  9. package/.claude/hooks/pre-tool-use.sh +35 -3
  10. package/README.md +4 -4
  11. package/claude-code/hooks/config-change.sh +18 -0
  12. package/claude-code/hooks/instructions-reloaded.sh +7 -0
  13. package/claude-code/hooks/worktree-create.sh +25 -0
  14. package/claude-code/hooks/worktree-remove.sh +20 -0
  15. package/config/code-constitution.json +130 -0
  16. package/dashboard/middleware/auth.ts +134 -0
  17. package/dashboard/middleware/constants.ts +21 -0
  18. package/dashboard/public/index.html +2 -6
  19. package/dashboard/public/styles.css +100 -97
  20. package/dashboard/routes/auth.ts +38 -0
  21. package/dashboard/server.ts +66 -25
  22. package/dashboard/services/config.ts +26 -0
  23. package/dashboard/services/db.ts +118 -0
  24. package/dashboard/src/canvas/pixel-agent.ts +298 -0
  25. package/dashboard/src/canvas/pixel-sprites.ts +440 -0
  26. package/dashboard/src/canvas/shipyard-effects.ts +367 -0
  27. package/dashboard/src/canvas/shipyard-scene.ts +616 -0
  28. package/dashboard/src/canvas/submarine-layout.ts +267 -0
  29. package/dashboard/src/components/header.ts +8 -7
  30. package/dashboard/src/core/router.ts +1 -0
  31. package/dashboard/src/design/submarine-theme.ts +253 -0
  32. package/dashboard/src/main.ts +2 -0
  33. package/dashboard/src/types/api.ts +2 -1
  34. package/dashboard/src/views/activity.ts +2 -1
  35. package/dashboard/src/views/shipyard.ts +39 -0
  36. package/dashboard/types/index.ts +166 -0
  37. package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
  38. package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
  39. package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
  40. package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
  41. package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
  42. package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
  43. package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
  44. package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
  45. package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
  46. package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
  47. package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
  48. package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
  49. package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
  50. package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
  51. package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
  52. package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
  53. package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
  54. package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
  55. package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
  56. package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
  57. package/docs/research/RESEARCH_INDEX.md +439 -0
  58. package/docs/research/RESEARCH_SOURCES.md +440 -0
  59. package/docs/research/RESEARCH_SUMMARY.txt +275 -0
  60. package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
  61. package/package.json +2 -2
  62. package/scripts/lib/adaptive-model.sh +427 -0
  63. package/scripts/lib/adaptive-timeout.sh +316 -0
  64. package/scripts/lib/audit-trail.sh +309 -0
  65. package/scripts/lib/auto-recovery.sh +471 -0
  66. package/scripts/lib/bandit-selector.sh +431 -0
  67. package/scripts/lib/bootstrap.sh +104 -2
  68. package/scripts/lib/causal-graph.sh +455 -0
  69. package/scripts/lib/compat.sh +126 -0
  70. package/scripts/lib/compound-audit.sh +337 -0
  71. package/scripts/lib/constitutional.sh +454 -0
  72. package/scripts/lib/context-budget.sh +359 -0
  73. package/scripts/lib/convergence.sh +594 -0
  74. package/scripts/lib/cost-optimizer.sh +634 -0
  75. package/scripts/lib/daemon-adaptive.sh +10 -0
  76. package/scripts/lib/daemon-dispatch.sh +106 -17
  77. package/scripts/lib/daemon-failure.sh +34 -4
  78. package/scripts/lib/daemon-patrol.sh +23 -2
  79. package/scripts/lib/daemon-poll-github.sh +361 -0
  80. package/scripts/lib/daemon-poll-health.sh +299 -0
  81. package/scripts/lib/daemon-poll.sh +27 -611
  82. package/scripts/lib/daemon-state.sh +112 -66
  83. package/scripts/lib/daemon-triage.sh +10 -0
  84. package/scripts/lib/dod-scorecard.sh +442 -0
  85. package/scripts/lib/error-actionability.sh +300 -0
  86. package/scripts/lib/formal-spec.sh +461 -0
  87. package/scripts/lib/helpers.sh +177 -4
  88. package/scripts/lib/intent-analysis.sh +409 -0
  89. package/scripts/lib/loop-convergence.sh +350 -0
  90. package/scripts/lib/loop-iteration.sh +682 -0
  91. package/scripts/lib/loop-progress.sh +48 -0
  92. package/scripts/lib/loop-restart.sh +185 -0
  93. package/scripts/lib/memory-effectiveness.sh +506 -0
  94. package/scripts/lib/mutation-executor.sh +352 -0
  95. package/scripts/lib/outcome-feedback.sh +521 -0
  96. package/scripts/lib/pipeline-cli.sh +336 -0
  97. package/scripts/lib/pipeline-commands.sh +1216 -0
  98. package/scripts/lib/pipeline-detection.sh +100 -2
  99. package/scripts/lib/pipeline-execution.sh +897 -0
  100. package/scripts/lib/pipeline-github.sh +28 -3
  101. package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
  102. package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
  103. package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
  104. package/scripts/lib/pipeline-intelligence.sh +100 -1136
  105. package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
  106. package/scripts/lib/pipeline-quality-checks.sh +17 -715
  107. package/scripts/lib/pipeline-quality-gates.sh +563 -0
  108. package/scripts/lib/pipeline-stages-build.sh +730 -0
  109. package/scripts/lib/pipeline-stages-delivery.sh +965 -0
  110. package/scripts/lib/pipeline-stages-intake.sh +1133 -0
  111. package/scripts/lib/pipeline-stages-monitor.sh +407 -0
  112. package/scripts/lib/pipeline-stages-review.sh +1022 -0
  113. package/scripts/lib/pipeline-stages.sh +59 -2929
  114. package/scripts/lib/pipeline-state.sh +36 -5
  115. package/scripts/lib/pipeline-util.sh +487 -0
  116. package/scripts/lib/policy-learner.sh +438 -0
  117. package/scripts/lib/process-reward.sh +493 -0
  118. package/scripts/lib/project-detect.sh +649 -0
  119. package/scripts/lib/quality-profile.sh +334 -0
  120. package/scripts/lib/recruit-commands.sh +885 -0
  121. package/scripts/lib/recruit-learning.sh +739 -0
  122. package/scripts/lib/recruit-roles.sh +648 -0
  123. package/scripts/lib/reward-aggregator.sh +458 -0
  124. package/scripts/lib/rl-optimizer.sh +362 -0
  125. package/scripts/lib/root-cause.sh +427 -0
  126. package/scripts/lib/scope-enforcement.sh +445 -0
  127. package/scripts/lib/session-restart.sh +493 -0
  128. package/scripts/lib/skill-memory.sh +300 -0
  129. package/scripts/lib/skill-registry.sh +775 -0
  130. package/scripts/lib/spec-driven.sh +476 -0
  131. package/scripts/lib/test-helpers.sh +18 -7
  132. package/scripts/lib/test-holdout.sh +429 -0
  133. package/scripts/lib/test-optimizer.sh +511 -0
  134. package/scripts/shipwright-file-suggest.sh +45 -0
  135. package/scripts/skills/adversarial-quality.md +61 -0
  136. package/scripts/skills/api-design.md +44 -0
  137. package/scripts/skills/architecture-design.md +50 -0
  138. package/scripts/skills/brainstorming.md +43 -0
  139. package/scripts/skills/data-pipeline.md +44 -0
  140. package/scripts/skills/deploy-safety.md +64 -0
  141. package/scripts/skills/documentation.md +38 -0
  142. package/scripts/skills/frontend-design.md +45 -0
  143. package/scripts/skills/generated/.gitkeep +0 -0
  144. package/scripts/skills/generated/_refinements/.gitkeep +0 -0
  145. package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
  146. package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
  147. package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
  148. package/scripts/skills/generated/cli-version-management.md +29 -0
  149. package/scripts/skills/generated/collection-system-validation.md +99 -0
  150. package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
  151. package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
  152. package/scripts/skills/generated/test-parallelization-detection.md +65 -0
  153. package/scripts/skills/observability.md +79 -0
  154. package/scripts/skills/performance.md +48 -0
  155. package/scripts/skills/pr-quality.md +49 -0
  156. package/scripts/skills/product-thinking.md +43 -0
  157. package/scripts/skills/security-audit.md +49 -0
  158. package/scripts/skills/systematic-debugging.md +40 -0
  159. package/scripts/skills/testing-strategy.md +47 -0
  160. package/scripts/skills/two-stage-review.md +52 -0
  161. package/scripts/skills/validation-thoroughness.md +55 -0
  162. package/scripts/sw +9 -3
  163. package/scripts/sw-activity.sh +9 -2
  164. package/scripts/sw-adaptive.sh +2 -1
  165. package/scripts/sw-adversarial.sh +2 -1
  166. package/scripts/sw-architecture-enforcer.sh +3 -1
  167. package/scripts/sw-auth.sh +12 -2
  168. package/scripts/sw-autonomous.sh +5 -1
  169. package/scripts/sw-changelog.sh +4 -1
  170. package/scripts/sw-checkpoint.sh +2 -1
  171. package/scripts/sw-ci.sh +5 -1
  172. package/scripts/sw-cleanup.sh +4 -26
  173. package/scripts/sw-code-review.sh +10 -4
  174. package/scripts/sw-connect.sh +2 -1
  175. package/scripts/sw-context.sh +2 -1
  176. package/scripts/sw-cost.sh +48 -3
  177. package/scripts/sw-daemon.sh +66 -9
  178. package/scripts/sw-dashboard.sh +3 -1
  179. package/scripts/sw-db.sh +59 -16
  180. package/scripts/sw-decide.sh +8 -2
  181. package/scripts/sw-decompose.sh +360 -17
  182. package/scripts/sw-deps.sh +4 -1
  183. package/scripts/sw-developer-simulation.sh +4 -1
  184. package/scripts/sw-discovery.sh +325 -2
  185. package/scripts/sw-doc-fleet.sh +4 -1
  186. package/scripts/sw-docs-agent.sh +3 -1
  187. package/scripts/sw-docs.sh +2 -1
  188. package/scripts/sw-doctor.sh +453 -2
  189. package/scripts/sw-dora.sh +4 -1
  190. package/scripts/sw-durable.sh +4 -3
  191. package/scripts/sw-e2e-orchestrator.sh +17 -16
  192. package/scripts/sw-eventbus.sh +7 -1
  193. package/scripts/sw-evidence.sh +364 -12
  194. package/scripts/sw-feedback.sh +550 -9
  195. package/scripts/sw-fix.sh +20 -1
  196. package/scripts/sw-fleet-discover.sh +6 -2
  197. package/scripts/sw-fleet-viz.sh +4 -1
  198. package/scripts/sw-fleet.sh +5 -1
  199. package/scripts/sw-github-app.sh +16 -3
  200. package/scripts/sw-github-checks.sh +3 -2
  201. package/scripts/sw-github-deploy.sh +3 -2
  202. package/scripts/sw-github-graphql.sh +18 -7
  203. package/scripts/sw-guild.sh +5 -1
  204. package/scripts/sw-heartbeat.sh +5 -30
  205. package/scripts/sw-hello.sh +67 -0
  206. package/scripts/sw-hygiene.sh +6 -1
  207. package/scripts/sw-incident.sh +265 -1
  208. package/scripts/sw-init.sh +18 -2
  209. package/scripts/sw-instrument.sh +10 -2
  210. package/scripts/sw-intelligence.sh +42 -6
  211. package/scripts/sw-jira.sh +5 -1
  212. package/scripts/sw-launchd.sh +2 -1
  213. package/scripts/sw-linear.sh +4 -1
  214. package/scripts/sw-logs.sh +4 -1
  215. package/scripts/sw-loop.sh +432 -1128
  216. package/scripts/sw-memory.sh +356 -2
  217. package/scripts/sw-mission-control.sh +6 -1
  218. package/scripts/sw-model-router.sh +481 -26
  219. package/scripts/sw-otel.sh +13 -4
  220. package/scripts/sw-oversight.sh +14 -5
  221. package/scripts/sw-patrol-meta.sh +334 -0
  222. package/scripts/sw-pipeline-composer.sh +5 -1
  223. package/scripts/sw-pipeline-vitals.sh +2 -1
  224. package/scripts/sw-pipeline.sh +53 -2664
  225. package/scripts/sw-pm.sh +12 -5
  226. package/scripts/sw-pr-lifecycle.sh +2 -1
  227. package/scripts/sw-predictive.sh +7 -1
  228. package/scripts/sw-prep.sh +185 -2
  229. package/scripts/sw-ps.sh +5 -25
  230. package/scripts/sw-public-dashboard.sh +15 -3
  231. package/scripts/sw-quality.sh +2 -1
  232. package/scripts/sw-reaper.sh +8 -25
  233. package/scripts/sw-recruit.sh +156 -2303
  234. package/scripts/sw-regression.sh +19 -12
  235. package/scripts/sw-release-manager.sh +3 -1
  236. package/scripts/sw-release.sh +4 -1
  237. package/scripts/sw-remote.sh +3 -1
  238. package/scripts/sw-replay.sh +7 -1
  239. package/scripts/sw-retro.sh +158 -1
  240. package/scripts/sw-review-rerun.sh +3 -1
  241. package/scripts/sw-scale.sh +10 -3
  242. package/scripts/sw-security-audit.sh +6 -1
  243. package/scripts/sw-self-optimize.sh +6 -3
  244. package/scripts/sw-session.sh +9 -3
  245. package/scripts/sw-setup.sh +3 -1
  246. package/scripts/sw-stall-detector.sh +406 -0
  247. package/scripts/sw-standup.sh +15 -7
  248. package/scripts/sw-status.sh +3 -1
  249. package/scripts/sw-strategic.sh +4 -1
  250. package/scripts/sw-stream.sh +7 -1
  251. package/scripts/sw-swarm.sh +18 -6
  252. package/scripts/sw-team-stages.sh +13 -6
  253. package/scripts/sw-templates.sh +5 -29
  254. package/scripts/sw-testgen.sh +7 -1
  255. package/scripts/sw-tmux-pipeline.sh +4 -1
  256. package/scripts/sw-tmux-role-color.sh +2 -0
  257. package/scripts/sw-tmux-status.sh +1 -1
  258. package/scripts/sw-tmux.sh +3 -1
  259. package/scripts/sw-trace.sh +3 -1
  260. package/scripts/sw-tracker-github.sh +3 -0
  261. package/scripts/sw-tracker-jira.sh +3 -0
  262. package/scripts/sw-tracker-linear.sh +3 -0
  263. package/scripts/sw-tracker.sh +3 -1
  264. package/scripts/sw-triage.sh +2 -1
  265. package/scripts/sw-upgrade.sh +3 -1
  266. package/scripts/sw-ux.sh +5 -2
  267. package/scripts/sw-webhook.sh +3 -1
  268. package/scripts/sw-widgets.sh +3 -1
  269. package/scripts/sw-worktree.sh +15 -3
  270. package/scripts/test-skill-injection.sh +1233 -0
  271. package/templates/pipelines/autonomous.json +27 -3
  272. package/templates/pipelines/cost-aware.json +34 -8
  273. package/templates/pipelines/deployed.json +12 -0
  274. package/templates/pipelines/enterprise.json +12 -0
  275. package/templates/pipelines/fast.json +6 -0
  276. package/templates/pipelines/full.json +27 -3
  277. package/templates/pipelines/hotfix.json +6 -0
  278. package/templates/pipelines/standard.json +12 -0
  279. package/templates/pipelines/tdd.json +12 -0
@@ -7,8 +7,10 @@
7
7
  set -euo pipefail
8
8
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
9
9
 
10
- VERSION="3.2.0"
10
+ # shellcheck disable=SC2034
11
+ VERSION="3.3.0"
11
12
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
13
+ # shellcheck disable=SC2034
12
14
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
13
15
 
14
16
  # ─── Cross-platform compatibility ──────────────────────────────────────────
@@ -30,7 +32,8 @@ fi
30
32
  if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
31
33
  emit_event() {
32
34
  local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
33
- local payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
35
+ local payload
36
+ payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
34
37
  while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
35
38
  echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
36
39
  }
@@ -42,29 +45,61 @@ MODEL_ROUTING_OPTIMIZATION="${OPTIMIZATION_DIR}/model-routing.json"
42
45
  MODEL_ROUTING_LEGACY="${HOME}/.shipwright/model-routing.json"
43
46
  MODEL_USAGE_LOG="${OPTIMIZATION_DIR}/model-usage.jsonl"
44
47
  AB_RESULTS_FILE="${HOME}/.shipwright/ab-results.jsonl"
48
+ CHAIN_CONFIG_FILE="${OPTIMIZATION_DIR}/reasoning-chains.json"
49
+ CHAIN_EXECUTION_LOG="${OPTIMIZATION_DIR}/chain-executions.jsonl"
45
50
 
46
51
  # Resolve which config file to use (set by _resolve_routing_config)
47
52
  MODEL_ROUTING_CONFIG=""
48
53
 
49
- # ─── Model Costs (per million tokens) ───────────────────────────────────────
50
- HAIKU_INPUT_COST="0.80"
51
- HAIKU_OUTPUT_COST="4.00"
52
- SONNET_INPUT_COST="3.00"
53
- SONNET_OUTPUT_COST="15.00"
54
- OPUS_INPUT_COST="15.00"
55
- OPUS_OUTPUT_COST="75.00"
56
-
57
- # ─── Default Routing Rules ──────────────────────────────────────────────────
58
- # Stages that default to haiku (low complexity, fast)
54
+ # ─── Model Costs (per million tokens, config-driven) ──────────────────────
55
+ # Read from ~/.shipwright/pricing.json if exists, otherwise use defaults
56
+ _load_pricing() {
57
+ local pricing_file="${HOME}/.shipwright/pricing.json"
58
+ if [[ -f "$pricing_file" ]]; then
59
+ HAIKU_INPUT_COST=$(jq -r '.haiku.input // "0.80"' "$pricing_file" 2>/dev/null || echo "0.80")
60
+ HAIKU_OUTPUT_COST=$(jq -r '.haiku.output // "4.00"' "$pricing_file" 2>/dev/null || echo "4.00")
61
+ SONNET_INPUT_COST=$(jq -r '.sonnet.input // "3.00"' "$pricing_file" 2>/dev/null || echo "3.00")
62
+ SONNET_OUTPUT_COST=$(jq -r '.sonnet.output // "15.00"' "$pricing_file" 2>/dev/null || echo "15.00")
63
+ OPUS_INPUT_COST=$(jq -r '.opus.input // "15.00"' "$pricing_file" 2>/dev/null || echo "15.00")
64
+ OPUS_OUTPUT_COST=$(jq -r '.opus.output // "75.00"' "$pricing_file" 2>/dev/null || echo "75.00")
65
+ else
66
+ HAIKU_INPUT_COST="0.80"
67
+ HAIKU_OUTPUT_COST="4.00"
68
+ SONNET_INPUT_COST="3.00"
69
+ SONNET_OUTPUT_COST="15.00"
70
+ OPUS_INPUT_COST="15.00"
71
+ OPUS_OUTPUT_COST="75.00"
72
+ fi
73
+ }
74
+ _load_pricing
75
+
76
+ # ─── Default Routing Rules (config-driven) ────────────────────────────────
77
+ # Read from daemon-config model_routing.stages if configured
78
+ _load_routing_rules() {
79
+ local cfg="${DAEMON_CONFIG:-${WORK_DIR:-.}/.claude/daemon-config.json}"
80
+ if [[ -f "$cfg" ]]; then
81
+ local h s o
82
+ h=$(jq -r '.model_routing.haiku_stages // empty' "$cfg" 2>/dev/null || true)
83
+ s=$(jq -r '.model_routing.sonnet_stages // empty' "$cfg" 2>/dev/null || true)
84
+ o=$(jq -r '.model_routing.opus_stages // empty' "$cfg" 2>/dev/null || true)
85
+ [[ -n "$h" && "$h" != "null" ]] && HAIKU_STAGES="$h"
86
+ [[ -n "$s" && "$s" != "null" ]] && SONNET_STAGES="$s"
87
+ [[ -n "$o" && "$o" != "null" ]] && OPUS_STAGES="$o"
88
+ fi
89
+ }
59
90
  HAIKU_STAGES="intake|monitor"
60
- # Stages that default to sonnet (medium complexity)
61
91
  SONNET_STAGES="test|review"
62
- # Stages that default to opus (high complexity, needs deep thinking)
63
92
  OPUS_STAGES="plan|design|build|compound_quality"
64
-
65
- # ─── Complexity Thresholds ──────────────────────────────────────────────────
66
- COMPLEXITY_LOW=30 # Below this: use sonnet
67
- COMPLEXITY_HIGH=80 # Above this: use opus
93
+ _load_routing_rules 2>/dev/null || true
94
+
95
+ # ─── Complexity Thresholds (config-driven) ────────────────────────────────
96
+ if type _smart_int >/dev/null 2>&1; then
97
+ COMPLEXITY_LOW=$(_smart_int "model_routing.complexity_low" 30)
98
+ COMPLEXITY_HIGH=$(_smart_int "model_routing.complexity_high" 80)
99
+ else
100
+ COMPLEXITY_LOW=30
101
+ COMPLEXITY_HIGH=80
102
+ fi
68
103
 
69
104
  # ─── Resolve Routing Config Path ────────────────────────────────────────────
70
105
  # Priority: optimization (self-optimize writes) > legacy > create in optimization
@@ -243,6 +278,7 @@ set_config() {
243
278
  # Use jq to safely update the config
244
279
  local tmp_config
245
280
  tmp_config=$(mktemp)
281
+ # shellcheck disable=SC2064
246
282
  trap "rm -f '$tmp_config'" RETURN
247
283
 
248
284
  if [[ "$value" == "true" ]] || [[ "$value" == "false" ]]; then
@@ -353,7 +389,8 @@ record_usage() {
353
389
  ;;
354
390
  esac
355
391
 
356
- local record="{\"ts\":\"$(now_iso)\",\"stage\":\"$stage\",\"model\":\"$model\",\"input_tokens\":$input_tokens,\"output_tokens\":$output_tokens,\"cost\":$cost}"
392
+ local record
393
+ record="{\"ts\":\"$(now_iso)\",\"stage\":\"$stage\",\"model\":\"$model\",\"input_tokens\":$input_tokens,\"output_tokens\":$output_tokens,\"cost\":$cost}"
357
394
  echo "$record" >> "$MODEL_USAGE_LOG"
358
395
  }
359
396
 
@@ -376,6 +413,7 @@ configure_ab_test() {
376
413
 
377
414
  local tmp_config
378
415
  tmp_config=$(mktemp)
416
+ # shellcheck disable=SC2064
379
417
  trap "rm -f '$tmp_config'" RETURN
380
418
 
381
419
  jq ".a_b_test = {\"enabled\": true, \"percentage\": $percentage, \"variant\": \"$variant\"}" \
@@ -395,7 +433,8 @@ log_ab_result() {
395
433
 
396
434
  mkdir -p "${HOME}/.shipwright"
397
435
 
398
- local record="{\"ts\":\"$(now_iso)\",\"run_id\":\"$run_id\",\"variant\":\"$variant\",\"success\":$success_status,\"cost\":$cost,\"duration_seconds\":$duration}"
436
+ local record
437
+ record="{\"ts\":\"$(now_iso)\",\"run_id\":\"$run_id\",\"variant\":\"$variant\",\"success\":$success_status,\"cost\":$cost,\"duration_seconds\":$duration}"
399
438
  echo "$record" >> "$AB_RESULTS_FILE"
400
439
  }
401
440
 
@@ -492,6 +531,380 @@ show_ab_results() {
492
531
  ' "$AB_RESULTS_FILE" 2>/dev/null | jq -r '.[] | "\(.variant):\n Runs: \(.total_runs)\n Success: \(.successful)/\(.total_runs) (\(.success_rate | round)%)\n Avg Cost: $\(.avg_cost | round)\n Total Cost: $\(.total_cost | round)\n Avg Duration: \(.avg_duration | round)s"' || true
493
532
  }
494
533
 
534
+ # ─── Initialize Chain Templates ────────────────────────────────────────────
535
+ _ensure_chain_templates() {
536
+ mkdir -p "$(dirname "$CHAIN_CONFIG_FILE")"
537
+
538
+ if [[ ! -f "$CHAIN_CONFIG_FILE" ]]; then
539
+ cat > "$CHAIN_CONFIG_FILE" <<'CHAINS'
540
+ {
541
+ "version": "1.0",
542
+ "templates": {
543
+ "explore-decide": [
544
+ {"step": "explore", "model": "haiku", "max_tokens": 4000, "description": "Fast exploration with haiku"},
545
+ {"step": "decide", "model": "opus", "max_tokens": 8000, "description": "Final decision with opus"}
546
+ ],
547
+ "explore-synthesize-decide": [
548
+ {"step": "explore", "model": "haiku", "max_tokens": 4000, "description": "Explore with haiku"},
549
+ {"step": "synthesize", "model": "sonnet", "max_tokens": 6000, "description": "Synthesize with sonnet"},
550
+ {"step": "decide", "model": "opus", "max_tokens": 8000, "description": "Decide with opus"}
551
+ ],
552
+ "fast-verify": [
553
+ {"step": "generate", "model": "sonnet", "max_tokens": 6000, "description": "Generate with sonnet"},
554
+ {"step": "verify", "model": "haiku", "max_tokens": 2000, "description": "Verify with haiku"}
555
+ ],
556
+ "deep-analysis": [
557
+ {"step": "analyze", "model": "opus", "max_tokens": 8000, "description": "Deep analysis with opus"},
558
+ {"step": "validate", "model": "opus", "max_tokens": 4000, "description": "Validate with opus"}
559
+ ]
560
+ },
561
+ "confidence_threshold": 50,
562
+ "escalation_threshold": 80,
563
+ "max_escalations_per_step": 1,
564
+ "custom_chains": {}
565
+ }
566
+ CHAINS
567
+ fi
568
+ }
569
+
570
+ # ─── Define a Custom Reasoning Chain ────────────────────────────────────────
571
+ chain_define() {
572
+ local chain_name="$1"
573
+ local steps_json="$2"
574
+
575
+ if [[ -z "$chain_name" ]] || [[ -z "$steps_json" ]]; then
576
+ error "Usage: chain_define <name> <steps_json>"
577
+ return 1
578
+ fi
579
+
580
+ _ensure_chain_templates
581
+
582
+ if ! command -v jq >/dev/null 2>&1; then
583
+ error "jq is required for chain definitions"
584
+ return 1
585
+ fi
586
+
587
+ local tmp_config
588
+ tmp_config=$(mktemp)
589
+ # shellcheck disable=SC2064
590
+ trap "rm -f '$tmp_config'" RETURN
591
+
592
+ # Validate that steps_json is valid JSON
593
+ if ! jq empty <<< "$steps_json" 2>/dev/null; then
594
+ error "Invalid JSON for chain steps"
595
+ return 1
596
+ fi
597
+
598
+ # Add the custom chain
599
+ jq --argjson steps "$steps_json" --arg name "$chain_name" \
600
+ '.custom_chains[$name] = $steps' \
601
+ "$CHAIN_CONFIG_FILE" > "$tmp_config"
602
+
603
+ mv "$tmp_config" "$CHAIN_CONFIG_FILE"
604
+ success "Defined custom chain: $chain_name"
605
+ }
606
+
607
+ # ─── Score Confidence from Output ──────────────────────────────────────────
608
+ chain_score_confidence() {
609
+ local output="$1"
610
+ local step_type="${2:-general}"
611
+
612
+ # Simple heuristics-based confidence scoring
613
+ # In a real system, this could call Claude's API for self-assessment
614
+ local confidence=50
615
+
616
+ # Check for markers of confidence in the output
617
+ local has_reasoning=0
618
+ local has_conclusion=0
619
+ local has_caveats=0
620
+
621
+ if grep -qiE "(therefore|thus|conclud|result|found|identify)" <<< "$output"; then
622
+ has_conclusion=1
623
+ fi
624
+
625
+ if grep -qiE "(because|reason|since|based|due to)" <<< "$output"; then
626
+ has_reasoning=1
627
+ fi
628
+
629
+ if grep -qiE "(however|but|though|caveat|limitation|uncertain)" <<< "$output"; then
630
+ has_caveats=1
631
+ fi
632
+
633
+ # Calculate confidence: base + reasoning + conclusion - caveats
634
+ confidence=$((50 + (has_reasoning * 15) + (has_conclusion * 20) - (has_caveats * 10)))
635
+
636
+ # Clamp to 0-100
637
+ if [[ "$confidence" -lt 0 ]]; then confidence=0; fi
638
+ if [[ "$confidence" -gt 100 ]]; then confidence=100; fi
639
+
640
+ echo "$confidence"
641
+ }
642
+
643
+ # ─── Get Next Escalation Model ─────────────────────────────────────────────
644
+ _get_escalation_model() {
645
+ local current_model="$1"
646
+ case "$current_model" in
647
+ haiku) echo "sonnet" ;;
648
+ sonnet) echo "opus" ;;
649
+ opus) echo "opus" ;; # Already at top
650
+ *) error "Unknown model: $current_model"; return 1 ;;
651
+ esac
652
+ }
653
+
654
+ # ─── Execute a Single Chain Step ───────────────────────────────────────────
655
+ _execute_chain_step() {
656
+ local step_name="$1"
657
+ local model="$2"
658
+ local prompt="$3"
659
+ local max_tokens="${4:-4000}"
660
+
661
+ local output=""
662
+ local tokens_in=0
663
+ local tokens_out=0
664
+ local duration_ms=0
665
+ local start_time
666
+ start_time=$(date +%s%N | cut -b1-13)
667
+
668
+ # In production, this would call Claude API with the specified model
669
+ # For testing/mock mode, return a synthetic response
670
+ if [[ -z "${CLAUDE_API_KEY:-}" ]] || [[ "$NO_GITHUB" == "true" ]]; then
671
+ # Mock/test mode
672
+ output="{\"status\": \"success\", \"step\": \"$step_name\", \"model\": \"$model\", \"content\": \"Mock response from $model for step $step_name\"}"
673
+ tokens_in=500
674
+ tokens_out=300
675
+ else
676
+ # Real Claude API call would happen here
677
+ # For now, return mock response
678
+ output="{\"status\": \"success\", \"step\": \"$step_name\", \"model\": \"$model\", \"content\": \"Mock response from $model\"}"
679
+ tokens_in=500
680
+ tokens_out=300
681
+ fi
682
+
683
+ local end_time
684
+ end_time=$(date +%s%N | cut -b1-13)
685
+ duration_ms=$((end_time - start_time))
686
+
687
+ # Return execution record as JSON
688
+ jq -n \
689
+ --arg step "$step_name" \
690
+ --arg model "$model" \
691
+ --argjson tokens_in "$tokens_in" \
692
+ --argjson tokens_out "$tokens_out" \
693
+ --argjson duration_ms "$duration_ms" \
694
+ --arg output "$output" \
695
+ '{step: $step, model: $model, tokens_in: $tokens_in, tokens_out: $tokens_out, duration_ms: $duration_ms, output: $output}'
696
+ }
697
+
698
+ # ─── Execute a Complete Reasoning Chain ────────────────────────────────────
699
+ chain_execute() {
700
+ local chain_name="$1"
701
+ local prompt="$2"
702
+
703
+ if [[ -z "$chain_name" ]] || [[ -z "$prompt" ]]; then
704
+ error "Usage: chain_execute <chain_name> <prompt>"
705
+ return 1
706
+ fi
707
+
708
+ _ensure_chain_templates
709
+
710
+ if ! command -v jq >/dev/null 2>&1; then
711
+ error "jq is required for chain execution"
712
+ return 1
713
+ fi
714
+
715
+ local steps
716
+ steps=$(jq -r ".templates[\"$chain_name\"] // .custom_chains[\"$chain_name\"] // empty" "$CHAIN_CONFIG_FILE" 2>/dev/null)
717
+
718
+ if [[ -z "$steps" || "$steps" == "null" ]]; then
719
+ error "Chain not found: $chain_name"
720
+ return 1
721
+ fi
722
+
723
+ mkdir -p "$(dirname "$CHAIN_EXECUTION_LOG")"
724
+
725
+ local execution_id
726
+ execution_id=$(date +%s)-$(od -An -N4 -tx4 /dev/urandom 2>/dev/null | tr -d ' ' | cut -c1-6 || echo "000000")
727
+
728
+ local chain_output
729
+ local confidence_threshold
730
+ confidence_threshold=$(jq -r '.confidence_threshold // 50' "$CHAIN_CONFIG_FILE")
731
+
732
+ local escalation_threshold
733
+ escalation_threshold=$(jq -r '.escalation_threshold // 80' "$CHAIN_CONFIG_FILE")
734
+
735
+ local total_cost="0"
736
+ local step_count
737
+ step_count=$(jq 'length' <<< "$steps")
738
+
739
+ local execution_trace
740
+ execution_trace="[]"
741
+
742
+ local current_prompt="$prompt"
743
+
744
+ for ((i = 0; i < step_count; i++)); do
745
+ local step_obj
746
+ step_obj=$(jq ".[$i]" <<< "$steps")
747
+
748
+ local step_name
749
+ step_name=$(jq -r '.step' <<< "$step_obj")
750
+
751
+ local model
752
+ model=$(jq -r '.model' <<< "$step_obj")
753
+
754
+ local max_tokens
755
+ max_tokens=$(jq -r '.max_tokens // 4000' <<< "$step_obj")
756
+
757
+ # Execute this step
758
+ local step_result
759
+ step_result=$(_execute_chain_step "$step_name" "$model" "$current_prompt" "$max_tokens")
760
+
761
+ # Extract output for next step
762
+ local step_output
763
+ step_output=$(jq -r '.output' <<< "$step_result")
764
+
765
+ # Score confidence
766
+ local confidence
767
+ confidence=$(chain_score_confidence "$step_output" "$step_name")
768
+
769
+ # Add confidence to result
770
+ step_result=$(jq --argjson conf "$confidence" '.confidence = $conf' <<< "$step_result")
771
+
772
+ # Check for early termination
773
+ if [[ "$confidence" -gt "$escalation_threshold" ]] && [[ "$i" -lt $((step_count - 1)) ]]; then
774
+ # Confidence is high enough, skip remaining steps
775
+ info "Step $step_name high confidence ($confidence%), skipping remaining steps"
776
+ execution_trace=$(jq ". += [$step_result]" <<< "$execution_trace")
777
+ chain_output="$step_output"
778
+ break
779
+ fi
780
+
781
+ # Check for low confidence escalation (only for first step typically)
782
+ if [[ "$confidence" -lt "$confidence_threshold" ]] && [[ "$i" -lt $((step_count - 1)) ]]; then
783
+ local next_model
784
+ next_model=$(_get_escalation_model "$model")
785
+ if [[ "$next_model" != "$model" ]]; then
786
+ warn "Step $step_name low confidence ($confidence%), escalating to $next_model"
787
+ # Re-execute with escalated model
788
+ step_result=$(_execute_chain_step "$step_name" "$next_model" "$current_prompt" "$max_tokens")
789
+ step_result=$(jq --argjson conf "$confidence" '.confidence = $conf | .escalated = true' <<< "$step_result")
790
+ step_output=$(jq -r '.output' <<< "$step_result")
791
+ fi
792
+ fi
793
+
794
+ # Add step to trace
795
+ execution_trace=$(jq ". += [$step_result]" <<< "$execution_trace")
796
+
797
+ # Use output as input to next step
798
+ current_prompt="$step_output"
799
+ done
800
+
801
+ # Extract final output
802
+ if [[ -z "$chain_output" ]]; then
803
+ chain_output=$(jq -r '.[-1].output' <<< "$execution_trace")
804
+ fi
805
+
806
+ # Calculate total cost (simplified: sum of all step costs)
807
+ total_cost=$(jq '[.[].tokens_in, .[].tokens_out] | add' <<< "$execution_trace" 2>/dev/null || echo "0")
808
+
809
+ # Log execution
810
+ local execution_record
811
+ execution_record=$(jq -n \
812
+ --arg id "$execution_id" \
813
+ --arg chain "$chain_name" \
814
+ --argjson trace "$execution_trace" \
815
+ --arg output "$chain_output" \
816
+ --argjson total_cost "$total_cost" \
817
+ --arg ts "$(now_iso)" \
818
+ '{id: $id, chain: $chain, ts: $ts, steps: $trace, output: $output, total_cost: $total_cost}')
819
+
820
+ echo "$execution_record" >> "$CHAIN_EXECUTION_LOG"
821
+
822
+ # Return execution record
823
+ echo "$execution_record"
824
+ }
825
+
826
+ # ─── Calculate Cost for a Single Step ──────────────────────────────────────
827
+ chain_step_cost() {
828
+ local tokens_in="${1:-0}"
829
+ local tokens_out="${2:-0}"
830
+ local model="${3:-sonnet}"
831
+
832
+ if ! [[ "$tokens_in" =~ ^[0-9]+$ ]]; then tokens_in=0; fi
833
+ if ! [[ "$tokens_out" =~ ^[0-9]+$ ]]; then tokens_out=0; fi
834
+
835
+ local cost="0"
836
+ case "$model" in
837
+ haiku)
838
+ cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $HAIKU_INPUT_COST + $tokens_out * $HAIKU_OUTPUT_COST) / 1000000}")
839
+ ;;
840
+ sonnet)
841
+ cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $SONNET_INPUT_COST + $tokens_out * $SONNET_OUTPUT_COST) / 1000000}")
842
+ ;;
843
+ opus)
844
+ cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $OPUS_INPUT_COST + $tokens_out * $OPUS_OUTPUT_COST) / 1000000}")
845
+ ;;
846
+ esac
847
+
848
+ echo "$cost"
849
+ }
850
+
851
+ # ─── Show Chain Configuration ──────────────────────────────────────────────
852
+ show_chain_config() {
853
+ _ensure_chain_templates
854
+
855
+ if [[ ! -f "$CHAIN_CONFIG_FILE" ]]; then
856
+ success "Created default chain templates at $CHAIN_CONFIG_FILE"
857
+ fi
858
+
859
+ info "Reasoning Chain Configuration"
860
+ echo ""
861
+
862
+ if command -v jq >/dev/null 2>&1; then
863
+ jq . "$CHAIN_CONFIG_FILE" 2>/dev/null || cat "$CHAIN_CONFIG_FILE"
864
+ else
865
+ cat "$CHAIN_CONFIG_FILE"
866
+ fi
867
+ }
868
+
869
+ # ─── Show Chain Execution Report ───────────────────────────────────────────
870
+ show_chain_report() {
871
+ info "Chain Execution Report"
872
+ echo ""
873
+
874
+ if [[ ! -f "$CHAIN_EXECUTION_LOG" ]]; then
875
+ warn "No chain execution data yet."
876
+ return 0
877
+ fi
878
+
879
+ if ! command -v jq >/dev/null 2>&1; then
880
+ error "jq is required to view reports"
881
+ return 1
882
+ fi
883
+
884
+ local total_executions
885
+ total_executions=$(wc -l < "$CHAIN_EXECUTION_LOG" || echo "0")
886
+
887
+ local total_cost
888
+ total_cost=$(jq -s 'map(.total_cost) | add // 0' "$CHAIN_EXECUTION_LOG" 2>/dev/null || echo "0")
889
+
890
+ echo -e "${BOLD}Summary${RESET}"
891
+ echo " Total chain executions: $total_executions"
892
+ echo " Total cost: \$$total_cost"
893
+ echo ""
894
+
895
+ echo -e "${BOLD}Cost Per Chain${RESET}"
896
+ jq -s '
897
+ group_by(.chain) |
898
+ map({
899
+ chain: .[0].chain,
900
+ executions: length,
901
+ total_cost: (map(.total_cost) | add),
902
+ avg_cost: (map(.total_cost) | add / length)
903
+ }) |
904
+ sort_by(.chain)
905
+ ' "$CHAIN_EXECUTION_LOG" 2>/dev/null | jq -r '.[] | " \(.chain): \(.executions) executions, $\(.total_cost | tostring), avg $\(.avg_cost | round)"' || true
906
+ }
907
+
495
908
  # ─── Help Text ──────────────────────────────────────────────────────────────
496
909
  show_help() {
497
910
  echo -e "${BOLD}shipwright model${RESET} — Intelligent Model Routing & Optimization"
@@ -499,7 +912,7 @@ show_help() {
499
912
  echo -e "${BOLD}USAGE${RESET}"
500
913
  echo " ${CYAN}shipwright model${RESET} <subcommand> [options]"
501
914
  echo ""
502
- echo -e "${BOLD}SUBCOMMANDS${RESET}"
915
+ echo -e "${BOLD}SUBCOMMANDS — Routing${RESET}"
503
916
  echo " ${CYAN}route${RESET} <stage> [complexity] Route task to optimal model (returns: haiku|sonnet|opus)"
504
917
  echo " ${CYAN}escalate${RESET} <model> Get next tier model (haiku→sonnet→opus)"
505
918
  echo " ${CYAN}config${RESET} [show|set <key> <val>] Show/set routing configuration"
@@ -507,15 +920,28 @@ show_help() {
507
920
  echo " ${CYAN}ab-test${RESET} [enable|disable] [pct] [variant] Configure A/B testing"
508
921
  echo " ${CYAN}report${RESET} Show model usage and cost report"
509
922
  echo " ${CYAN}ab-results${RESET} Show A/B test results"
510
- echo " ${CYAN}help${RESET} Show this help message"
923
+ echo ""
924
+ echo -e "${BOLD}SUBCOMMANDS — Multi-Model Reasoning Chains${RESET}"
925
+ echo " ${CYAN}chain${RESET} [config|define|execute|report|step-cost]"
926
+ echo " ${CYAN}config${RESET} Show chain configuration & templates"
927
+ echo " ${CYAN}define${RESET} <name> <json> Define custom reasoning chain"
928
+ echo " ${CYAN}execute${RESET} <chain> <prompt> Execute a reasoning chain"
929
+ echo " ${CYAN}report${RESET} Show chain execution report"
930
+ echo " ${CYAN}step-cost${RESET} <in> <out> <model> Calculate cost for one step"
931
+ echo ""
932
+ echo -e "${BOLD}BUILT-IN CHAINS${RESET}"
933
+ echo " ${DIM}explore-decide${RESET} 2-step: haiku explores → opus decides"
934
+ echo " ${DIM}explore-synthesize-decide${RESET} 3-step: haiku → sonnet → opus"
935
+ echo " ${DIM}fast-verify${RESET} 2-step: sonnet generates → haiku verifies"
936
+ echo " ${DIM}deep-analysis${RESET} 2-step: opus analyzes → opus validates"
511
937
  echo ""
512
938
  echo -e "${BOLD}EXAMPLES${RESET}"
513
939
  echo " ${DIM}shipwright model route plan 65${RESET} # Route 'plan' stage with 65% complexity"
514
940
  echo " ${DIM}shipwright model escalate haiku${RESET} # Upgrade from haiku"
515
- echo " ${DIM}shipwright model config show${RESET} # View routing rules"
516
- echo " ${DIM}shipwright model estimate standard 50${RESET} # Estimate standard pipeline cost"
517
- echo " ${DIM}shipwright model ab-test enable 15 cost-optimized${RESET} # 15% A/B test"
518
- echo " ${DIM}shipwright model report${RESET} # Show usage stats"
941
+ echo " ${DIM}shipwright model chain config${RESET} # Show chain templates"
942
+ echo " ${DIM}shipwright model chain execute explore-decide \"analyze this code\"${RESET}"
943
+ echo " ${DIM}shipwright model chain report${RESET} # Show chain execution stats"
944
+ echo " ${DIM}shipwright model chain step-cost 1000 500 sonnet${RESET} # Cost for step"
519
945
  }
520
946
 
521
947
  # ─── Main ───────────────────────────────────────────────────────────────────
@@ -563,6 +989,7 @@ main() {
563
989
  if command -v jq >/dev/null 2>&1; then
564
990
  local tmp_config
565
991
  tmp_config=$(mktemp)
992
+ # shellcheck disable=SC2064
566
993
  trap "rm -f '$tmp_config'" RETURN
567
994
  jq ".a_b_test.enabled = false" "$MODEL_ROUTING_CONFIG" > "$tmp_config"
568
995
  mv "$tmp_config" "$MODEL_ROUTING_CONFIG"
@@ -582,6 +1009,34 @@ main() {
582
1009
  ab-results)
583
1010
  show_ab_results
584
1011
  ;;
1012
+ chain)
1013
+ shift 2>/dev/null || true
1014
+ case "${1:-config}" in
1015
+ config)
1016
+ show_chain_config
1017
+ ;;
1018
+ define)
1019
+ shift 2>/dev/null || true
1020
+ chain_define "$@"
1021
+ ;;
1022
+ execute)
1023
+ shift 2>/dev/null || true
1024
+ chain_execute "$@"
1025
+ ;;
1026
+ report)
1027
+ show_chain_report
1028
+ ;;
1029
+ step-cost)
1030
+ shift 2>/dev/null || true
1031
+ chain_step_cost "$@"
1032
+ ;;
1033
+ *)
1034
+ error "Unknown chain subcommand: ${1:-}"
1035
+ show_help
1036
+ exit 1
1037
+ ;;
1038
+ esac
1039
+ ;;
585
1040
  help|--help|-h)
586
1041
  show_help
587
1042
  ;;