shipwright-cli 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.claude/agents/code-reviewer.md +2 -0
  2. package/.claude/agents/devops-engineer.md +2 -0
  3. package/.claude/agents/doc-fleet-agent.md +2 -0
  4. package/.claude/agents/pipeline-agent.md +2 -0
  5. package/.claude/agents/shell-script-specialist.md +2 -0
  6. package/.claude/agents/test-specialist.md +2 -0
  7. package/.claude/hooks/agent-crash-capture.sh +32 -0
  8. package/.claude/hooks/post-tool-use.sh +3 -2
  9. package/.claude/hooks/pre-tool-use.sh +35 -3
  10. package/README.md +4 -4
  11. package/claude-code/hooks/config-change.sh +18 -0
  12. package/claude-code/hooks/instructions-reloaded.sh +7 -0
  13. package/claude-code/hooks/worktree-create.sh +25 -0
  14. package/claude-code/hooks/worktree-remove.sh +20 -0
  15. package/config/code-constitution.json +130 -0
  16. package/dashboard/middleware/auth.ts +134 -0
  17. package/dashboard/middleware/constants.ts +21 -0
  18. package/dashboard/public/index.html +2 -6
  19. package/dashboard/public/styles.css +100 -97
  20. package/dashboard/routes/auth.ts +38 -0
  21. package/dashboard/server.ts +66 -25
  22. package/dashboard/services/config.ts +26 -0
  23. package/dashboard/services/db.ts +118 -0
  24. package/dashboard/src/canvas/pixel-agent.ts +298 -0
  25. package/dashboard/src/canvas/pixel-sprites.ts +440 -0
  26. package/dashboard/src/canvas/shipyard-effects.ts +367 -0
  27. package/dashboard/src/canvas/shipyard-scene.ts +616 -0
  28. package/dashboard/src/canvas/submarine-layout.ts +267 -0
  29. package/dashboard/src/components/header.ts +8 -7
  30. package/dashboard/src/core/router.ts +1 -0
  31. package/dashboard/src/design/submarine-theme.ts +253 -0
  32. package/dashboard/src/main.ts +2 -0
  33. package/dashboard/src/types/api.ts +2 -1
  34. package/dashboard/src/views/activity.ts +2 -1
  35. package/dashboard/src/views/shipyard.ts +39 -0
  36. package/dashboard/types/index.ts +166 -0
  37. package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
  38. package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
  39. package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
  40. package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
  41. package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
  42. package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
  43. package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
  44. package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
  45. package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
  46. package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
  47. package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
  48. package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
  49. package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
  50. package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
  51. package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
  52. package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
  53. package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
  54. package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
  55. package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
  56. package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
  57. package/docs/research/RESEARCH_INDEX.md +439 -0
  58. package/docs/research/RESEARCH_SOURCES.md +440 -0
  59. package/docs/research/RESEARCH_SUMMARY.txt +275 -0
  60. package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
  61. package/package.json +2 -2
  62. package/scripts/lib/adaptive-model.sh +427 -0
  63. package/scripts/lib/adaptive-timeout.sh +316 -0
  64. package/scripts/lib/audit-trail.sh +309 -0
  65. package/scripts/lib/auto-recovery.sh +471 -0
  66. package/scripts/lib/bandit-selector.sh +431 -0
  67. package/scripts/lib/bootstrap.sh +104 -2
  68. package/scripts/lib/causal-graph.sh +455 -0
  69. package/scripts/lib/compat.sh +126 -0
  70. package/scripts/lib/compound-audit.sh +337 -0
  71. package/scripts/lib/constitutional.sh +454 -0
  72. package/scripts/lib/context-budget.sh +359 -0
  73. package/scripts/lib/convergence.sh +594 -0
  74. package/scripts/lib/cost-optimizer.sh +634 -0
  75. package/scripts/lib/daemon-adaptive.sh +10 -0
  76. package/scripts/lib/daemon-dispatch.sh +106 -17
  77. package/scripts/lib/daemon-failure.sh +34 -4
  78. package/scripts/lib/daemon-patrol.sh +23 -2
  79. package/scripts/lib/daemon-poll-github.sh +361 -0
  80. package/scripts/lib/daemon-poll-health.sh +299 -0
  81. package/scripts/lib/daemon-poll.sh +27 -611
  82. package/scripts/lib/daemon-state.sh +112 -66
  83. package/scripts/lib/daemon-triage.sh +10 -0
  84. package/scripts/lib/dod-scorecard.sh +442 -0
  85. package/scripts/lib/error-actionability.sh +300 -0
  86. package/scripts/lib/formal-spec.sh +461 -0
  87. package/scripts/lib/helpers.sh +177 -4
  88. package/scripts/lib/intent-analysis.sh +409 -0
  89. package/scripts/lib/loop-convergence.sh +350 -0
  90. package/scripts/lib/loop-iteration.sh +682 -0
  91. package/scripts/lib/loop-progress.sh +48 -0
  92. package/scripts/lib/loop-restart.sh +185 -0
  93. package/scripts/lib/memory-effectiveness.sh +506 -0
  94. package/scripts/lib/mutation-executor.sh +352 -0
  95. package/scripts/lib/outcome-feedback.sh +521 -0
  96. package/scripts/lib/pipeline-cli.sh +336 -0
  97. package/scripts/lib/pipeline-commands.sh +1216 -0
  98. package/scripts/lib/pipeline-detection.sh +100 -2
  99. package/scripts/lib/pipeline-execution.sh +897 -0
  100. package/scripts/lib/pipeline-github.sh +28 -3
  101. package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
  102. package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
  103. package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
  104. package/scripts/lib/pipeline-intelligence.sh +100 -1136
  105. package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
  106. package/scripts/lib/pipeline-quality-checks.sh +17 -715
  107. package/scripts/lib/pipeline-quality-gates.sh +563 -0
  108. package/scripts/lib/pipeline-stages-build.sh +730 -0
  109. package/scripts/lib/pipeline-stages-delivery.sh +965 -0
  110. package/scripts/lib/pipeline-stages-intake.sh +1133 -0
  111. package/scripts/lib/pipeline-stages-monitor.sh +407 -0
  112. package/scripts/lib/pipeline-stages-review.sh +1022 -0
  113. package/scripts/lib/pipeline-stages.sh +59 -2929
  114. package/scripts/lib/pipeline-state.sh +36 -5
  115. package/scripts/lib/pipeline-util.sh +487 -0
  116. package/scripts/lib/policy-learner.sh +438 -0
  117. package/scripts/lib/process-reward.sh +493 -0
  118. package/scripts/lib/project-detect.sh +649 -0
  119. package/scripts/lib/quality-profile.sh +334 -0
  120. package/scripts/lib/recruit-commands.sh +885 -0
  121. package/scripts/lib/recruit-learning.sh +739 -0
  122. package/scripts/lib/recruit-roles.sh +648 -0
  123. package/scripts/lib/reward-aggregator.sh +458 -0
  124. package/scripts/lib/rl-optimizer.sh +362 -0
  125. package/scripts/lib/root-cause.sh +427 -0
  126. package/scripts/lib/scope-enforcement.sh +445 -0
  127. package/scripts/lib/session-restart.sh +493 -0
  128. package/scripts/lib/skill-memory.sh +300 -0
  129. package/scripts/lib/skill-registry.sh +775 -0
  130. package/scripts/lib/spec-driven.sh +476 -0
  131. package/scripts/lib/test-helpers.sh +18 -7
  132. package/scripts/lib/test-holdout.sh +429 -0
  133. package/scripts/lib/test-optimizer.sh +511 -0
  134. package/scripts/shipwright-file-suggest.sh +45 -0
  135. package/scripts/skills/adversarial-quality.md +61 -0
  136. package/scripts/skills/api-design.md +44 -0
  137. package/scripts/skills/architecture-design.md +50 -0
  138. package/scripts/skills/brainstorming.md +43 -0
  139. package/scripts/skills/data-pipeline.md +44 -0
  140. package/scripts/skills/deploy-safety.md +64 -0
  141. package/scripts/skills/documentation.md +38 -0
  142. package/scripts/skills/frontend-design.md +45 -0
  143. package/scripts/skills/generated/.gitkeep +0 -0
  144. package/scripts/skills/generated/_refinements/.gitkeep +0 -0
  145. package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
  146. package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
  147. package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
  148. package/scripts/skills/generated/cli-version-management.md +29 -0
  149. package/scripts/skills/generated/collection-system-validation.md +99 -0
  150. package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
  151. package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
  152. package/scripts/skills/generated/test-parallelization-detection.md +65 -0
  153. package/scripts/skills/observability.md +79 -0
  154. package/scripts/skills/performance.md +48 -0
  155. package/scripts/skills/pr-quality.md +49 -0
  156. package/scripts/skills/product-thinking.md +43 -0
  157. package/scripts/skills/security-audit.md +49 -0
  158. package/scripts/skills/systematic-debugging.md +40 -0
  159. package/scripts/skills/testing-strategy.md +47 -0
  160. package/scripts/skills/two-stage-review.md +52 -0
  161. package/scripts/skills/validation-thoroughness.md +55 -0
  162. package/scripts/sw +9 -3
  163. package/scripts/sw-activity.sh +9 -2
  164. package/scripts/sw-adaptive.sh +2 -1
  165. package/scripts/sw-adversarial.sh +2 -1
  166. package/scripts/sw-architecture-enforcer.sh +3 -1
  167. package/scripts/sw-auth.sh +12 -2
  168. package/scripts/sw-autonomous.sh +5 -1
  169. package/scripts/sw-changelog.sh +4 -1
  170. package/scripts/sw-checkpoint.sh +2 -1
  171. package/scripts/sw-ci.sh +5 -1
  172. package/scripts/sw-cleanup.sh +4 -26
  173. package/scripts/sw-code-review.sh +10 -4
  174. package/scripts/sw-connect.sh +2 -1
  175. package/scripts/sw-context.sh +2 -1
  176. package/scripts/sw-cost.sh +48 -3
  177. package/scripts/sw-daemon.sh +66 -9
  178. package/scripts/sw-dashboard.sh +3 -1
  179. package/scripts/sw-db.sh +59 -16
  180. package/scripts/sw-decide.sh +8 -2
  181. package/scripts/sw-decompose.sh +360 -17
  182. package/scripts/sw-deps.sh +4 -1
  183. package/scripts/sw-developer-simulation.sh +4 -1
  184. package/scripts/sw-discovery.sh +325 -2
  185. package/scripts/sw-doc-fleet.sh +4 -1
  186. package/scripts/sw-docs-agent.sh +3 -1
  187. package/scripts/sw-docs.sh +2 -1
  188. package/scripts/sw-doctor.sh +453 -2
  189. package/scripts/sw-dora.sh +4 -1
  190. package/scripts/sw-durable.sh +4 -3
  191. package/scripts/sw-e2e-orchestrator.sh +17 -16
  192. package/scripts/sw-eventbus.sh +7 -1
  193. package/scripts/sw-evidence.sh +364 -12
  194. package/scripts/sw-feedback.sh +550 -9
  195. package/scripts/sw-fix.sh +20 -1
  196. package/scripts/sw-fleet-discover.sh +6 -2
  197. package/scripts/sw-fleet-viz.sh +4 -1
  198. package/scripts/sw-fleet.sh +5 -1
  199. package/scripts/sw-github-app.sh +16 -3
  200. package/scripts/sw-github-checks.sh +3 -2
  201. package/scripts/sw-github-deploy.sh +3 -2
  202. package/scripts/sw-github-graphql.sh +18 -7
  203. package/scripts/sw-guild.sh +5 -1
  204. package/scripts/sw-heartbeat.sh +5 -30
  205. package/scripts/sw-hello.sh +67 -0
  206. package/scripts/sw-hygiene.sh +6 -1
  207. package/scripts/sw-incident.sh +265 -1
  208. package/scripts/sw-init.sh +18 -2
  209. package/scripts/sw-instrument.sh +10 -2
  210. package/scripts/sw-intelligence.sh +42 -6
  211. package/scripts/sw-jira.sh +5 -1
  212. package/scripts/sw-launchd.sh +2 -1
  213. package/scripts/sw-linear.sh +4 -1
  214. package/scripts/sw-logs.sh +4 -1
  215. package/scripts/sw-loop.sh +432 -1128
  216. package/scripts/sw-memory.sh +356 -2
  217. package/scripts/sw-mission-control.sh +6 -1
  218. package/scripts/sw-model-router.sh +481 -26
  219. package/scripts/sw-otel.sh +13 -4
  220. package/scripts/sw-oversight.sh +14 -5
  221. package/scripts/sw-patrol-meta.sh +334 -0
  222. package/scripts/sw-pipeline-composer.sh +5 -1
  223. package/scripts/sw-pipeline-vitals.sh +2 -1
  224. package/scripts/sw-pipeline.sh +53 -2664
  225. package/scripts/sw-pm.sh +12 -5
  226. package/scripts/sw-pr-lifecycle.sh +2 -1
  227. package/scripts/sw-predictive.sh +7 -1
  228. package/scripts/sw-prep.sh +185 -2
  229. package/scripts/sw-ps.sh +5 -25
  230. package/scripts/sw-public-dashboard.sh +15 -3
  231. package/scripts/sw-quality.sh +2 -1
  232. package/scripts/sw-reaper.sh +8 -25
  233. package/scripts/sw-recruit.sh +156 -2303
  234. package/scripts/sw-regression.sh +19 -12
  235. package/scripts/sw-release-manager.sh +3 -1
  236. package/scripts/sw-release.sh +4 -1
  237. package/scripts/sw-remote.sh +3 -1
  238. package/scripts/sw-replay.sh +7 -1
  239. package/scripts/sw-retro.sh +158 -1
  240. package/scripts/sw-review-rerun.sh +3 -1
  241. package/scripts/sw-scale.sh +10 -3
  242. package/scripts/sw-security-audit.sh +6 -1
  243. package/scripts/sw-self-optimize.sh +6 -3
  244. package/scripts/sw-session.sh +9 -3
  245. package/scripts/sw-setup.sh +3 -1
  246. package/scripts/sw-stall-detector.sh +406 -0
  247. package/scripts/sw-standup.sh +15 -7
  248. package/scripts/sw-status.sh +3 -1
  249. package/scripts/sw-strategic.sh +4 -1
  250. package/scripts/sw-stream.sh +7 -1
  251. package/scripts/sw-swarm.sh +18 -6
  252. package/scripts/sw-team-stages.sh +13 -6
  253. package/scripts/sw-templates.sh +5 -29
  254. package/scripts/sw-testgen.sh +7 -1
  255. package/scripts/sw-tmux-pipeline.sh +4 -1
  256. package/scripts/sw-tmux-role-color.sh +2 -0
  257. package/scripts/sw-tmux-status.sh +1 -1
  258. package/scripts/sw-tmux.sh +3 -1
  259. package/scripts/sw-trace.sh +3 -1
  260. package/scripts/sw-tracker-github.sh +3 -0
  261. package/scripts/sw-tracker-jira.sh +3 -0
  262. package/scripts/sw-tracker-linear.sh +3 -0
  263. package/scripts/sw-tracker.sh +3 -1
  264. package/scripts/sw-triage.sh +2 -1
  265. package/scripts/sw-upgrade.sh +3 -1
  266. package/scripts/sw-ux.sh +5 -2
  267. package/scripts/sw-webhook.sh +3 -1
  268. package/scripts/sw-widgets.sh +3 -1
  269. package/scripts/sw-worktree.sh +15 -3
  270. package/scripts/test-skill-injection.sh +1233 -0
  271. package/templates/pipelines/autonomous.json +27 -3
  272. package/templates/pipelines/cost-aware.json +34 -8
  273. package/templates/pipelines/deployed.json +12 -0
  274. package/templates/pipelines/enterprise.json +12 -0
  275. package/templates/pipelines/fast.json +6 -0
  276. package/templates/pipelines/full.json +27 -3
  277. package/templates/pipelines/hotfix.json +6 -0
  278. package/templates/pipelines/standard.json +12 -0
  279. package/templates/pipelines/tdd.json +12 -0
@@ -0,0 +1,427 @@
1
+ #!/usr/bin/env bash
2
+ # ╔═══════════════════════════════════════════════════════════════════════════╗
3
+ # ║ adaptive-model.sh — Real-Time Adaptive Model Selection During Build Loop ║
4
+ # ║ ║
5
+ # ║ Changes model choice mid-iteration based on real-time signals: ║
6
+ # ║ - Tests passing + converging: downgrade to cheaper model (save cost) ║
7
+ # ║ - Tests failing + same error 2x: escalate to stronger model ║
8
+ # ║ - Convergence score dropping: escalate model ║
9
+ # ║ - Rate limit hit: fallback to next available model ║
10
+ # ║ ║
11
+ # ║ Usage: Source from sw-loop.sh, call adaptive_model_select() ║
12
+ # ║ before each Claude invocation in the build loop ║
13
+ # ╚═══════════════════════════════════════════════════════════════════════════╝
14
+
15
+ [[ -n "${_ADAPTIVE_MODEL_LOADED:-}" ]] && return 0
16
+ _ADAPTIVE_MODEL_LOADED=1
17
+
18
+ # ─── Defaults ──────────────────────────────────────────────────────────────
19
+ ARTIFACTS_DIR="${ARTIFACTS_DIR:-.claude/pipeline-artifacts}"
20
+ SCRIPT_DIR="${SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
21
+
22
+ # ─── Load helpers ──────────────────────────────────────────────────────────
23
+ if [[ "$(type -t info 2>/dev/null)" != "function" ]]; then
24
+ info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
25
+ success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
26
+ warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
27
+ error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
28
+ fi
29
+ if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
30
+ now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
31
+ now_epoch() { date +%s; }
32
+ fi
33
+ if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
34
+ emit_event() {
35
+ local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
36
+ local payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
37
+ while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
38
+ echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
39
+ }
40
+ fi
41
+
42
+ # ─── Model Ranking & Escalation Chains ─────────────────────────────────────
43
+ MODEL_HIERARCHY=("haiku" "sonnet" "opus")
44
+
45
+ # ─── Thresholds for Adaptive Behavior ──────────────────────────────────────
46
+ ESCALATION_ERROR_THRESHOLD=2 # Escalate after this many same error repeats
47
+ DOWNGRADE_SUCCESS_THRESHOLD=3 # Downgrade after this many consecutive successes
48
+ CONVERGENCE_DROP_THRESHOLD=10 # Escalate if convergence score drops by this
49
+ ESCALATION_COOLDOWN=2 # Wait this many iterations before escalating again
50
+ RATE_LIMIT_BACKOFF_SECONDS=60 # Cooldown on rate limit (handled by Claude CLI)
51
+
52
+ # ─── Initialize adaptive tracking for a pipeline run ──────────────────────
53
+ adaptive_model_init() {
54
+ local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
55
+ mkdir -p "$ARTIFACTS_DIR"
56
+
57
+ # Start fresh history for this pipeline run
58
+ echo "[]" > "${history_file}.tmp.$$"
59
+ mv "${history_file}.tmp.$$" "$history_file"
60
+
61
+ # Initialize preferences file if it doesn't exist
62
+ local preferences_file="${HOME}/.shipwright/optimization/model-preferences.json"
63
+ mkdir -p "$(dirname "$preferences_file")"
64
+
65
+ if [[ ! -f "$preferences_file" ]]; then
66
+ cat > "$preferences_file" <<'JSON'
67
+ {
68
+ "version": "1.0",
69
+ "stage_priors": {},
70
+ "learned_escalations": {},
71
+ "learned_downgrades": {},
72
+ "last_updated": ""
73
+ }
74
+ JSON
75
+ fi
76
+
77
+ emit_event "adaptive_model.init" "artifacts_dir=$ARTIFACTS_DIR" 2>/dev/null || true
78
+ }
79
+
80
+ # ─── Select model based on real-time signals ───────────────────────────────
81
+ #
82
+ # Inputs:
83
+ # stage: pipeline stage name (optional, default "build")
84
+ # iteration_number: current iteration (0-based)
85
+ # last_test_result: "pass" or "fail"
86
+ # error_count: number of times same error appeared
87
+ # convergence_score: 0-100 quality score
88
+ # current_model: model currently in use
89
+ #
90
+ # Returns: Selected model (haiku|sonnet|opus) on stdout
91
+ #
92
+ adaptive_model_select() {
93
+ local stage="${1:-build}"
94
+ local iteration_number="${2:-0}"
95
+ local last_test_result="${3:-unknown}"
96
+ local error_count="${4:-0}"
97
+ local convergence_score="${5:-50}"
98
+ local current_model="${6:-opus}"
99
+
100
+ # Defaults
101
+ [[ ! "$iteration_number" =~ ^[0-9]+$ ]] && iteration_number=0
102
+ [[ ! "$error_count" =~ ^[0-9]+$ ]] && error_count=0
103
+ [[ ! "$convergence_score" =~ ^[0-9]+$ ]] && convergence_score=50
104
+
105
+ local selected_model="$current_model"
106
+ local reason="no_change"
107
+ local escalated=false
108
+ local downgraded=false
109
+
110
+ # First iteration: use default (don't adapt)
111
+ if [[ "$iteration_number" -le 0 ]]; then
112
+ reason="first_iteration"
113
+ # Tests passing + high convergence: consider downgrade for cost savings
114
+ elif [[ "$last_test_result" == "pass" && "$convergence_score" -ge 75 ]]; then
115
+ # Downgrade if we're above haiku
116
+ case "$current_model" in
117
+ opus)
118
+ selected_model="sonnet"
119
+ reason="tests_passing_high_convergence_downgrade_opus_to_sonnet"
120
+ downgraded=true
121
+ ;;
122
+ sonnet)
123
+ selected_model="haiku"
124
+ reason="tests_passing_high_convergence_downgrade_sonnet_to_haiku"
125
+ downgraded=true
126
+ ;;
127
+ *)
128
+ reason="already_at_minimum_model"
129
+ ;;
130
+ esac
131
+ # Tests failing + repeated error: escalate to stronger model
132
+ elif [[ "$last_test_result" == "fail" && "$error_count" -ge "$ESCALATION_ERROR_THRESHOLD" ]]; then
133
+ case "$current_model" in
134
+ haiku)
135
+ selected_model="sonnet"
136
+ reason="test_failure_repeated_error_escalate_haiku_to_sonnet"
137
+ escalated=true
138
+ ;;
139
+ sonnet)
140
+ selected_model="opus"
141
+ reason="test_failure_repeated_error_escalate_sonnet_to_opus"
142
+ escalated=true
143
+ ;;
144
+ opus)
145
+ reason="already_at_maximum_model"
146
+ ;;
147
+ esac
148
+ # Convergence score dropping: escalate to get better analysis
149
+ elif [[ "$iteration_number" -gt 0 && "$convergence_score" -lt 30 ]]; then
150
+ case "$current_model" in
151
+ haiku)
152
+ selected_model="sonnet"
153
+ reason="low_convergence_score_escalate_haiku_to_sonnet"
154
+ escalated=true
155
+ ;;
156
+ sonnet)
157
+ selected_model="opus"
158
+ reason="low_convergence_score_escalate_sonnet_to_opus"
159
+ escalated=true
160
+ ;;
161
+ opus)
162
+ reason="already_at_maximum_model"
163
+ ;;
164
+ esac
165
+ else
166
+ reason="stable_conditions_keep_current"
167
+ fi
168
+
169
+ # Record selection
170
+ adaptive_model_record "$iteration_number" "$selected_model" "$last_test_result" \
171
+ "$error_count" "$convergence_score" "$reason" "$escalated" "$downgraded"
172
+
173
+ echo "$selected_model"
174
+ }
175
+
176
+ # ─── Record model selection and outcome ────────────────────────────────────
177
+ adaptive_model_record() {
178
+ local iteration="${1:-0}"
179
+ local model_used="${2:-opus}"
180
+ local test_result="${3:-unknown}"
181
+ local error_count="${4:-0}"
182
+ local convergence_score="${5:-50}"
183
+ local reason="${6:-unknown}"
184
+ local escalated="${7:-false}"
185
+ local downgraded="${8:-false}"
186
+
187
+ [[ ! "$iteration" =~ ^[0-9]+$ ]] && iteration=0
188
+ [[ ! "$error_count" =~ ^[0-9]+$ ]] && error_count=0
189
+ [[ ! "$convergence_score" =~ ^[0-9]+$ ]] && convergence_score=50
190
+
191
+ local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
192
+ mkdir -p "$ARTIFACTS_DIR"
193
+
194
+ # Build record
195
+ local record
196
+ record=$(cat <<JSON
197
+ {
198
+ "ts": "$(now_iso)",
199
+ "iteration": $iteration,
200
+ "model": "$model_used",
201
+ "test_result": "$test_result",
202
+ "error_count": $error_count,
203
+ "convergence_score": $convergence_score,
204
+ "reason": "$reason",
205
+ "escalated": $escalated,
206
+ "downgraded": $downgraded
207
+ }
208
+ JSON
209
+ )
210
+
211
+ # Append to history (use jq if available)
212
+ local tmp_hist="${history_file}.tmp.$$"
213
+ if command -v jq >/dev/null 2>&1; then
214
+ if jq ". += [$(echo "$record" | jq '.')] " "$history_file" > "$tmp_hist" 2>/dev/null; then
215
+ mv "$tmp_hist" "$history_file"
216
+ else
217
+ rm -f "$tmp_hist"
218
+ fi
219
+ else
220
+ # Fallback: simple append (may not be valid JSON at end of file)
221
+ echo "$record" >> "$history_file"
222
+ fi
223
+
224
+ emit_event "adaptive_model.recorded" \
225
+ "iteration=$iteration" \
226
+ "model=$model_used" \
227
+ "test_result=$test_result" \
228
+ "reason=$reason" \
229
+ "escalated=$escalated" \
230
+ "downgraded=$downgraded" 2>/dev/null || true
231
+ }
232
+
233
+ # ─── Learn from history after pipeline completes ───────────────────────────
234
+ #
235
+ # Analyzes adaptive history to answer: which model changes helped?
236
+ # Writes learned preferences to optimization/model-preferences.json
237
+ #
238
+ adaptive_model_learn() {
239
+ local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
240
+
241
+ if [[ ! -f "$history_file" ]]; then
242
+ return 0
243
+ fi
244
+
245
+ if ! command -v jq >/dev/null 2>&1; then
246
+ warn "jq required for adaptive model learning"
247
+ return 1
248
+ fi
249
+
250
+ local preferences_file="${HOME}/.shipwright/optimization/model-preferences.json"
251
+ mkdir -p "$(dirname "$preferences_file")"
252
+
253
+ # Count model transitions and their outcomes
254
+ local escalation_success=0
255
+ local escalation_total=0
256
+ local downgrade_success=0
257
+ local downgrade_total=0
258
+
259
+ # Parse history: look for escalations/downgrades followed by success
260
+ if [[ -f "$history_file" ]]; then
261
+ # Count escalations that were followed by pass
262
+ escalation_total=$(jq '[.[] | select(.escalated == true)] | length' "$history_file" 2>/dev/null || echo "0")
263
+ escalation_success=$(jq '[.[] | select(.escalated == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
264
+
265
+ # Count downgrades that maintained pass
266
+ downgrade_total=$(jq '[.[] | select(.downgraded == true)] | length' "$history_file" 2>/dev/null || echo "0")
267
+ downgrade_success=$(jq '[.[] | select(.downgraded == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
268
+ fi
269
+
270
+ # Calculate effectiveness rates
271
+ local escalation_rate=0
272
+ local downgrade_rate=0
273
+
274
+ if [[ "$escalation_total" -gt 0 ]]; then
275
+ escalation_rate=$((escalation_success * 100 / escalation_total))
276
+ fi
277
+
278
+ if [[ "$downgrade_total" -gt 0 ]]; then
279
+ downgrade_rate=$((downgrade_success * 100 / downgrade_total))
280
+ fi
281
+
282
+ # Update preferences file
283
+ local tmp_prefs
284
+ tmp_prefs=$(mktemp)
285
+ trap "rm -f '$tmp_prefs'" RETURN
286
+
287
+ jq \
288
+ --argjson esc_success "$escalation_success" \
289
+ --argjson esc_total "$escalation_total" \
290
+ --argjson down_success "$downgrade_success" \
291
+ --argjson down_total "$downgrade_total" \
292
+ --argjson esc_rate "$escalation_rate" \
293
+ --argjson down_rate "$downgrade_rate" \
294
+ --arg timestamp "$(now_iso)" \
295
+ '.learned_escalations = {
296
+ "total_attempts": $esc_total,
297
+ "successful": $esc_success,
298
+ "success_rate": $esc_rate
299
+ } |
300
+ .learned_downgrades = {
301
+ "total_attempts": $down_total,
302
+ "successful": $down_success,
303
+ "success_rate": $down_rate
304
+ } |
305
+ .last_updated = $timestamp' \
306
+ "$preferences_file" > "$tmp_prefs" 2>/dev/null
307
+
308
+ if [[ -f "$tmp_prefs" ]]; then
309
+ mv "$tmp_prefs" "$preferences_file"
310
+ success "Learned model preferences: escalation=${escalation_rate}% success, downgrade=${downgrade_rate}% success"
311
+ emit_event "adaptive_model.learned" \
312
+ "escalation_rate=$escalation_rate" \
313
+ "downgrade_rate=$downgrade_rate" 2>/dev/null || true
314
+ fi
315
+ }
316
+
317
+ # ─── Show adaptive selection stats ─────────────────────────────────────────
318
+ #
319
+ # Reports on model usage distribution, cost savings, and effectiveness
320
+ #
321
+ adaptive_model_report() {
322
+ local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
323
+
324
+ if [[ ! -f "$history_file" ]]; then
325
+ info "No adaptive model history yet"
326
+ return 0
327
+ fi
328
+
329
+ if ! command -v jq >/dev/null 2>&1; then
330
+ error "jq required for reports"
331
+ return 1
332
+ fi
333
+
334
+ echo ""
335
+ info "Adaptive Model Selection Report"
336
+ echo ""
337
+
338
+ # Model usage distribution
339
+ echo "▸ Model Usage Distribution:"
340
+ jq -s 'group_by(.model) | map({
341
+ model: .[0].model,
342
+ count: length,
343
+ percentage: ((length / (. | length)) * 100 | round)
344
+ }) | sort_by(.count) | reverse | .[]' "$history_file" 2>/dev/null | \
345
+ jq -r '" \(.model): \(.count) iterations (\(.percentage)%)"' 2>/dev/null || true
346
+
347
+ echo ""
348
+ echo "▸ Adaptive Actions:"
349
+ local escalations
350
+ escalations=$(jq '[.[] | select(.escalated == true)] | length' "$history_file" 2>/dev/null || echo "0")
351
+ local downgrades
352
+ downgrades=$(jq '[.[] | select(.downgraded == true)] | length' "$history_file" 2>/dev/null || echo "0")
353
+
354
+ echo " Escalations: $escalations"
355
+ echo " Downgrades: $downgrades"
356
+
357
+ # Effectiveness
358
+ if [[ "$escalations" -gt 0 ]]; then
359
+ local esc_success
360
+ esc_success=$(jq '[.[] | select(.escalated == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
361
+ local esc_rate=$((esc_success * 100 / escalations))
362
+ echo " Escalation Success Rate: ${esc_rate}% ($esc_success/$escalations)"
363
+ fi
364
+
365
+ if [[ "$downgrades" -gt 0 ]]; then
366
+ local down_success
367
+ down_success=$(jq '[.[] | select(.downgraded == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
368
+ local down_rate=$((down_success * 100 / downgrades))
369
+ echo " Downgrade Success Rate: ${down_rate}% ($down_success/$downgrades)"
370
+ fi
371
+
372
+ # Test result summary
373
+ echo ""
374
+ echo "▸ Test Result Summary:"
375
+ jq -s 'group_by(.test_result) | map({
376
+ result: .[0].test_result,
377
+ count: length
378
+ }) | .[]' "$history_file" 2>/dev/null | \
379
+ jq -r '" \(.result): \(.count)"' 2>/dev/null || true
380
+
381
+ # Top reasons
382
+ echo ""
383
+ echo "▸ Top Adaptation Reasons:"
384
+ jq -s 'group_by(.reason) | map({
385
+ reason: .[0].reason,
386
+ count: length
387
+ }) | sort_by(.count) | reverse | .[0:5] | .[]' "$history_file" 2>/dev/null | \
388
+ jq -r '" \(.reason): \(.count) times"' 2>/dev/null || true
389
+
390
+ echo ""
391
+ }
392
+
393
+ # ─── Load learned preferences from previous runs ────────────────────────────
394
+ adaptive_model_apply_learned_preferences() {
395
+ local preferences_file="${HOME}/.shipwright/optimization/model-preferences.json"
396
+
397
+ if [[ ! -f "$preferences_file" ]]; then
398
+ return 0
399
+ fi
400
+
401
+ if ! command -v jq >/dev/null 2>&1; then
402
+ return 0
403
+ fi
404
+
405
+ # Load learned escalation/downgrade rates (could affect future decisions)
406
+ local learned_escalation_rate
407
+ learned_escalation_rate=$(jq -r '.learned_escalations.success_rate // 0' "$preferences_file" 2>/dev/null || echo "0")
408
+
409
+ local learned_downgrade_rate
410
+ learned_downgrade_rate=$(jq -r '.learned_downgrades.success_rate // 0' "$preferences_file" 2>/dev/null || echo "0")
411
+
412
+ # Could use these to adjust thresholds dynamically in future runs
413
+ # For now, just log them
414
+ if [[ -n "$learned_escalation_rate" && "$learned_escalation_rate" -gt 0 ]]; then
415
+ emit_event "adaptive_model.using_learned_preferences" \
416
+ "escalation_rate=$learned_escalation_rate" \
417
+ "downgrade_rate=$learned_downgrade_rate" 2>/dev/null || true
418
+ fi
419
+ }
420
+
421
+ # ─── Export functions for use by sw-loop.sh ───────────────────────────────
422
+ export -f adaptive_model_init
423
+ export -f adaptive_model_select
424
+ export -f adaptive_model_record
425
+ export -f adaptive_model_learn
426
+ export -f adaptive_model_report
427
+ export -f adaptive_model_apply_learned_preferences