shipwright-cli 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/.claude/agents/code-reviewer.md +2 -0
  2. package/.claude/agents/devops-engineer.md +2 -0
  3. package/.claude/agents/doc-fleet-agent.md +2 -0
  4. package/.claude/agents/pipeline-agent.md +2 -0
  5. package/.claude/agents/shell-script-specialist.md +2 -0
  6. package/.claude/agents/test-specialist.md +2 -0
  7. package/.claude/hooks/agent-crash-capture.sh +32 -0
  8. package/.claude/hooks/post-tool-use.sh +3 -2
  9. package/.claude/hooks/pre-tool-use.sh +35 -3
  10. package/README.md +22 -8
  11. package/claude-code/hooks/config-change.sh +18 -0
  12. package/claude-code/hooks/instructions-reloaded.sh +7 -0
  13. package/claude-code/hooks/worktree-create.sh +25 -0
  14. package/claude-code/hooks/worktree-remove.sh +20 -0
  15. package/config/code-constitution.json +130 -0
  16. package/config/defaults.json +25 -2
  17. package/config/policy.json +1 -1
  18. package/dashboard/middleware/auth.ts +134 -0
  19. package/dashboard/middleware/constants.ts +21 -0
  20. package/dashboard/public/index.html +8 -6
  21. package/dashboard/public/styles.css +176 -97
  22. package/dashboard/routes/auth.ts +38 -0
  23. package/dashboard/server.ts +117 -25
  24. package/dashboard/services/config.ts +26 -0
  25. package/dashboard/services/db.ts +118 -0
  26. package/dashboard/src/canvas/pixel-agent.ts +298 -0
  27. package/dashboard/src/canvas/pixel-sprites.ts +440 -0
  28. package/dashboard/src/canvas/shipyard-effects.ts +367 -0
  29. package/dashboard/src/canvas/shipyard-scene.ts +616 -0
  30. package/dashboard/src/canvas/submarine-layout.ts +267 -0
  31. package/dashboard/src/components/header.ts +8 -7
  32. package/dashboard/src/core/api.ts +5 -0
  33. package/dashboard/src/core/router.ts +1 -0
  34. package/dashboard/src/design/submarine-theme.ts +253 -0
  35. package/dashboard/src/main.ts +2 -0
  36. package/dashboard/src/types/api.ts +12 -1
  37. package/dashboard/src/views/activity.ts +2 -1
  38. package/dashboard/src/views/metrics.ts +69 -1
  39. package/dashboard/src/views/shipyard.ts +39 -0
  40. package/dashboard/types/index.ts +166 -0
  41. package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
  42. package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
  43. package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
  44. package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
  45. package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
  46. package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
  47. package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
  48. package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
  49. package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
  50. package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
  51. package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
  52. package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
  53. package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
  54. package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
  55. package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
  56. package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
  57. package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
  58. package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
  59. package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
  60. package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
  61. package/docs/research/RESEARCH_INDEX.md +439 -0
  62. package/docs/research/RESEARCH_SOURCES.md +440 -0
  63. package/docs/research/RESEARCH_SUMMARY.txt +275 -0
  64. package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
  65. package/package.json +2 -2
  66. package/scripts/lib/adaptive-model.sh +427 -0
  67. package/scripts/lib/adaptive-timeout.sh +316 -0
  68. package/scripts/lib/audit-trail.sh +309 -0
  69. package/scripts/lib/auto-recovery.sh +471 -0
  70. package/scripts/lib/bandit-selector.sh +431 -0
  71. package/scripts/lib/bootstrap.sh +104 -2
  72. package/scripts/lib/causal-graph.sh +455 -0
  73. package/scripts/lib/compat.sh +126 -0
  74. package/scripts/lib/compound-audit.sh +337 -0
  75. package/scripts/lib/constitutional.sh +454 -0
  76. package/scripts/lib/context-budget.sh +359 -0
  77. package/scripts/lib/convergence.sh +594 -0
  78. package/scripts/lib/cost-optimizer.sh +634 -0
  79. package/scripts/lib/daemon-adaptive.sh +14 -2
  80. package/scripts/lib/daemon-dispatch.sh +106 -17
  81. package/scripts/lib/daemon-failure.sh +34 -4
  82. package/scripts/lib/daemon-patrol.sh +25 -4
  83. package/scripts/lib/daemon-poll-github.sh +361 -0
  84. package/scripts/lib/daemon-poll-health.sh +299 -0
  85. package/scripts/lib/daemon-poll.sh +27 -611
  86. package/scripts/lib/daemon-state.sh +119 -66
  87. package/scripts/lib/daemon-triage.sh +10 -0
  88. package/scripts/lib/dod-scorecard.sh +442 -0
  89. package/scripts/lib/error-actionability.sh +300 -0
  90. package/scripts/lib/formal-spec.sh +461 -0
  91. package/scripts/lib/helpers.sh +180 -5
  92. package/scripts/lib/intent-analysis.sh +409 -0
  93. package/scripts/lib/loop-convergence.sh +350 -0
  94. package/scripts/lib/loop-iteration.sh +682 -0
  95. package/scripts/lib/loop-progress.sh +48 -0
  96. package/scripts/lib/loop-restart.sh +185 -0
  97. package/scripts/lib/memory-effectiveness.sh +506 -0
  98. package/scripts/lib/mutation-executor.sh +352 -0
  99. package/scripts/lib/outcome-feedback.sh +521 -0
  100. package/scripts/lib/pipeline-cli.sh +336 -0
  101. package/scripts/lib/pipeline-commands.sh +1216 -0
  102. package/scripts/lib/pipeline-detection.sh +101 -3
  103. package/scripts/lib/pipeline-execution.sh +897 -0
  104. package/scripts/lib/pipeline-github.sh +28 -3
  105. package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
  106. package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
  107. package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
  108. package/scripts/lib/pipeline-intelligence.sh +104 -1138
  109. package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
  110. package/scripts/lib/pipeline-quality-checks.sh +17 -711
  111. package/scripts/lib/pipeline-quality-gates.sh +563 -0
  112. package/scripts/lib/pipeline-stages-build.sh +730 -0
  113. package/scripts/lib/pipeline-stages-delivery.sh +965 -0
  114. package/scripts/lib/pipeline-stages-intake.sh +1133 -0
  115. package/scripts/lib/pipeline-stages-monitor.sh +407 -0
  116. package/scripts/lib/pipeline-stages-review.sh +1022 -0
  117. package/scripts/lib/pipeline-stages.sh +161 -2901
  118. package/scripts/lib/pipeline-state.sh +36 -5
  119. package/scripts/lib/pipeline-util.sh +487 -0
  120. package/scripts/lib/policy-learner.sh +438 -0
  121. package/scripts/lib/process-reward.sh +493 -0
  122. package/scripts/lib/project-detect.sh +649 -0
  123. package/scripts/lib/quality-profile.sh +334 -0
  124. package/scripts/lib/recruit-commands.sh +885 -0
  125. package/scripts/lib/recruit-learning.sh +739 -0
  126. package/scripts/lib/recruit-roles.sh +648 -0
  127. package/scripts/lib/reward-aggregator.sh +458 -0
  128. package/scripts/lib/rl-optimizer.sh +362 -0
  129. package/scripts/lib/root-cause.sh +427 -0
  130. package/scripts/lib/scope-enforcement.sh +445 -0
  131. package/scripts/lib/session-restart.sh +493 -0
  132. package/scripts/lib/skill-memory.sh +300 -0
  133. package/scripts/lib/skill-registry.sh +775 -0
  134. package/scripts/lib/spec-driven.sh +476 -0
  135. package/scripts/lib/test-helpers.sh +18 -7
  136. package/scripts/lib/test-holdout.sh +429 -0
  137. package/scripts/lib/test-optimizer.sh +511 -0
  138. package/scripts/shipwright-file-suggest.sh +45 -0
  139. package/scripts/skills/adversarial-quality.md +61 -0
  140. package/scripts/skills/api-design.md +44 -0
  141. package/scripts/skills/architecture-design.md +50 -0
  142. package/scripts/skills/brainstorming.md +43 -0
  143. package/scripts/skills/data-pipeline.md +44 -0
  144. package/scripts/skills/deploy-safety.md +64 -0
  145. package/scripts/skills/documentation.md +38 -0
  146. package/scripts/skills/frontend-design.md +45 -0
  147. package/scripts/skills/generated/.gitkeep +0 -0
  148. package/scripts/skills/generated/_refinements/.gitkeep +0 -0
  149. package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
  150. package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
  151. package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
  152. package/scripts/skills/generated/cli-version-management.md +29 -0
  153. package/scripts/skills/generated/collection-system-validation.md +99 -0
  154. package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
  155. package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
  156. package/scripts/skills/generated/test-parallelization-detection.md +65 -0
  157. package/scripts/skills/observability.md +79 -0
  158. package/scripts/skills/performance.md +48 -0
  159. package/scripts/skills/pr-quality.md +49 -0
  160. package/scripts/skills/product-thinking.md +43 -0
  161. package/scripts/skills/security-audit.md +49 -0
  162. package/scripts/skills/systematic-debugging.md +40 -0
  163. package/scripts/skills/testing-strategy.md +47 -0
  164. package/scripts/skills/two-stage-review.md +52 -0
  165. package/scripts/skills/validation-thoroughness.md +55 -0
  166. package/scripts/sw +9 -3
  167. package/scripts/sw-activity.sh +9 -8
  168. package/scripts/sw-adaptive.sh +8 -7
  169. package/scripts/sw-adversarial.sh +2 -1
  170. package/scripts/sw-architecture-enforcer.sh +3 -1
  171. package/scripts/sw-auth.sh +12 -2
  172. package/scripts/sw-autonomous.sh +5 -1
  173. package/scripts/sw-changelog.sh +4 -1
  174. package/scripts/sw-checkpoint.sh +2 -1
  175. package/scripts/sw-ci.sh +15 -6
  176. package/scripts/sw-cleanup.sh +4 -26
  177. package/scripts/sw-code-review.sh +45 -20
  178. package/scripts/sw-connect.sh +2 -1
  179. package/scripts/sw-context.sh +2 -1
  180. package/scripts/sw-cost.sh +107 -5
  181. package/scripts/sw-daemon.sh +71 -11
  182. package/scripts/sw-dashboard.sh +3 -1
  183. package/scripts/sw-db.sh +71 -20
  184. package/scripts/sw-decide.sh +8 -2
  185. package/scripts/sw-decompose.sh +360 -17
  186. package/scripts/sw-deps.sh +4 -1
  187. package/scripts/sw-developer-simulation.sh +4 -1
  188. package/scripts/sw-discovery.sh +378 -5
  189. package/scripts/sw-doc-fleet.sh +4 -1
  190. package/scripts/sw-docs-agent.sh +3 -1
  191. package/scripts/sw-docs.sh +2 -1
  192. package/scripts/sw-doctor.sh +453 -2
  193. package/scripts/sw-dora.sh +4 -1
  194. package/scripts/sw-durable.sh +12 -7
  195. package/scripts/sw-e2e-orchestrator.sh +17 -16
  196. package/scripts/sw-eventbus.sh +13 -4
  197. package/scripts/sw-evidence.sh +364 -12
  198. package/scripts/sw-feedback.sh +550 -9
  199. package/scripts/sw-fix.sh +20 -1
  200. package/scripts/sw-fleet-discover.sh +6 -2
  201. package/scripts/sw-fleet-viz.sh +9 -4
  202. package/scripts/sw-fleet.sh +5 -1
  203. package/scripts/sw-github-app.sh +18 -4
  204. package/scripts/sw-github-checks.sh +3 -2
  205. package/scripts/sw-github-deploy.sh +3 -2
  206. package/scripts/sw-github-graphql.sh +18 -7
  207. package/scripts/sw-guild.sh +5 -1
  208. package/scripts/sw-heartbeat.sh +5 -30
  209. package/scripts/sw-hello.sh +67 -0
  210. package/scripts/sw-hygiene.sh +10 -3
  211. package/scripts/sw-incident.sh +273 -5
  212. package/scripts/sw-init.sh +18 -2
  213. package/scripts/sw-instrument.sh +10 -2
  214. package/scripts/sw-intelligence.sh +44 -7
  215. package/scripts/sw-jira.sh +5 -1
  216. package/scripts/sw-launchd.sh +2 -1
  217. package/scripts/sw-linear.sh +4 -1
  218. package/scripts/sw-logs.sh +4 -1
  219. package/scripts/sw-loop.sh +436 -1076
  220. package/scripts/sw-memory.sh +357 -3
  221. package/scripts/sw-mission-control.sh +6 -1
  222. package/scripts/sw-model-router.sh +483 -27
  223. package/scripts/sw-otel.sh +15 -4
  224. package/scripts/sw-oversight.sh +14 -5
  225. package/scripts/sw-patrol-meta.sh +334 -0
  226. package/scripts/sw-pipeline-composer.sh +7 -1
  227. package/scripts/sw-pipeline-vitals.sh +12 -6
  228. package/scripts/sw-pipeline.sh +54 -2653
  229. package/scripts/sw-pm.sh +16 -8
  230. package/scripts/sw-pr-lifecycle.sh +2 -1
  231. package/scripts/sw-predictive.sh +17 -5
  232. package/scripts/sw-prep.sh +185 -2
  233. package/scripts/sw-ps.sh +5 -25
  234. package/scripts/sw-public-dashboard.sh +17 -4
  235. package/scripts/sw-quality.sh +14 -6
  236. package/scripts/sw-reaper.sh +8 -25
  237. package/scripts/sw-recruit.sh +156 -2303
  238. package/scripts/sw-regression.sh +19 -12
  239. package/scripts/sw-release-manager.sh +3 -1
  240. package/scripts/sw-release.sh +4 -1
  241. package/scripts/sw-remote.sh +3 -1
  242. package/scripts/sw-replay.sh +7 -1
  243. package/scripts/sw-retro.sh +158 -1
  244. package/scripts/sw-review-rerun.sh +3 -1
  245. package/scripts/sw-scale.sh +14 -5
  246. package/scripts/sw-security-audit.sh +6 -1
  247. package/scripts/sw-self-optimize.sh +173 -6
  248. package/scripts/sw-session.sh +9 -3
  249. package/scripts/sw-setup.sh +3 -1
  250. package/scripts/sw-stall-detector.sh +406 -0
  251. package/scripts/sw-standup.sh +15 -7
  252. package/scripts/sw-status.sh +3 -1
  253. package/scripts/sw-strategic.sh +14 -6
  254. package/scripts/sw-stream.sh +13 -4
  255. package/scripts/sw-swarm.sh +20 -7
  256. package/scripts/sw-team-stages.sh +13 -6
  257. package/scripts/sw-templates.sh +7 -31
  258. package/scripts/sw-testgen.sh +17 -6
  259. package/scripts/sw-tmux-pipeline.sh +4 -1
  260. package/scripts/sw-tmux-role-color.sh +2 -0
  261. package/scripts/sw-tmux-status.sh +1 -1
  262. package/scripts/sw-tmux.sh +37 -1
  263. package/scripts/sw-trace.sh +3 -1
  264. package/scripts/sw-tracker-github.sh +3 -0
  265. package/scripts/sw-tracker-jira.sh +3 -0
  266. package/scripts/sw-tracker-linear.sh +3 -0
  267. package/scripts/sw-tracker.sh +3 -1
  268. package/scripts/sw-triage.sh +3 -2
  269. package/scripts/sw-upgrade.sh +3 -1
  270. package/scripts/sw-ux.sh +5 -2
  271. package/scripts/sw-webhook.sh +5 -2
  272. package/scripts/sw-widgets.sh +9 -4
  273. package/scripts/sw-worktree.sh +15 -3
  274. package/scripts/test-skill-injection.sh +1233 -0
  275. package/templates/pipelines/autonomous.json +27 -3
  276. package/templates/pipelines/cost-aware.json +34 -8
  277. package/templates/pipelines/deployed.json +12 -0
  278. package/templates/pipelines/enterprise.json +12 -0
  279. package/templates/pipelines/fast.json +6 -0
  280. package/templates/pipelines/full.json +27 -3
  281. package/templates/pipelines/hotfix.json +6 -0
  282. package/templates/pipelines/standard.json +12 -0
  283. package/templates/pipelines/tdd.json +12 -0
@@ -0,0 +1,493 @@
1
+ #!/usr/bin/env bash
2
+ # Module guard - prevent double-sourcing
3
+ [[ -n "${_PROCESS_REWARD_LOADED:-}" ]] && return 0
4
+ _PROCESS_REWARD_LOADED=1
5
+
6
+ # ╔═══════════════════════════════════════════════════════════════════════════╗
7
+ # ║ shipwright process-reward — Per-Step Iteration Scoring (Phase 3) ║
8
+ # ║ Score each loop iteration on 5 dimensions for dense learning signals ║
9
+ # ║ Weights: test_progress 30%, code_quality 25%, convergence 20%, ║
10
+ # ║ architecture 15%, security 10% ║
11
+ # ╚═══════════════════════════════════════════════════════════════════════════╝
12
+
13
+ # shellcheck disable=SC2034
14
+ VERSION="3.3.0"
15
+
16
+ # ─── Output Helpers ──────────────────────────────────────────────────────────
17
+ [[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
18
+ [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
19
+ [[ "$(type -t warn 2>/dev/null)" == "function" ]] || warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
20
+ [[ "$(type -t error 2>/dev/null)" == "function" ]] || error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
21
+ if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
22
+ now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
23
+ now_epoch() { date +%s; }
24
+ fi
25
+ [[ "$(type -t emit_event 2>/dev/null)" == "function" ]] || emit_event() { true; }
26
+
27
+ # ─── Configuration ───────────────────────────────────────────────────────────
28
+
29
+ PROCESS_REWARD_FILE="${PROCESS_REWARD_FILE:-.claude/pipeline-artifacts/process-rewards.jsonl}"
30
+
31
+ # Dimension weights (must sum to 100)
32
+ REWARD_WEIGHT_TEST="${REWARD_WEIGHT_TEST:-30}"
33
+ REWARD_WEIGHT_CODE="${REWARD_WEIGHT_CODE:-25}"
34
+ REWARD_WEIGHT_CONVERGENCE="${REWARD_WEIGHT_CONVERGENCE:-20}"
35
+ REWARD_WEIGHT_ARCH="${REWARD_WEIGHT_ARCH:-15}"
36
+ REWARD_WEIGHT_SECURITY="${REWARD_WEIGHT_SECURITY:-10}"
37
+
38
+ # ─── Dimension Scorers ──────────────────────────────────────────────────────
39
+
40
+ # Score test progress (0-100)
41
+ # Inputs: test_passed (true/false/""), test_output, previous test state
42
+ _reward_score_test_progress() {
43
+ local test_passed="${1:-}"
44
+ local test_output="${2:-}"
45
+ local prev_passed="${3:-}"
46
+ local score=50 # neutral default
47
+
48
+ # No test command — return neutral
49
+ if [[ -z "$test_passed" ]]; then
50
+ echo "$score"
51
+ return 0
52
+ fi
53
+
54
+ if [[ "$test_passed" == "true" ]]; then
55
+ score=90
56
+ # Bonus if previously failing
57
+ if [[ "$prev_passed" == "false" ]]; then
58
+ score=100
59
+ fi
60
+ elif [[ "$test_passed" == "false" ]]; then
61
+ score=20
62
+ # Check if test count improved (partial progress)
63
+ local pass_count=0
64
+ if [[ -n "$test_output" ]]; then
65
+ pass_count=$(echo "$test_output" | grep -ciE '(pass|passed|ok|✓)' || true)
66
+ pass_count="${pass_count:-0}"
67
+ fi
68
+ if [[ "$pass_count" -gt 0 ]]; then
69
+ # Some tests passing — partial credit
70
+ score=40
71
+ fi
72
+ # Previously also failing — at least not regressing
73
+ if [[ "$prev_passed" == "false" ]]; then
74
+ score=$(( score + 5 ))
75
+ fi
76
+ fi
77
+
78
+ echo "$score"
79
+ }
80
+
81
+ # Score code quality (0-100)
82
+ # Checks: diff size, duplication, complexity indicators
83
+ _reward_score_code_quality() {
84
+ local project_root="${1:-.}"
85
+ local score=70 # default: decent
86
+
87
+ # Check recent diff for quality signals
88
+ local diff_text
89
+ diff_text=$(git -C "$project_root" diff HEAD~1 --unified=0 2>/dev/null || true)
90
+
91
+ if [[ -z "$diff_text" ]]; then
92
+ echo "$score"
93
+ return 0
94
+ fi
95
+
96
+ # Count additions and deletions
97
+ local additions deletions
98
+ additions=$(echo "$diff_text" | grep -c '^+[^+]' || true)
99
+ additions="${additions:-0}"
100
+ deletions=$(echo "$diff_text" | grep -c '^-[^-]' || true)
101
+ deletions="${deletions:-0}"
102
+
103
+ # Penalize very large diffs (>500 lines added = likely unfocused)
104
+ if [[ "$additions" -gt 500 ]]; then
105
+ score=$(( score - 15 ))
106
+ elif [[ "$additions" -gt 200 ]]; then
107
+ score=$(( score - 5 ))
108
+ fi
109
+
110
+ # Reward cleanup (more deletions than additions)
111
+ if [[ "$deletions" -gt "$additions" ]] && [[ "$additions" -gt 0 ]]; then
112
+ score=$(( score + 10 ))
113
+ fi
114
+
115
+ # Check for TODO/FIXME/HACK in new code
116
+ local hack_count
117
+ hack_count=$(echo "$diff_text" | grep -c '^+.*\(TODO\|FIXME\|HACK\|XXX\)' || true)
118
+ hack_count="${hack_count:-0}"
119
+ if [[ "$hack_count" -gt 3 ]]; then
120
+ score=$(( score - 10 ))
121
+ elif [[ "$hack_count" -gt 0 ]]; then
122
+ score=$(( score - 5 ))
123
+ fi
124
+
125
+ # Check for debug/console statements left in
126
+ local debug_count
127
+ debug_count=$(echo "$diff_text" | grep -c '^+.*\(console\.log\|debugger\|print(\|echo "DEBUG\)' || true)
128
+ debug_count="${debug_count:-0}"
129
+ if [[ "$debug_count" -gt 0 ]]; then
130
+ score=$(( score - 10 ))
131
+ fi
132
+
133
+ # Clamp 0-100
134
+ [[ "$score" -lt 0 ]] && score=0
135
+ [[ "$score" -gt 100 ]] && score=100
136
+
137
+ echo "$score"
138
+ }
139
+
140
+ # Score convergence (0-100)
141
+ # Is the diff getting smaller? Are we approaching the goal?
142
+ _reward_score_convergence() {
143
+ local iteration="${1:-1}"
144
+ local project_root="${2:-.}"
145
+ local reward_file="${3:-$PROCESS_REWARD_FILE}"
146
+ local score=50 # neutral default
147
+
148
+ # First iteration — no history to compare
149
+ if [[ "$iteration" -le 1 ]]; then
150
+ echo "60"
151
+ return 0
152
+ fi
153
+
154
+ # Get current diff stat
155
+ local current_diff_lines
156
+ current_diff_lines=$(git -C "$project_root" diff HEAD~1 --stat 2>/dev/null | tail -1 | grep -oE '[0-9]+ insertion|[0-9]+ deletion' | grep -oE '[0-9]+' | head -2 | paste -sd+ - | bc 2>/dev/null || echo "0")
157
+ current_diff_lines="${current_diff_lines:-0}"
158
+
159
+ # Get previous iteration's convergence score from reward history
160
+ local prev_convergence
161
+ prev_convergence=$(tail -1 "$reward_file" 2>/dev/null | jq -r '.scores.convergence // 50' 2>/dev/null || echo "50")
162
+
163
+ # Smaller diffs = more convergent (likely finishing touches)
164
+ if [[ "$current_diff_lines" -lt 20 ]]; then
165
+ score=85
166
+ elif [[ "$current_diff_lines" -lt 50 ]]; then
167
+ score=70
168
+ elif [[ "$current_diff_lines" -lt 100 ]]; then
169
+ score=55
170
+ elif [[ "$current_diff_lines" -lt 300 ]]; then
171
+ score=40
172
+ else
173
+ score=25
174
+ fi
175
+
176
+ # Bonus for sustained convergence trend
177
+ if [[ "$prev_convergence" -ge 70 ]] && [[ "$score" -ge 70 ]]; then
178
+ score=$(( score + 10 ))
179
+ fi
180
+
181
+ # Clamp 0-100
182
+ [[ "$score" -gt 100 ]] && score=100
183
+
184
+ echo "$score"
185
+ }
186
+
187
+ # Score architecture adherence (0-100)
188
+ # Check naming, file placement, patterns
189
+ _reward_score_architecture() {
190
+ local project_root="${1:-.}"
191
+ local score=80 # default: good
192
+
193
+ # Get list of files changed in last commit
194
+ local changed_files
195
+ changed_files=$(git -C "$project_root" diff --name-only HEAD~1 2>/dev/null || true)
196
+
197
+ if [[ -z "$changed_files" ]]; then
198
+ echo "$score"
199
+ return 0
200
+ fi
201
+
202
+ # Check for test files alongside source (good practice)
203
+ local has_test=false
204
+ if echo "$changed_files" | grep -qE '(test|spec|_test\.)'; then
205
+ has_test=true
206
+ score=$(( score + 10 ))
207
+ fi
208
+
209
+ # Penalize changes to too many directories (unfocused)
210
+ local dir_count
211
+ dir_count=$(echo "$changed_files" | sed 's|/[^/]*$||' | sort -u | wc -l | tr -d ' ')
212
+ dir_count="${dir_count:-0}"
213
+ if [[ "$dir_count" -gt 10 ]]; then
214
+ score=$(( score - 15 ))
215
+ elif [[ "$dir_count" -gt 5 ]]; then
216
+ score=$(( score - 5 ))
217
+ fi
218
+
219
+ # Check architecture rules file if it exists
220
+ local repo_hash
221
+ repo_hash=$(echo -n "$project_root" | shasum -a 256 2>/dev/null | cut -c1-12 || echo "unknown")
222
+ local arch_file="${HOME}/.shipwright/memory/${repo_hash}/architecture.json"
223
+ if [[ -f "$arch_file" ]]; then
224
+ # Check if any rules are violated (simple heuristic: file in wrong layer)
225
+ local violations
226
+ violations=$(jq -r '.rules[]? // empty' "$arch_file" 2>/dev/null | wc -l | tr -d ' ')
227
+ # Having rules is good — we can only check heuristically here
228
+ score=$(( score + 5 ))
229
+ fi
230
+
231
+ # Clamp 0-100
232
+ [[ "$score" -lt 0 ]] && score=0
233
+ [[ "$score" -gt 100 ]] && score=100
234
+
235
+ echo "$score"
236
+ }
237
+
238
+ # Score security (0-100)
239
+ # Grep for obvious issues in new code
240
+ _reward_score_security() {
241
+ local project_root="${1:-.}"
242
+ local score=90 # default: no issues
243
+
244
+ local diff_text
245
+ diff_text=$(git -C "$project_root" diff HEAD~1 2>/dev/null || true)
246
+
247
+ if [[ -z "$diff_text" ]]; then
248
+ echo "$score"
249
+ return 0
250
+ fi
251
+
252
+ # Only check added lines
253
+ local added_lines
254
+ added_lines=$(echo "$diff_text" | grep '^+[^+]' || true)
255
+
256
+ if [[ -z "$added_lines" ]]; then
257
+ echo "$score"
258
+ return 0
259
+ fi
260
+
261
+ # Check for hardcoded secrets patterns
262
+ local secret_count
263
+ secret_count=$(echo "$added_lines" | grep -ciE '(password\s*=\s*["\x27][^"\x27]+|api_key\s*=\s*["\x27]|secret\s*=\s*["\x27][^"\x27]+|token\s*=\s*["\x27][A-Za-z0-9])' || true)
264
+ secret_count="${secret_count:-0}"
265
+ if [[ "$secret_count" -gt 0 ]]; then
266
+ score=$(( score - 30 ))
267
+ fi
268
+
269
+ # Check for eval/exec usage
270
+ local eval_count
271
+ eval_count=$(echo "$added_lines" | grep -cE '(^|\s)(eval|exec)\s' || true)
272
+ eval_count="${eval_count:-0}"
273
+ if [[ "$eval_count" -gt 0 ]]; then
274
+ score=$(( score - 15 ))
275
+ fi
276
+
277
+ # Check for SQL injection patterns (string concat in queries)
278
+ local sql_count
279
+ sql_count=$(echo "$added_lines" | grep -ciE '(query\(.*\+|execute\(.*\+|sql.*\+.*\$)' || true)
280
+ sql_count="${sql_count:-0}"
281
+ if [[ "$sql_count" -gt 0 ]]; then
282
+ score=$(( score - 20 ))
283
+ fi
284
+
285
+ # Check for command injection patterns
286
+ local cmd_count
287
+ cmd_count=$(echo "$added_lines" | grep -cE 'system\(\s*\$|`\$|exec\(\s*\$' || true)
288
+ cmd_count="${cmd_count:-0}"
289
+ if [[ "$cmd_count" -gt 0 ]]; then
290
+ score=$(( score - 20 ))
291
+ fi
292
+
293
+ # Clamp 0-100
294
+ [[ "$score" -lt 0 ]] && score=0
295
+ [[ "$score" -gt 100 ]] && score=100
296
+
297
+ echo "$score"
298
+ }
299
+
300
+ # ─── Core Functions ──────────────────────────────────────────────────────────
301
+
302
+ # Score a completed iteration on all 5 dimensions
303
+ # Returns JSON: {"composite":N, "scores":{"test_progress":N,...}}
304
+ process_reward_score_iteration() {
305
+ local iteration="${1:-1}"
306
+ local test_passed="${2:-}"
307
+ local test_output="${3:-}"
308
+ local prev_test_passed="${4:-}"
309
+ local project_root="${5:-.}"
310
+
311
+ local test_score code_score conv_score arch_score sec_score
312
+
313
+ test_score=$(_reward_score_test_progress "$test_passed" "$test_output" "$prev_test_passed")
314
+ code_score=$(_reward_score_code_quality "$project_root")
315
+ conv_score=$(_reward_score_convergence "$iteration" "$project_root" "$PROCESS_REWARD_FILE")
316
+ arch_score=$(_reward_score_architecture "$project_root")
317
+ sec_score=$(_reward_score_security "$project_root")
318
+
319
+ # Weighted composite (integer math — multiply by weight then divide by 100)
320
+ local composite
321
+ composite=$(( (test_score * REWARD_WEIGHT_TEST + code_score * REWARD_WEIGHT_CODE + conv_score * REWARD_WEIGHT_CONVERGENCE + arch_score * REWARD_WEIGHT_ARCH + sec_score * REWARD_WEIGHT_SECURITY) / 100 ))
322
+
323
+ # Clamp
324
+ [[ "$composite" -lt 0 ]] && composite=0
325
+ [[ "$composite" -gt 100 ]] && composite=100
326
+
327
+ # Return as JSON
328
+ printf '{"composite":%d,"scores":{"test_progress":%d,"code_quality":%d,"convergence":%d,"architecture":%d,"security":%d}}' \
329
+ "$composite" "$test_score" "$code_score" "$conv_score" "$arch_score" "$sec_score"
330
+ }
331
+
332
+ # Record iteration reward data to JSONL file
333
+ process_reward_record() {
334
+ local iteration="${1:-1}"
335
+ local scores_json="${2:-"{}"}"
336
+ local action_taken="${3:-unknown}"
337
+ local outcome="${4:-unknown}"
338
+
339
+ # Ensure directory exists
340
+ local reward_dir
341
+ reward_dir=$(dirname "$PROCESS_REWARD_FILE")
342
+ mkdir -p "$reward_dir" 2>/dev/null || true
343
+
344
+ local timestamp
345
+ timestamp=$(now_iso)
346
+
347
+ # Build record using jq for safe JSON construction
348
+ local record
349
+ record=$(jq -c -n \
350
+ --arg ts "$timestamp" \
351
+ --argjson iter "$iteration" \
352
+ --argjson scores "$scores_json" \
353
+ --arg action "$action_taken" \
354
+ --arg outcome "$outcome" \
355
+ '{timestamp: $ts, iteration: $iter, scores: $scores, action: $action, outcome: $outcome}' 2>/dev/null)
356
+
357
+ if [[ -z "$record" ]]; then
358
+ warn "process-reward: failed to build JSON record"
359
+ return 1
360
+ fi
361
+
362
+ # Atomic write via temp file + mv
363
+ local tmp_file
364
+ tmp_file=$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/process-reward-$$.tmp")
365
+ if [[ -f "$PROCESS_REWARD_FILE" ]]; then
366
+ cat "$PROCESS_REWARD_FILE" > "$tmp_file"
367
+ fi
368
+ echo "$record" >> "$tmp_file"
369
+ mv "$tmp_file" "$PROCESS_REWARD_FILE"
370
+
371
+ emit_event "process_reward.recorded" "iteration=$iteration" "composite=$(echo "$scores_json" | jq -r '.composite // 0' 2>/dev/null || echo 0)"
372
+ }
373
+
374
+ # Suggest next action based on reward trajectory (last 3 iterations)
375
+ process_reward_suggest_action() {
376
+ local reward_file="${1:-$PROCESS_REWARD_FILE}"
377
+
378
+ if [[ ! -f "$reward_file" ]]; then
379
+ echo "No reward history yet — proceed with the goal."
380
+ return 0
381
+ fi
382
+
383
+ local line_count
384
+ line_count=$(wc -l < "$reward_file" 2>/dev/null | tr -d ' ')
385
+ line_count="${line_count:-0}"
386
+
387
+ if [[ "$line_count" -lt 2 ]]; then
388
+ echo "Not enough history for suggestions — keep working on the goal."
389
+ return 0
390
+ fi
391
+
392
+ # Get last 3 records
393
+ local recent
394
+ recent=$(tail -3 "$reward_file")
395
+
396
+ # Extract composite scores
397
+ local composites
398
+ composites=$(echo "$recent" | jq -r '.scores.composite // .composite // 0' 2>/dev/null || true)
399
+
400
+ # Extract dimension scores from latest
401
+ local latest
402
+ latest=$(echo "$recent" | tail -1)
403
+ local test_score code_score conv_score
404
+ test_score=$(echo "$latest" | jq -r '.scores.test_progress // 50' 2>/dev/null || echo "50")
405
+ code_score=$(echo "$latest" | jq -r '.scores.code_quality // 50' 2>/dev/null || echo "50")
406
+ conv_score=$(echo "$latest" | jq -r '.scores.convergence // 50' 2>/dev/null || echo "50")
407
+
408
+ # Check for declining trend
409
+ local first_score last_score
410
+ first_score=$(echo "$composites" | head -1)
411
+ first_score="${first_score:-50}"
412
+ last_score=$(echo "$composites" | tail -1)
413
+ last_score="${last_score:-50}"
414
+
415
+ # Decision logic
416
+ if [[ "$test_score" -le 30 ]]; then
417
+ echo "Tests are failing badly (score: ${test_score}/100). Focus on making tests pass before anything else."
418
+ return 0
419
+ fi
420
+
421
+ if [[ "$code_score" -le 40 ]]; then
422
+ echo "Code quality is low (score: ${code_score}/100). Refactor and clean up before adding more features."
423
+ return 0
424
+ fi
425
+
426
+ if [[ "$conv_score" -le 30 ]]; then
427
+ echo "Changes are diverging, not converging (score: ${conv_score}/100). Make smaller, more focused changes."
428
+ return 0
429
+ fi
430
+
431
+ if [[ "$last_score" -lt "$first_score" ]] && [[ $(( first_score - last_score )) -ge 10 ]]; then
432
+ echo "Reward trajectory is declining (${first_score} -> ${last_score}). Try a different approach — current strategy is making things worse."
433
+ return 0
434
+ fi
435
+
436
+ if [[ "$last_score" -ge 80 ]]; then
437
+ echo "Strong progress (score: ${last_score}/100). Keep the current approach — you're converging well."
438
+ return 0
439
+ fi
440
+
441
+ echo "Moderate progress (score: ${last_score}/100). Continue working toward the goal."
442
+ }
443
+
444
+ # Format reward history as markdown for injection into iteration prompts
445
+ process_reward_inject_context() {
446
+ local reward_file="${1:-$PROCESS_REWARD_FILE}"
447
+ local max_entries="${2:-5}"
448
+
449
+ if [[ ! -f "$reward_file" ]]; then
450
+ return 0
451
+ fi
452
+
453
+ local line_count
454
+ line_count=$(wc -l < "$reward_file" 2>/dev/null | tr -d ' ')
455
+ line_count="${line_count:-0}"
456
+
457
+ if [[ "$line_count" -eq 0 ]]; then
458
+ return 0
459
+ fi
460
+
461
+ local recent
462
+ recent=$(tail -"$max_entries" "$reward_file")
463
+
464
+ local output="## Iteration Rewards (Process Reward Model)
465
+ | Iter | Composite | Test | Quality | Converge | Arch | Security |
466
+ |------|-----------|------|---------|----------|------|----------|"
467
+
468
+ local line
469
+ while IFS= read -r line; do
470
+ [[ -z "$line" ]] && continue
471
+ local iter comp ts cs cvs as ss
472
+ iter=$(echo "$line" | jq -r '.iteration // "?"' 2>/dev/null || echo "?")
473
+ comp=$(echo "$line" | jq -r '.scores.composite // .composite // "?"' 2>/dev/null || echo "?")
474
+ ts=$(echo "$line" | jq -r '.scores.test_progress // "?"' 2>/dev/null || echo "?")
475
+ cs=$(echo "$line" | jq -r '.scores.code_quality // "?"' 2>/dev/null || echo "?")
476
+ cvs=$(echo "$line" | jq -r '.scores.convergence // "?"' 2>/dev/null || echo "?")
477
+ as=$(echo "$line" | jq -r '.scores.architecture // "?"' 2>/dev/null || echo "?")
478
+ ss=$(echo "$line" | jq -r '.scores.security // "?"' 2>/dev/null || echo "?")
479
+ output="${output}
480
+ | ${iter} | ${comp} | ${ts} | ${cs} | ${cvs} | ${as} | ${ss} |"
481
+ done <<< "$recent"
482
+
483
+ # Add suggestion
484
+ local suggestion
485
+ suggestion=$(process_reward_suggest_action "$reward_file")
486
+ if [[ -n "$suggestion" ]]; then
487
+ output="${output}
488
+
489
+ **Reward signal:** ${suggestion}"
490
+ fi
491
+
492
+ echo "$output"
493
+ }