shipwright-cli 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +19 -19
  2. package/dashboard/public/index.html +224 -8
  3. package/dashboard/public/styles.css +1078 -4
  4. package/dashboard/server.ts +1100 -15
  5. package/dashboard/src/canvas/interactions.ts +74 -0
  6. package/dashboard/src/canvas/layout.ts +85 -0
  7. package/dashboard/src/canvas/overlays.ts +117 -0
  8. package/dashboard/src/canvas/particles.ts +105 -0
  9. package/dashboard/src/canvas/renderer.ts +191 -0
  10. package/dashboard/src/components/charts/bar.ts +54 -0
  11. package/dashboard/src/components/charts/donut.ts +25 -0
  12. package/dashboard/src/components/charts/pipeline-rail.ts +105 -0
  13. package/dashboard/src/components/charts/sparkline.ts +82 -0
  14. package/dashboard/src/components/header.ts +616 -0
  15. package/dashboard/src/components/modal.ts +413 -0
  16. package/dashboard/src/components/terminal.ts +144 -0
  17. package/dashboard/src/core/api.ts +381 -0
  18. package/dashboard/src/core/helpers.ts +118 -0
  19. package/dashboard/src/core/router.ts +190 -0
  20. package/dashboard/src/core/sse.ts +38 -0
  21. package/dashboard/src/core/state.ts +150 -0
  22. package/dashboard/src/core/ws.ts +143 -0
  23. package/dashboard/src/design/icons.ts +131 -0
  24. package/dashboard/src/design/tokens.ts +160 -0
  25. package/dashboard/src/main.ts +68 -0
  26. package/dashboard/src/types/api.ts +337 -0
  27. package/dashboard/src/views/activity.ts +185 -0
  28. package/dashboard/src/views/agent-cockpit.ts +236 -0
  29. package/dashboard/src/views/agents.ts +72 -0
  30. package/dashboard/src/views/fleet-map.ts +299 -0
  31. package/dashboard/src/views/insights.ts +298 -0
  32. package/dashboard/src/views/machines.ts +162 -0
  33. package/dashboard/src/views/metrics.ts +420 -0
  34. package/dashboard/src/views/overview.ts +409 -0
  35. package/dashboard/src/views/pipeline-theater.ts +219 -0
  36. package/dashboard/src/views/pipelines.ts +595 -0
  37. package/dashboard/src/views/team.ts +362 -0
  38. package/dashboard/src/views/timeline.ts +389 -0
  39. package/dashboard/tsconfig.json +21 -0
  40. package/docs/AGI-PLATFORM-PLAN.md +5 -5
  41. package/docs/AGI-WHATS-NEXT.md +19 -16
  42. package/docs/README.md +2 -0
  43. package/package.json +8 -1
  44. package/scripts/check-version-consistency.sh +72 -0
  45. package/scripts/lib/daemon-adaptive.sh +610 -0
  46. package/scripts/lib/daemon-dispatch.sh +489 -0
  47. package/scripts/lib/daemon-failure.sh +387 -0
  48. package/scripts/lib/daemon-patrol.sh +1113 -0
  49. package/scripts/lib/daemon-poll.sh +1202 -0
  50. package/scripts/lib/daemon-state.sh +550 -0
  51. package/scripts/lib/daemon-triage.sh +490 -0
  52. package/scripts/lib/helpers.sh +81 -0
  53. package/scripts/lib/pipeline-intelligence.sh +0 -6
  54. package/scripts/lib/pipeline-quality-checks.sh +3 -1
  55. package/scripts/lib/pipeline-stages.sh +20 -0
  56. package/scripts/sw +109 -168
  57. package/scripts/sw-activity.sh +1 -1
  58. package/scripts/sw-adaptive.sh +2 -2
  59. package/scripts/sw-adversarial.sh +1 -1
  60. package/scripts/sw-architecture-enforcer.sh +1 -1
  61. package/scripts/sw-auth.sh +14 -6
  62. package/scripts/sw-autonomous.sh +1 -1
  63. package/scripts/sw-changelog.sh +2 -2
  64. package/scripts/sw-checkpoint.sh +1 -1
  65. package/scripts/sw-ci.sh +1 -1
  66. package/scripts/sw-cleanup.sh +1 -1
  67. package/scripts/sw-code-review.sh +1 -1
  68. package/scripts/sw-connect.sh +1 -1
  69. package/scripts/sw-context.sh +1 -1
  70. package/scripts/sw-cost.sh +1 -1
  71. package/scripts/sw-daemon.sh +53 -4817
  72. package/scripts/sw-dashboard.sh +1 -1
  73. package/scripts/sw-db.sh +1 -1
  74. package/scripts/sw-decompose.sh +1 -1
  75. package/scripts/sw-deps.sh +1 -1
  76. package/scripts/sw-developer-simulation.sh +1 -1
  77. package/scripts/sw-discovery.sh +1 -1
  78. package/scripts/sw-doc-fleet.sh +1 -1
  79. package/scripts/sw-docs-agent.sh +1 -1
  80. package/scripts/sw-docs.sh +1 -1
  81. package/scripts/sw-doctor.sh +49 -1
  82. package/scripts/sw-dora.sh +1 -1
  83. package/scripts/sw-durable.sh +1 -1
  84. package/scripts/sw-e2e-orchestrator.sh +1 -1
  85. package/scripts/sw-eventbus.sh +1 -1
  86. package/scripts/sw-feedback.sh +1 -1
  87. package/scripts/sw-fix.sh +6 -5
  88. package/scripts/sw-fleet-discover.sh +1 -1
  89. package/scripts/sw-fleet-viz.sh +3 -3
  90. package/scripts/sw-fleet.sh +1 -1
  91. package/scripts/sw-github-app.sh +5 -2
  92. package/scripts/sw-github-checks.sh +1 -1
  93. package/scripts/sw-github-deploy.sh +1 -1
  94. package/scripts/sw-github-graphql.sh +1 -1
  95. package/scripts/sw-guild.sh +1 -1
  96. package/scripts/sw-heartbeat.sh +1 -1
  97. package/scripts/sw-hygiene.sh +1 -1
  98. package/scripts/sw-incident.sh +1 -1
  99. package/scripts/sw-init.sh +112 -9
  100. package/scripts/sw-instrument.sh +6 -1
  101. package/scripts/sw-intelligence.sh +5 -1
  102. package/scripts/sw-jira.sh +1 -1
  103. package/scripts/sw-launchd.sh +1 -1
  104. package/scripts/sw-linear.sh +20 -9
  105. package/scripts/sw-logs.sh +1 -1
  106. package/scripts/sw-loop.sh +2 -1
  107. package/scripts/sw-memory.sh +10 -1
  108. package/scripts/sw-mission-control.sh +1 -1
  109. package/scripts/sw-model-router.sh +4 -1
  110. package/scripts/sw-otel.sh +4 -4
  111. package/scripts/sw-oversight.sh +1 -1
  112. package/scripts/sw-pipeline-composer.sh +3 -1
  113. package/scripts/sw-pipeline-vitals.sh +4 -6
  114. package/scripts/sw-pipeline.sh +19 -56
  115. package/scripts/sw-pipeline.sh.mock +7 -0
  116. package/scripts/sw-pm.sh +5 -2
  117. package/scripts/sw-pr-lifecycle.sh +1 -1
  118. package/scripts/sw-predictive.sh +4 -1
  119. package/scripts/sw-prep.sh +3 -2
  120. package/scripts/sw-ps.sh +1 -1
  121. package/scripts/sw-public-dashboard.sh +10 -4
  122. package/scripts/sw-quality.sh +1 -1
  123. package/scripts/sw-reaper.sh +1 -1
  124. package/scripts/sw-recruit.sh +25 -1
  125. package/scripts/sw-regression.sh +2 -1
  126. package/scripts/sw-release-manager.sh +1 -1
  127. package/scripts/sw-release.sh +7 -5
  128. package/scripts/sw-remote.sh +1 -1
  129. package/scripts/sw-replay.sh +1 -1
  130. package/scripts/sw-retro.sh +1 -1
  131. package/scripts/sw-scale.sh +11 -5
  132. package/scripts/sw-security-audit.sh +1 -1
  133. package/scripts/sw-self-optimize.sh +172 -7
  134. package/scripts/sw-session.sh +1 -1
  135. package/scripts/sw-setup.sh +1 -1
  136. package/scripts/sw-standup.sh +4 -3
  137. package/scripts/sw-status.sh +1 -1
  138. package/scripts/sw-strategic.sh +2 -1
  139. package/scripts/sw-stream.sh +8 -2
  140. package/scripts/sw-swarm.sh +12 -10
  141. package/scripts/sw-team-stages.sh +1 -1
  142. package/scripts/sw-templates.sh +1 -1
  143. package/scripts/sw-testgen.sh +3 -2
  144. package/scripts/sw-tmux-pipeline.sh +2 -1
  145. package/scripts/sw-tmux.sh +1 -1
  146. package/scripts/sw-trace.sh +1 -1
  147. package/scripts/sw-tracker-jira.sh +1 -0
  148. package/scripts/sw-tracker-linear.sh +1 -0
  149. package/scripts/sw-tracker.sh +24 -6
  150. package/scripts/sw-triage.sh +1 -1
  151. package/scripts/sw-upgrade.sh +1 -1
  152. package/scripts/sw-ux.sh +1 -1
  153. package/scripts/sw-webhook.sh +1 -1
  154. package/scripts/sw-widgets.sh +2 -2
  155. package/scripts/sw-worktree.sh +1 -1
  156. package/dashboard/public/app.js +0 -4422
@@ -0,0 +1,610 @@
1
+ # daemon-adaptive.sh — Adaptive intervals, progress tracking, learning (for sw-daemon.sh)
2
+ # Source from sw-daemon.sh. Requires state, policy, helpers.
3
+ [[ -n "${_DAEMON_ADAPTIVE_LOADED:-}" ]] && return 0
4
+ _DAEMON_ADAPTIVE_LOADED=1
5
+
6
+ # Adapt poll interval based on queue state
7
+ # Empty queue 5+ cycles → 120s; queue has items → 30s; processing → 60s
8
+ get_adaptive_poll_interval() {
9
+ local queue_depth="$1"
10
+ local active_count="$2"
11
+
12
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
13
+ echo "$POLL_INTERVAL"
14
+ return
15
+ fi
16
+
17
+ if [[ "$queue_depth" -eq 0 && "$active_count" -eq 0 ]]; then
18
+ EMPTY_QUEUE_CYCLES=$((EMPTY_QUEUE_CYCLES + 1))
19
+ else
20
+ EMPTY_QUEUE_CYCLES=0
21
+ fi
22
+
23
+ local interval="$POLL_INTERVAL"
24
+ if [[ "$EMPTY_QUEUE_CYCLES" -ge 5 ]]; then
25
+ interval=120
26
+ elif [[ "$queue_depth" -gt 0 ]]; then
27
+ interval=30
28
+ else
29
+ interval=60
30
+ fi
31
+
32
+ # Persist current setting for dashboard visibility
33
+ local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
34
+ mkdir -p "$HOME/.shipwright/optimization"
35
+ local tmp_tuning="${tuning_file}.tmp.$$"
36
+ if [[ -f "$tuning_file" ]]; then
37
+ jq --argjson pi "$interval" --argjson eqc "$EMPTY_QUEUE_CYCLES" \
38
+ '.poll_interval = $pi | .empty_queue_cycles = $eqc' \
39
+ "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
40
+ else
41
+ jq -n --argjson pi "$interval" --argjson eqc "$EMPTY_QUEUE_CYCLES" \
42
+ '{poll_interval: $pi, empty_queue_cycles: $eqc}' > "$tmp_tuning" \
43
+ && mv "$tmp_tuning" "$tuning_file"
44
+ fi
45
+
46
+ echo "$interval"
47
+ }
48
+
49
+ # Rolling average cost per template from costs.json (last 10 runs)
50
+ get_adaptive_cost_estimate() {
51
+ local template="${1:-autonomous}"
52
+
53
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
54
+ echo "$EST_COST_PER_JOB"
55
+ return
56
+ fi
57
+
58
+ local costs_file="$HOME/.shipwright/costs.json"
59
+ if [[ ! -f "$costs_file" ]]; then
60
+ echo "$EST_COST_PER_JOB"
61
+ return
62
+ fi
63
+
64
+ local avg_cost
65
+ avg_cost=$(jq -r --arg tpl "$template" '
66
+ [.sessions // [] | .[] | select(.template == $tpl) | .total_cost_usd // 0] |
67
+ .[-10:] | if length > 0 then (add / length) else null end
68
+ ' "$costs_file" 2>/dev/null || echo "")
69
+
70
+ if [[ -n "$avg_cost" && "$avg_cost" != "null" && "$avg_cost" != "0" ]]; then
71
+ echo "$avg_cost"
72
+ else
73
+ echo "$EST_COST_PER_JOB"
74
+ fi
75
+ }
76
+
77
+ # Per-stage adaptive heartbeat timeout from learned stage durations
78
+ get_adaptive_heartbeat_timeout() {
79
+ local stage="${1:-unknown}"
80
+
81
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
82
+ echo "${HEALTH_HEARTBEAT_TIMEOUT:-120}"
83
+ return
84
+ fi
85
+
86
+ # Stage-specific defaults (daemon-health.sh when sourced, else policy_get, else literal)
87
+ local default_timeout="${HEALTH_HEARTBEAT_TIMEOUT:-120}"
88
+ if type daemon_health_timeout_for_stage &>/dev/null 2>&1; then
89
+ default_timeout=$(daemon_health_timeout_for_stage "$stage" "$default_timeout")
90
+ elif type policy_get &>/dev/null 2>&1; then
91
+ local policy_stage
92
+ policy_stage=$(policy_get ".daemon.stage_timeouts.$stage" "")
93
+ [[ -n "$policy_stage" && "$policy_stage" =~ ^[0-9]+$ ]] && default_timeout="$policy_stage"
94
+ else
95
+ case "$stage" in
96
+ build) default_timeout=300 ;;
97
+ test) default_timeout=180 ;;
98
+ review|compound_quality) default_timeout=180 ;;
99
+ lint|format|intake|plan|design) default_timeout=60 ;;
100
+ esac
101
+ fi
102
+ [[ "$default_timeout" =~ ^[0-9]+$ ]] || default_timeout="${HEALTH_HEARTBEAT_TIMEOUT:-120}"
103
+
104
+ local durations_file="$HOME/.shipwright/optimization/stage-durations.json"
105
+ if [[ ! -f "$durations_file" ]]; then
106
+ echo "$default_timeout"
107
+ return
108
+ fi
109
+
110
+ local learned_duration
111
+ learned_duration=$(jq -r --arg s "$stage" \
112
+ '.stages[$s].p90_duration_s // 0' "$durations_file" 2>/dev/null || echo "0")
113
+
114
+ if [[ "$learned_duration" -gt 0 ]]; then
115
+ # 150% of p90 duration, floor of 60s
116
+ local adaptive_timeout=$(( (learned_duration * 3) / 2 ))
117
+ [[ "$adaptive_timeout" -lt 60 ]] && adaptive_timeout=60
118
+ echo "$adaptive_timeout"
119
+ else
120
+ echo "$default_timeout"
121
+ fi
122
+ }
123
+
124
+ # Adaptive stale pipeline timeout using 95th percentile of historical durations
125
+ get_adaptive_stale_timeout() {
126
+ local template="${1:-autonomous}"
127
+
128
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
129
+ echo "${HEALTH_STALE_TIMEOUT:-1800}"
130
+ return
131
+ fi
132
+
133
+ local durations_file="$HOME/.shipwright/optimization/pipeline-durations.json"
134
+ if [[ ! -f "$durations_file" ]]; then
135
+ echo "${HEALTH_STALE_TIMEOUT:-1800}"
136
+ return
137
+ fi
138
+
139
+ local p95_duration
140
+ p95_duration=$(jq -r --arg tpl "$template" \
141
+ '.templates[$tpl].p95_duration_s // 0' "$durations_file" 2>/dev/null || echo "0")
142
+
143
+ if [[ "$p95_duration" -gt 0 ]]; then
144
+ # 1.5x safety margin, clamped 600s-7200s
145
+ local adaptive_timeout=$(( (p95_duration * 3) / 2 ))
146
+ [[ "$adaptive_timeout" -lt 600 ]] && adaptive_timeout=600
147
+ [[ "$adaptive_timeout" -gt 7200 ]] && adaptive_timeout=7200
148
+ echo "$adaptive_timeout"
149
+ else
150
+ echo "${HEALTH_STALE_TIMEOUT:-1800}"
151
+ fi
152
+ }
153
+
154
+ # Record pipeline duration for future threshold learning
155
+ record_pipeline_duration() {
156
+ local template="$1" duration_s="$2" result="$3"
157
+
158
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
159
+ return
160
+ fi
161
+ [[ ! "$duration_s" =~ ^[0-9]+$ ]] && return
162
+
163
+ local durations_file="$HOME/.shipwright/optimization/pipeline-durations.json"
164
+ mkdir -p "$HOME/.shipwright/optimization"
165
+
166
+ if [[ ! -f "$durations_file" ]]; then
167
+ echo '{"templates":{}}' > "$durations_file"
168
+ fi
169
+
170
+ local tmp_dur="${durations_file}.tmp.$$"
171
+ jq --arg tpl "$template" --argjson dur "$duration_s" --arg res "$result" --arg ts "$(now_iso)" '
172
+ .templates[$tpl] = (
173
+ (.templates[$tpl] // {durations: [], p95_duration_s: 0}) |
174
+ .durations = ((.durations + [{duration_s: $dur, result: $res, ts: $ts}]) | .[-50:]) |
175
+ .p95_duration_s = (
176
+ [.durations[].duration_s] | sort |
177
+ if length > 0 then .[((length * 95 / 100) | floor)] else 0 end
178
+ )
179
+ )
180
+ ' "$durations_file" > "$tmp_dur" 2>/dev/null && mv "$tmp_dur" "$durations_file"
181
+ }
182
+
183
+ # ─── Progress-Based Health Monitoring ─────────────────────────────────────────
184
+ # Instead of killing jobs after a static timeout, we check for forward progress.
185
+ # Progress signals: stage transitions, iteration advances, git diff growth, new files.
186
+ # Graduated response: healthy → slowing → stalled → stuck → kill.
187
+
188
+ PROGRESS_DIR="${PROGRESS_DIR:-$HOME/.shipwright/progress}"
189
+
190
+ # Collect a progress snapshot for an active job
191
+ # Returns JSON with stage, iteration, diff_lines, files_changed
192
+ daemon_collect_snapshot() {
193
+ local issue_num="$1" worktree="$2" pid="$3"
194
+
195
+ local stage="" iteration=0 diff_lines=0 files_changed=0 last_error=""
196
+
197
+ # Get stage and iteration from heartbeat (fastest source)
198
+ local heartbeat_dir="$HOME/.shipwright/heartbeats"
199
+ if [[ -d "$heartbeat_dir" ]]; then
200
+ local hb_file
201
+ for hb_file in "$heartbeat_dir"/*.json; do
202
+ [[ ! -f "$hb_file" ]] && continue
203
+ local hb_pid
204
+ hb_pid=$(jq -r '.pid // 0' "$hb_file" 2>/dev/null || echo 0)
205
+ if [[ "$hb_pid" == "$pid" ]]; then
206
+ stage=$(jq -r '.stage // "unknown"' "$hb_file" 2>/dev/null || echo "unknown")
207
+ iteration=$(jq -r '.iteration // 0' "$hb_file" 2>/dev/null || echo 0)
208
+ [[ "$iteration" == "null" ]] && iteration=0
209
+ break
210
+ fi
211
+ done
212
+ fi
213
+
214
+ # Fallback: read stage from pipeline-state.md in worktree
215
+ if [[ -z "$stage" || "$stage" == "unknown" ]] && [[ -d "$worktree" ]]; then
216
+ local state_file="$worktree/.claude/pipeline-state.md"
217
+ if [[ -f "$state_file" ]]; then
218
+ stage=$(grep -m1 '^current_stage:' "$state_file" 2>/dev/null | sed 's/^current_stage: *//' || echo "unknown")
219
+ fi
220
+ fi
221
+
222
+ # Get git diff stats from worktree (how much code has been written)
223
+ if [[ -d "$worktree/.git" ]] || [[ -f "$worktree/.git" ]]; then
224
+ diff_lines=$(cd "$worktree" && git diff --stat 2>/dev/null | tail -1 | grep -o '[0-9]* insertion' | grep -o '[0-9]*' || echo "0")
225
+ [[ -z "$diff_lines" ]] && diff_lines=0
226
+ files_changed=$(cd "$worktree" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ' || echo "0")
227
+ # Also count untracked files the agent has created
228
+ local untracked
229
+ untracked=$(cd "$worktree" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ' || echo "0")
230
+ files_changed=$((files_changed + untracked))
231
+ fi
232
+
233
+ # Check last error from error log
234
+ if [[ -d "$worktree" ]]; then
235
+ local error_log="$worktree/.claude/pipeline-artifacts/error-log.jsonl"
236
+ if [[ -f "$error_log" ]]; then
237
+ last_error=$(tail -1 "$error_log" 2>/dev/null | jq -r '.signature // ""' 2>/dev/null || echo "")
238
+ fi
239
+ fi
240
+
241
+ # Output JSON snapshot
242
+ jq -n \
243
+ --arg stage "$stage" \
244
+ --argjson iteration "${iteration:-0}" \
245
+ --argjson diff_lines "${diff_lines:-0}" \
246
+ --argjson files_changed "${files_changed:-0}" \
247
+ --arg last_error "$last_error" \
248
+ --arg ts "$(now_iso)" \
249
+ '{
250
+ stage: $stage,
251
+ iteration: $iteration,
252
+ diff_lines: $diff_lines,
253
+ files_changed: $files_changed,
254
+ last_error: $last_error,
255
+ ts: $ts
256
+ }'
257
+ }
258
+
259
+ # Assess job progress by comparing current snapshot to previous
260
+ # Returns: healthy | slowing | stalled | stuck
261
+ daemon_assess_progress() {
262
+ local issue_num="$1" current_snapshot="$2"
263
+
264
+ mkdir -p "$PROGRESS_DIR"
265
+ local progress_file="$PROGRESS_DIR/issue-${issue_num}.json"
266
+
267
+ # If no previous snapshot, store this one and return healthy
268
+ if [[ ! -f "$progress_file" ]]; then
269
+ jq -n \
270
+ --argjson snap "$current_snapshot" \
271
+ --arg issue "$issue_num" \
272
+ '{
273
+ issue: $issue,
274
+ snapshots: [$snap],
275
+ no_progress_count: 0,
276
+ last_progress_at: $snap.ts,
277
+ repeated_error_count: 0
278
+ }' > "$progress_file"
279
+ echo "healthy"
280
+ return
281
+ fi
282
+
283
+ local prev_data
284
+ prev_data=$(cat "$progress_file")
285
+
286
+ # Get previous snapshot values
287
+ local prev_stage prev_iteration prev_diff_lines prev_files prev_error prev_no_progress
288
+ prev_stage=$(echo "$prev_data" | jq -r '.snapshots[-1].stage // "unknown"')
289
+ prev_iteration=$(echo "$prev_data" | jq -r '.snapshots[-1].iteration // 0')
290
+ prev_diff_lines=$(echo "$prev_data" | jq -r '.snapshots[-1].diff_lines // 0')
291
+ prev_files=$(echo "$prev_data" | jq -r '.snapshots[-1].files_changed // 0')
292
+ prev_error=$(echo "$prev_data" | jq -r '.snapshots[-1].last_error // ""')
293
+ prev_no_progress=$(echo "$prev_data" | jq -r '.no_progress_count // 0')
294
+ local prev_repeated_errors
295
+ prev_repeated_errors=$(echo "$prev_data" | jq -r '.repeated_error_count // 0')
296
+
297
+ # Get current values
298
+ local cur_stage cur_iteration cur_diff cur_files cur_error
299
+ cur_stage=$(echo "$current_snapshot" | jq -r '.stage')
300
+ cur_iteration=$(echo "$current_snapshot" | jq -r '.iteration')
301
+ cur_diff=$(echo "$current_snapshot" | jq -r '.diff_lines')
302
+ cur_files=$(echo "$current_snapshot" | jq -r '.files_changed')
303
+ cur_error=$(echo "$current_snapshot" | jq -r '.last_error')
304
+
305
+ # Detect progress
306
+ local has_progress=false
307
+
308
+ # Stage advanced → clear progress
309
+ if [[ "$cur_stage" != "$prev_stage" && "$cur_stage" != "unknown" ]]; then
310
+ has_progress=true
311
+ daemon_log INFO "Progress: issue #${issue_num} stage ${prev_stage} → ${cur_stage}"
312
+ fi
313
+
314
+ # Iteration increased → clear progress (agent is looping but advancing)
315
+ if [[ "$cur_iteration" -gt "$prev_iteration" ]]; then
316
+ has_progress=true
317
+ daemon_log INFO "Progress: issue #${issue_num} iteration ${prev_iteration} → ${cur_iteration}"
318
+ fi
319
+
320
+ # Diff lines grew (agent is writing code)
321
+ if [[ "$cur_diff" -gt "$prev_diff_lines" ]]; then
322
+ has_progress=true
323
+ fi
324
+
325
+ # More files touched
326
+ if [[ "$cur_files" -gt "$prev_files" ]]; then
327
+ has_progress=true
328
+ fi
329
+
330
+ # Claude subprocess is alive and consuming CPU — agent is thinking/working
331
+ # During build stage, Claude can spend 10+ minutes thinking before any
332
+ # visible git changes appear. Detect this as progress.
333
+ if [[ "$has_progress" != "true" ]]; then
334
+ local _pid_for_check
335
+ _pid_for_check=$(echo "$current_snapshot" | jq -r '.pid // empty' 2>/dev/null || true)
336
+ if [[ -z "$_pid_for_check" ]]; then
337
+ # Fallback: get PID from active_jobs
338
+ _pid_for_check=$(jq -r --argjson num "$issue_num" \
339
+ '.active_jobs[] | select(.issue == ($num | tonumber)) | .pid' "$STATE_FILE" 2>/dev/null | head -1 || true)
340
+ fi
341
+ if [[ -n "$_pid_for_check" ]]; then
342
+ # Check if any child process (claude) is alive and using CPU
343
+ local child_cpu=0
344
+ child_cpu=$(ps -o pid=,pcpu= -p "$_pid_for_check" 2>/dev/null | awk '{sum+=$2} END{printf "%d", sum+0}' || echo "0")
345
+ if [[ "$child_cpu" -eq 0 ]]; then
346
+ # Check children of the pipeline process
347
+ child_cpu=$(pgrep -P "$_pid_for_check" 2>/dev/null | xargs -I{} ps -o pcpu= -p {} 2>/dev/null | awk '{sum+=$1} END{printf "%d", sum+0}' || echo "0")
348
+ fi
349
+ if [[ "${child_cpu:-0}" -gt 0 ]]; then
350
+ has_progress=true
351
+ fi
352
+ fi
353
+ fi
354
+
355
+ # Detect repeated errors (same error signature hitting again)
356
+ local repeated_errors="$prev_repeated_errors"
357
+ if [[ -n "$cur_error" && "$cur_error" == "$prev_error" ]]; then
358
+ repeated_errors=$((repeated_errors + 1))
359
+ elif [[ -n "$cur_error" && "$cur_error" != "$prev_error" ]]; then
360
+ # Different error — reset counter (agent is making different mistakes, that's progress)
361
+ repeated_errors=0
362
+ fi
363
+
364
+ # Update no_progress counter
365
+ local no_progress_count
366
+ if [[ "$has_progress" == "true" ]]; then
367
+ no_progress_count=0
368
+ repeated_errors=0
369
+ else
370
+ no_progress_count=$((prev_no_progress + 1))
371
+ fi
372
+
373
+ # Update progress file (keep last 10 snapshots)
374
+ local tmp_progress="${progress_file}.tmp.$$"
375
+ jq \
376
+ --argjson snap "$current_snapshot" \
377
+ --argjson npc "$no_progress_count" \
378
+ --argjson rec "$repeated_errors" \
379
+ --arg ts "$(now_iso)" \
380
+ '
381
+ .snapshots = ((.snapshots + [$snap]) | .[-10:]) |
382
+ .no_progress_count = $npc |
383
+ .repeated_error_count = $rec |
384
+ if $npc == 0 then .last_progress_at = $ts else . end
385
+ ' "$progress_file" > "$tmp_progress" 2>/dev/null && mv "$tmp_progress" "$progress_file"
386
+
387
+ # ── Vitals-based verdict (preferred over static thresholds) ──
388
+ if type pipeline_compute_vitals &>/dev/null 2>&1 && type pipeline_health_verdict &>/dev/null 2>&1; then
389
+ # Compute vitals using the worktree's pipeline state if available
390
+ local _worktree_state=""
391
+ local _worktree_artifacts=""
392
+ local _worktree_dir
393
+ _worktree_dir=$(jq -r --arg i "$issue_num" '.active_jobs[] | select(.issue == ($i | tonumber)) | .worktree // ""' "$STATE_FILE" 2>/dev/null || echo "")
394
+ if [[ -n "$_worktree_dir" && -d "$_worktree_dir/.claude" ]]; then
395
+ _worktree_state="$_worktree_dir/.claude/pipeline-state.md"
396
+ _worktree_artifacts="$_worktree_dir/.claude/pipeline-artifacts"
397
+ fi
398
+
399
+ local _vitals_json
400
+ _vitals_json=$(pipeline_compute_vitals "$_worktree_state" "$_worktree_artifacts" "$issue_num" 2>/dev/null) || true
401
+ if [[ -n "$_vitals_json" && "$_vitals_json" != "{}" ]]; then
402
+ local _health_verdict _health_score
403
+ _health_verdict=$(echo "$_vitals_json" | jq -r '.verdict // "continue"' 2>/dev/null || echo "continue")
404
+ _health_score=$(echo "$_vitals_json" | jq -r '.health_score // 50' 2>/dev/null || echo "50")
405
+
406
+ emit_event "pipeline.vitals_check" \
407
+ "issue=$issue_num" \
408
+ "health_score=$_health_score" \
409
+ "verdict=$_health_verdict" \
410
+ "no_progress=$no_progress_count" \
411
+ "repeated_errors=$repeated_errors"
412
+
413
+ # Map vitals verdict to daemon verdict
414
+ case "$_health_verdict" in
415
+ continue)
416
+ echo "healthy"
417
+ return
418
+ ;;
419
+ warn)
420
+ # Sluggish but not dead — equivalent to slowing
421
+ echo "slowing"
422
+ return
423
+ ;;
424
+ intervene)
425
+ echo "stalled"
426
+ return
427
+ ;;
428
+ abort)
429
+ echo "stuck"
430
+ return
431
+ ;;
432
+ esac
433
+ fi
434
+ fi
435
+
436
+ # ── Fallback: static threshold verdict ──
437
+ local warn_threshold="${PROGRESS_CHECKS_BEFORE_WARN:-3}"
438
+ local kill_threshold="${PROGRESS_CHECKS_BEFORE_KILL:-6}"
439
+
440
+ # Stuck in same error loop — accelerate to kill
441
+ if [[ "$repeated_errors" -ge 3 ]]; then
442
+ echo "stuck"
443
+ return
444
+ fi
445
+
446
+ if [[ "$no_progress_count" -ge "$kill_threshold" ]]; then
447
+ echo "stuck"
448
+ elif [[ "$no_progress_count" -ge "$warn_threshold" ]]; then
449
+ echo "stalled"
450
+ elif [[ "$no_progress_count" -ge 1 ]]; then
451
+ echo "slowing"
452
+ else
453
+ echo "healthy"
454
+ fi
455
+ }
456
+
457
+ # Clean up progress tracking for a completed/failed job
458
+ daemon_clear_progress() {
459
+ local issue_num="$1"
460
+ rm -f "$PROGRESS_DIR/issue-${issue_num}.json"
461
+ }
462
+
463
+ # Learn actual worker memory from peak RSS of pipeline processes
464
+ learn_worker_memory() {
465
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
466
+ return
467
+ fi
468
+ if [[ ! -f "$STATE_FILE" ]]; then
469
+ return
470
+ fi
471
+
472
+ local total_rss=0
473
+ local process_count=0
474
+
475
+ while IFS= read -r job; do
476
+ local pid
477
+ pid=$(echo "$job" | jq -r '.pid // empty')
478
+ [[ -z "$pid" || ! "$pid" =~ ^[0-9]+$ ]] && continue
479
+ if kill -0 "$pid" 2>/dev/null; then
480
+ local rss_kb
481
+ rss_kb=$(ps -o rss= -p "$pid" 2>/dev/null | tr -d ' ' || echo "0")
482
+ [[ ! "$rss_kb" =~ ^[0-9]+$ ]] && rss_kb=0
483
+ if [[ "$rss_kb" -gt 0 ]]; then
484
+ total_rss=$((total_rss + rss_kb))
485
+ process_count=$((process_count + 1))
486
+ fi
487
+ fi
488
+ done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
489
+
490
+ if [[ "$process_count" -gt 0 ]]; then
491
+ local avg_rss_gb=$(( total_rss / process_count / 1048576 ))
492
+ # 125% headroom, minimum 1GB, max 16GB
493
+ local learned_mem_gb=$(( (avg_rss_gb * 5 + 3) / 4 ))
494
+ [[ "$learned_mem_gb" -lt 1 ]] && learned_mem_gb=1
495
+ [[ "$learned_mem_gb" -gt 16 ]] && learned_mem_gb=16
496
+
497
+ local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
498
+ mkdir -p "$HOME/.shipwright/optimization"
499
+ local tmp_tuning="${tuning_file}.tmp.$$"
500
+ if [[ -f "$tuning_file" ]]; then
501
+ jq --argjson mem "$learned_mem_gb" --argjson rss "$total_rss" --argjson cnt "$process_count" \
502
+ '.learned_worker_mem_gb = $mem | .last_rss_total_kb = $rss | .last_rss_process_count = $cnt' \
503
+ "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
504
+ else
505
+ jq -n --argjson mem "$learned_mem_gb" \
506
+ '{learned_worker_mem_gb: $mem}' > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
507
+ fi
508
+
509
+ WORKER_MEM_GB="$learned_mem_gb"
510
+ fi
511
+ }
512
+
513
+ # Record scaling outcome for learning optimal parallelism
514
+ record_scaling_outcome() {
515
+ local parallelism="$1" result="$2"
516
+
517
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
518
+ return
519
+ fi
520
+
521
+ local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
522
+ mkdir -p "$HOME/.shipwright/optimization"
523
+ local tmp_tuning="${tuning_file}.tmp.$$"
524
+ if [[ -f "$tuning_file" ]]; then
525
+ jq --argjson p "$parallelism" --arg r "$result" --arg ts "$(now_iso)" '
526
+ .scaling_history = ((.scaling_history // []) + [{parallelism: $p, result: $r, ts: $ts}]) |
527
+ .scaling_history |= .[-50:]
528
+ ' "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
529
+ else
530
+ jq -n --argjson p "$parallelism" --arg r "$result" --arg ts "$(now_iso)" '
531
+ {scaling_history: [{parallelism: $p, result: $r, ts: $ts}]}
532
+ ' > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
533
+ fi
534
+ }
535
+
536
+ # Get success rate at a given parallelism level (for gradual scaling decisions)
537
+ get_success_rate_at_parallelism() {
538
+ local target_parallelism="$1"
539
+
540
+ local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
541
+ if [[ ! -f "$tuning_file" ]]; then
542
+ echo "100"
543
+ return
544
+ fi
545
+
546
+ local rate
547
+ rate=$(jq -r --argjson p "$target_parallelism" '
548
+ [.scaling_history // [] | .[] | select(.parallelism == $p)] |
549
+ if length > 0 then
550
+ ([.[] | select(.result == "success")] | length) * 100 / length | floor
551
+ else 100 end
552
+ ' "$tuning_file" 2>/dev/null || echo "100")
553
+
554
+ echo "${rate:-100}"
555
+ }
556
+
557
+ # Adapt patrol limits based on hit rate
558
+ adapt_patrol_limits() {
559
+ local findings="$1" max_issues="$2"
560
+
561
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
562
+ return
563
+ fi
564
+
565
+ local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
566
+ mkdir -p "$HOME/.shipwright/optimization"
567
+
568
+ local new_max="$max_issues"
569
+ if [[ "$findings" -ge "$max_issues" ]]; then
570
+ # Consistently hitting limit — increase
571
+ new_max=$((max_issues + 2))
572
+ [[ "$new_max" -gt 20 ]] && new_max=20
573
+ elif [[ "$findings" -eq 0 ]]; then
574
+ # Finds nothing — reduce
575
+ if [[ "$max_issues" -gt 3 ]]; then
576
+ new_max=$((max_issues - 1))
577
+ else
578
+ new_max=3
579
+ fi
580
+ fi
581
+
582
+ local tmp_tuning="${tuning_file}.tmp.$$"
583
+ if [[ -f "$tuning_file" ]]; then
584
+ jq --argjson pm "$new_max" --argjson lf "$findings" --arg ts "$(now_iso)" \
585
+ '.patrol_max_issues = $pm | .last_patrol_findings = $lf | .patrol_adapted_at = $ts' \
586
+ "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
587
+ else
588
+ jq -n --argjson pm "$new_max" --argjson lf "$findings" --arg ts "$(now_iso)" \
589
+ '{patrol_max_issues: $pm, last_patrol_findings: $lf, patrol_adapted_at: $ts}' \
590
+ > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
591
+ fi
592
+ }
593
+
594
+ # Load adaptive patrol limits from tuning config
595
+ load_adaptive_patrol_limits() {
596
+ if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
597
+ return
598
+ fi
599
+
600
+ local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
601
+ if [[ ! -f "$tuning_file" ]]; then
602
+ return
603
+ fi
604
+
605
+ local adaptive_max_issues
606
+ adaptive_max_issues=$(jq -r '.patrol_max_issues // 0' "$tuning_file" 2>/dev/null || echo "0")
607
+ if [[ "$adaptive_max_issues" -gt 0 ]]; then
608
+ PATROL_MAX_ISSUES="$adaptive_max_issues"
609
+ fi
610
+ }