shipwright-cli 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -19
- package/dashboard/public/index.html +224 -8
- package/dashboard/public/styles.css +1078 -4
- package/dashboard/server.ts +1100 -15
- package/dashboard/src/canvas/interactions.ts +74 -0
- package/dashboard/src/canvas/layout.ts +85 -0
- package/dashboard/src/canvas/overlays.ts +117 -0
- package/dashboard/src/canvas/particles.ts +105 -0
- package/dashboard/src/canvas/renderer.ts +191 -0
- package/dashboard/src/components/charts/bar.ts +54 -0
- package/dashboard/src/components/charts/donut.ts +25 -0
- package/dashboard/src/components/charts/pipeline-rail.ts +105 -0
- package/dashboard/src/components/charts/sparkline.ts +82 -0
- package/dashboard/src/components/header.ts +616 -0
- package/dashboard/src/components/modal.ts +413 -0
- package/dashboard/src/components/terminal.ts +144 -0
- package/dashboard/src/core/api.ts +381 -0
- package/dashboard/src/core/helpers.ts +118 -0
- package/dashboard/src/core/router.ts +190 -0
- package/dashboard/src/core/sse.ts +38 -0
- package/dashboard/src/core/state.ts +150 -0
- package/dashboard/src/core/ws.ts +143 -0
- package/dashboard/src/design/icons.ts +131 -0
- package/dashboard/src/design/tokens.ts +160 -0
- package/dashboard/src/main.ts +68 -0
- package/dashboard/src/types/api.ts +337 -0
- package/dashboard/src/views/activity.ts +185 -0
- package/dashboard/src/views/agent-cockpit.ts +236 -0
- package/dashboard/src/views/agents.ts +72 -0
- package/dashboard/src/views/fleet-map.ts +299 -0
- package/dashboard/src/views/insights.ts +298 -0
- package/dashboard/src/views/machines.ts +162 -0
- package/dashboard/src/views/metrics.ts +420 -0
- package/dashboard/src/views/overview.ts +409 -0
- package/dashboard/src/views/pipeline-theater.ts +219 -0
- package/dashboard/src/views/pipelines.ts +595 -0
- package/dashboard/src/views/team.ts +362 -0
- package/dashboard/src/views/timeline.ts +389 -0
- package/dashboard/tsconfig.json +21 -0
- package/docs/AGI-PLATFORM-PLAN.md +5 -5
- package/docs/AGI-WHATS-NEXT.md +19 -16
- package/docs/README.md +2 -0
- package/package.json +8 -1
- package/scripts/check-version-consistency.sh +72 -0
- package/scripts/lib/daemon-adaptive.sh +610 -0
- package/scripts/lib/daemon-dispatch.sh +489 -0
- package/scripts/lib/daemon-failure.sh +387 -0
- package/scripts/lib/daemon-patrol.sh +1113 -0
- package/scripts/lib/daemon-poll.sh +1202 -0
- package/scripts/lib/daemon-state.sh +550 -0
- package/scripts/lib/daemon-triage.sh +490 -0
- package/scripts/lib/helpers.sh +81 -0
- package/scripts/lib/pipeline-intelligence.sh +0 -6
- package/scripts/lib/pipeline-quality-checks.sh +3 -1
- package/scripts/lib/pipeline-stages.sh +20 -0
- package/scripts/sw +109 -168
- package/scripts/sw-activity.sh +1 -1
- package/scripts/sw-adaptive.sh +2 -2
- package/scripts/sw-adversarial.sh +1 -1
- package/scripts/sw-architecture-enforcer.sh +1 -1
- package/scripts/sw-auth.sh +14 -6
- package/scripts/sw-autonomous.sh +1 -1
- package/scripts/sw-changelog.sh +2 -2
- package/scripts/sw-checkpoint.sh +1 -1
- package/scripts/sw-ci.sh +1 -1
- package/scripts/sw-cleanup.sh +1 -1
- package/scripts/sw-code-review.sh +1 -1
- package/scripts/sw-connect.sh +1 -1
- package/scripts/sw-context.sh +1 -1
- package/scripts/sw-cost.sh +1 -1
- package/scripts/sw-daemon.sh +53 -4817
- package/scripts/sw-dashboard.sh +1 -1
- package/scripts/sw-db.sh +1 -1
- package/scripts/sw-decompose.sh +1 -1
- package/scripts/sw-deps.sh +1 -1
- package/scripts/sw-developer-simulation.sh +1 -1
- package/scripts/sw-discovery.sh +1 -1
- package/scripts/sw-doc-fleet.sh +1 -1
- package/scripts/sw-docs-agent.sh +1 -1
- package/scripts/sw-docs.sh +1 -1
- package/scripts/sw-doctor.sh +49 -1
- package/scripts/sw-dora.sh +1 -1
- package/scripts/sw-durable.sh +1 -1
- package/scripts/sw-e2e-orchestrator.sh +1 -1
- package/scripts/sw-eventbus.sh +1 -1
- package/scripts/sw-feedback.sh +1 -1
- package/scripts/sw-fix.sh +6 -5
- package/scripts/sw-fleet-discover.sh +1 -1
- package/scripts/sw-fleet-viz.sh +3 -3
- package/scripts/sw-fleet.sh +1 -1
- package/scripts/sw-github-app.sh +5 -2
- package/scripts/sw-github-checks.sh +1 -1
- package/scripts/sw-github-deploy.sh +1 -1
- package/scripts/sw-github-graphql.sh +1 -1
- package/scripts/sw-guild.sh +1 -1
- package/scripts/sw-heartbeat.sh +1 -1
- package/scripts/sw-hygiene.sh +1 -1
- package/scripts/sw-incident.sh +1 -1
- package/scripts/sw-init.sh +112 -9
- package/scripts/sw-instrument.sh +6 -1
- package/scripts/sw-intelligence.sh +5 -1
- package/scripts/sw-jira.sh +1 -1
- package/scripts/sw-launchd.sh +1 -1
- package/scripts/sw-linear.sh +20 -9
- package/scripts/sw-logs.sh +1 -1
- package/scripts/sw-loop.sh +2 -1
- package/scripts/sw-memory.sh +10 -1
- package/scripts/sw-mission-control.sh +1 -1
- package/scripts/sw-model-router.sh +4 -1
- package/scripts/sw-otel.sh +4 -4
- package/scripts/sw-oversight.sh +1 -1
- package/scripts/sw-pipeline-composer.sh +3 -1
- package/scripts/sw-pipeline-vitals.sh +4 -6
- package/scripts/sw-pipeline.sh +19 -56
- package/scripts/sw-pipeline.sh.mock +7 -0
- package/scripts/sw-pm.sh +5 -2
- package/scripts/sw-pr-lifecycle.sh +1 -1
- package/scripts/sw-predictive.sh +4 -1
- package/scripts/sw-prep.sh +3 -2
- package/scripts/sw-ps.sh +1 -1
- package/scripts/sw-public-dashboard.sh +10 -4
- package/scripts/sw-quality.sh +1 -1
- package/scripts/sw-reaper.sh +1 -1
- package/scripts/sw-recruit.sh +25 -1
- package/scripts/sw-regression.sh +2 -1
- package/scripts/sw-release-manager.sh +1 -1
- package/scripts/sw-release.sh +7 -5
- package/scripts/sw-remote.sh +1 -1
- package/scripts/sw-replay.sh +1 -1
- package/scripts/sw-retro.sh +1 -1
- package/scripts/sw-scale.sh +11 -5
- package/scripts/sw-security-audit.sh +1 -1
- package/scripts/sw-self-optimize.sh +172 -7
- package/scripts/sw-session.sh +1 -1
- package/scripts/sw-setup.sh +1 -1
- package/scripts/sw-standup.sh +4 -3
- package/scripts/sw-status.sh +1 -1
- package/scripts/sw-strategic.sh +2 -1
- package/scripts/sw-stream.sh +8 -2
- package/scripts/sw-swarm.sh +12 -10
- package/scripts/sw-team-stages.sh +1 -1
- package/scripts/sw-templates.sh +1 -1
- package/scripts/sw-testgen.sh +3 -2
- package/scripts/sw-tmux-pipeline.sh +2 -1
- package/scripts/sw-tmux.sh +1 -1
- package/scripts/sw-trace.sh +1 -1
- package/scripts/sw-tracker-jira.sh +1 -0
- package/scripts/sw-tracker-linear.sh +1 -0
- package/scripts/sw-tracker.sh +24 -6
- package/scripts/sw-triage.sh +1 -1
- package/scripts/sw-upgrade.sh +1 -1
- package/scripts/sw-ux.sh +1 -1
- package/scripts/sw-webhook.sh +1 -1
- package/scripts/sw-widgets.sh +2 -2
- package/scripts/sw-worktree.sh +1 -1
- package/dashboard/public/app.js +0 -4422
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
# daemon-adaptive.sh — Adaptive intervals, progress tracking, learning (for sw-daemon.sh)
|
|
2
|
+
# Source from sw-daemon.sh. Requires state, policy, helpers.
|
|
3
|
+
[[ -n "${_DAEMON_ADAPTIVE_LOADED:-}" ]] && return 0
|
|
4
|
+
_DAEMON_ADAPTIVE_LOADED=1
|
|
5
|
+
|
|
6
|
+
# Adapt poll interval based on queue state
|
|
7
|
+
# Empty queue 5+ cycles → 120s; queue has items → 30s; processing → 60s
|
|
8
|
+
get_adaptive_poll_interval() {
|
|
9
|
+
local queue_depth="$1"
|
|
10
|
+
local active_count="$2"
|
|
11
|
+
|
|
12
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
13
|
+
echo "$POLL_INTERVAL"
|
|
14
|
+
return
|
|
15
|
+
fi
|
|
16
|
+
|
|
17
|
+
if [[ "$queue_depth" -eq 0 && "$active_count" -eq 0 ]]; then
|
|
18
|
+
EMPTY_QUEUE_CYCLES=$((EMPTY_QUEUE_CYCLES + 1))
|
|
19
|
+
else
|
|
20
|
+
EMPTY_QUEUE_CYCLES=0
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
local interval="$POLL_INTERVAL"
|
|
24
|
+
if [[ "$EMPTY_QUEUE_CYCLES" -ge 5 ]]; then
|
|
25
|
+
interval=120
|
|
26
|
+
elif [[ "$queue_depth" -gt 0 ]]; then
|
|
27
|
+
interval=30
|
|
28
|
+
else
|
|
29
|
+
interval=60
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
# Persist current setting for dashboard visibility
|
|
33
|
+
local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
|
|
34
|
+
mkdir -p "$HOME/.shipwright/optimization"
|
|
35
|
+
local tmp_tuning="${tuning_file}.tmp.$$"
|
|
36
|
+
if [[ -f "$tuning_file" ]]; then
|
|
37
|
+
jq --argjson pi "$interval" --argjson eqc "$EMPTY_QUEUE_CYCLES" \
|
|
38
|
+
'.poll_interval = $pi | .empty_queue_cycles = $eqc' \
|
|
39
|
+
"$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
|
|
40
|
+
else
|
|
41
|
+
jq -n --argjson pi "$interval" --argjson eqc "$EMPTY_QUEUE_CYCLES" \
|
|
42
|
+
'{poll_interval: $pi, empty_queue_cycles: $eqc}' > "$tmp_tuning" \
|
|
43
|
+
&& mv "$tmp_tuning" "$tuning_file"
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
echo "$interval"
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Rolling average cost per template from costs.json (last 10 runs)
|
|
50
|
+
get_adaptive_cost_estimate() {
|
|
51
|
+
local template="${1:-autonomous}"
|
|
52
|
+
|
|
53
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
54
|
+
echo "$EST_COST_PER_JOB"
|
|
55
|
+
return
|
|
56
|
+
fi
|
|
57
|
+
|
|
58
|
+
local costs_file="$HOME/.shipwright/costs.json"
|
|
59
|
+
if [[ ! -f "$costs_file" ]]; then
|
|
60
|
+
echo "$EST_COST_PER_JOB"
|
|
61
|
+
return
|
|
62
|
+
fi
|
|
63
|
+
|
|
64
|
+
local avg_cost
|
|
65
|
+
avg_cost=$(jq -r --arg tpl "$template" '
|
|
66
|
+
[.sessions // [] | .[] | select(.template == $tpl) | .total_cost_usd // 0] |
|
|
67
|
+
.[-10:] | if length > 0 then (add / length) else null end
|
|
68
|
+
' "$costs_file" 2>/dev/null || echo "")
|
|
69
|
+
|
|
70
|
+
if [[ -n "$avg_cost" && "$avg_cost" != "null" && "$avg_cost" != "0" ]]; then
|
|
71
|
+
echo "$avg_cost"
|
|
72
|
+
else
|
|
73
|
+
echo "$EST_COST_PER_JOB"
|
|
74
|
+
fi
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Per-stage adaptive heartbeat timeout from learned stage durations
|
|
78
|
+
get_adaptive_heartbeat_timeout() {
|
|
79
|
+
local stage="${1:-unknown}"
|
|
80
|
+
|
|
81
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
82
|
+
echo "${HEALTH_HEARTBEAT_TIMEOUT:-120}"
|
|
83
|
+
return
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
# Stage-specific defaults (daemon-health.sh when sourced, else policy_get, else literal)
|
|
87
|
+
local default_timeout="${HEALTH_HEARTBEAT_TIMEOUT:-120}"
|
|
88
|
+
if type daemon_health_timeout_for_stage &>/dev/null 2>&1; then
|
|
89
|
+
default_timeout=$(daemon_health_timeout_for_stage "$stage" "$default_timeout")
|
|
90
|
+
elif type policy_get &>/dev/null 2>&1; then
|
|
91
|
+
local policy_stage
|
|
92
|
+
policy_stage=$(policy_get ".daemon.stage_timeouts.$stage" "")
|
|
93
|
+
[[ -n "$policy_stage" && "$policy_stage" =~ ^[0-9]+$ ]] && default_timeout="$policy_stage"
|
|
94
|
+
else
|
|
95
|
+
case "$stage" in
|
|
96
|
+
build) default_timeout=300 ;;
|
|
97
|
+
test) default_timeout=180 ;;
|
|
98
|
+
review|compound_quality) default_timeout=180 ;;
|
|
99
|
+
lint|format|intake|plan|design) default_timeout=60 ;;
|
|
100
|
+
esac
|
|
101
|
+
fi
|
|
102
|
+
[[ "$default_timeout" =~ ^[0-9]+$ ]] || default_timeout="${HEALTH_HEARTBEAT_TIMEOUT:-120}"
|
|
103
|
+
|
|
104
|
+
local durations_file="$HOME/.shipwright/optimization/stage-durations.json"
|
|
105
|
+
if [[ ! -f "$durations_file" ]]; then
|
|
106
|
+
echo "$default_timeout"
|
|
107
|
+
return
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
local learned_duration
|
|
111
|
+
learned_duration=$(jq -r --arg s "$stage" \
|
|
112
|
+
'.stages[$s].p90_duration_s // 0' "$durations_file" 2>/dev/null || echo "0")
|
|
113
|
+
|
|
114
|
+
if [[ "$learned_duration" -gt 0 ]]; then
|
|
115
|
+
# 150% of p90 duration, floor of 60s
|
|
116
|
+
local adaptive_timeout=$(( (learned_duration * 3) / 2 ))
|
|
117
|
+
[[ "$adaptive_timeout" -lt 60 ]] && adaptive_timeout=60
|
|
118
|
+
echo "$adaptive_timeout"
|
|
119
|
+
else
|
|
120
|
+
echo "$default_timeout"
|
|
121
|
+
fi
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Adaptive stale pipeline timeout using 95th percentile of historical durations
|
|
125
|
+
get_adaptive_stale_timeout() {
|
|
126
|
+
local template="${1:-autonomous}"
|
|
127
|
+
|
|
128
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
129
|
+
echo "${HEALTH_STALE_TIMEOUT:-1800}"
|
|
130
|
+
return
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
local durations_file="$HOME/.shipwright/optimization/pipeline-durations.json"
|
|
134
|
+
if [[ ! -f "$durations_file" ]]; then
|
|
135
|
+
echo "${HEALTH_STALE_TIMEOUT:-1800}"
|
|
136
|
+
return
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
local p95_duration
|
|
140
|
+
p95_duration=$(jq -r --arg tpl "$template" \
|
|
141
|
+
'.templates[$tpl].p95_duration_s // 0' "$durations_file" 2>/dev/null || echo "0")
|
|
142
|
+
|
|
143
|
+
if [[ "$p95_duration" -gt 0 ]]; then
|
|
144
|
+
# 1.5x safety margin, clamped 600s-7200s
|
|
145
|
+
local adaptive_timeout=$(( (p95_duration * 3) / 2 ))
|
|
146
|
+
[[ "$adaptive_timeout" -lt 600 ]] && adaptive_timeout=600
|
|
147
|
+
[[ "$adaptive_timeout" -gt 7200 ]] && adaptive_timeout=7200
|
|
148
|
+
echo "$adaptive_timeout"
|
|
149
|
+
else
|
|
150
|
+
echo "${HEALTH_STALE_TIMEOUT:-1800}"
|
|
151
|
+
fi
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
# Record pipeline duration for future threshold learning
|
|
155
|
+
record_pipeline_duration() {
|
|
156
|
+
local template="$1" duration_s="$2" result="$3"
|
|
157
|
+
|
|
158
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
159
|
+
return
|
|
160
|
+
fi
|
|
161
|
+
[[ ! "$duration_s" =~ ^[0-9]+$ ]] && return
|
|
162
|
+
|
|
163
|
+
local durations_file="$HOME/.shipwright/optimization/pipeline-durations.json"
|
|
164
|
+
mkdir -p "$HOME/.shipwright/optimization"
|
|
165
|
+
|
|
166
|
+
if [[ ! -f "$durations_file" ]]; then
|
|
167
|
+
echo '{"templates":{}}' > "$durations_file"
|
|
168
|
+
fi
|
|
169
|
+
|
|
170
|
+
local tmp_dur="${durations_file}.tmp.$$"
|
|
171
|
+
jq --arg tpl "$template" --argjson dur "$duration_s" --arg res "$result" --arg ts "$(now_iso)" '
|
|
172
|
+
.templates[$tpl] = (
|
|
173
|
+
(.templates[$tpl] // {durations: [], p95_duration_s: 0}) |
|
|
174
|
+
.durations = ((.durations + [{duration_s: $dur, result: $res, ts: $ts}]) | .[-50:]) |
|
|
175
|
+
.p95_duration_s = (
|
|
176
|
+
[.durations[].duration_s] | sort |
|
|
177
|
+
if length > 0 then .[((length * 95 / 100) | floor)] else 0 end
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
' "$durations_file" > "$tmp_dur" 2>/dev/null && mv "$tmp_dur" "$durations_file"
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# ─── Progress-Based Health Monitoring ─────────────────────────────────────────
|
|
184
|
+
# Instead of killing jobs after a static timeout, we check for forward progress.
|
|
185
|
+
# Progress signals: stage transitions, iteration advances, git diff growth, new files.
|
|
186
|
+
# Graduated response: healthy → slowing → stalled → stuck → kill.
|
|
187
|
+
|
|
188
|
+
PROGRESS_DIR="${PROGRESS_DIR:-$HOME/.shipwright/progress}"
|
|
189
|
+
|
|
190
|
+
# Collect a progress snapshot for an active job
|
|
191
|
+
# Returns JSON with stage, iteration, diff_lines, files_changed
|
|
192
|
+
daemon_collect_snapshot() {
|
|
193
|
+
local issue_num="$1" worktree="$2" pid="$3"
|
|
194
|
+
|
|
195
|
+
local stage="" iteration=0 diff_lines=0 files_changed=0 last_error=""
|
|
196
|
+
|
|
197
|
+
# Get stage and iteration from heartbeat (fastest source)
|
|
198
|
+
local heartbeat_dir="$HOME/.shipwright/heartbeats"
|
|
199
|
+
if [[ -d "$heartbeat_dir" ]]; then
|
|
200
|
+
local hb_file
|
|
201
|
+
for hb_file in "$heartbeat_dir"/*.json; do
|
|
202
|
+
[[ ! -f "$hb_file" ]] && continue
|
|
203
|
+
local hb_pid
|
|
204
|
+
hb_pid=$(jq -r '.pid // 0' "$hb_file" 2>/dev/null || echo 0)
|
|
205
|
+
if [[ "$hb_pid" == "$pid" ]]; then
|
|
206
|
+
stage=$(jq -r '.stage // "unknown"' "$hb_file" 2>/dev/null || echo "unknown")
|
|
207
|
+
iteration=$(jq -r '.iteration // 0' "$hb_file" 2>/dev/null || echo 0)
|
|
208
|
+
[[ "$iteration" == "null" ]] && iteration=0
|
|
209
|
+
break
|
|
210
|
+
fi
|
|
211
|
+
done
|
|
212
|
+
fi
|
|
213
|
+
|
|
214
|
+
# Fallback: read stage from pipeline-state.md in worktree
|
|
215
|
+
if [[ -z "$stage" || "$stage" == "unknown" ]] && [[ -d "$worktree" ]]; then
|
|
216
|
+
local state_file="$worktree/.claude/pipeline-state.md"
|
|
217
|
+
if [[ -f "$state_file" ]]; then
|
|
218
|
+
stage=$(grep -m1 '^current_stage:' "$state_file" 2>/dev/null | sed 's/^current_stage: *//' || echo "unknown")
|
|
219
|
+
fi
|
|
220
|
+
fi
|
|
221
|
+
|
|
222
|
+
# Get git diff stats from worktree (how much code has been written)
|
|
223
|
+
if [[ -d "$worktree/.git" ]] || [[ -f "$worktree/.git" ]]; then
|
|
224
|
+
diff_lines=$(cd "$worktree" && git diff --stat 2>/dev/null | tail -1 | grep -o '[0-9]* insertion' | grep -o '[0-9]*' || echo "0")
|
|
225
|
+
[[ -z "$diff_lines" ]] && diff_lines=0
|
|
226
|
+
files_changed=$(cd "$worktree" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
|
227
|
+
# Also count untracked files the agent has created
|
|
228
|
+
local untracked
|
|
229
|
+
untracked=$(cd "$worktree" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
|
230
|
+
files_changed=$((files_changed + untracked))
|
|
231
|
+
fi
|
|
232
|
+
|
|
233
|
+
# Check last error from error log
|
|
234
|
+
if [[ -d "$worktree" ]]; then
|
|
235
|
+
local error_log="$worktree/.claude/pipeline-artifacts/error-log.jsonl"
|
|
236
|
+
if [[ -f "$error_log" ]]; then
|
|
237
|
+
last_error=$(tail -1 "$error_log" 2>/dev/null | jq -r '.signature // ""' 2>/dev/null || echo "")
|
|
238
|
+
fi
|
|
239
|
+
fi
|
|
240
|
+
|
|
241
|
+
# Output JSON snapshot
|
|
242
|
+
jq -n \
|
|
243
|
+
--arg stage "$stage" \
|
|
244
|
+
--argjson iteration "${iteration:-0}" \
|
|
245
|
+
--argjson diff_lines "${diff_lines:-0}" \
|
|
246
|
+
--argjson files_changed "${files_changed:-0}" \
|
|
247
|
+
--arg last_error "$last_error" \
|
|
248
|
+
--arg ts "$(now_iso)" \
|
|
249
|
+
'{
|
|
250
|
+
stage: $stage,
|
|
251
|
+
iteration: $iteration,
|
|
252
|
+
diff_lines: $diff_lines,
|
|
253
|
+
files_changed: $files_changed,
|
|
254
|
+
last_error: $last_error,
|
|
255
|
+
ts: $ts
|
|
256
|
+
}'
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
# Assess job progress by comparing current snapshot to previous
|
|
260
|
+
# Returns: healthy | slowing | stalled | stuck
|
|
261
|
+
daemon_assess_progress() {
|
|
262
|
+
local issue_num="$1" current_snapshot="$2"
|
|
263
|
+
|
|
264
|
+
mkdir -p "$PROGRESS_DIR"
|
|
265
|
+
local progress_file="$PROGRESS_DIR/issue-${issue_num}.json"
|
|
266
|
+
|
|
267
|
+
# If no previous snapshot, store this one and return healthy
|
|
268
|
+
if [[ ! -f "$progress_file" ]]; then
|
|
269
|
+
jq -n \
|
|
270
|
+
--argjson snap "$current_snapshot" \
|
|
271
|
+
--arg issue "$issue_num" \
|
|
272
|
+
'{
|
|
273
|
+
issue: $issue,
|
|
274
|
+
snapshots: [$snap],
|
|
275
|
+
no_progress_count: 0,
|
|
276
|
+
last_progress_at: $snap.ts,
|
|
277
|
+
repeated_error_count: 0
|
|
278
|
+
}' > "$progress_file"
|
|
279
|
+
echo "healthy"
|
|
280
|
+
return
|
|
281
|
+
fi
|
|
282
|
+
|
|
283
|
+
local prev_data
|
|
284
|
+
prev_data=$(cat "$progress_file")
|
|
285
|
+
|
|
286
|
+
# Get previous snapshot values
|
|
287
|
+
local prev_stage prev_iteration prev_diff_lines prev_files prev_error prev_no_progress
|
|
288
|
+
prev_stage=$(echo "$prev_data" | jq -r '.snapshots[-1].stage // "unknown"')
|
|
289
|
+
prev_iteration=$(echo "$prev_data" | jq -r '.snapshots[-1].iteration // 0')
|
|
290
|
+
prev_diff_lines=$(echo "$prev_data" | jq -r '.snapshots[-1].diff_lines // 0')
|
|
291
|
+
prev_files=$(echo "$prev_data" | jq -r '.snapshots[-1].files_changed // 0')
|
|
292
|
+
prev_error=$(echo "$prev_data" | jq -r '.snapshots[-1].last_error // ""')
|
|
293
|
+
prev_no_progress=$(echo "$prev_data" | jq -r '.no_progress_count // 0')
|
|
294
|
+
local prev_repeated_errors
|
|
295
|
+
prev_repeated_errors=$(echo "$prev_data" | jq -r '.repeated_error_count // 0')
|
|
296
|
+
|
|
297
|
+
# Get current values
|
|
298
|
+
local cur_stage cur_iteration cur_diff cur_files cur_error
|
|
299
|
+
cur_stage=$(echo "$current_snapshot" | jq -r '.stage')
|
|
300
|
+
cur_iteration=$(echo "$current_snapshot" | jq -r '.iteration')
|
|
301
|
+
cur_diff=$(echo "$current_snapshot" | jq -r '.diff_lines')
|
|
302
|
+
cur_files=$(echo "$current_snapshot" | jq -r '.files_changed')
|
|
303
|
+
cur_error=$(echo "$current_snapshot" | jq -r '.last_error')
|
|
304
|
+
|
|
305
|
+
# Detect progress
|
|
306
|
+
local has_progress=false
|
|
307
|
+
|
|
308
|
+
# Stage advanced → clear progress
|
|
309
|
+
if [[ "$cur_stage" != "$prev_stage" && "$cur_stage" != "unknown" ]]; then
|
|
310
|
+
has_progress=true
|
|
311
|
+
daemon_log INFO "Progress: issue #${issue_num} stage ${prev_stage} → ${cur_stage}"
|
|
312
|
+
fi
|
|
313
|
+
|
|
314
|
+
# Iteration increased → clear progress (agent is looping but advancing)
|
|
315
|
+
if [[ "$cur_iteration" -gt "$prev_iteration" ]]; then
|
|
316
|
+
has_progress=true
|
|
317
|
+
daemon_log INFO "Progress: issue #${issue_num} iteration ${prev_iteration} → ${cur_iteration}"
|
|
318
|
+
fi
|
|
319
|
+
|
|
320
|
+
# Diff lines grew (agent is writing code)
|
|
321
|
+
if [[ "$cur_diff" -gt "$prev_diff_lines" ]]; then
|
|
322
|
+
has_progress=true
|
|
323
|
+
fi
|
|
324
|
+
|
|
325
|
+
# More files touched
|
|
326
|
+
if [[ "$cur_files" -gt "$prev_files" ]]; then
|
|
327
|
+
has_progress=true
|
|
328
|
+
fi
|
|
329
|
+
|
|
330
|
+
# Claude subprocess is alive and consuming CPU — agent is thinking/working
|
|
331
|
+
# During build stage, Claude can spend 10+ minutes thinking before any
|
|
332
|
+
# visible git changes appear. Detect this as progress.
|
|
333
|
+
if [[ "$has_progress" != "true" ]]; then
|
|
334
|
+
local _pid_for_check
|
|
335
|
+
_pid_for_check=$(echo "$current_snapshot" | jq -r '.pid // empty' 2>/dev/null || true)
|
|
336
|
+
if [[ -z "$_pid_for_check" ]]; then
|
|
337
|
+
# Fallback: get PID from active_jobs
|
|
338
|
+
_pid_for_check=$(jq -r --argjson num "$issue_num" \
|
|
339
|
+
'.active_jobs[] | select(.issue == ($num | tonumber)) | .pid' "$STATE_FILE" 2>/dev/null | head -1 || true)
|
|
340
|
+
fi
|
|
341
|
+
if [[ -n "$_pid_for_check" ]]; then
|
|
342
|
+
# Check if any child process (claude) is alive and using CPU
|
|
343
|
+
local child_cpu=0
|
|
344
|
+
child_cpu=$(ps -o pid=,pcpu= -p "$_pid_for_check" 2>/dev/null | awk '{sum+=$2} END{printf "%d", sum+0}' || echo "0")
|
|
345
|
+
if [[ "$child_cpu" -eq 0 ]]; then
|
|
346
|
+
# Check children of the pipeline process
|
|
347
|
+
child_cpu=$(pgrep -P "$_pid_for_check" 2>/dev/null | xargs -I{} ps -o pcpu= -p {} 2>/dev/null | awk '{sum+=$1} END{printf "%d", sum+0}' || echo "0")
|
|
348
|
+
fi
|
|
349
|
+
if [[ "${child_cpu:-0}" -gt 0 ]]; then
|
|
350
|
+
has_progress=true
|
|
351
|
+
fi
|
|
352
|
+
fi
|
|
353
|
+
fi
|
|
354
|
+
|
|
355
|
+
# Detect repeated errors (same error signature hitting again)
|
|
356
|
+
local repeated_errors="$prev_repeated_errors"
|
|
357
|
+
if [[ -n "$cur_error" && "$cur_error" == "$prev_error" ]]; then
|
|
358
|
+
repeated_errors=$((repeated_errors + 1))
|
|
359
|
+
elif [[ -n "$cur_error" && "$cur_error" != "$prev_error" ]]; then
|
|
360
|
+
# Different error — reset counter (agent is making different mistakes, that's progress)
|
|
361
|
+
repeated_errors=0
|
|
362
|
+
fi
|
|
363
|
+
|
|
364
|
+
# Update no_progress counter
|
|
365
|
+
local no_progress_count
|
|
366
|
+
if [[ "$has_progress" == "true" ]]; then
|
|
367
|
+
no_progress_count=0
|
|
368
|
+
repeated_errors=0
|
|
369
|
+
else
|
|
370
|
+
no_progress_count=$((prev_no_progress + 1))
|
|
371
|
+
fi
|
|
372
|
+
|
|
373
|
+
# Update progress file (keep last 10 snapshots)
|
|
374
|
+
local tmp_progress="${progress_file}.tmp.$$"
|
|
375
|
+
jq \
|
|
376
|
+
--argjson snap "$current_snapshot" \
|
|
377
|
+
--argjson npc "$no_progress_count" \
|
|
378
|
+
--argjson rec "$repeated_errors" \
|
|
379
|
+
--arg ts "$(now_iso)" \
|
|
380
|
+
'
|
|
381
|
+
.snapshots = ((.snapshots + [$snap]) | .[-10:]) |
|
|
382
|
+
.no_progress_count = $npc |
|
|
383
|
+
.repeated_error_count = $rec |
|
|
384
|
+
if $npc == 0 then .last_progress_at = $ts else . end
|
|
385
|
+
' "$progress_file" > "$tmp_progress" 2>/dev/null && mv "$tmp_progress" "$progress_file"
|
|
386
|
+
|
|
387
|
+
# ── Vitals-based verdict (preferred over static thresholds) ──
|
|
388
|
+
if type pipeline_compute_vitals &>/dev/null 2>&1 && type pipeline_health_verdict &>/dev/null 2>&1; then
|
|
389
|
+
# Compute vitals using the worktree's pipeline state if available
|
|
390
|
+
local _worktree_state=""
|
|
391
|
+
local _worktree_artifacts=""
|
|
392
|
+
local _worktree_dir
|
|
393
|
+
_worktree_dir=$(jq -r --arg i "$issue_num" '.active_jobs[] | select(.issue == ($i | tonumber)) | .worktree // ""' "$STATE_FILE" 2>/dev/null || echo "")
|
|
394
|
+
if [[ -n "$_worktree_dir" && -d "$_worktree_dir/.claude" ]]; then
|
|
395
|
+
_worktree_state="$_worktree_dir/.claude/pipeline-state.md"
|
|
396
|
+
_worktree_artifacts="$_worktree_dir/.claude/pipeline-artifacts"
|
|
397
|
+
fi
|
|
398
|
+
|
|
399
|
+
local _vitals_json
|
|
400
|
+
_vitals_json=$(pipeline_compute_vitals "$_worktree_state" "$_worktree_artifacts" "$issue_num" 2>/dev/null) || true
|
|
401
|
+
if [[ -n "$_vitals_json" && "$_vitals_json" != "{}" ]]; then
|
|
402
|
+
local _health_verdict _health_score
|
|
403
|
+
_health_verdict=$(echo "$_vitals_json" | jq -r '.verdict // "continue"' 2>/dev/null || echo "continue")
|
|
404
|
+
_health_score=$(echo "$_vitals_json" | jq -r '.health_score // 50' 2>/dev/null || echo "50")
|
|
405
|
+
|
|
406
|
+
emit_event "pipeline.vitals_check" \
|
|
407
|
+
"issue=$issue_num" \
|
|
408
|
+
"health_score=$_health_score" \
|
|
409
|
+
"verdict=$_health_verdict" \
|
|
410
|
+
"no_progress=$no_progress_count" \
|
|
411
|
+
"repeated_errors=$repeated_errors"
|
|
412
|
+
|
|
413
|
+
# Map vitals verdict to daemon verdict
|
|
414
|
+
case "$_health_verdict" in
|
|
415
|
+
continue)
|
|
416
|
+
echo "healthy"
|
|
417
|
+
return
|
|
418
|
+
;;
|
|
419
|
+
warn)
|
|
420
|
+
# Sluggish but not dead — equivalent to slowing
|
|
421
|
+
echo "slowing"
|
|
422
|
+
return
|
|
423
|
+
;;
|
|
424
|
+
intervene)
|
|
425
|
+
echo "stalled"
|
|
426
|
+
return
|
|
427
|
+
;;
|
|
428
|
+
abort)
|
|
429
|
+
echo "stuck"
|
|
430
|
+
return
|
|
431
|
+
;;
|
|
432
|
+
esac
|
|
433
|
+
fi
|
|
434
|
+
fi
|
|
435
|
+
|
|
436
|
+
# ── Fallback: static threshold verdict ──
|
|
437
|
+
local warn_threshold="${PROGRESS_CHECKS_BEFORE_WARN:-3}"
|
|
438
|
+
local kill_threshold="${PROGRESS_CHECKS_BEFORE_KILL:-6}"
|
|
439
|
+
|
|
440
|
+
# Stuck in same error loop — accelerate to kill
|
|
441
|
+
if [[ "$repeated_errors" -ge 3 ]]; then
|
|
442
|
+
echo "stuck"
|
|
443
|
+
return
|
|
444
|
+
fi
|
|
445
|
+
|
|
446
|
+
if [[ "$no_progress_count" -ge "$kill_threshold" ]]; then
|
|
447
|
+
echo "stuck"
|
|
448
|
+
elif [[ "$no_progress_count" -ge "$warn_threshold" ]]; then
|
|
449
|
+
echo "stalled"
|
|
450
|
+
elif [[ "$no_progress_count" -ge 1 ]]; then
|
|
451
|
+
echo "slowing"
|
|
452
|
+
else
|
|
453
|
+
echo "healthy"
|
|
454
|
+
fi
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
# Clean up progress tracking for a completed/failed job
|
|
458
|
+
daemon_clear_progress() {
|
|
459
|
+
local issue_num="$1"
|
|
460
|
+
rm -f "$PROGRESS_DIR/issue-${issue_num}.json"
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
# Learn actual worker memory from peak RSS of pipeline processes
|
|
464
|
+
learn_worker_memory() {
|
|
465
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
466
|
+
return
|
|
467
|
+
fi
|
|
468
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
469
|
+
return
|
|
470
|
+
fi
|
|
471
|
+
|
|
472
|
+
local total_rss=0
|
|
473
|
+
local process_count=0
|
|
474
|
+
|
|
475
|
+
while IFS= read -r job; do
|
|
476
|
+
local pid
|
|
477
|
+
pid=$(echo "$job" | jq -r '.pid // empty')
|
|
478
|
+
[[ -z "$pid" || ! "$pid" =~ ^[0-9]+$ ]] && continue
|
|
479
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
480
|
+
local rss_kb
|
|
481
|
+
rss_kb=$(ps -o rss= -p "$pid" 2>/dev/null | tr -d ' ' || echo "0")
|
|
482
|
+
[[ ! "$rss_kb" =~ ^[0-9]+$ ]] && rss_kb=0
|
|
483
|
+
if [[ "$rss_kb" -gt 0 ]]; then
|
|
484
|
+
total_rss=$((total_rss + rss_kb))
|
|
485
|
+
process_count=$((process_count + 1))
|
|
486
|
+
fi
|
|
487
|
+
fi
|
|
488
|
+
done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
|
|
489
|
+
|
|
490
|
+
if [[ "$process_count" -gt 0 ]]; then
|
|
491
|
+
local avg_rss_gb=$(( total_rss / process_count / 1048576 ))
|
|
492
|
+
# 125% headroom, minimum 1GB, max 16GB
|
|
493
|
+
local learned_mem_gb=$(( (avg_rss_gb * 5 + 3) / 4 ))
|
|
494
|
+
[[ "$learned_mem_gb" -lt 1 ]] && learned_mem_gb=1
|
|
495
|
+
[[ "$learned_mem_gb" -gt 16 ]] && learned_mem_gb=16
|
|
496
|
+
|
|
497
|
+
local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
|
|
498
|
+
mkdir -p "$HOME/.shipwright/optimization"
|
|
499
|
+
local tmp_tuning="${tuning_file}.tmp.$$"
|
|
500
|
+
if [[ -f "$tuning_file" ]]; then
|
|
501
|
+
jq --argjson mem "$learned_mem_gb" --argjson rss "$total_rss" --argjson cnt "$process_count" \
|
|
502
|
+
'.learned_worker_mem_gb = $mem | .last_rss_total_kb = $rss | .last_rss_process_count = $cnt' \
|
|
503
|
+
"$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
|
|
504
|
+
else
|
|
505
|
+
jq -n --argjson mem "$learned_mem_gb" \
|
|
506
|
+
'{learned_worker_mem_gb: $mem}' > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
|
|
507
|
+
fi
|
|
508
|
+
|
|
509
|
+
WORKER_MEM_GB="$learned_mem_gb"
|
|
510
|
+
fi
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
# Record scaling outcome for learning optimal parallelism
|
|
514
|
+
record_scaling_outcome() {
|
|
515
|
+
local parallelism="$1" result="$2"
|
|
516
|
+
|
|
517
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
518
|
+
return
|
|
519
|
+
fi
|
|
520
|
+
|
|
521
|
+
local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
|
|
522
|
+
mkdir -p "$HOME/.shipwright/optimization"
|
|
523
|
+
local tmp_tuning="${tuning_file}.tmp.$$"
|
|
524
|
+
if [[ -f "$tuning_file" ]]; then
|
|
525
|
+
jq --argjson p "$parallelism" --arg r "$result" --arg ts "$(now_iso)" '
|
|
526
|
+
.scaling_history = ((.scaling_history // []) + [{parallelism: $p, result: $r, ts: $ts}]) |
|
|
527
|
+
.scaling_history |= .[-50:]
|
|
528
|
+
' "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
|
|
529
|
+
else
|
|
530
|
+
jq -n --argjson p "$parallelism" --arg r "$result" --arg ts "$(now_iso)" '
|
|
531
|
+
{scaling_history: [{parallelism: $p, result: $r, ts: $ts}]}
|
|
532
|
+
' > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
|
|
533
|
+
fi
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
# Get success rate at a given parallelism level (for gradual scaling decisions)
|
|
537
|
+
get_success_rate_at_parallelism() {
|
|
538
|
+
local target_parallelism="$1"
|
|
539
|
+
|
|
540
|
+
local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
|
|
541
|
+
if [[ ! -f "$tuning_file" ]]; then
|
|
542
|
+
echo "100"
|
|
543
|
+
return
|
|
544
|
+
fi
|
|
545
|
+
|
|
546
|
+
local rate
|
|
547
|
+
rate=$(jq -r --argjson p "$target_parallelism" '
|
|
548
|
+
[.scaling_history // [] | .[] | select(.parallelism == $p)] |
|
|
549
|
+
if length > 0 then
|
|
550
|
+
([.[] | select(.result == "success")] | length) * 100 / length | floor
|
|
551
|
+
else 100 end
|
|
552
|
+
' "$tuning_file" 2>/dev/null || echo "100")
|
|
553
|
+
|
|
554
|
+
echo "${rate:-100}"
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
# Adapt patrol limits based on hit rate
|
|
558
|
+
adapt_patrol_limits() {
|
|
559
|
+
local findings="$1" max_issues="$2"
|
|
560
|
+
|
|
561
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
562
|
+
return
|
|
563
|
+
fi
|
|
564
|
+
|
|
565
|
+
local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
|
|
566
|
+
mkdir -p "$HOME/.shipwright/optimization"
|
|
567
|
+
|
|
568
|
+
local new_max="$max_issues"
|
|
569
|
+
if [[ "$findings" -ge "$max_issues" ]]; then
|
|
570
|
+
# Consistently hitting limit — increase
|
|
571
|
+
new_max=$((max_issues + 2))
|
|
572
|
+
[[ "$new_max" -gt 20 ]] && new_max=20
|
|
573
|
+
elif [[ "$findings" -eq 0 ]]; then
|
|
574
|
+
# Finds nothing — reduce
|
|
575
|
+
if [[ "$max_issues" -gt 3 ]]; then
|
|
576
|
+
new_max=$((max_issues - 1))
|
|
577
|
+
else
|
|
578
|
+
new_max=3
|
|
579
|
+
fi
|
|
580
|
+
fi
|
|
581
|
+
|
|
582
|
+
local tmp_tuning="${tuning_file}.tmp.$$"
|
|
583
|
+
if [[ -f "$tuning_file" ]]; then
|
|
584
|
+
jq --argjson pm "$new_max" --argjson lf "$findings" --arg ts "$(now_iso)" \
|
|
585
|
+
'.patrol_max_issues = $pm | .last_patrol_findings = $lf | .patrol_adapted_at = $ts' \
|
|
586
|
+
"$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
|
|
587
|
+
else
|
|
588
|
+
jq -n --argjson pm "$new_max" --argjson lf "$findings" --arg ts "$(now_iso)" \
|
|
589
|
+
'{patrol_max_issues: $pm, last_patrol_findings: $lf, patrol_adapted_at: $ts}' \
|
|
590
|
+
> "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
|
|
591
|
+
fi
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
# Load adaptive patrol limits from tuning config
|
|
595
|
+
load_adaptive_patrol_limits() {
|
|
596
|
+
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
|
|
597
|
+
return
|
|
598
|
+
fi
|
|
599
|
+
|
|
600
|
+
local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
|
|
601
|
+
if [[ ! -f "$tuning_file" ]]; then
|
|
602
|
+
return
|
|
603
|
+
fi
|
|
604
|
+
|
|
605
|
+
local adaptive_max_issues
|
|
606
|
+
adaptive_max_issues=$(jq -r '.patrol_max_issues // 0' "$tuning_file" 2>/dev/null || echo "0")
|
|
607
|
+
if [[ "$adaptive_max_issues" -gt 0 ]]; then
|
|
608
|
+
PATROL_MAX_ISSUES="$adaptive_max_issues"
|
|
609
|
+
fi
|
|
610
|
+
}
|