shipwright-cli 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +926 -0
  3. package/claude-code/CLAUDE.md.shipwright +125 -0
  4. package/claude-code/hooks/notify-idle.sh +35 -0
  5. package/claude-code/hooks/pre-compact-save.sh +57 -0
  6. package/claude-code/hooks/task-completed.sh +170 -0
  7. package/claude-code/hooks/teammate-idle.sh +68 -0
  8. package/claude-code/settings.json.template +184 -0
  9. package/completions/_shipwright +140 -0
  10. package/completions/shipwright.bash +89 -0
  11. package/completions/shipwright.fish +107 -0
  12. package/docs/KNOWN-ISSUES.md +199 -0
  13. package/docs/TIPS.md +331 -0
  14. package/docs/definition-of-done.example.md +16 -0
  15. package/docs/patterns/README.md +139 -0
  16. package/docs/patterns/audit-loop.md +149 -0
  17. package/docs/patterns/bug-hunt.md +183 -0
  18. package/docs/patterns/feature-implementation.md +159 -0
  19. package/docs/patterns/refactoring.md +183 -0
  20. package/docs/patterns/research-exploration.md +144 -0
  21. package/docs/patterns/test-generation.md +173 -0
  22. package/package.json +49 -0
  23. package/scripts/adapters/docker-deploy.sh +50 -0
  24. package/scripts/adapters/fly-deploy.sh +41 -0
  25. package/scripts/adapters/iterm2-adapter.sh +122 -0
  26. package/scripts/adapters/railway-deploy.sh +34 -0
  27. package/scripts/adapters/tmux-adapter.sh +87 -0
  28. package/scripts/adapters/vercel-deploy.sh +35 -0
  29. package/scripts/adapters/wezterm-adapter.sh +103 -0
  30. package/scripts/cct +242 -0
  31. package/scripts/cct-cleanup.sh +172 -0
  32. package/scripts/cct-cost.sh +590 -0
  33. package/scripts/cct-daemon.sh +3189 -0
  34. package/scripts/cct-doctor.sh +328 -0
  35. package/scripts/cct-fix.sh +478 -0
  36. package/scripts/cct-fleet.sh +904 -0
  37. package/scripts/cct-init.sh +282 -0
  38. package/scripts/cct-logs.sh +273 -0
  39. package/scripts/cct-loop.sh +1332 -0
  40. package/scripts/cct-memory.sh +1148 -0
  41. package/scripts/cct-pipeline.sh +3844 -0
  42. package/scripts/cct-prep.sh +1352 -0
  43. package/scripts/cct-ps.sh +168 -0
  44. package/scripts/cct-reaper.sh +390 -0
  45. package/scripts/cct-session.sh +284 -0
  46. package/scripts/cct-status.sh +169 -0
  47. package/scripts/cct-templates.sh +242 -0
  48. package/scripts/cct-upgrade.sh +422 -0
  49. package/scripts/cct-worktree.sh +405 -0
  50. package/scripts/postinstall.mjs +96 -0
  51. package/templates/pipelines/autonomous.json +71 -0
  52. package/templates/pipelines/cost-aware.json +95 -0
  53. package/templates/pipelines/deployed.json +79 -0
  54. package/templates/pipelines/enterprise.json +114 -0
  55. package/templates/pipelines/fast.json +63 -0
  56. package/templates/pipelines/full.json +104 -0
  57. package/templates/pipelines/hotfix.json +63 -0
  58. package/templates/pipelines/standard.json +91 -0
  59. package/tmux/claude-teams-overlay.conf +109 -0
  60. package/tmux/templates/architecture.json +19 -0
  61. package/tmux/templates/bug-fix.json +24 -0
  62. package/tmux/templates/code-review.json +24 -0
  63. package/tmux/templates/devops.json +19 -0
  64. package/tmux/templates/documentation.json +19 -0
  65. package/tmux/templates/exploration.json +19 -0
  66. package/tmux/templates/feature-dev.json +24 -0
  67. package/tmux/templates/full-stack.json +24 -0
  68. package/tmux/templates/migration.json +24 -0
  69. package/tmux/templates/refactor.json +19 -0
  70. package/tmux/templates/security-audit.json +24 -0
  71. package/tmux/templates/testing.json +24 -0
  72. package/tmux/tmux.conf +167 -0
@@ -0,0 +1,3189 @@
1
+ #!/usr/bin/env bash
2
+ # ╔═══════════════════════════════════════════════════════════════════════════╗
3
+ # ║ shipwright daemon — Autonomous GitHub Issue Watcher ║
4
+ # ║ Polls for labeled issues · Spawns pipelines · Manages worktrees ║
5
+ # ╚═══════════════════════════════════════════════════════════════════════════╝
6
+ set -euo pipefail
7
+
8
+ VERSION="1.7.0"
9
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
+ REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
11
+
12
+ # ─── Colors (matches Seth's tmux theme) ─────────────────────────────────────
13
+ CYAN='\033[38;2;0;212;255m' # #00d4ff — primary accent
14
+ PURPLE='\033[38;2;124;58;237m' # #7c3aed — secondary
15
+ BLUE='\033[38;2;0;102;255m' # #0066ff — tertiary
16
+ GREEN='\033[38;2;74;222;128m' # success
17
+ YELLOW='\033[38;2;250;204;21m' # warning
18
+ RED='\033[38;2;248;113;113m' # error
19
+ DIM='\033[2m'
20
+ BOLD='\033[1m'
21
+ RESET='\033[0m'
22
+
23
+ # ─── Output Helpers ─────────────────────────────────────────────────────────
24
+ info() { echo -e "${CYAN}${BOLD}▸${RESET} $*"; }
25
+ success() { echo -e "${GREEN}${BOLD}✓${RESET} $*"; }
26
+ warn() { echo -e "${YELLOW}${BOLD}⚠${RESET} $*"; }
27
+ error() { echo -e "${RED}${BOLD}✗${RESET} $*" >&2; }
28
+
29
+ now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
30
+ now_epoch() { date +%s; }
31
+
32
+ epoch_to_iso() {
33
+ local epoch="$1"
34
+ date -u -r "$epoch" +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || \
35
+ date -u -d "@$epoch" +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || \
36
+ python3 -c "import datetime; print(datetime.datetime.utcfromtimestamp($epoch).strftime('%Y-%m-%dT%H:%M:%SZ'))" 2>/dev/null || \
37
+ echo "1970-01-01T00:00:00Z"
38
+ }
39
+
40
+ format_duration() {
41
+ local secs="$1"
42
+ if [[ "$secs" -ge 3600 ]]; then
43
+ printf "%dh %dm %ds" $((secs/3600)) $((secs%3600/60)) $((secs%60))
44
+ elif [[ "$secs" -ge 60 ]]; then
45
+ printf "%dm %ds" $((secs/60)) $((secs%60))
46
+ else
47
+ printf "%ds" "$secs"
48
+ fi
49
+ }
50
+
51
+ # ─── Structured Event Log ──────────────────────────────────────────────────
52
+ EVENTS_FILE="${HOME}/.claude-teams/events.jsonl"
53
+
54
+ emit_event() {
55
+ local event_type="$1"
56
+ shift
57
+ local json_fields=""
58
+ for kv in "$@"; do
59
+ local key="${kv%%=*}"
60
+ local val="${kv#*=}"
61
+ if [[ "$val" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then
62
+ json_fields="${json_fields},\"${key}\":${val}"
63
+ else
64
+ val="${val//\"/\\\"}"
65
+ json_fields="${json_fields},\"${key}\":\"${val}\""
66
+ fi
67
+ done
68
+ mkdir -p "${HOME}/.claude-teams"
69
+ echo "{\"ts\":\"$(now_iso)\",\"ts_epoch\":$(now_epoch),\"type\":\"${event_type}\"${json_fields}}" >> "$EVENTS_FILE"
70
+ }
71
+
72
+ # ─── GitHub API Retry with Backoff ────────────────────────────────────────
73
+ # Retries gh commands up to 3 times with exponential backoff (1s, 3s, 9s).
74
+ # Detects rate-limit (403/429) and transient errors. Returns the gh exit code.
75
+ gh_retry() {
76
+ local max_retries=3
77
+ local backoff=1
78
+ local attempt=0
79
+ local exit_code=0
80
+
81
+ while [[ $attempt -lt $max_retries ]]; do
82
+ attempt=$((attempt + 1))
83
+ # Run the gh command; capture exit code
84
+ if output=$("$@" 2>&1); then
85
+ echo "$output"
86
+ return 0
87
+ fi
88
+ exit_code=$?
89
+
90
+ # Check for rate-limit or server error indicators
91
+ if echo "$output" | grep -qiE "rate limit|403|429|502|503"; then
92
+ daemon_log WARN "gh_retry: rate limit / server error on attempt ${attempt}/${max_retries} — backoff ${backoff}s"
93
+ else
94
+ daemon_log WARN "gh_retry: transient error on attempt ${attempt}/${max_retries} (exit ${exit_code}) — backoff ${backoff}s"
95
+ fi
96
+
97
+ if [[ $attempt -lt $max_retries ]]; then
98
+ sleep "$backoff"
99
+ backoff=$((backoff * 3))
100
+ fi
101
+ done
102
+
103
+ # Return last output and exit code after exhausting retries
104
+ echo "$output"
105
+ return "$exit_code"
106
+ }
107
+
108
+ # ─── Defaults ───────────────────────────────────────────────────────────────
109
+ DAEMON_DIR="$HOME/.claude-teams"
110
+ PID_FILE="$DAEMON_DIR/daemon.pid"
111
+ SHUTDOWN_FLAG="$DAEMON_DIR/daemon.shutdown"
112
+ STATE_FILE=""
113
+ LOG_FILE=""
114
+ LOG_DIR=""
115
+ WORKTREE_DIR=""
116
+
117
+ # Config defaults (overridden by daemon-config.json)
118
+ WATCH_LABEL="ready-to-build"
119
+ POLL_INTERVAL=60
120
+ MAX_PARALLEL=2
121
+ PIPELINE_TEMPLATE="autonomous"
122
+ SKIP_GATES=true
123
+ MODEL="opus"
124
+ BASE_BRANCH="main"
125
+ ON_SUCCESS_REMOVE_LABEL="ready-to-build"
126
+ ON_SUCCESS_ADD_LABEL="pipeline/complete"
127
+ ON_SUCCESS_CLOSE_ISSUE=false
128
+ ON_FAILURE_ADD_LABEL="pipeline/failed"
129
+ ON_FAILURE_LOG_LINES=50
130
+ SLACK_WEBHOOK=""
131
+
132
+ # Priority lane defaults
133
+ PRIORITY_LANE=false
134
+ PRIORITY_LANE_LABELS="hotfix,incident,p0,urgent"
135
+ PRIORITY_LANE_MAX=1
136
+
137
+ # Org-wide daemon defaults
138
+ WATCH_MODE="repo"
139
+ ORG=""
140
+ REPO_FILTER=""
141
+
142
+ # Auto-scaling defaults
143
+ AUTO_SCALE=false
144
+ AUTO_SCALE_INTERVAL=5
145
+ MAX_WORKERS=8
146
+ MIN_WORKERS=1
147
+ WORKER_MEM_GB=4
148
+ EST_COST_PER_JOB=5.0
149
+ FLEET_MAX_PARALLEL=""
150
+
151
+ # Patrol defaults (overridden by daemon-config.json or env)
152
+ PATROL_INTERVAL="${PATROL_INTERVAL:-3600}"
153
+ PATROL_MAX_ISSUES="${PATROL_MAX_ISSUES:-5}"
154
+ PATROL_LABEL="${PATROL_LABEL:-auto-patrol}"
155
+ PATROL_DRY_RUN=false
156
+ LAST_PATROL_EPOCH=0
157
+
158
+ # Runtime
159
+ NO_GITHUB=false
160
+ CONFIG_PATH=""
161
+ DETACH=false
162
+ FOLLOW=false
163
+ BACKOFF_SECS=0
164
+
165
+ # ─── CLI Argument Parsing ──────────────────────────────────────────────────
166
+
167
+ SUBCOMMAND="${1:-help}"
168
+ shift 2>/dev/null || true
169
+
170
+ while [[ $# -gt 0 ]]; do
171
+ case "$1" in
172
+ --config)
173
+ CONFIG_PATH="${2:-}"
174
+ shift 2
175
+ ;;
176
+ --config=*)
177
+ CONFIG_PATH="${1#--config=}"
178
+ shift
179
+ ;;
180
+ --detach|-d)
181
+ DETACH=true
182
+ shift
183
+ ;;
184
+ --follow|-f)
185
+ FOLLOW=true
186
+ shift
187
+ ;;
188
+ --no-github)
189
+ NO_GITHUB=true
190
+ shift
191
+ ;;
192
+ --help|-h)
193
+ SUBCOMMAND="help"
194
+ shift
195
+ ;;
196
+ *)
197
+ # Pass unrecognized flags to subcommands (e.g. metrics --period 7)
198
+ break
199
+ ;;
200
+ esac
201
+ done
202
+
203
+ # Remaining args available as "$@" for subcommands
204
+
205
+ # ─── Help ───────────────────────────────────────────────────────────────────
206
+
207
+ show_help() {
208
+ echo -e "${CYAN}${BOLD}shipwright daemon${RESET} ${DIM}v${VERSION}${RESET} — Autonomous GitHub Issue Watcher"
209
+ echo ""
210
+ echo -e "${BOLD}USAGE${RESET}"
211
+ echo -e " ${CYAN}shipwright daemon${RESET} <command> [options]"
212
+ echo ""
213
+ echo -e "${BOLD}COMMANDS${RESET}"
214
+ echo -e " ${CYAN}start${RESET} [--config path] [--detach] Start the issue watcher"
215
+ echo -e " ${CYAN}stop${RESET} Graceful shutdown via PID file"
216
+ echo -e " ${CYAN}status${RESET} Show active pipelines and queue"
217
+ echo -e " ${CYAN}init${RESET} Generate default daemon-config.json"
218
+ echo -e " ${CYAN}logs${RESET} [--follow] Tail daemon activity log"
219
+ echo -e " ${CYAN}metrics${RESET} [--period N] [--json] DORA/DX metrics dashboard"
220
+ echo -e " ${CYAN}triage${RESET} Show issue triage scores and priority"
221
+ echo -e " ${CYAN}patrol${RESET} [--once] [--dry-run] Run proactive codebase patrol"
222
+ echo ""
223
+ echo -e "${BOLD}OPTIONS${RESET}"
224
+ echo -e " ${CYAN}--config${RESET} <path> Path to daemon-config.json ${DIM}(default: .claude/daemon-config.json)${RESET}"
225
+ echo -e " ${CYAN}--detach${RESET}, ${CYAN}-d${RESET} Run in a detached tmux session"
226
+ echo -e " ${CYAN}--follow${RESET}, ${CYAN}-f${RESET} Follow log output (with ${CYAN}logs${RESET} command)"
227
+ echo -e " ${CYAN}--no-github${RESET} Disable GitHub API calls (dry-run mode)"
228
+ echo ""
229
+ echo -e "${BOLD}EXAMPLES${RESET}"
230
+ echo -e " ${DIM}shipwright daemon init${RESET} # Generate config file"
231
+ echo -e " ${DIM}shipwright daemon start${RESET} # Start watching in foreground"
232
+ echo -e " ${DIM}shipwright daemon start --detach${RESET} # Start in background tmux session"
233
+ echo -e " ${DIM}shipwright daemon start --config my-config.json${RESET} # Custom config"
234
+ echo -e " ${DIM}shipwright daemon status${RESET} # Show active jobs and queue"
235
+ echo -e " ${DIM}shipwright daemon stop${RESET} # Graceful shutdown"
236
+ echo -e " ${DIM}shipwright daemon logs --follow${RESET} # Tail the daemon log"
237
+ echo -e " ${DIM}shipwright daemon metrics${RESET} # DORA + DX metrics (last 7 days)"
238
+ echo -e " ${DIM}shipwright daemon metrics --period 30${RESET} # Last 30 days"
239
+ echo -e " ${DIM}shipwright daemon metrics --json${RESET} # JSON output for dashboards"
240
+ echo -e " ${DIM}shipwright daemon triage${RESET} # Show issue triage scores"
241
+ echo -e " ${DIM}shipwright daemon patrol${RESET} # Run proactive codebase patrol"
242
+ echo -e " ${DIM}shipwright daemon patrol --dry-run${RESET} # Show what patrol would find"
243
+ echo -e " ${DIM}shipwright daemon patrol --once${RESET} # Run patrol once and exit"
244
+ echo ""
245
+ echo -e "${BOLD}CONFIG FILE${RESET} ${DIM}(.claude/daemon-config.json)${RESET}"
246
+ echo -e " ${DIM}watch_label${RESET} GitHub label to watch for ${DIM}(default: ready-to-build)${RESET}"
247
+ echo -e " ${DIM}poll_interval${RESET} Seconds between polls ${DIM}(default: 60)${RESET}"
248
+ echo -e " ${DIM}max_parallel${RESET} Max concurrent pipeline jobs ${DIM}(default: 2)${RESET}"
249
+ echo -e " ${DIM}pipeline_template${RESET} Pipeline template to use ${DIM}(default: autonomous)${RESET}"
250
+ echo -e " ${DIM}base_branch${RESET} Branch to create worktrees from ${DIM}(default: main)${RESET}"
251
+ echo ""
252
+ echo -e " ${BOLD}Priority Lanes${RESET}"
253
+ echo -e " ${DIM}priority_lane${RESET} Enable priority bypass queue ${DIM}(default: false)${RESET}"
254
+ echo -e " ${DIM}priority_lane_labels${RESET} Labels that trigger priority ${DIM}(default: hotfix,incident,p0,urgent)${RESET}"
255
+ echo -e " ${DIM}priority_lane_max${RESET} Max extra slots for priority ${DIM}(default: 1)${RESET}"
256
+ echo ""
257
+ echo -e " ${BOLD}Org-Wide Mode${RESET}"
258
+ echo -e " ${DIM}watch_mode${RESET} \"repo\" or \"org\" ${DIM}(default: repo)${RESET}"
259
+ echo -e " ${DIM}org${RESET} GitHub org name ${DIM}(required for org mode)${RESET}"
260
+ echo -e " ${DIM}repo_filter${RESET} Regex filter for repo names ${DIM}(e.g. \"api-.*|web-.*\")${RESET}"
261
+ echo ""
262
+ echo -e "${BOLD}HOW IT WORKS${RESET}"
263
+ echo -e " 1. Polls GitHub for issues with the ${CYAN}${WATCH_LABEL}${RESET} label"
264
+ echo -e " 2. For each new issue, creates a git worktree and spawns a pipeline"
265
+ echo -e " 3. On success: removes label, adds ${GREEN}pipeline/complete${RESET}, comments on issue"
266
+ echo -e " 4. On failure: adds ${RED}pipeline/failed${RESET}, comments with log tail"
267
+ echo -e " 5. Respects ${CYAN}max_parallel${RESET} limit — excess issues are queued"
268
+ echo -e " 6. Priority lane: ${CYAN}hotfix${RESET}/${CYAN}incident${RESET} issues bypass the queue"
269
+ echo -e " 7. Org mode: watches issues across all repos in a GitHub org"
270
+ echo ""
271
+ echo -e "${DIM}Docs: https://sethdford.github.io/shipwright | GitHub: https://github.com/sethdford/shipwright${RESET}"
272
+ }
273
+
274
+ # ─── Config Loading ─────────────────────────────────────────────────────────
275
+
276
+ load_config() {
277
+ local config_file="${CONFIG_PATH:-.claude/daemon-config.json}"
278
+
279
+ if [[ ! -f "$config_file" ]]; then
280
+ warn "Config not found at $config_file — using defaults"
281
+ warn "Run ${CYAN}shipwright daemon init${RESET} to generate a config file"
282
+ return 0
283
+ fi
284
+
285
+ info "Loading config: ${DIM}${config_file}${RESET}"
286
+
287
+ WATCH_LABEL=$(jq -r '.watch_label // "ready-to-build"' "$config_file")
288
+ POLL_INTERVAL=$(jq -r '.poll_interval // 60' "$config_file")
289
+ MAX_PARALLEL=$(jq -r '.max_parallel // 2' "$config_file")
290
+ PIPELINE_TEMPLATE=$(jq -r '.pipeline_template // "autonomous"' "$config_file")
291
+ SKIP_GATES=$(jq -r '.skip_gates // true' "$config_file")
292
+ MODEL=$(jq -r '.model // "opus"' "$config_file")
293
+ BASE_BRANCH=$(jq -r '.base_branch // "main"' "$config_file")
294
+
295
+ # on_success settings
296
+ ON_SUCCESS_REMOVE_LABEL=$(jq -r '.on_success.remove_label // "ready-to-build"' "$config_file")
297
+ ON_SUCCESS_ADD_LABEL=$(jq -r '.on_success.add_label // "pipeline/complete"' "$config_file")
298
+ ON_SUCCESS_CLOSE_ISSUE=$(jq -r '.on_success.close_issue // false' "$config_file")
299
+
300
+ # on_failure settings
301
+ ON_FAILURE_ADD_LABEL=$(jq -r '.on_failure.add_label // "pipeline/failed"' "$config_file")
302
+ ON_FAILURE_LOG_LINES=$(jq -r '.on_failure.comment_log_lines // 50' "$config_file")
303
+
304
+ # notifications
305
+ SLACK_WEBHOOK=$(jq -r '.notifications.slack_webhook // ""' "$config_file")
306
+ if [[ "$SLACK_WEBHOOK" == "null" ]]; then SLACK_WEBHOOK=""; fi
307
+
308
+ # health monitoring
309
+ HEALTH_STALE_TIMEOUT=$(jq -r '.health.stale_timeout_s // 1800' "$config_file")
310
+
311
+ # priority labels
312
+ PRIORITY_LABELS=$(jq -r '.priority_labels // "urgent,p0,high,p1,normal,p2,low,p3"' "$config_file")
313
+
314
+ # degradation alerting
315
+ DEGRADATION_WINDOW=$(jq -r '.alerts.degradation_window // 5' "$config_file")
316
+ DEGRADATION_CFR_THRESHOLD=$(jq -r '.alerts.cfr_threshold // 30' "$config_file")
317
+ DEGRADATION_SUCCESS_THRESHOLD=$(jq -r '.alerts.success_threshold // 50' "$config_file")
318
+
319
+ # patrol settings
320
+ PATROL_INTERVAL=$(jq -r '.patrol.interval // 3600' "$config_file")
321
+ PATROL_MAX_ISSUES=$(jq -r '.patrol.max_issues // 5' "$config_file")
322
+ PATROL_LABEL=$(jq -r '.patrol.label // "auto-patrol"' "$config_file")
323
+
324
+ # adaptive template selection
325
+ AUTO_TEMPLATE=$(jq -r '.auto_template // false' "$config_file")
326
+ TEMPLATE_MAP=$(jq -r '.template_map // "{}" | @json' "$config_file" 2>/dev/null || echo '"{}"')
327
+
328
+ # auto-retry with escalation
329
+ MAX_RETRIES=$(jq -r '.max_retries // 2' "$config_file")
330
+ RETRY_ESCALATION=$(jq -r '.retry_escalation // true' "$config_file")
331
+
332
+ # self-optimization
333
+ SELF_OPTIMIZE=$(jq -r '.self_optimize // false' "$config_file")
334
+ OPTIMIZE_INTERVAL=$(jq -r '.optimize_interval // 10' "$config_file")
335
+
336
+ # gh_retry: enable retry wrapper on critical GitHub API calls
337
+ GH_RETRY_ENABLED=$(jq -r '.gh_retry // true' "$config_file")
338
+
339
+ # stale state reaper: clean old worktrees, artifacts, state entries
340
+ STALE_REAPER_ENABLED=$(jq -r '.stale_reaper // true' "$config_file")
341
+ STALE_REAPER_INTERVAL=$(jq -r '.stale_reaper_interval // 10' "$config_file")
342
+ STALE_REAPER_AGE_DAYS=$(jq -r '.stale_reaper_age_days // 7' "$config_file")
343
+
344
+ # priority lane settings
345
+ PRIORITY_LANE=$(jq -r '.priority_lane // false' "$config_file")
346
+ PRIORITY_LANE_LABELS=$(jq -r '.priority_lane_labels // "hotfix,incident,p0,urgent"' "$config_file")
347
+ PRIORITY_LANE_MAX=$(jq -r '.priority_lane_max // 1' "$config_file")
348
+
349
+ # org-wide daemon mode
350
+ WATCH_MODE=$(jq -r '.watch_mode // "repo"' "$config_file")
351
+ ORG=$(jq -r '.org // ""' "$config_file")
352
+ if [[ "$ORG" == "null" ]]; then ORG=""; fi
353
+ REPO_FILTER=$(jq -r '.repo_filter // ""' "$config_file")
354
+ if [[ "$REPO_FILTER" == "null" ]]; then REPO_FILTER=""; fi
355
+
356
+ # auto-scaling
357
+ AUTO_SCALE=$(jq -r '.auto_scale // false' "$config_file")
358
+ AUTO_SCALE_INTERVAL=$(jq -r '.auto_scale_interval // 5' "$config_file")
359
+ MAX_WORKERS=$(jq -r '.max_workers // 8' "$config_file")
360
+ MIN_WORKERS=$(jq -r '.min_workers // 1' "$config_file")
361
+ WORKER_MEM_GB=$(jq -r '.worker_mem_gb // 4' "$config_file")
362
+ EST_COST_PER_JOB=$(jq -r '.estimated_cost_per_job_usd // 5.0' "$config_file")
363
+
364
+ success "Config loaded"
365
+ }
366
+
367
+ # ─── Directory Setup ────────────────────────────────────────────────────────
368
+
369
+ setup_dirs() {
370
+ mkdir -p "$DAEMON_DIR"
371
+
372
+ STATE_FILE="$DAEMON_DIR/daemon-state.json"
373
+ LOG_FILE="$DAEMON_DIR/daemon.log"
374
+ LOG_DIR="$DAEMON_DIR/logs"
375
+ WORKTREE_DIR=".worktrees"
376
+
377
+ mkdir -p "$LOG_DIR"
378
+ }
379
+
380
+ # ─── Logging ─────────────────────────────────────────────────────────────────
381
+
382
+ daemon_log() {
383
+ local level="$1"
384
+ shift
385
+ local msg="$*"
386
+ local ts
387
+ ts=$(now_iso)
388
+ echo "[$ts] [$level] $msg" >> "$LOG_FILE"
389
+
390
+ # Also print to stdout
391
+ case "$level" in
392
+ INFO) info "$msg" ;;
393
+ SUCCESS) success "$msg" ;;
394
+ WARN) warn "$msg" ;;
395
+ ERROR) error "$msg" ;;
396
+ esac
397
+ }
398
+
399
+ # ─── Notification Helper ────────────────────────────────────────────────────
400
+
401
+ notify() {
402
+ local title="$1" message="$2" level="${3:-info}"
403
+ local emoji
404
+ case "$level" in
405
+ success) emoji="✅" ;;
406
+ error) emoji="❌" ;;
407
+ warn) emoji="⚠️" ;;
408
+ *) emoji="🔔" ;;
409
+ esac
410
+
411
+ # Slack webhook
412
+ if [[ -n "${SLACK_WEBHOOK:-}" ]]; then
413
+ local payload
414
+ payload=$(jq -n \
415
+ --arg text "${emoji} *${title}*\n${message}" \
416
+ '{text: $text}')
417
+ curl -sf -X POST -H 'Content-Type: application/json' \
418
+ -d "$payload" "$SLACK_WEBHOOK" >/dev/null 2>&1 || true
419
+ fi
420
+
421
+ # Custom webhook (env var SHIPWRIGHT_WEBHOOK_URL, with CCT_WEBHOOK_URL fallback)
422
+ local _webhook_url="${SHIPWRIGHT_WEBHOOK_URL:-${CCT_WEBHOOK_URL:-}}"
423
+ if [[ -n "$_webhook_url" ]]; then
424
+ local payload
425
+ payload=$(jq -n \
426
+ --arg title "$title" --arg message "$message" \
427
+ --arg level "$level" \
428
+ '{title:$title, message:$message, level:$level}')
429
+ curl -sf -X POST -H 'Content-Type: application/json' \
430
+ -d "$payload" "$_webhook_url" >/dev/null 2>&1 || true
431
+ fi
432
+ }
433
+
434
+ # ─── Pre-flight Checks ──────────────────────────────────────────────────────
435
+
436
+ preflight_checks() {
437
+ local errors=0
438
+
439
+ echo -e "${PURPLE}${BOLD}━━━ Pre-flight Checks ━━━${RESET}"
440
+ echo ""
441
+
442
+ # 1. Required tools
443
+ local required_tools=("git" "jq" "gh" "claude")
444
+ local optional_tools=("tmux" "curl")
445
+
446
+ for tool in "${required_tools[@]}"; do
447
+ if command -v "$tool" &>/dev/null; then
448
+ echo -e " ${GREEN}✓${RESET} $tool"
449
+ else
450
+ echo -e " ${RED}✗${RESET} $tool ${RED}(required)${RESET}"
451
+ errors=$((errors + 1))
452
+ fi
453
+ done
454
+
455
+ for tool in "${optional_tools[@]}"; do
456
+ if command -v "$tool" &>/dev/null; then
457
+ echo -e " ${GREEN}✓${RESET} $tool"
458
+ else
459
+ echo -e " ${DIM}○${RESET} $tool ${DIM}(optional — some features disabled)${RESET}"
460
+ fi
461
+ done
462
+
463
+ # 2. Git state
464
+ echo ""
465
+ if git rev-parse --is-inside-work-tree &>/dev/null; then
466
+ echo -e " ${GREEN}✓${RESET} Inside git repo"
467
+ else
468
+ echo -e " ${RED}✗${RESET} Not inside a git repository"
469
+ errors=$((errors + 1))
470
+ fi
471
+
472
+ # Check base branch exists
473
+ if git rev-parse --verify "$BASE_BRANCH" &>/dev/null; then
474
+ echo -e " ${GREEN}✓${RESET} Base branch: $BASE_BRANCH"
475
+ else
476
+ echo -e " ${RED}✗${RESET} Base branch not found: $BASE_BRANCH"
477
+ errors=$((errors + 1))
478
+ fi
479
+
480
+ # 3. GitHub auth (required for daemon — it needs to poll issues)
481
+ if [[ "$NO_GITHUB" != "true" ]]; then
482
+ if gh auth status &>/dev/null 2>&1; then
483
+ echo -e " ${GREEN}✓${RESET} GitHub authenticated"
484
+ else
485
+ echo -e " ${RED}✗${RESET} GitHub not authenticated (required for daemon)"
486
+ errors=$((errors + 1))
487
+ fi
488
+ else
489
+ echo -e " ${DIM}○${RESET} GitHub disabled (--no-github)"
490
+ fi
491
+
492
+ # 4. Pipeline script
493
+ if [[ -x "$SCRIPT_DIR/cct-pipeline.sh" ]]; then
494
+ echo -e " ${GREEN}✓${RESET} cct-pipeline.sh available"
495
+ else
496
+ echo -e " ${RED}✗${RESET} cct-pipeline.sh not found at $SCRIPT_DIR"
497
+ errors=$((errors + 1))
498
+ fi
499
+
500
+ # 5. Disk space check (warn if < 1GB free)
501
+ local free_space_kb
502
+ free_space_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
503
+ if [[ -n "$free_space_kb" ]] && [[ "$free_space_kb" -lt 1048576 ]] 2>/dev/null; then
504
+ echo -e " ${YELLOW}⚠${RESET} Low disk space: $(( free_space_kb / 1024 ))MB free"
505
+ fi
506
+
507
+ echo ""
508
+
509
+ if [[ "$errors" -gt 0 ]]; then
510
+ error "Pre-flight failed: $errors error(s)"
511
+ return 1
512
+ fi
513
+
514
+ success "Pre-flight passed"
515
+ echo ""
516
+ return 0
517
+ }
518
+
519
+ # ─── State Management ───────────────────────────────────────────────────────
520
+
521
+ # Atomic write: write to tmp file, then mv (prevents corruption on crash)
522
+ atomic_write_state() {
523
+ local content="$1"
524
+ local tmp_file="${STATE_FILE}.tmp.$$"
525
+ echo "$content" > "$tmp_file"
526
+ mv "$tmp_file" "$STATE_FILE"
527
+ }
528
+
529
+ init_state() {
530
+ if [[ ! -f "$STATE_FILE" ]]; then
531
+ jq -n \
532
+ --arg pid "$$" \
533
+ --arg started "$(now_iso)" \
534
+ --argjson interval "$POLL_INTERVAL" \
535
+ --argjson max_parallel "$MAX_PARALLEL" \
536
+ --arg label "$WATCH_LABEL" \
537
+ --arg watch_mode "$WATCH_MODE" \
538
+ '{
539
+ version: 1,
540
+ pid: ($pid | tonumber),
541
+ started_at: $started,
542
+ last_poll: null,
543
+ config: {
544
+ poll_interval: $interval,
545
+ max_parallel: $max_parallel,
546
+ watch_label: $label,
547
+ watch_mode: $watch_mode
548
+ },
549
+ active_jobs: [],
550
+ queued: [],
551
+ completed: [],
552
+ retry_counts: {},
553
+ priority_lane_active: []
554
+ }' > "$STATE_FILE"
555
+ else
556
+ # Update PID and start time in existing state
557
+ local tmp
558
+ tmp=$(jq \
559
+ --arg pid "$$" \
560
+ --arg started "$(now_iso)" \
561
+ '.pid = ($pid | tonumber) | .started_at = $started' \
562
+ "$STATE_FILE")
563
+ atomic_write_state "$tmp"
564
+ fi
565
+ }
566
+
567
+ update_state_field() {
568
+ local field="$1" value="$2"
569
+ local tmp
570
+ tmp=$(jq --arg val "$value" ".${field} = \$val" "$STATE_FILE")
571
+ atomic_write_state "$tmp"
572
+ }
573
+
574
+ # ─── Inflight Check ─────────────────────────────────────────────────────────
575
+
576
+ daemon_is_inflight() {
577
+ local issue_num="$1"
578
+
579
+ if [[ ! -f "$STATE_FILE" ]]; then
580
+ return 1
581
+ fi
582
+
583
+ # Check active_jobs
584
+ local active_match
585
+ active_match=$(jq -r --argjson num "$issue_num" \
586
+ '.active_jobs[] | select(.issue == $num) | .issue' \
587
+ "$STATE_FILE" 2>/dev/null || true)
588
+ if [[ -n "$active_match" ]]; then
589
+ return 0
590
+ fi
591
+
592
+ # Check queued
593
+ local queued_match
594
+ queued_match=$(jq -r --argjson num "$issue_num" \
595
+ '.queued[] | select(. == $num)' \
596
+ "$STATE_FILE" 2>/dev/null || true)
597
+ if [[ -n "$queued_match" ]]; then
598
+ return 0
599
+ fi
600
+
601
+ return 1
602
+ }
603
+
604
+ # ─── Active Job Count ───────────────────────────────────────────────────────
605
+
606
+ get_active_count() {
607
+ if [[ ! -f "$STATE_FILE" ]]; then
608
+ echo 0
609
+ return
610
+ fi
611
+ jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo 0
612
+ }
613
+
614
+ # ─── Queue Management ───────────────────────────────────────────────────────
615
+
616
+ enqueue_issue() {
617
+ local issue_num="$1"
618
+ local tmp
619
+ tmp=$(jq --argjson num "$issue_num" \
620
+ '.queued += [$num] | .queued |= unique' \
621
+ "$STATE_FILE")
622
+ atomic_write_state "$tmp"
623
+ daemon_log INFO "Queued issue #${issue_num} (at capacity)"
624
+ }
625
+
626
+ dequeue_next() {
627
+ if [[ ! -f "$STATE_FILE" ]]; then
628
+ return
629
+ fi
630
+
631
+ local next
632
+ next=$(jq -r '.queued[0] // empty' "$STATE_FILE" 2>/dev/null || true)
633
+ if [[ -n "$next" ]]; then
634
+ # Remove from queue
635
+ local tmp
636
+ tmp=$(jq '.queued = .queued[1:]' "$STATE_FILE")
637
+ atomic_write_state "$tmp"
638
+ echo "$next"
639
+ fi
640
+ }
641
+
642
+ # ─── Priority Lane Helpers ─────────────────────────────────────────────────
643
+
644
+ is_priority_issue() {
645
+ local labels_csv="$1"
646
+ local IFS=','
647
+ local lane_labels
648
+ read -ra lane_labels <<< "$PRIORITY_LANE_LABELS"
649
+ for lane_label in "${lane_labels[@]}"; do
650
+ # Trim whitespace
651
+ lane_label="${lane_label## }"
652
+ lane_label="${lane_label%% }"
653
+ if [[ ",$labels_csv," == *",$lane_label,"* ]]; then
654
+ return 0
655
+ fi
656
+ done
657
+ return 1
658
+ }
659
+
660
+ get_priority_active_count() {
661
+ if [[ ! -f "$STATE_FILE" ]]; then
662
+ echo 0
663
+ return
664
+ fi
665
+ jq -r '.priority_lane_active // [] | length' "$STATE_FILE" 2>/dev/null || echo 0
666
+ }
667
+
668
+ track_priority_job() {
669
+ local issue_num="$1"
670
+ local tmp
671
+ tmp=$(jq --argjson num "$issue_num" \
672
+ '.priority_lane_active = ((.priority_lane_active // []) + [$num] | unique)' \
673
+ "$STATE_FILE")
674
+ atomic_write_state "$tmp"
675
+ }
676
+
677
+ untrack_priority_job() {
678
+ local issue_num="$1"
679
+ if [[ ! -f "$STATE_FILE" ]]; then
680
+ return
681
+ fi
682
+ local tmp
683
+ tmp=$(jq --argjson num "$issue_num" \
684
+ '.priority_lane_active = [(.priority_lane_active // [])[] | select(. != $num)]' \
685
+ "$STATE_FILE")
686
+ atomic_write_state "$tmp"
687
+ }
688
+
689
+ # ─── Org-Wide Repo Management ─────────────────────────────────────────────
690
+
691
+ daemon_ensure_repo() {
692
+ local owner="$1" repo="$2"
693
+ local repo_dir="$DAEMON_DIR/repos/${owner}/${repo}"
694
+
695
+ if [[ -d "$repo_dir/.git" ]]; then
696
+ # Pull latest
697
+ (cd "$repo_dir" && git pull --ff-only 2>/dev/null) || {
698
+ daemon_log WARN "Failed to update ${owner}/${repo} — using existing clone"
699
+ }
700
+ else
701
+ mkdir -p "$DAEMON_DIR/repos/${owner}"
702
+ if ! git clone --depth=1 "https://github.com/${owner}/${repo}.git" "$repo_dir" 2>/dev/null; then
703
+ daemon_log ERROR "Failed to clone ${owner}/${repo}"
704
+ return 1
705
+ fi
706
+ daemon_log INFO "Cloned ${owner}/${repo} to ${repo_dir}"
707
+ fi
708
+
709
+ echo "$repo_dir"
710
+ }
711
+
712
+ # ─── Spawn Pipeline ─────────────────────────────────────────────────────────
713
+
714
+ daemon_spawn_pipeline() {
715
+ local issue_num="$1"
716
+ local issue_title="${2:-}"
717
+ local repo_full_name="${3:-}" # owner/repo (org mode only)
718
+
719
+ daemon_log INFO "Spawning pipeline for issue #${issue_num}: ${issue_title}"
720
+
721
+ # Check disk space before spawning
722
+ local free_space_kb
723
+ free_space_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
724
+ if [[ -n "$free_space_kb" ]] && [[ "$free_space_kb" -lt 1048576 ]] 2>/dev/null; then
725
+ daemon_log WARN "Low disk space ($(( free_space_kb / 1024 ))MB) — skipping issue #${issue_num}"
726
+ return 1
727
+ fi
728
+
729
+ local work_dir="" branch_name="daemon/issue-${issue_num}"
730
+
731
+ if [[ "$WATCH_MODE" == "org" && -n "$repo_full_name" ]]; then
732
+ # Org mode: use cloned repo directory
733
+ local owner="${repo_full_name%%/*}"
734
+ local repo="${repo_full_name##*/}"
735
+ work_dir=$(daemon_ensure_repo "$owner" "$repo") || return 1
736
+
737
+ # Create branch in the cloned repo
738
+ (
739
+ cd "$work_dir"
740
+ git checkout -B "$branch_name" "${BASE_BRANCH}" 2>/dev/null
741
+ ) || {
742
+ daemon_log ERROR "Failed to create branch in ${repo_full_name}"
743
+ return 1
744
+ }
745
+ daemon_log INFO "Org mode: working in ${work_dir} (${repo_full_name})"
746
+ else
747
+ # Standard mode: use git worktree
748
+ work_dir="${WORKTREE_DIR}/daemon-issue-${issue_num}"
749
+
750
+ # Clean up stale worktree if it exists
751
+ if [[ -d "$work_dir" ]]; then
752
+ git worktree remove "$work_dir" --force 2>/dev/null || true
753
+ fi
754
+ git branch -D "$branch_name" 2>/dev/null || true
755
+
756
+ if ! git worktree add "$work_dir" -b "$branch_name" "$BASE_BRANCH" 2>/dev/null; then
757
+ daemon_log ERROR "Failed to create worktree for issue #${issue_num}"
758
+ return 1
759
+ fi
760
+ daemon_log INFO "Worktree created at ${work_dir}"
761
+ fi
762
+
763
+ # Build pipeline args
764
+ local pipeline_args=("start" "--issue" "$issue_num" "--pipeline" "$PIPELINE_TEMPLATE")
765
+ if [[ "$SKIP_GATES" == "true" ]]; then
766
+ pipeline_args+=("--skip-gates")
767
+ fi
768
+ if [[ -n "$MODEL" ]]; then
769
+ pipeline_args+=("--model" "$MODEL")
770
+ fi
771
+ if [[ "$NO_GITHUB" == "true" ]]; then
772
+ pipeline_args+=("--no-github")
773
+ fi
774
+
775
+ # Run pipeline in work directory (background)
776
+ (
777
+ cd "$work_dir"
778
+ "$SCRIPT_DIR/cct-pipeline.sh" "${pipeline_args[@]}"
779
+ ) > "$LOG_DIR/issue-${issue_num}.log" 2>&1 &
780
+ local pid=$!
781
+
782
+ daemon_log INFO "Pipeline started for issue #${issue_num} (PID: ${pid})"
783
+
784
+ # Track the job (include repo for org mode)
785
+ daemon_track_job "$issue_num" "$pid" "$work_dir" "$issue_title" "$repo_full_name"
786
+ emit_event "daemon.spawn" "issue=$issue_num" "pid=$pid" "repo=${repo_full_name:-local}"
787
+
788
+ # Comment on the issue
789
+ if [[ "$NO_GITHUB" != "true" ]]; then
790
+ local gh_args=()
791
+ if [[ -n "$repo_full_name" ]]; then
792
+ gh_args+=("--repo" "$repo_full_name")
793
+ fi
794
+ gh issue comment "$issue_num" "${gh_args[@]}" --body "## 🤖 Pipeline Started
795
+
796
+ **Daemon** picked up this issue and started an autonomous pipeline.
797
+
798
+ | Field | Value |
799
+ |-------|-------|
800
+ | Template | \`${PIPELINE_TEMPLATE}\` |
801
+ | Branch | \`${branch_name}\` |
802
+ | Repo | \`${repo_full_name:-local}\` |
803
+ | Started | $(now_iso) |
804
+
805
+ _Progress updates will be posted as the pipeline advances._" 2>/dev/null || true
806
+ fi
807
+ }
808
+
809
+ # ─── Track Job ───────────────────────────────────────────────────────────────
810
+
811
+ daemon_track_job() {
812
+ local issue_num="$1" pid="$2" worktree="$3" title="${4:-}" repo="${5:-}"
813
+ local tmp
814
+ tmp=$(jq \
815
+ --argjson num "$issue_num" \
816
+ --argjson pid "$pid" \
817
+ --arg wt "$worktree" \
818
+ --arg title "$title" \
819
+ --arg started "$(now_iso)" \
820
+ --arg repo "$repo" \
821
+ '.active_jobs += [{
822
+ issue: $num,
823
+ pid: $pid,
824
+ worktree: $wt,
825
+ title: $title,
826
+ started_at: $started,
827
+ repo: $repo
828
+ }]' \
829
+ "$STATE_FILE")
830
+ atomic_write_state "$tmp"
831
+ }
832
+
833
+ # ─── Reap Completed Jobs ────────────────────────────────────────────────────
834
+
835
+ daemon_reap_completed() {
836
+ if [[ ! -f "$STATE_FILE" ]]; then
837
+ return
838
+ fi
839
+
840
+ local jobs
841
+ jobs=$(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
842
+ if [[ -z "$jobs" ]]; then
843
+ return
844
+ fi
845
+
846
+ while IFS= read -r job; do
847
+ local issue_num pid worktree
848
+ issue_num=$(echo "$job" | jq -r '.issue')
849
+ pid=$(echo "$job" | jq -r '.pid')
850
+ worktree=$(echo "$job" | jq -r '.worktree')
851
+
852
+ # Check if process is still running
853
+ if kill -0 "$pid" 2>/dev/null; then
854
+ continue
855
+ fi
856
+
857
+ # Process is dead — determine exit code
858
+ local exit_code=0
859
+ wait "$pid" 2>/dev/null || exit_code=$?
860
+
861
+ local started_at duration_str=""
862
+ started_at=$(echo "$job" | jq -r '.started_at // empty')
863
+ if [[ -n "$started_at" ]]; then
864
+ local start_epoch end_epoch
865
+ # macOS date -j for parsing ISO dates (TZ=UTC to parse Z-suffix correctly)
866
+ start_epoch=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started_at" +%s 2>/dev/null || date -d "$started_at" +%s 2>/dev/null || echo "0")
867
+ end_epoch=$(now_epoch)
868
+ if [[ "$start_epoch" -gt 0 ]]; then
869
+ duration_str=$(format_duration $((end_epoch - start_epoch)))
870
+ fi
871
+ fi
872
+
873
+ local result_str="success"
874
+ [[ "$exit_code" -ne 0 ]] && result_str="failure"
875
+ local dur_s=0
876
+ [[ "$start_epoch" -gt 0 ]] && dur_s=$((end_epoch - start_epoch))
877
+ emit_event "daemon.reap" "issue=$issue_num" "result=$result_str" "duration_s=$dur_s"
878
+
879
+ if [[ "$exit_code" -eq 0 ]]; then
880
+ daemon_on_success "$issue_num" "$duration_str"
881
+ else
882
+ daemon_on_failure "$issue_num" "$exit_code" "$duration_str"
883
+ fi
884
+
885
+ # Remove from active_jobs and priority lane tracking
886
+ local tmp
887
+ tmp=$(jq --argjson num "$issue_num" \
888
+ '.active_jobs = [.active_jobs[] | select(.issue != $num)]' \
889
+ "$STATE_FILE")
890
+ atomic_write_state "$tmp"
891
+ untrack_priority_job "$issue_num"
892
+
893
+ # Clean up worktree (skip for org-mode clones — they persist)
894
+ local job_repo
895
+ job_repo=$(echo "$job" | jq -r '.repo // ""')
896
+ if [[ -z "$job_repo" ]] && [[ -d "$worktree" ]]; then
897
+ git worktree remove "$worktree" --force 2>/dev/null || true
898
+ daemon_log INFO "Cleaned worktree: $worktree"
899
+ git branch -D "daemon/issue-${issue_num}" 2>/dev/null || true
900
+ elif [[ -n "$job_repo" ]]; then
901
+ daemon_log INFO "Org-mode: preserving clone for ${job_repo}"
902
+ fi
903
+
904
+ # Dequeue next issue if available
905
+ local next_issue
906
+ next_issue=$(dequeue_next)
907
+ if [[ -n "$next_issue" ]]; then
908
+ daemon_log INFO "Dequeuing issue #${next_issue}"
909
+ daemon_spawn_pipeline "$next_issue"
910
+ fi
911
+ done <<< "$jobs"
912
+ }
913
+
914
+ # ─── Success Handler ────────────────────────────────────────────────────────
915
+
916
+ daemon_on_success() {
917
+ local issue_num="$1" duration="${2:-}"
918
+
919
+ daemon_log SUCCESS "Pipeline completed for issue #${issue_num} (${duration:-unknown})"
920
+
921
+ # Record in completed list
922
+ local tmp
923
+ tmp=$(jq \
924
+ --argjson num "$issue_num" \
925
+ --arg result "success" \
926
+ --arg dur "${duration:-unknown}" \
927
+ --arg completed_at "$(now_iso)" \
928
+ '.completed += [{
929
+ issue: $num,
930
+ result: $result,
931
+ duration: $dur,
932
+ completed_at: $completed_at
933
+ }]' \
934
+ "$STATE_FILE")
935
+ atomic_write_state "$tmp"
936
+
937
+ if [[ "$NO_GITHUB" != "true" ]]; then
938
+ # Remove watch label, add success label
939
+ gh issue edit "$issue_num" \
940
+ --remove-label "$ON_SUCCESS_REMOVE_LABEL" \
941
+ --add-label "$ON_SUCCESS_ADD_LABEL" 2>/dev/null || true
942
+
943
+ # Comment on issue
944
+ gh issue comment "$issue_num" --body "## ✅ Pipeline Complete
945
+
946
+ The autonomous pipeline finished successfully.
947
+
948
+ | Field | Value |
949
+ |-------|-------|
950
+ | Duration | ${duration:-unknown} |
951
+ | Completed | $(now_iso) |
952
+
953
+ Check the associated PR for the implementation." 2>/dev/null || true
954
+
955
+ # Optionally close the issue
956
+ if [[ "$ON_SUCCESS_CLOSE_ISSUE" == "true" ]]; then
957
+ gh issue close "$issue_num" 2>/dev/null || true
958
+ fi
959
+ fi
960
+
961
+ notify "Pipeline Complete — Issue #${issue_num}" \
962
+ "Duration: ${duration:-unknown}" "success"
963
+ }
964
+
965
+ # ─── Failure Handler ────────────────────────────────────────────────────────
966
+
967
+ daemon_on_failure() {
968
+ local issue_num="$1" exit_code="${2:-1}" duration="${3:-}"
969
+
970
+ daemon_log ERROR "Pipeline failed for issue #${issue_num} (exit: ${exit_code}, ${duration:-unknown})"
971
+
972
+ # Record in completed list
973
+ local tmp
974
+ tmp=$(jq \
975
+ --argjson num "$issue_num" \
976
+ --arg result "failed" \
977
+ --argjson code "$exit_code" \
978
+ --arg dur "${duration:-unknown}" \
979
+ --arg completed_at "$(now_iso)" \
980
+ '.completed += [{
981
+ issue: $num,
982
+ result: $result,
983
+ exit_code: $code,
984
+ duration: $dur,
985
+ completed_at: $completed_at
986
+ }]' \
987
+ "$STATE_FILE")
988
+ atomic_write_state "$tmp"
989
+
990
+ # ── Auto-retry with strategy escalation ──
991
+ if [[ "${RETRY_ESCALATION:-true}" == "true" ]]; then
992
+ local retry_count
993
+ retry_count=$(jq -r --arg num "$issue_num" \
994
+ '.retry_counts[$num] // 0' "$STATE_FILE" 2>/dev/null || echo "0")
995
+
996
+ if [[ "$retry_count" -lt "${MAX_RETRIES:-2}" ]]; then
997
+ retry_count=$((retry_count + 1))
998
+
999
+ # Update retry count in state
1000
+ local tmp_state
1001
+ tmp_state=$(jq --arg num "$issue_num" --argjson count "$retry_count" \
1002
+ '.retry_counts[$num] = $count' "$STATE_FILE")
1003
+ atomic_write_state "$tmp_state"
1004
+
1005
+ daemon_log WARN "Auto-retry #${retry_count}/${MAX_RETRIES:-2} for issue #${issue_num}"
1006
+ emit_event "daemon.retry" "issue=$issue_num" "retry=$retry_count" "max=${MAX_RETRIES:-2}"
1007
+
1008
+ # Build escalated pipeline args
1009
+ local retry_template="$PIPELINE_TEMPLATE"
1010
+ local retry_model="${MODEL:-opus}"
1011
+ local extra_args=()
1012
+
1013
+ if [[ "$retry_count" -eq 1 ]]; then
1014
+ # Retry 1: same template, upgrade model, more iterations
1015
+ retry_model="opus"
1016
+ extra_args+=("--max-iterations" "30")
1017
+ daemon_log INFO "Escalation: model=opus, max_iterations=30"
1018
+ elif [[ "$retry_count" -ge 2 ]]; then
1019
+ # Retry 2: full template, compound quality max cycles
1020
+ retry_template="full"
1021
+ retry_model="opus"
1022
+ extra_args+=("--max-iterations" "30" "--compound-cycles" "5")
1023
+ daemon_log INFO "Escalation: template=full, compound_cycles=5"
1024
+ fi
1025
+
1026
+ if [[ "$NO_GITHUB" != "true" ]]; then
1027
+ gh issue comment "$issue_num" --body "## 🔄 Auto-Retry #${retry_count}
1028
+
1029
+ Pipeline failed — retrying with escalated strategy.
1030
+
1031
+ | Field | Value |
1032
+ |-------|-------|
1033
+ | Retry | ${retry_count} / ${MAX_RETRIES:-2} |
1034
+ | Template | \`${retry_template}\` |
1035
+ | Model | \`${retry_model}\` |
1036
+ | Started | $(now_iso) |
1037
+
1038
+ _Escalation: $(if [[ "$retry_count" -eq 1 ]]; then echo "upgraded model + increased iterations"; else echo "full template + compound quality"; fi)_" 2>/dev/null || true
1039
+ fi
1040
+
1041
+ # Re-spawn with escalated strategy
1042
+ local orig_template="$PIPELINE_TEMPLATE"
1043
+ local orig_model="$MODEL"
1044
+ PIPELINE_TEMPLATE="$retry_template"
1045
+ MODEL="$retry_model"
1046
+ daemon_spawn_pipeline "$issue_num" "retry-${retry_count}"
1047
+ PIPELINE_TEMPLATE="$orig_template"
1048
+ MODEL="$orig_model"
1049
+ return
1050
+ fi
1051
+
1052
+ daemon_log WARN "Max retries (${MAX_RETRIES:-2}) exhausted for issue #${issue_num}"
1053
+ emit_event "daemon.retry_exhausted" "issue=$issue_num" "retries=$retry_count"
1054
+ fi
1055
+
1056
+ # ── No retry — report final failure ──
1057
+ if [[ "$NO_GITHUB" != "true" ]]; then
1058
+ # Add failure label
1059
+ gh issue edit "$issue_num" \
1060
+ --add-label "$ON_FAILURE_ADD_LABEL" 2>/dev/null || true
1061
+
1062
+ # Comment with log tail
1063
+ local log_tail=""
1064
+ local log_path="$LOG_DIR/issue-${issue_num}.log"
1065
+ if [[ -f "$log_path" ]]; then
1066
+ log_tail=$(tail -"$ON_FAILURE_LOG_LINES" "$log_path" 2>/dev/null || true)
1067
+ fi
1068
+
1069
+ local retry_info=""
1070
+ if [[ "${RETRY_ESCALATION:-true}" == "true" ]]; then
1071
+ local final_count
1072
+ final_count=$(jq -r --arg num "$issue_num" \
1073
+ '.retry_counts[$num] // 0' "$STATE_FILE" 2>/dev/null || echo "0")
1074
+ retry_info="| Retries | ${final_count} / ${MAX_RETRIES:-2} (exhausted) |"
1075
+ fi
1076
+
1077
+ gh issue comment "$issue_num" --body "## ❌ Pipeline Failed
1078
+
1079
+ The autonomous pipeline encountered an error.
1080
+
1081
+ | Field | Value |
1082
+ |-------|-------|
1083
+ | Exit Code | ${exit_code} |
1084
+ | Duration | ${duration:-unknown} |
1085
+ | Failed At | $(now_iso) |
1086
+ ${retry_info}
1087
+
1088
+ <details>
1089
+ <summary>Last ${ON_FAILURE_LOG_LINES} lines of log</summary>
1090
+
1091
+ \`\`\`
1092
+ ${log_tail}
1093
+ \`\`\`
1094
+
1095
+ </details>
1096
+
1097
+ _Re-add the \`${WATCH_LABEL}\` label to retry._" 2>/dev/null || true
1098
+ fi
1099
+
1100
+ notify "Pipeline Failed — Issue #${issue_num}" \
1101
+ "Exit code: ${exit_code}, Duration: ${duration:-unknown}" "error"
1102
+ }
1103
+
1104
+ # ─── Intelligent Triage ──────────────────────────────────────────────────────
1105
+
1106
+ # Score an issue from 0-100 based on multiple signals for intelligent prioritization.
1107
+ # Combines priority labels, age, complexity, dependencies, type, and memory signals.
1108
+ triage_score_issue() {
1109
+ local issue_json="$1"
1110
+ local issue_num issue_title issue_body labels_csv created_at
1111
+ issue_num=$(echo "$issue_json" | jq -r '.number')
1112
+ issue_title=$(echo "$issue_json" | jq -r '.title // ""')
1113
+ issue_body=$(echo "$issue_json" | jq -r '.body // ""')
1114
+ labels_csv=$(echo "$issue_json" | jq -r '[.labels[].name] | join(",")')
1115
+ created_at=$(echo "$issue_json" | jq -r '.createdAt // ""')
1116
+
1117
+ local score=0
1118
+
1119
+ # ── 1. Priority labels (0-30 points) ──
1120
+ local priority_score=0
1121
+ if echo "$labels_csv" | grep -qiE "urgent|p0"; then
1122
+ priority_score=30
1123
+ elif echo "$labels_csv" | grep -qiE "^high$|^high,|,high,|,high$|p1"; then
1124
+ priority_score=20
1125
+ elif echo "$labels_csv" | grep -qiE "normal|p2"; then
1126
+ priority_score=10
1127
+ elif echo "$labels_csv" | grep -qiE "^low$|^low,|,low,|,low$|p3"; then
1128
+ priority_score=5
1129
+ fi
1130
+
1131
+ # ── 2. Issue age (0-15 points) — older issues boosted to prevent starvation ──
1132
+ local age_score=0
1133
+ if [[ -n "$created_at" ]]; then
1134
+ local created_epoch now_e age_secs
1135
+ created_epoch=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$created_at" +%s 2>/dev/null || \
1136
+ date -d "$created_at" +%s 2>/dev/null || echo "0")
1137
+ now_e=$(now_epoch)
1138
+ if [[ "$created_epoch" -gt 0 ]]; then
1139
+ age_secs=$((now_e - created_epoch))
1140
+ if [[ "$age_secs" -gt 604800 ]]; then # > 7 days
1141
+ age_score=15
1142
+ elif [[ "$age_secs" -gt 259200 ]]; then # > 3 days
1143
+ age_score=10
1144
+ elif [[ "$age_secs" -gt 86400 ]]; then # > 1 day
1145
+ age_score=5
1146
+ fi
1147
+ fi
1148
+ fi
1149
+
1150
+ # ── 3. Complexity estimate (0-20 points, INVERTED — simpler = higher) ──
1151
+ local complexity_score=0
1152
+ local body_len=${#issue_body}
1153
+ local file_refs
1154
+ file_refs=$(echo "$issue_body" | grep -coE '[a-zA-Z0-9_/-]+\.(ts|js|py|go|rs|sh|json|yaml|yml|md)' || true)
1155
+ file_refs=${file_refs:-0}
1156
+
1157
+ if [[ "$body_len" -lt 200 ]] && [[ "$file_refs" -lt 3 ]]; then
1158
+ complexity_score=20 # Short + few files = likely simple
1159
+ elif [[ "$body_len" -lt 1000 ]]; then
1160
+ complexity_score=10 # Medium
1161
+ elif [[ "$file_refs" -lt 5 ]]; then
1162
+ complexity_score=5 # Long but not many files
1163
+ fi
1164
+ # Long + many files = complex = 0 points (lower throughput)
1165
+
1166
+ # ── 4. Dependencies (0-15 points / -15 for blocked) ──
1167
+ local dep_score=0
1168
+ local combined_text="${issue_title} ${issue_body}"
1169
+
1170
+ # Check if this issue is blocked
1171
+ local blocked_refs
1172
+ blocked_refs=$(echo "$combined_text" | grep -oE '(blocked by|depends on) #[0-9]+' | grep -oE '#[0-9]+' || true)
1173
+ if [[ -n "$blocked_refs" ]] && [[ "$NO_GITHUB" != "true" ]]; then
1174
+ local all_closed=true
1175
+ while IFS= read -r ref; do
1176
+ local ref_num="${ref#\#}"
1177
+ local ref_state
1178
+ ref_state=$(gh issue view "$ref_num" --json state -q '.state' 2>/dev/null || echo "UNKNOWN")
1179
+ if [[ "$ref_state" != "CLOSED" ]]; then
1180
+ all_closed=false
1181
+ break
1182
+ fi
1183
+ done <<< "$blocked_refs"
1184
+ if [[ "$all_closed" == "false" ]]; then
1185
+ dep_score=-15
1186
+ fi
1187
+ fi
1188
+
1189
+ # Check if this issue blocks others (search issue references)
1190
+ if [[ "$NO_GITHUB" != "true" ]]; then
1191
+ local mentions
1192
+ mentions=$(gh api "repos/{owner}/{repo}/issues/${issue_num}/timeline" --paginate -q '
1193
+ [.[] | select(.event == "cross-referenced") | .source.issue.body // ""] |
1194
+ map(select(test("blocked by #'"${issue_num}"'|depends on #'"${issue_num}"'"; "i"))) | length
1195
+ ' 2>/dev/null || echo "0")
1196
+ mentions=${mentions:-0}
1197
+ if [[ "$mentions" -gt 0 ]]; then
1198
+ dep_score=15
1199
+ fi
1200
+ fi
1201
+
1202
+ # ── 5. Type bonus (0-10 points) ──
1203
+ local type_score=0
1204
+ if echo "$labels_csv" | grep -qiE "security"; then
1205
+ type_score=10
1206
+ elif echo "$labels_csv" | grep -qiE "bug"; then
1207
+ type_score=10
1208
+ elif echo "$labels_csv" | grep -qiE "feature|enhancement"; then
1209
+ type_score=5
1210
+ fi
1211
+
1212
+ # ── 6. Memory bonus (0-10 points / -5 for prior failures) ──
1213
+ local memory_score=0
1214
+ if [[ -x "$SCRIPT_DIR/cct-memory.sh" ]]; then
1215
+ local memory_result
1216
+ memory_result=$("$SCRIPT_DIR/cct-memory.sh" search --issue "$issue_num" --json 2>/dev/null || true)
1217
+ if [[ -n "$memory_result" ]]; then
1218
+ local prior_result
1219
+ prior_result=$(echo "$memory_result" | jq -r '.last_result // ""' 2>/dev/null || true)
1220
+ if [[ "$prior_result" == "success" ]]; then
1221
+ memory_score=10
1222
+ elif [[ "$prior_result" == "failure" ]]; then
1223
+ memory_score=-5
1224
+ fi
1225
+ fi
1226
+ fi
1227
+
1228
+ # ── Total ──
1229
+ score=$((priority_score + age_score + complexity_score + dep_score + type_score + memory_score))
1230
+ # Clamp to 0-100
1231
+ [[ "$score" -lt 0 ]] && score=0
1232
+ [[ "$score" -gt 100 ]] && score=100
1233
+
1234
+ emit_event "daemon.triage" \
1235
+ "issue=$issue_num" \
1236
+ "score=$score" \
1237
+ "priority=$priority_score" \
1238
+ "age=$age_score" \
1239
+ "complexity=$complexity_score" \
1240
+ "dependency=$dep_score" \
1241
+ "type=$type_score" \
1242
+ "memory=$memory_score"
1243
+
1244
+ echo "$score"
1245
+ }
1246
+
1247
+ # Auto-select pipeline template based on issue labels
1248
+ select_pipeline_template() {
1249
+ local labels="$1"
1250
+ local score="${2:-50}"
1251
+
1252
+ # When auto_template is disabled, use default pipeline template
1253
+ if [[ "${AUTO_TEMPLATE:-false}" != "true" ]]; then
1254
+ echo "$PIPELINE_TEMPLATE"
1255
+ return
1256
+ fi
1257
+
1258
+ # ── Label-based overrides (highest priority) ──
1259
+ if echo "$labels" | grep -qi "hotfix\|incident"; then
1260
+ echo "hotfix"
1261
+ return
1262
+ fi
1263
+ if echo "$labels" | grep -qi "security"; then
1264
+ echo "enterprise"
1265
+ return
1266
+ fi
1267
+
1268
+ # ── Config-driven template_map overrides ──
1269
+ local map="${TEMPLATE_MAP:-\"{}\"}"
1270
+ # Unwrap double-encoded JSON if needed
1271
+ local decoded_map
1272
+ decoded_map=$(echo "$map" | jq -r 'if type == "string" then . else tostring end' 2>/dev/null || echo "{}")
1273
+ if [[ "$decoded_map" != "{}" ]]; then
1274
+ local matched
1275
+ matched=$(echo "$decoded_map" | jq -r --arg labels "$labels" '
1276
+ to_entries[] |
1277
+ select($labels | test(.key; "i")) |
1278
+ .value' 2>/dev/null | head -1)
1279
+ if [[ -n "$matched" ]]; then
1280
+ echo "$matched"
1281
+ return
1282
+ fi
1283
+ fi
1284
+
1285
+ # ── Score-based selection ──
1286
+ if [[ "$score" -ge 70 ]]; then
1287
+ echo "fast"
1288
+ elif [[ "$score" -ge 40 ]]; then
1289
+ echo "standard"
1290
+ else
1291
+ echo "full"
1292
+ fi
1293
+ }
1294
+
1295
+ # ─── Triage Display ──────────────────────────────────────────────────────────
1296
+
1297
+ daemon_triage_show() {
1298
+ if [[ "$NO_GITHUB" == "true" ]]; then
1299
+ error "Triage requires GitHub access (--no-github is set)"
1300
+ exit 1
1301
+ fi
1302
+
1303
+ load_config
1304
+
1305
+ echo -e "${PURPLE}${BOLD}━━━ Issue Triage Scores ━━━${RESET}"
1306
+ echo ""
1307
+
1308
+ local issues_json
1309
+ issues_json=$(gh issue list \
1310
+ --label "$WATCH_LABEL" \
1311
+ --state open \
1312
+ --json number,title,labels,body,createdAt \
1313
+ --limit 50 2>/dev/null) || {
1314
+ error "Failed to fetch issues from GitHub"
1315
+ exit 1
1316
+ }
1317
+
1318
+ local issue_count
1319
+ issue_count=$(echo "$issues_json" | jq 'length' 2>/dev/null || echo 0)
1320
+
1321
+ if [[ "$issue_count" -eq 0 ]]; then
1322
+ echo -e " ${DIM}No open issues with label '${WATCH_LABEL}'${RESET}"
1323
+ return 0
1324
+ fi
1325
+
1326
+ # Score each issue and collect results
1327
+ local scored_lines=()
1328
+ while IFS= read -r issue; do
1329
+ local num title labels_csv score template
1330
+ num=$(echo "$issue" | jq -r '.number')
1331
+ title=$(echo "$issue" | jq -r '.title // "—"')
1332
+ labels_csv=$(echo "$issue" | jq -r '[.labels[].name] | join(", ")')
1333
+ score=$(triage_score_issue "$issue")
1334
+ template=$(select_pipeline_template "$labels_csv" "$score")
1335
+
1336
+ scored_lines+=("${score}|${num}|${title}|${labels_csv}|${template}")
1337
+ done < <(echo "$issues_json" | jq -c '.[]')
1338
+
1339
+ # Sort by score descending
1340
+ local sorted
1341
+ sorted=$(printf '%s\n' "${scored_lines[@]}" | sort -t'|' -k1 -rn)
1342
+
1343
+ # Print header
1344
+ printf " ${BOLD}%-6s %-7s %-45s %-12s %s${RESET}\n" "Score" "Issue" "Title" "Template" "Labels"
1345
+ echo -e " ${DIM}$(printf '%.0s─' {1..90})${RESET}"
1346
+
1347
+ while IFS='|' read -r score num title labels_csv template; do
1348
+ # Color score by tier
1349
+ local score_color="$RED"
1350
+ [[ "$score" -ge 20 ]] && score_color="$YELLOW"
1351
+ [[ "$score" -ge 40 ]] && score_color="$CYAN"
1352
+ [[ "$score" -ge 60 ]] && score_color="$GREEN"
1353
+
1354
+ # Truncate title
1355
+ [[ ${#title} -gt 42 ]] && title="${title:0:39}..."
1356
+
1357
+ printf " ${score_color}%-6s${RESET} ${CYAN}#%-6s${RESET} %-45s ${DIM}%-12s %s${RESET}\n" \
1358
+ "$score" "$num" "$title" "$template" "$labels_csv"
1359
+ done <<< "$sorted"
1360
+
1361
+ echo ""
1362
+ echo -e " ${DIM}${issue_count} issue(s) scored | Higher score = higher processing priority${RESET}"
1363
+ echo ""
1364
+ }
1365
+
1366
+ # ─── Proactive Patrol Mode ───────────────────────────────────────────────────
1367
+
1368
+ daemon_patrol() {
1369
+ local once=false
1370
+ local dry_run="$PATROL_DRY_RUN"
1371
+
1372
+ while [[ $# -gt 0 ]]; do
1373
+ case "$1" in
1374
+ --once) once=true; shift ;;
1375
+ --dry-run) dry_run=true; shift ;;
1376
+ *) shift ;;
1377
+ esac
1378
+ done
1379
+
1380
+ echo -e "${PURPLE}${BOLD}━━━ Codebase Patrol ━━━${RESET}"
1381
+ echo ""
1382
+
1383
+ if [[ "$dry_run" == "true" ]]; then
1384
+ echo -e " ${YELLOW}DRY RUN${RESET} — findings will be reported but no issues created"
1385
+ echo ""
1386
+ fi
1387
+
1388
+ emit_event "patrol.started" "dry_run=$dry_run"
1389
+
1390
+ local total_findings=0
1391
+ local issues_created=0
1392
+
1393
+ # ── 1. Dependency Security Audit ──
1394
+ patrol_security_audit() {
1395
+ daemon_log INFO "Patrol: running dependency security audit"
1396
+ local findings=0
1397
+
1398
+ # npm audit
1399
+ if [[ -f "package.json" ]] && command -v npm &>/dev/null; then
1400
+ local audit_json
1401
+ audit_json=$(npm audit --json 2>/dev/null || true)
1402
+ if [[ -n "$audit_json" ]]; then
1403
+ while IFS= read -r vuln; do
1404
+ local severity name advisory_url title
1405
+ severity=$(echo "$vuln" | jq -r '.severity // "unknown"')
1406
+ name=$(echo "$vuln" | jq -r '.name // "unknown"')
1407
+ advisory_url=$(echo "$vuln" | jq -r '.url // ""')
1408
+ title=$(echo "$vuln" | jq -r '.title // "vulnerability"')
1409
+
1410
+ # Only report critical/high
1411
+ if [[ "$severity" != "critical" ]] && [[ "$severity" != "high" ]]; then
1412
+ continue
1413
+ fi
1414
+
1415
+ findings=$((findings + 1))
1416
+ emit_event "patrol.finding" "type=security" "severity=$severity" "package=$name"
1417
+
1418
+ # Check if issue already exists
1419
+ if [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
1420
+ local existing
1421
+ existing=$(gh issue list --label "$PATROL_LABEL" --label "security" \
1422
+ --search "Security: $name" --json number -q 'length' 2>/dev/null || echo "0")
1423
+ if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
1424
+ gh issue create \
1425
+ --title "Security: ${title} in ${name}" \
1426
+ --body "## Dependency Security Finding
1427
+
1428
+ | Field | Value |
1429
+ |-------|-------|
1430
+ | Package | \`${name}\` |
1431
+ | Severity | **${severity}** |
1432
+ | Advisory | ${advisory_url} |
1433
+ | Found by | Shipwright patrol |
1434
+ | Date | $(now_iso) |
1435
+
1436
+ Auto-detected by \`shipwright daemon patrol\`." \
1437
+ --label "security" --label "$PATROL_LABEL" 2>/dev/null || true
1438
+ issues_created=$((issues_created + 1))
1439
+ emit_event "patrol.issue_created" "type=security" "package=$name"
1440
+ fi
1441
+ else
1442
+ echo -e " ${RED}●${RESET} ${BOLD}${severity}${RESET}: ${title} in ${CYAN}${name}${RESET}"
1443
+ fi
1444
+ done < <(echo "$audit_json" | jq -c '.vulnerabilities | to_entries[] | .value' 2>/dev/null)
1445
+ fi
1446
+ fi
1447
+
1448
+ # pip-audit
1449
+ if [[ -f "requirements.txt" ]] && command -v pip-audit &>/dev/null; then
1450
+ local pip_json
1451
+ pip_json=$(pip-audit --format=json 2>/dev/null || true)
1452
+ if [[ -n "$pip_json" ]]; then
1453
+ local vuln_count
1454
+ vuln_count=$(echo "$pip_json" | jq '[.dependencies[] | select(.vulns | length > 0)] | length' 2>/dev/null || echo "0")
1455
+ findings=$((findings + ${vuln_count:-0}))
1456
+ fi
1457
+ fi
1458
+
1459
+ # cargo audit
1460
+ if [[ -f "Cargo.toml" ]] && command -v cargo-audit &>/dev/null; then
1461
+ local cargo_json
1462
+ cargo_json=$(cargo audit --json 2>/dev/null || true)
1463
+ if [[ -n "$cargo_json" ]]; then
1464
+ local vuln_count
1465
+ vuln_count=$(echo "$cargo_json" | jq '.vulnerabilities.found' 2>/dev/null || echo "0")
1466
+ findings=$((findings + ${vuln_count:-0}))
1467
+ fi
1468
+ fi
1469
+
1470
+ total_findings=$((total_findings + findings))
1471
+ if [[ "$findings" -gt 0 ]]; then
1472
+ daemon_log INFO "Patrol: found ${findings} security vulnerability(ies)"
1473
+ else
1474
+ daemon_log INFO "Patrol: no security vulnerabilities found"
1475
+ fi
1476
+ }
1477
+
1478
+ # ── 2. Stale Dependency Check ──
1479
+ patrol_stale_dependencies() {
1480
+ daemon_log INFO "Patrol: checking for stale dependencies"
1481
+ local findings=0
1482
+
1483
+ if [[ -f "package.json" ]] && command -v npm &>/dev/null; then
1484
+ local outdated_json
1485
+ outdated_json=$(npm outdated --json 2>/dev/null || true)
1486
+ if [[ -n "$outdated_json" ]] && [[ "$outdated_json" != "{}" ]]; then
1487
+ local stale_packages=""
1488
+ while IFS= read -r pkg; do
1489
+ local name current latest current_major latest_major
1490
+ name=$(echo "$pkg" | jq -r '.key')
1491
+ current=$(echo "$pkg" | jq -r '.value.current // "0.0.0"')
1492
+ latest=$(echo "$pkg" | jq -r '.value.latest // "0.0.0"')
1493
+ current_major="${current%%.*}"
1494
+ latest_major="${latest%%.*}"
1495
+
1496
+ # Only flag if > 2 major versions behind
1497
+ if [[ "$latest_major" =~ ^[0-9]+$ ]] && [[ "$current_major" =~ ^[0-9]+$ ]]; then
1498
+ local diff=$((latest_major - current_major))
1499
+ if [[ "$diff" -ge 2 ]]; then
1500
+ findings=$((findings + 1))
1501
+ stale_packages="${stale_packages}\n- \`${name}\`: ${current} → ${latest} (${diff} major versions behind)"
1502
+ emit_event "patrol.finding" "type=stale_dependency" "package=$name" "current=$current" "latest=$latest"
1503
+
1504
+ if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
1505
+ echo -e " ${YELLOW}●${RESET} ${CYAN}${name}${RESET}: ${current} → ${latest} (${diff} major versions behind)"
1506
+ fi
1507
+ fi
1508
+ fi
1509
+ done < <(echo "$outdated_json" | jq -c 'to_entries[]' 2>/dev/null)
1510
+
1511
+ # Create a single issue for all stale deps
1512
+ if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
1513
+ local existing
1514
+ existing=$(gh issue list --label "$PATROL_LABEL" --label "dependencies" \
1515
+ --search "Stale dependencies" --json number -q 'length' 2>/dev/null || echo "0")
1516
+ if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
1517
+ gh issue create \
1518
+ --title "Update ${findings} stale dependencies" \
1519
+ --body "## Stale Dependencies
1520
+
1521
+ The following packages are 2+ major versions behind:
1522
+ $(echo -e "$stale_packages")
1523
+
1524
+ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
1525
+ --label "dependencies" --label "$PATROL_LABEL" 2>/dev/null || true
1526
+ issues_created=$((issues_created + 1))
1527
+ emit_event "patrol.issue_created" "type=stale_dependency" "count=$findings"
1528
+ fi
1529
+ fi
1530
+ fi
1531
+ fi
1532
+
1533
+ total_findings=$((total_findings + findings))
1534
+ daemon_log INFO "Patrol: found ${findings} stale dependency(ies)"
1535
+ }
1536
+
1537
+ # ── 3. Dead Code Detection ──
1538
+ patrol_dead_code() {
1539
+ daemon_log INFO "Patrol: scanning for dead code"
1540
+ local findings=0
1541
+ local dead_files=""
1542
+
1543
+ # For JS/TS projects: find exported files not imported anywhere
1544
+ if [[ -f "package.json" ]] || [[ -f "tsconfig.json" ]]; then
1545
+ local src_dirs=("src" "lib" "app")
1546
+ for dir in "${src_dirs[@]}"; do
1547
+ [[ -d "$dir" ]] || continue
1548
+ while IFS= read -r file; do
1549
+ local basename_no_ext
1550
+ basename_no_ext=$(basename "$file" | sed 's/\.\(ts\|js\|tsx\|jsx\)$//')
1551
+ # Skip index files and test files
1552
+ [[ "$basename_no_ext" == "index" ]] && continue
1553
+ [[ "$basename_no_ext" =~ \.(test|spec)$ ]] && continue
1554
+
1555
+ # Check if this file is imported anywhere
1556
+ local import_count
1557
+ import_count=$(grep -rlE "(from|require).*['\"].*${basename_no_ext}['\"]" \
1558
+ --include="*.ts" --include="*.js" --include="*.tsx" --include="*.jsx" \
1559
+ . 2>/dev/null | grep -cv "$file" || true)
1560
+ import_count=${import_count:-0}
1561
+
1562
+ if [[ "$import_count" -eq 0 ]]; then
1563
+ findings=$((findings + 1))
1564
+ dead_files="${dead_files}\n- \`${file}\`"
1565
+ if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
1566
+ echo -e " ${DIM}●${RESET} ${file} ${DIM}(not imported)${RESET}"
1567
+ fi
1568
+ fi
1569
+ done < <(find "$dir" -type f \( -name "*.ts" -o -name "*.js" -o -name "*.tsx" -o -name "*.jsx" \) \
1570
+ ! -name "*.test.*" ! -name "*.spec.*" ! -name "*.d.ts" 2>/dev/null)
1571
+ done
1572
+ fi
1573
+
1574
+ if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
1575
+ local existing
1576
+ existing=$(gh issue list --label "$PATROL_LABEL" --label "tech-debt" \
1577
+ --search "Dead code candidates" --json number -q 'length' 2>/dev/null || echo "0")
1578
+ if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
1579
+ gh issue create \
1580
+ --title "Dead code candidates (${findings} files)" \
1581
+ --body "## Dead Code Detection
1582
+
1583
+ These files appear to have no importers — they may be unused:
1584
+ $(echo -e "$dead_files")
1585
+
1586
+ > **Note:** Some files may be entry points or dynamically loaded. Verify before removing.
1587
+
1588
+ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
1589
+ --label "tech-debt" --label "$PATROL_LABEL" 2>/dev/null || true
1590
+ issues_created=$((issues_created + 1))
1591
+ emit_event "patrol.issue_created" "type=dead_code" "count=$findings"
1592
+ fi
1593
+ fi
1594
+
1595
+ total_findings=$((total_findings + findings))
1596
+ daemon_log INFO "Patrol: found ${findings} dead code candidate(s)"
1597
+ }
1598
+
1599
+ # ── 4. Test Coverage Gaps ──
1600
+ patrol_coverage_gaps() {
1601
+ daemon_log INFO "Patrol: checking test coverage gaps"
1602
+ local findings=0
1603
+ local low_cov_files=""
1604
+
1605
+ # Look for coverage reports from last pipeline run
1606
+ local coverage_file=""
1607
+ for candidate in \
1608
+ ".claude/pipeline-artifacts/coverage/coverage-summary.json" \
1609
+ "coverage/coverage-summary.json" \
1610
+ ".coverage/coverage-summary.json"; do
1611
+ if [[ -f "$candidate" ]]; then
1612
+ coverage_file="$candidate"
1613
+ break
1614
+ fi
1615
+ done
1616
+
1617
+ if [[ -z "$coverage_file" ]]; then
1618
+ daemon_log INFO "Patrol: no coverage report found — skipping"
1619
+ return
1620
+ fi
1621
+
1622
+ while IFS= read -r entry; do
1623
+ local file_path line_pct
1624
+ file_path=$(echo "$entry" | jq -r '.key')
1625
+ line_pct=$(echo "$entry" | jq -r '.value.lines.pct // 100')
1626
+
1627
+ # Skip total and well-covered files
1628
+ [[ "$file_path" == "total" ]] && continue
1629
+ if awk "BEGIN{exit !($line_pct >= 50)}" 2>/dev/null; then continue; fi
1630
+
1631
+ findings=$((findings + 1))
1632
+ low_cov_files="${low_cov_files}\n- \`${file_path}\`: ${line_pct}% line coverage"
1633
+
1634
+ if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
1635
+ echo -e " ${YELLOW}●${RESET} ${file_path}: ${line_pct}% coverage"
1636
+ fi
1637
+ done < <(jq -c 'to_entries[]' "$coverage_file" 2>/dev/null)
1638
+
1639
+ if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
1640
+ local existing
1641
+ existing=$(gh issue list --label "$PATROL_LABEL" --label "testing" \
1642
+ --search "Test coverage gaps" --json number -q 'length' 2>/dev/null || echo "0")
1643
+ if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
1644
+ gh issue create \
1645
+ --title "Improve test coverage for ${findings} file(s)" \
1646
+ --body "## Test Coverage Gaps
1647
+
1648
+ These files have < 50% line coverage:
1649
+ $(echo -e "$low_cov_files")
1650
+
1651
+ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
1652
+ --label "testing" --label "$PATROL_LABEL" 2>/dev/null || true
1653
+ issues_created=$((issues_created + 1))
1654
+ emit_event "patrol.issue_created" "type=coverage" "count=$findings"
1655
+ fi
1656
+ fi
1657
+
1658
+ total_findings=$((total_findings + findings))
1659
+ daemon_log INFO "Patrol: found ${findings} low-coverage file(s)"
1660
+ }
1661
+
1662
+ # ── 5. Documentation Staleness ──
1663
+ patrol_doc_staleness() {
1664
+ daemon_log INFO "Patrol: checking documentation staleness"
1665
+ local findings=0
1666
+ local stale_docs=""
1667
+
1668
+ # Check if README is older than recent source changes
1669
+ if [[ -f "README.md" ]]; then
1670
+ local readme_epoch src_epoch
1671
+ readme_epoch=$(git log -1 --format=%ct -- README.md 2>/dev/null || echo "0")
1672
+ src_epoch=$(git log -1 --format=%ct -- "*.ts" "*.js" "*.py" "*.go" "*.rs" "*.sh" 2>/dev/null || echo "0")
1673
+
1674
+ if [[ "$src_epoch" -gt 0 ]] && [[ "$readme_epoch" -gt 0 ]]; then
1675
+ local drift=$((src_epoch - readme_epoch))
1676
+ # Flag if README is > 30 days behind source
1677
+ if [[ "$drift" -gt 2592000 ]]; then
1678
+ findings=$((findings + 1))
1679
+ local days_behind=$((drift / 86400))
1680
+ stale_docs="${stale_docs}\n- \`README.md\`: ${days_behind} days behind source code"
1681
+ if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
1682
+ echo -e " ${YELLOW}●${RESET} README.md is ${days_behind} days behind source code"
1683
+ fi
1684
+ fi
1685
+ fi
1686
+ fi
1687
+
1688
+ # Check if CHANGELOG is behind latest tag
1689
+ if [[ -f "CHANGELOG.md" ]]; then
1690
+ local latest_tag changelog_epoch tag_epoch
1691
+ latest_tag=$(git describe --tags --abbrev=0 2>/dev/null || true)
1692
+ if [[ -n "$latest_tag" ]]; then
1693
+ changelog_epoch=$(git log -1 --format=%ct -- CHANGELOG.md 2>/dev/null || echo "0")
1694
+ tag_epoch=$(git log -1 --format=%ct "$latest_tag" 2>/dev/null || echo "0")
1695
+ if [[ "$tag_epoch" -gt "$changelog_epoch" ]] && [[ "$changelog_epoch" -gt 0 ]]; then
1696
+ findings=$((findings + 1))
1697
+ stale_docs="${stale_docs}\n- \`CHANGELOG.md\`: not updated since tag \`${latest_tag}\`"
1698
+ if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
1699
+ echo -e " ${YELLOW}●${RESET} CHANGELOG.md not updated since ${latest_tag}"
1700
+ fi
1701
+ fi
1702
+ fi
1703
+ fi
1704
+
1705
+ if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
1706
+ local existing
1707
+ existing=$(gh issue list --label "$PATROL_LABEL" --label "documentation" \
1708
+ --search "Stale documentation" --json number -q 'length' 2>/dev/null || echo "0")
1709
+ if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
1710
+ gh issue create \
1711
+ --title "Stale documentation detected" \
1712
+ --body "## Documentation Staleness
1713
+
1714
+ The following docs may need updating:
1715
+ $(echo -e "$stale_docs")
1716
+
1717
+ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
1718
+ --label "documentation" --label "$PATROL_LABEL" 2>/dev/null || true
1719
+ issues_created=$((issues_created + 1))
1720
+ emit_event "patrol.issue_created" "type=documentation" "count=$findings"
1721
+ fi
1722
+ fi
1723
+
1724
+ total_findings=$((total_findings + findings))
1725
+ daemon_log INFO "Patrol: found ${findings} stale documentation item(s)"
1726
+ }
1727
+
1728
+ # ── 6. Performance Baseline ──
1729
+ patrol_performance_baseline() {
1730
+ daemon_log INFO "Patrol: checking performance baseline"
1731
+
1732
+ # Look for test timing in recent pipeline events
1733
+ if [[ ! -f "$EVENTS_FILE" ]]; then
1734
+ daemon_log INFO "Patrol: no events file — skipping performance check"
1735
+ return
1736
+ fi
1737
+
1738
+ local baseline_file="$DAEMON_DIR/patrol-perf-baseline.json"
1739
+ local recent_test_dur
1740
+ recent_test_dur=$(tail -500 "$EVENTS_FILE" | \
1741
+ jq -s '[.[] | select(.type == "stage.completed" and .stage == "test") | .duration_s] | if length > 0 then .[-1] else null end' \
1742
+ 2>/dev/null || echo "null")
1743
+
1744
+ if [[ "$recent_test_dur" == "null" ]] || [[ -z "$recent_test_dur" ]]; then
1745
+ daemon_log INFO "Patrol: no recent test duration found — skipping"
1746
+ return
1747
+ fi
1748
+
1749
+ if [[ -f "$baseline_file" ]]; then
1750
+ local baseline_dur
1751
+ baseline_dur=$(jq -r '.test_duration_s // 0' "$baseline_file" 2>/dev/null || echo "0")
1752
+ if [[ "$baseline_dur" -gt 0 ]]; then
1753
+ local threshold=$(( baseline_dur * 130 / 100 )) # 30% slower
1754
+ if [[ "$recent_test_dur" -gt "$threshold" ]]; then
1755
+ total_findings=$((total_findings + 1))
1756
+ local pct_slower=$(( (recent_test_dur - baseline_dur) * 100 / baseline_dur ))
1757
+ emit_event "patrol.finding" "type=performance" "baseline=${baseline_dur}s" "current=${recent_test_dur}s" "regression=${pct_slower}%"
1758
+
1759
+ if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
1760
+ echo -e " ${RED}●${RESET} Test suite ${pct_slower}% slower than baseline (${baseline_dur}s → ${recent_test_dur}s)"
1761
+ elif [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
1762
+ local existing
1763
+ existing=$(gh issue list --label "$PATROL_LABEL" --label "performance" \
1764
+ --search "Test suite performance regression" --json number -q 'length' 2>/dev/null || echo "0")
1765
+ if [[ "${existing:-0}" -eq 0 ]]; then
1766
+ gh issue create \
1767
+ --title "Test suite performance regression (${pct_slower}% slower)" \
1768
+ --body "## Performance Regression
1769
+
1770
+ | Metric | Value |
1771
+ |--------|-------|
1772
+ | Baseline | ${baseline_dur}s |
1773
+ | Current | ${recent_test_dur}s |
1774
+ | Regression | ${pct_slower}% |
1775
+
1776
+ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
1777
+ --label "performance" --label "$PATROL_LABEL" 2>/dev/null || true
1778
+ issues_created=$((issues_created + 1))
1779
+ emit_event "patrol.issue_created" "type=performance"
1780
+ fi
1781
+ fi
1782
+
1783
+ daemon_log WARN "Patrol: test suite ${pct_slower}% slower than baseline"
1784
+ return
1785
+ fi
1786
+ fi
1787
+ fi
1788
+
1789
+ # Save/update baseline
1790
+ jq -n --argjson dur "$recent_test_dur" --arg ts "$(now_iso)" \
1791
+ '{test_duration_s: $dur, updated_at: $ts}' > "$baseline_file"
1792
+ daemon_log INFO "Patrol: performance baseline updated (${recent_test_dur}s)"
1793
+ }
1794
+
1795
+ # ── Run all patrol checks ──
1796
+ echo -e " ${BOLD}Security Audit${RESET}"
1797
+ patrol_security_audit
1798
+ echo ""
1799
+
1800
+ echo -e " ${BOLD}Stale Dependencies${RESET}"
1801
+ patrol_stale_dependencies
1802
+ echo ""
1803
+
1804
+ echo -e " ${BOLD}Dead Code Detection${RESET}"
1805
+ patrol_dead_code
1806
+ echo ""
1807
+
1808
+ echo -e " ${BOLD}Test Coverage Gaps${RESET}"
1809
+ patrol_coverage_gaps
1810
+ echo ""
1811
+
1812
+ echo -e " ${BOLD}Documentation Staleness${RESET}"
1813
+ patrol_doc_staleness
1814
+ echo ""
1815
+
1816
+ echo -e " ${BOLD}Performance Baseline${RESET}"
1817
+ patrol_performance_baseline
1818
+ echo ""
1819
+
1820
+ # ── Summary ──
1821
+ emit_event "patrol.completed" "findings=$total_findings" "issues_created=$issues_created" "dry_run=$dry_run"
1822
+
1823
+ echo -e "${PURPLE}${BOLD}━━━ Patrol Summary ━━━${RESET}"
1824
+ echo -e " Findings: ${total_findings}"
1825
+ echo -e " Issues created: ${issues_created}"
1826
+ if [[ "$dry_run" == "true" ]]; then
1827
+ echo -e " ${DIM}(dry run — no issues were created)${RESET}"
1828
+ fi
1829
+ echo ""
1830
+
1831
+ daemon_log INFO "Patrol complete: ${total_findings} findings, ${issues_created} issues created"
1832
+ }
1833
+
1834
+ # ─── Poll Issues ─────────────────────────────────────────────────────────────
1835
+
1836
+ daemon_poll_issues() {
1837
+ if [[ "$NO_GITHUB" == "true" ]]; then
1838
+ daemon_log INFO "Polling skipped (--no-github)"
1839
+ return
1840
+ fi
1841
+
1842
+ local issues_json
1843
+
1844
+ # Select gh command wrapper: gh_retry for critical poll calls when enabled
1845
+ local gh_cmd="gh"
1846
+ if [[ "${GH_RETRY_ENABLED:-true}" == "true" ]]; then
1847
+ gh_cmd="gh_retry gh"
1848
+ fi
1849
+
1850
+ if [[ "$WATCH_MODE" == "org" && -n "$ORG" ]]; then
1851
+ # Org-wide mode: search issues across all org repos
1852
+ issues_json=$($gh_cmd search issues \
1853
+ --label "$WATCH_LABEL" \
1854
+ --owner "$ORG" \
1855
+ --state open \
1856
+ --json repository,number,title,labels,body,createdAt \
1857
+ --limit 20 2>/dev/null) || {
1858
+ # Handle rate limiting with exponential backoff
1859
+ if [[ $BACKOFF_SECS -eq 0 ]]; then
1860
+ BACKOFF_SECS=30
1861
+ elif [[ $BACKOFF_SECS -lt 300 ]]; then
1862
+ BACKOFF_SECS=$((BACKOFF_SECS * 2))
1863
+ if [[ $BACKOFF_SECS -gt 300 ]]; then
1864
+ BACKOFF_SECS=300
1865
+ fi
1866
+ fi
1867
+ daemon_log WARN "GitHub API error (org search) — backing off ${BACKOFF_SECS}s"
1868
+ sleep "$BACKOFF_SECS"
1869
+ return
1870
+ }
1871
+
1872
+ # Filter by repo_filter regex if set
1873
+ if [[ -n "$REPO_FILTER" ]]; then
1874
+ issues_json=$(echo "$issues_json" | jq -c --arg filter "$REPO_FILTER" \
1875
+ '[.[] | select(.repository.nameWithOwner | test($filter))]')
1876
+ fi
1877
+ else
1878
+ # Standard single-repo mode
1879
+ issues_json=$($gh_cmd issue list \
1880
+ --label "$WATCH_LABEL" \
1881
+ --state open \
1882
+ --json number,title,labels,body,createdAt \
1883
+ --limit 20 2>/dev/null) || {
1884
+ # Handle rate limiting with exponential backoff
1885
+ if [[ $BACKOFF_SECS -eq 0 ]]; then
1886
+ BACKOFF_SECS=30
1887
+ elif [[ $BACKOFF_SECS -lt 300 ]]; then
1888
+ BACKOFF_SECS=$((BACKOFF_SECS * 2))
1889
+ if [[ $BACKOFF_SECS -gt 300 ]]; then
1890
+ BACKOFF_SECS=300
1891
+ fi
1892
+ fi
1893
+ daemon_log WARN "GitHub API error — backing off ${BACKOFF_SECS}s"
1894
+ sleep "$BACKOFF_SECS"
1895
+ return
1896
+ }
1897
+ fi
1898
+
1899
+ # Reset backoff on success
1900
+ BACKOFF_SECS=0
1901
+
1902
+ local issue_count
1903
+ issue_count=$(echo "$issues_json" | jq 'length' 2>/dev/null || echo 0)
1904
+
1905
+ if [[ "$issue_count" -eq 0 ]]; then
1906
+ return
1907
+ fi
1908
+
1909
+ local mode_label="repo"
1910
+ [[ "$WATCH_MODE" == "org" ]] && mode_label="org:${ORG}"
1911
+ daemon_log INFO "Found ${issue_count} issue(s) with label '${WATCH_LABEL}' (${mode_label})"
1912
+ emit_event "daemon.poll" "issues_found=$issue_count" "active=$(get_active_count)" "mode=$WATCH_MODE"
1913
+
1914
+ # Score each issue using intelligent triage and sort by descending score
1915
+ local scored_issues=()
1916
+ while IFS= read -r issue; do
1917
+ local num score
1918
+ num=$(echo "$issue" | jq -r '.number')
1919
+ score=$(triage_score_issue "$issue")
1920
+ # For org mode, include repo name in the scored entry
1921
+ local repo_name=""
1922
+ if [[ "$WATCH_MODE" == "org" ]]; then
1923
+ repo_name=$(echo "$issue" | jq -r '.repository.nameWithOwner // ""')
1924
+ fi
1925
+ scored_issues+=("${score}|${num}|${repo_name}")
1926
+ done < <(echo "$issues_json" | jq -c '.[]')
1927
+
1928
+ # Sort by score descending
1929
+ local sorted_order
1930
+ sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1 -rn)
1931
+
1932
+ local active_count
1933
+ active_count=$(get_active_count)
1934
+
1935
+ # Process each issue in triage order (process substitution keeps state in current shell)
1936
+ while IFS='|' read -r score issue_num repo_name; do
1937
+ [[ -z "$issue_num" ]] && continue
1938
+
1939
+ local issue_title labels_csv
1940
+ issue_title=$(echo "$issues_json" | jq -r --argjson n "$issue_num" '.[] | select(.number == $n) | .title')
1941
+ labels_csv=$(echo "$issues_json" | jq -r --argjson n "$issue_num" '.[] | select(.number == $n) | [.labels[].name] | join(",")')
1942
+
1943
+ # Skip if already inflight
1944
+ if daemon_is_inflight "$issue_num"; then
1945
+ continue
1946
+ fi
1947
+
1948
+ # Priority lane: bypass queue for critical issues
1949
+ if [[ "$PRIORITY_LANE" == "true" ]]; then
1950
+ local priority_active
1951
+ priority_active=$(get_priority_active_count)
1952
+ if is_priority_issue "$labels_csv" && [[ "$priority_active" -lt "$PRIORITY_LANE_MAX" ]]; then
1953
+ daemon_log WARN "PRIORITY LANE: issue #${issue_num} bypassing queue (${labels_csv})"
1954
+ emit_event "daemon.priority_lane" "issue=$issue_num" "score=$score"
1955
+
1956
+ local template
1957
+ template=$(select_pipeline_template "$labels_csv" "$score")
1958
+ daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template} [PRIORITY]"
1959
+
1960
+ local orig_template="$PIPELINE_TEMPLATE"
1961
+ PIPELINE_TEMPLATE="$template"
1962
+ daemon_spawn_pipeline "$issue_num" "$issue_title" "$repo_name"
1963
+ PIPELINE_TEMPLATE="$orig_template"
1964
+ track_priority_job "$issue_num"
1965
+ continue
1966
+ fi
1967
+ fi
1968
+
1969
+ # Check capacity
1970
+ active_count=$(get_active_count)
1971
+ if [[ "$active_count" -ge "$MAX_PARALLEL" ]]; then
1972
+ enqueue_issue "$issue_num"
1973
+ continue
1974
+ fi
1975
+
1976
+ # Auto-select pipeline template based on labels + triage score
1977
+ local template
1978
+ template=$(select_pipeline_template "$labels_csv" "$score")
1979
+ daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template}"
1980
+
1981
+ # Spawn pipeline (template selection applied via PIPELINE_TEMPLATE override)
1982
+ local orig_template="$PIPELINE_TEMPLATE"
1983
+ PIPELINE_TEMPLATE="$template"
1984
+ daemon_spawn_pipeline "$issue_num" "$issue_title" "$repo_name"
1985
+ PIPELINE_TEMPLATE="$orig_template"
1986
+ done <<< "$sorted_order"
1987
+
1988
+ # Update last poll
1989
+ update_state_field "last_poll" "$(now_iso)"
1990
+ }
1991
+
1992
+ # ─── Health Check ─────────────────────────────────────────────────────────────
1993
+
1994
+ daemon_health_check() {
1995
+ local findings=0
1996
+
1997
+ # Stale jobs: kill processes running > timeout
1998
+ local stale_timeout="${HEALTH_STALE_TIMEOUT:-1800}" # default 30min
1999
+ local now_e
2000
+ now_e=$(now_epoch)
2001
+
2002
+ if [[ -f "$STATE_FILE" ]]; then
2003
+ while IFS= read -r job; do
2004
+ local pid started_at issue_num
2005
+ pid=$(echo "$job" | jq -r '.pid')
2006
+ started_at=$(echo "$job" | jq -r '.started_at // empty')
2007
+ issue_num=$(echo "$job" | jq -r '.issue')
2008
+
2009
+ if [[ -n "$started_at" ]]; then
2010
+ local start_e
2011
+ start_e=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started_at" +%s 2>/dev/null || date -d "$started_at" +%s 2>/dev/null || echo "0")
2012
+ local elapsed=$(( now_e - start_e ))
2013
+ if [[ "$elapsed" -gt "$stale_timeout" ]] && kill -0 "$pid" 2>/dev/null; then
2014
+ daemon_log WARN "Stale job detected: issue #${issue_num} (${elapsed}s, PID $pid) — killing"
2015
+ kill "$pid" 2>/dev/null || true
2016
+ findings=$((findings + 1))
2017
+ fi
2018
+ fi
2019
+ done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null)
2020
+ fi
2021
+
2022
+ # Disk space warning
2023
+ local free_kb
2024
+ free_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
2025
+ if [[ -n "$free_kb" ]] && [[ "$free_kb" -lt 1048576 ]] 2>/dev/null; then
2026
+ daemon_log WARN "Low disk space: $(( free_kb / 1024 ))MB free"
2027
+ findings=$((findings + 1))
2028
+ fi
2029
+
2030
+ # Events file size warning
2031
+ if [[ -f "$EVENTS_FILE" ]]; then
2032
+ local events_size
2033
+ events_size=$(wc -c < "$EVENTS_FILE" 2>/dev/null || echo 0)
2034
+ if [[ "$events_size" -gt 104857600 ]]; then # 100MB
2035
+ daemon_log WARN "Events file large ($(( events_size / 1048576 ))MB) — consider rotating"
2036
+ findings=$((findings + 1))
2037
+ fi
2038
+ fi
2039
+
2040
+ if [[ "$findings" -gt 0 ]]; then
2041
+ emit_event "daemon.health" "findings=$findings"
2042
+ fi
2043
+ }
2044
+
2045
+ # ─── Degradation Alerting ─────────────────────────────────────────────────────
2046
+
2047
+ daemon_check_degradation() {
2048
+ if [[ ! -f "$EVENTS_FILE" ]]; then return; fi
2049
+
2050
+ local window="${DEGRADATION_WINDOW:-5}"
2051
+ local cfr_threshold="${DEGRADATION_CFR_THRESHOLD:-30}"
2052
+ local success_threshold="${DEGRADATION_SUCCESS_THRESHOLD:-50}"
2053
+
2054
+ # Get last N pipeline completions
2055
+ local recent
2056
+ recent=$(tail -200 "$EVENTS_FILE" | jq -s "[.[] | select(.type == \"pipeline.completed\")] | .[-${window}:]" 2>/dev/null)
2057
+ local count
2058
+ count=$(echo "$recent" | jq 'length' 2>/dev/null || echo 0)
2059
+
2060
+ if [[ "$count" -lt "$window" ]]; then return; fi
2061
+
2062
+ local failures successes
2063
+ failures=$(echo "$recent" | jq '[.[] | select(.result == "failure")] | length')
2064
+ successes=$(echo "$recent" | jq '[.[] | select(.result == "success")] | length')
2065
+ local cfr_pct=$(( failures * 100 / count ))
2066
+ local success_pct=$(( successes * 100 / count ))
2067
+
2068
+ local alerts=""
2069
+ if [[ "$cfr_pct" -gt "$cfr_threshold" ]]; then
2070
+ alerts="CFR ${cfr_pct}% exceeds threshold ${cfr_threshold}%"
2071
+ daemon_log WARN "DEGRADATION: $alerts"
2072
+ fi
2073
+ if [[ "$success_pct" -lt "$success_threshold" ]]; then
2074
+ local msg="Success rate ${success_pct}% below threshold ${success_threshold}%"
2075
+ [[ -n "$alerts" ]] && alerts="$alerts; $msg" || alerts="$msg"
2076
+ daemon_log WARN "DEGRADATION: $msg"
2077
+ fi
2078
+
2079
+ if [[ -n "$alerts" ]]; then
2080
+ emit_event "daemon.alert" "alerts=$alerts" "cfr_pct=$cfr_pct" "success_pct=$success_pct"
2081
+
2082
+ # Slack notification
2083
+ if [[ -n "${SLACK_WEBHOOK:-}" ]]; then
2084
+ notify "Pipeline Degradation Alert" "$alerts" "warn"
2085
+ fi
2086
+ fi
2087
+ }
2088
+
2089
+ # ─── Auto-Scaling ─────────────────────────────────────────────────────────
2090
+ # Dynamically adjusts MAX_PARALLEL based on CPU, memory, budget, and queue depth
2091
+
2092
+ daemon_auto_scale() {
2093
+ if [[ "${AUTO_SCALE:-false}" != "true" ]]; then
2094
+ return
2095
+ fi
2096
+
2097
+ local prev_max="$MAX_PARALLEL"
2098
+
2099
+ # ── CPU cores ──
2100
+ local cpu_cores=2
2101
+ if [[ "$(uname -s)" == "Darwin" ]]; then
2102
+ cpu_cores=$(sysctl -n hw.ncpu 2>/dev/null || echo 2)
2103
+ else
2104
+ cpu_cores=$(nproc 2>/dev/null || echo 2)
2105
+ fi
2106
+ local max_by_cpu=$(( (cpu_cores * 3) / 4 )) # 75% utilization cap
2107
+ [[ "$max_by_cpu" -lt 1 ]] && max_by_cpu=1
2108
+
2109
+ # ── Load average check (back off if system is stressed) ──
2110
+ local load_avg
2111
+ load_avg=$(uptime | awk -F'load averages?: ' '{print $2}' | awk -F'[, ]+' '{print $1}' 2>/dev/null || echo "0")
2112
+ # Validate numeric
2113
+ if [[ ! "$load_avg" =~ ^[0-9]+\.?[0-9]*$ ]]; then
2114
+ load_avg="0"
2115
+ fi
2116
+ local load_ratio=0
2117
+ if [[ "$cpu_cores" -gt 0 ]]; then
2118
+ load_ratio=$(awk -v load="$load_avg" -v cores="$cpu_cores" 'BEGIN { printf "%.0f", (load / cores) * 100 }')
2119
+ fi
2120
+ if [[ "$load_ratio" -gt 90 ]]; then
2121
+ # System under heavy load — scale down to min
2122
+ max_by_cpu="$MIN_WORKERS"
2123
+ daemon_log WARN "Auto-scale: high load (${load_avg}/${cpu_cores} cores) — constraining to ${max_by_cpu}"
2124
+ fi
2125
+
2126
+ # ── Available memory ──
2127
+ local avail_mem_gb=8
2128
+ if [[ "$(uname -s)" == "Darwin" ]]; then
2129
+ local page_size free_pages inactive_pages purgeable_pages speculative_pages
2130
+ # Page size is in format: "(page size of 16384 bytes)"
2131
+ page_size=$(vm_stat | awk '/page size of/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) print $i}')
2132
+ page_size="${page_size:-16384}"
2133
+ free_pages=$(vm_stat | awk '/^Pages free:/ {gsub(/\./, "", $NF); print $NF}')
2134
+ free_pages="${free_pages:-0}"
2135
+ speculative_pages=$(vm_stat | awk '/^Pages speculative:/ {gsub(/\./, "", $NF); print $NF}')
2136
+ speculative_pages="${speculative_pages:-0}"
2137
+ inactive_pages=$(vm_stat | awk '/^Pages inactive:/ {gsub(/\./, "", $NF); print $NF}')
2138
+ inactive_pages="${inactive_pages:-0}"
2139
+ purgeable_pages=$(vm_stat | awk '/^Pages purgeable:/ {gsub(/\./, "", $NF); print $NF}')
2140
+ purgeable_pages="${purgeable_pages:-0}"
2141
+ # Available ≈ free + speculative + inactive + purgeable
2142
+ local avail_pages=$(( free_pages + speculative_pages + inactive_pages + purgeable_pages ))
2143
+ if [[ "$avail_pages" -gt 0 && "$page_size" -gt 0 ]]; then
2144
+ local free_bytes=$(( avail_pages * page_size ))
2145
+ avail_mem_gb=$(( free_bytes / 1073741824 ))
2146
+ fi
2147
+ else
2148
+ local avail_kb
2149
+ avail_kb=$(awk '/MemAvailable/ {print $2}' /proc/meminfo 2>/dev/null || echo "8388608")
2150
+ avail_mem_gb=$(( avail_kb / 1048576 ))
2151
+ fi
2152
+ [[ "$avail_mem_gb" -lt 1 ]] && avail_mem_gb=1
2153
+ local max_by_mem=$(( avail_mem_gb / WORKER_MEM_GB ))
2154
+ [[ "$max_by_mem" -lt 1 ]] && max_by_mem=1
2155
+
2156
+ # ── Budget remaining ──
2157
+ local max_by_budget="$MAX_WORKERS"
2158
+ local remaining_usd
2159
+ remaining_usd=$("$SCRIPT_DIR/cct-cost.sh" remaining-budget 2>/dev/null || echo "unlimited")
2160
+ if [[ "$remaining_usd" != "unlimited" && -n "$remaining_usd" ]]; then
2161
+ if awk -v r="$remaining_usd" -v c="$EST_COST_PER_JOB" 'BEGIN { exit !(r > 0 && c > 0) }'; then
2162
+ max_by_budget=$(awk -v r="$remaining_usd" -v c="$EST_COST_PER_JOB" 'BEGIN { printf "%.0f", r / c }')
2163
+ [[ "$max_by_budget" -lt 0 ]] && max_by_budget=0
2164
+ else
2165
+ max_by_budget=0
2166
+ fi
2167
+ fi
2168
+
2169
+ # ── Queue depth (don't over-provision) ──
2170
+ local queue_depth active_count
2171
+ queue_depth=$(jq -r '.queued | length' "$STATE_FILE" 2>/dev/null || echo 0)
2172
+ queue_depth="${queue_depth:-0}"
2173
+ [[ ! "$queue_depth" =~ ^[0-9]+$ ]] && queue_depth=0
2174
+ active_count=$(get_active_count)
2175
+ active_count="${active_count:-0}"
2176
+ [[ ! "$active_count" =~ ^[0-9]+$ ]] && active_count=0
2177
+ local max_by_queue=$(( queue_depth + active_count ))
2178
+ [[ "$max_by_queue" -lt 1 ]] && max_by_queue=1
2179
+
2180
+ # ── Compute final value ──
2181
+ local computed="$max_by_cpu"
2182
+ [[ "$max_by_mem" -lt "$computed" ]] && computed="$max_by_mem"
2183
+ [[ "$max_by_budget" -lt "$computed" ]] && computed="$max_by_budget"
2184
+ [[ "$max_by_queue" -lt "$computed" ]] && computed="$max_by_queue"
2185
+ [[ "$MAX_WORKERS" -lt "$computed" ]] && computed="$MAX_WORKERS"
2186
+
2187
+ # Respect fleet-assigned ceiling if set
2188
+ if [[ -n "${FLEET_MAX_PARALLEL:-}" && "$FLEET_MAX_PARALLEL" -lt "$computed" ]]; then
2189
+ computed="$FLEET_MAX_PARALLEL"
2190
+ fi
2191
+
2192
+ # Clamp to min_workers
2193
+ [[ "$computed" -lt "$MIN_WORKERS" ]] && computed="$MIN_WORKERS"
2194
+
2195
+ MAX_PARALLEL="$computed"
2196
+
2197
+ if [[ "$MAX_PARALLEL" -ne "$prev_max" ]]; then
2198
+ daemon_log INFO "Auto-scale: ${prev_max} → ${MAX_PARALLEL} (cpu=${max_by_cpu} mem=${max_by_mem} budget=${max_by_budget} queue=${max_by_queue})"
2199
+ emit_event "daemon.scale" \
2200
+ "from=$prev_max" \
2201
+ "to=$MAX_PARALLEL" \
2202
+ "max_by_cpu=$max_by_cpu" \
2203
+ "max_by_mem=$max_by_mem" \
2204
+ "max_by_budget=$max_by_budget" \
2205
+ "max_by_queue=$max_by_queue" \
2206
+ "cpu_cores=$cpu_cores" \
2207
+ "avail_mem_gb=$avail_mem_gb" \
2208
+ "remaining_usd=$remaining_usd"
2209
+ fi
2210
+ }
2211
+
2212
+ # ─── Fleet Config Reload ──────────────────────────────────────────────────
2213
+ # Checks for fleet-reload.flag and reloads MAX_PARALLEL from fleet-managed config
2214
+
2215
+ daemon_reload_config() {
2216
+ local reload_flag="$HOME/.claude-teams/fleet-reload.flag"
2217
+ if [[ ! -f "$reload_flag" ]]; then
2218
+ return
2219
+ fi
2220
+
2221
+ local fleet_config=".claude/.fleet-daemon-config.json"
2222
+ if [[ -f "$fleet_config" ]]; then
2223
+ local new_max
2224
+ new_max=$(jq -r '.max_parallel // empty' "$fleet_config" 2>/dev/null || true)
2225
+ if [[ -n "$new_max" && "$new_max" != "null" ]]; then
2226
+ local prev="$MAX_PARALLEL"
2227
+ FLEET_MAX_PARALLEL="$new_max"
2228
+ MAX_PARALLEL="$new_max"
2229
+ daemon_log INFO "Fleet reload: max_parallel ${prev} → ${MAX_PARALLEL} (fleet ceiling: ${FLEET_MAX_PARALLEL})"
2230
+ emit_event "daemon.fleet_reload" "from=$prev" "to=$MAX_PARALLEL"
2231
+ fi
2232
+ fi
2233
+
2234
+ rm -f "$reload_flag"
2235
+ }
2236
+
2237
+ # ─── Self-Optimizing Metrics Loop ──────────────────────────────────────────
2238
+
2239
+ daemon_self_optimize() {
2240
+ if [[ "${SELF_OPTIMIZE:-false}" != "true" ]]; then
2241
+ return
2242
+ fi
2243
+
2244
+ if [[ ! -f "$EVENTS_FILE" ]]; then
2245
+ return
2246
+ fi
2247
+
2248
+ daemon_log INFO "Running self-optimization check"
2249
+
2250
+ # Read DORA metrics from recent events (last 7 days)
2251
+ local cutoff_epoch
2252
+ cutoff_epoch=$(( $(now_epoch) - (7 * 86400) ))
2253
+
2254
+ local period_events
2255
+ period_events=$(jq -c "select(.ts_epoch >= $cutoff_epoch)" "$EVENTS_FILE" 2>/dev/null || true)
2256
+
2257
+ if [[ -z "$period_events" ]]; then
2258
+ daemon_log INFO "No recent events for optimization"
2259
+ return
2260
+ fi
2261
+
2262
+ local total_completed successes failures
2263
+ total_completed=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed")] | length')
2264
+ successes=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success")] | length')
2265
+ failures=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "failure")] | length')
2266
+
2267
+ # Change Failure Rate
2268
+ local cfr=0
2269
+ if [[ "$total_completed" -gt 0 ]]; then
2270
+ cfr=$(echo "$failures $total_completed" | awk '{printf "%.0f", ($1 / $2) * 100}')
2271
+ fi
2272
+
2273
+ # Cycle time (median, in seconds)
2274
+ local cycle_time_median
2275
+ cycle_time_median=$(echo "$period_events" | \
2276
+ jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success") | .duration_s // 0] | sort | if length > 0 then .[length/2 | floor] else 0 end')
2277
+
2278
+ # Deploy frequency (per week)
2279
+ local deploy_freq
2280
+ deploy_freq=$(echo "$successes" | awk '{printf "%.1f", $1 / 1}') # Already 7 days
2281
+
2282
+ # MTTR
2283
+ local mttr
2284
+ mttr=$(echo "$period_events" | \
2285
+ jq -s '
2286
+ [.[] | select(.type == "pipeline.completed")] | sort_by(.ts_epoch // 0) |
2287
+ [range(length) as $i |
2288
+ if .[$i].result == "failure" then
2289
+ [.[$i+1:][] | select(.result == "success")][0] as $next |
2290
+ if $next and $next.ts_epoch and .[$i].ts_epoch then
2291
+ ($next.ts_epoch - .[$i].ts_epoch)
2292
+ else null end
2293
+ else null end
2294
+ ] | map(select(. != null)) |
2295
+ if length > 0 then (add / length | floor) else 0 end
2296
+ ')
2297
+
2298
+ local adjustments=()
2299
+
2300
+ # ── CFR > 20%: enable compound_quality, increase max_cycles ──
2301
+ if [[ "$cfr" -gt 40 ]]; then
2302
+ PIPELINE_TEMPLATE="full"
2303
+ adjustments+=("template→full (CFR ${cfr}% > 40%)")
2304
+ daemon_log WARN "Self-optimize: CFR ${cfr}% critical — switching to full template"
2305
+ elif [[ "$cfr" -gt 20 ]]; then
2306
+ adjustments+=("compound_quality enabled (CFR ${cfr}% > 20%)")
2307
+ daemon_log WARN "Self-optimize: CFR ${cfr}% elevated — enabling compound quality"
2308
+ fi
2309
+
2310
+ # ── Lead time > 4hrs: increase max_parallel, reduce poll_interval ──
2311
+ if [[ "$cycle_time_median" -gt 14400 ]]; then
2312
+ MAX_PARALLEL=$((MAX_PARALLEL + 1))
2313
+ if [[ "$POLL_INTERVAL" -gt 30 ]]; then
2314
+ POLL_INTERVAL=$((POLL_INTERVAL / 2))
2315
+ fi
2316
+ adjustments+=("max_parallel→${MAX_PARALLEL}, poll_interval→${POLL_INTERVAL}s (lead time > 4hrs)")
2317
+ daemon_log WARN "Self-optimize: lead time $(format_duration "$cycle_time_median") — increasing parallelism"
2318
+ elif [[ "$cycle_time_median" -gt 7200 ]]; then
2319
+ # ── Lead time > 2hrs: enable auto_template for fast-pathing ──
2320
+ AUTO_TEMPLATE="true"
2321
+ adjustments+=("auto_template enabled (lead time > 2hrs)")
2322
+ daemon_log INFO "Self-optimize: lead time $(format_duration "$cycle_time_median") — enabling adaptive templates"
2323
+ fi
2324
+
2325
+ # ── Deploy freq < 1/day (< 7/week): enable merge stage ──
2326
+ if [[ "$(echo "$deploy_freq < 7" | bc -l 2>/dev/null || echo 0)" == "1" ]]; then
2327
+ adjustments+=("merge stage recommended (deploy freq ${deploy_freq}/week)")
2328
+ daemon_log INFO "Self-optimize: low deploy frequency — consider enabling merge stage"
2329
+ fi
2330
+
2331
+ # ── MTTR > 2hrs: enable auto_rollback ──
2332
+ if [[ "$mttr" -gt 7200 ]]; then
2333
+ adjustments+=("auto_rollback recommended (MTTR $(format_duration "$mttr"))")
2334
+ daemon_log WARN "Self-optimize: high MTTR $(format_duration "$mttr") — consider enabling auto-rollback"
2335
+ fi
2336
+
2337
+ # Write adjustments to state and persist to config
2338
+ if [[ ${#adjustments[@]} -gt 0 ]]; then
2339
+ local adj_str
2340
+ adj_str=$(printf '%s; ' "${adjustments[@]}")
2341
+
2342
+ local tmp_state
2343
+ tmp_state=$(jq \
2344
+ --arg adj "$adj_str" \
2345
+ --arg ts "$(now_iso)" \
2346
+ '.last_optimization = {timestamp: $ts, adjustments: $adj}' \
2347
+ "$STATE_FILE")
2348
+ atomic_write_state "$tmp_state"
2349
+
2350
+ # ── Persist adjustments to daemon-config.json (survives restart) ──
2351
+ local config_file="${CONFIG_PATH:-.claude/daemon-config.json}"
2352
+ if [[ -f "$config_file" ]]; then
2353
+ local tmp_config
2354
+ tmp_config=$(jq \
2355
+ --argjson max_parallel "$MAX_PARALLEL" \
2356
+ --argjson poll_interval "$POLL_INTERVAL" \
2357
+ --arg template "$PIPELINE_TEMPLATE" \
2358
+ --arg auto_template "${AUTO_TEMPLATE:-false}" \
2359
+ --arg ts "$(now_iso)" \
2360
+ --arg adj "$adj_str" \
2361
+ '.max_parallel = $max_parallel |
2362
+ .poll_interval = $poll_interval |
2363
+ .pipeline_template = $template |
2364
+ .auto_template = ($auto_template == "true") |
2365
+ .last_optimization = {timestamp: $ts, adjustments: $adj}' \
2366
+ "$config_file")
2367
+ # Atomic write: tmp file + mv
2368
+ local tmp_cfg_file="${config_file}.tmp.$$"
2369
+ echo "$tmp_config" > "$tmp_cfg_file"
2370
+ mv "$tmp_cfg_file" "$config_file"
2371
+ daemon_log INFO "Self-optimize: persisted adjustments to ${config_file}"
2372
+ fi
2373
+
2374
+ emit_event "daemon.optimize" "adjustments=${adj_str}" "cfr=$cfr" "cycle_time=$cycle_time_median" "deploy_freq=$deploy_freq" "mttr=$mttr"
2375
+ daemon_log SUCCESS "Self-optimization applied ${#adjustments[@]} adjustment(s)"
2376
+ else
2377
+ daemon_log INFO "Self-optimization: all metrics within thresholds"
2378
+ fi
2379
+ }
2380
+
2381
+ # ─── Stale State Reaper ──────────────────────────────────────────────────────
2382
+ # Cleans old worktrees, pipeline artifacts, and completed state entries.
2383
+ # Called every N poll cycles (configurable via stale_reaper_interval).
2384
+
2385
+ daemon_cleanup_stale() {
2386
+ if [[ "${STALE_REAPER_ENABLED:-true}" != "true" ]]; then
2387
+ return
2388
+ fi
2389
+
2390
+ daemon_log INFO "Running stale state reaper"
2391
+ local cleaned=0
2392
+ local age_days="${STALE_REAPER_AGE_DAYS:-7}"
2393
+ local age_secs=$((age_days * 86400))
2394
+ local now_e
2395
+ now_e=$(now_epoch)
2396
+
2397
+ # ── 1. Clean old git worktrees ──
2398
+ if command -v git &>/dev/null; then
2399
+ while IFS= read -r line; do
2400
+ local wt_path
2401
+ wt_path=$(echo "$line" | awk '{print $1}')
2402
+ # Only clean daemon-created worktrees
2403
+ [[ "$wt_path" == *"daemon-issue-"* ]] || continue
2404
+ # Check worktree age via directory mtime
2405
+ local mtime
2406
+ mtime=$(stat -f '%m' "$wt_path" 2>/dev/null || stat -c '%Y' "$wt_path" 2>/dev/null || echo "0")
2407
+ if [[ $((now_e - mtime)) -gt $age_secs ]]; then
2408
+ daemon_log INFO "Removing stale worktree: ${wt_path}"
2409
+ git worktree remove "$wt_path" --force 2>/dev/null || true
2410
+ cleaned=$((cleaned + 1))
2411
+ fi
2412
+ done < <(git worktree list --porcelain 2>/dev/null | grep '^worktree ' | sed 's/^worktree //')
2413
+ fi
2414
+
2415
+ # ── 2. Clean old pipeline artifacts ──
2416
+ local artifacts_dir=".claude/pipeline-artifacts"
2417
+ if [[ -d "$artifacts_dir" ]]; then
2418
+ while IFS= read -r artifact_dir; do
2419
+ [[ -d "$artifact_dir" ]] || continue
2420
+ local mtime
2421
+ mtime=$(stat -f '%m' "$artifact_dir" 2>/dev/null || stat -c '%Y' "$artifact_dir" 2>/dev/null || echo "0")
2422
+ if [[ $((now_e - mtime)) -gt $age_secs ]]; then
2423
+ daemon_log INFO "Removing stale artifact: ${artifact_dir}"
2424
+ rm -rf "$artifact_dir"
2425
+ cleaned=$((cleaned + 1))
2426
+ fi
2427
+ done < <(find "$artifacts_dir" -mindepth 1 -maxdepth 1 -type d 2>/dev/null)
2428
+ fi
2429
+
2430
+ # ── 3. Prune completed/failed state entries older than age_days ──
2431
+ if [[ -f "$STATE_FILE" ]]; then
2432
+ local cutoff_iso
2433
+ cutoff_iso=$(epoch_to_iso $((now_e - age_secs)))
2434
+ local before_count after_count
2435
+ before_count=$(jq '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
2436
+ local tmp_state
2437
+ tmp_state=$(jq --arg cutoff "$cutoff_iso" \
2438
+ '.completed = [.completed[] | select(.completed_at > $cutoff)]' \
2439
+ "$STATE_FILE" 2>/dev/null) || true
2440
+ if [[ -n "$tmp_state" ]]; then
2441
+ atomic_write_state "$tmp_state"
2442
+ after_count=$(jq '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
2443
+ local pruned=$((before_count - after_count))
2444
+ if [[ "$pruned" -gt 0 ]]; then
2445
+ daemon_log INFO "Pruned ${pruned} old completed state entries"
2446
+ cleaned=$((cleaned + pruned))
2447
+ fi
2448
+ fi
2449
+ fi
2450
+
2451
+ if [[ "$cleaned" -gt 0 ]]; then
2452
+ emit_event "daemon.cleanup" "cleaned=$cleaned" "age_days=$age_days"
2453
+ daemon_log SUCCESS "Stale reaper cleaned ${cleaned} item(s)"
2454
+ else
2455
+ daemon_log INFO "Stale reaper: nothing to clean"
2456
+ fi
2457
+ }
2458
+
2459
+ # ─── Poll Loop ───────────────────────────────────────────────────────────────
2460
+
2461
+ POLL_CYCLE_COUNT=0
2462
+
2463
+ daemon_poll_loop() {
2464
+ daemon_log INFO "Entering poll loop (interval: ${POLL_INTERVAL}s, max_parallel: ${MAX_PARALLEL})"
2465
+ daemon_log INFO "Watching for label: ${CYAN}${WATCH_LABEL}${RESET}"
2466
+
2467
+ while [[ ! -f "$SHUTDOWN_FLAG" ]]; do
2468
+ daemon_poll_issues
2469
+ daemon_reap_completed
2470
+ daemon_health_check
2471
+
2472
+ # Increment cycle counter (must be before all modulo checks)
2473
+ POLL_CYCLE_COUNT=$((POLL_CYCLE_COUNT + 1))
2474
+
2475
+ # Fleet config reload every 3 cycles
2476
+ if [[ $((POLL_CYCLE_COUNT % 3)) -eq 0 ]]; then
2477
+ daemon_reload_config
2478
+ fi
2479
+
2480
+ # Check degradation every 5 poll cycles
2481
+ if [[ $((POLL_CYCLE_COUNT % 5)) -eq 0 ]]; then
2482
+ daemon_check_degradation
2483
+ fi
2484
+
2485
+ # Auto-scale every N cycles (default: 5)
2486
+ if [[ $((POLL_CYCLE_COUNT % ${AUTO_SCALE_INTERVAL:-5})) -eq 0 ]]; then
2487
+ daemon_auto_scale
2488
+ fi
2489
+
2490
+ # Self-optimize every N cycles (default: 10)
2491
+ if [[ $((POLL_CYCLE_COUNT % ${OPTIMIZE_INTERVAL:-10})) -eq 0 ]]; then
2492
+ daemon_self_optimize
2493
+ fi
2494
+
2495
+ # Stale state reaper every N cycles (default: 10)
2496
+ if [[ $((POLL_CYCLE_COUNT % ${STALE_REAPER_INTERVAL:-10})) -eq 0 ]]; then
2497
+ daemon_cleanup_stale
2498
+ fi
2499
+
2500
+ # Proactive patrol during quiet periods
2501
+ local issue_count_now active_count_now
2502
+ issue_count_now=$(jq -r '.queued | length' "$STATE_FILE" 2>/dev/null || echo 0)
2503
+ active_count_now=$(get_active_count)
2504
+ if [[ "$issue_count_now" -eq 0 ]] && [[ "$active_count_now" -eq 0 ]]; then
2505
+ local now_e
2506
+ now_e=$(now_epoch)
2507
+ if [[ $((now_e - LAST_PATROL_EPOCH)) -ge "$PATROL_INTERVAL" ]]; then
2508
+ daemon_log INFO "No active work — running patrol"
2509
+ daemon_patrol --once
2510
+ LAST_PATROL_EPOCH=$now_e
2511
+ fi
2512
+ fi
2513
+
2514
+ # Sleep in 1s intervals so we can catch shutdown quickly
2515
+ local i=0
2516
+ while [[ $i -lt $POLL_INTERVAL ]] && [[ ! -f "$SHUTDOWN_FLAG" ]]; do
2517
+ sleep 1
2518
+ i=$((i + 1))
2519
+ done
2520
+ done
2521
+
2522
+ daemon_log INFO "Shutdown flag detected — exiting poll loop"
2523
+ }
2524
+
2525
+ # ─── Graceful Shutdown Handler ───────────────────────────────────────────────
2526
+
2527
+ cleanup_on_exit() {
2528
+ daemon_log INFO "Cleaning up..."
2529
+ rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
2530
+ daemon_log INFO "Daemon stopped"
2531
+ emit_event "daemon.stopped" "pid=$$"
2532
+ }
2533
+
2534
+ # ─── daemon start ───────────────────────────────────────────────────────────
2535
+
2536
+ daemon_start() {
2537
+ echo -e "${PURPLE}${BOLD}━━━ shipwright daemon v${VERSION} ━━━${RESET}"
2538
+ echo ""
2539
+
2540
+ # Acquire exclusive lock on PID file (prevents race between concurrent starts)
2541
+ exec 9>"$PID_FILE"
2542
+ if ! flock -n 9 2>/dev/null; then
2543
+ # flock unavailable or lock held — fall back to PID check
2544
+ local existing_pid
2545
+ existing_pid=$(cat "$PID_FILE" 2>/dev/null || true)
2546
+ if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then
2547
+ error "Daemon already running (PID: ${existing_pid})"
2548
+ info "Use ${CYAN}shipwright daemon stop${RESET} to stop it first"
2549
+ exit 1
2550
+ else
2551
+ warn "Stale PID file found — removing"
2552
+ rm -f "$PID_FILE"
2553
+ exec 9>"$PID_FILE"
2554
+ fi
2555
+ fi
2556
+
2557
+ # Load config
2558
+ load_config
2559
+
2560
+ # Pre-flight
2561
+ if ! preflight_checks; then
2562
+ exit 1
2563
+ fi
2564
+
2565
+ # Detach mode: re-exec in a tmux session
2566
+ if [[ "$DETACH" == "true" ]]; then
2567
+ if ! command -v tmux &>/dev/null; then
2568
+ error "tmux required for --detach mode"
2569
+ exit 1
2570
+ fi
2571
+
2572
+ info "Starting daemon in detached tmux session: ${CYAN}cct-daemon${RESET}"
2573
+
2574
+ # Build the command to run in tmux
2575
+ local cmd_args=("$SCRIPT_DIR/cct-daemon.sh" "start")
2576
+ if [[ -n "$CONFIG_PATH" ]]; then
2577
+ cmd_args+=("--config" "$CONFIG_PATH")
2578
+ fi
2579
+ if [[ "$NO_GITHUB" == "true" ]]; then
2580
+ cmd_args+=("--no-github")
2581
+ fi
2582
+
2583
+ tmux new-session -d -s "cct-daemon" "${cmd_args[*]}" 2>/dev/null || {
2584
+ # Session may already exist — try killing and recreating
2585
+ tmux kill-session -t "cct-daemon" 2>/dev/null || true
2586
+ tmux new-session -d -s "cct-daemon" "${cmd_args[*]}"
2587
+ }
2588
+
2589
+ success "Daemon started in tmux session ${CYAN}cct-daemon${RESET}"
2590
+ info "Attach with: ${DIM}tmux attach -t cct-daemon${RESET}"
2591
+ info "View logs: ${DIM}shipwright daemon logs --follow${RESET}"
2592
+ return 0
2593
+ fi
2594
+
2595
+ # Foreground mode
2596
+ info "Starting daemon (PID: $$)"
2597
+
2598
+ # Write PID file
2599
+ echo "$$" > "$PID_FILE"
2600
+
2601
+ # Remove stale shutdown flag
2602
+ rm -f "$SHUTDOWN_FLAG"
2603
+
2604
+ # Initialize state
2605
+ init_state
2606
+
2607
+ # Trap signals for graceful shutdown
2608
+ trap cleanup_on_exit EXIT
2609
+ trap 'touch "$SHUTDOWN_FLAG"' SIGINT SIGTERM
2610
+
2611
+ # Reap any orphaned jobs from previous runs
2612
+ daemon_reap_completed
2613
+
2614
+ daemon_log INFO "Daemon started successfully"
2615
+ daemon_log INFO "Config: poll_interval=${POLL_INTERVAL}s, max_parallel=${MAX_PARALLEL}, label=${WATCH_LABEL}"
2616
+
2617
+ emit_event "daemon.started" \
2618
+ "pid=$$" \
2619
+ "poll_interval=$POLL_INTERVAL" \
2620
+ "max_parallel=$MAX_PARALLEL" \
2621
+ "watch_label=$WATCH_LABEL"
2622
+
2623
+ # Enter poll loop
2624
+ daemon_poll_loop
2625
+ }
2626
+
2627
+ # ─── daemon stop ─────────────────────────────────────────────────────────────
2628
+
2629
+ daemon_stop() {
2630
+ if [[ ! -f "$PID_FILE" ]]; then
2631
+ error "No daemon PID file found at $PID_FILE"
2632
+ info "Is the daemon running?"
2633
+ exit 1
2634
+ fi
2635
+
2636
+ local pid
2637
+ pid=$(cat "$PID_FILE" 2>/dev/null || true)
2638
+
2639
+ if [[ -z "$pid" ]]; then
2640
+ error "Empty PID file"
2641
+ rm -f "$PID_FILE"
2642
+ exit 1
2643
+ fi
2644
+
2645
+ if ! kill -0 "$pid" 2>/dev/null; then
2646
+ warn "Daemon process (PID: ${pid}) is not running — cleaning up"
2647
+ rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
2648
+ return 0
2649
+ fi
2650
+
2651
+ info "Sending shutdown signal to daemon (PID: ${pid})..."
2652
+
2653
+ # Touch shutdown flag for graceful exit
2654
+ touch "$SHUTDOWN_FLAG"
2655
+
2656
+ # Wait for graceful shutdown (up to 30s)
2657
+ local wait_secs=0
2658
+ while kill -0 "$pid" 2>/dev/null && [[ $wait_secs -lt 30 ]]; do
2659
+ sleep 1
2660
+ wait_secs=$((wait_secs + 1))
2661
+ done
2662
+
2663
+ if kill -0 "$pid" 2>/dev/null; then
2664
+ warn "Daemon didn't stop gracefully — sending SIGTERM"
2665
+ kill "$pid" 2>/dev/null || true
2666
+ sleep 2
2667
+ if kill -0 "$pid" 2>/dev/null; then
2668
+ warn "Sending SIGKILL"
2669
+ kill -9 "$pid" 2>/dev/null || true
2670
+ fi
2671
+ fi
2672
+
2673
+ rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
2674
+
2675
+ # Also kill tmux session if it exists
2676
+ tmux kill-session -t "cct-daemon" 2>/dev/null || true
2677
+
2678
+ success "Daemon stopped"
2679
+ }
2680
+
2681
+ # ─── daemon status ───────────────────────────────────────────────────────────
2682
+
2683
+ daemon_status() {
2684
+ echo -e "${PURPLE}${BOLD}━━━ Daemon Status ━━━${RESET}"
2685
+ echo ""
2686
+
2687
+ # Check if running
2688
+ local running=false
2689
+ if [[ -f "$PID_FILE" ]]; then
2690
+ local pid
2691
+ pid=$(cat "$PID_FILE" 2>/dev/null || true)
2692
+ if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
2693
+ running=true
2694
+ echo -e " ${GREEN}●${RESET} ${BOLD}Running${RESET} ${DIM}(PID: ${pid})${RESET}"
2695
+ else
2696
+ echo -e " ${RED}●${RESET} ${BOLD}Stopped${RESET} ${DIM}(stale PID file)${RESET}"
2697
+ fi
2698
+ else
2699
+ echo -e " ${RED}●${RESET} ${BOLD}Stopped${RESET}"
2700
+ fi
2701
+
2702
+ if [[ ! -f "$STATE_FILE" ]]; then
2703
+ echo ""
2704
+ echo -e " ${DIM}No state file found. Start the daemon first.${RESET}"
2705
+ return
2706
+ fi
2707
+
2708
+ # Read state
2709
+ local last_poll started_at
2710
+ last_poll=$(jq -r '.last_poll // "never"' "$STATE_FILE" 2>/dev/null)
2711
+ started_at=$(jq -r '.started_at // "unknown"' "$STATE_FILE" 2>/dev/null)
2712
+
2713
+ echo -e " Started: ${DIM}${started_at}${RESET}"
2714
+ echo -e " Last poll: ${DIM}${last_poll}${RESET}"
2715
+ echo ""
2716
+
2717
+ # Active jobs
2718
+ local active_count
2719
+ active_count=$(jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo 0)
2720
+
2721
+ echo -e "${BOLD} Active Jobs (${active_count}/${MAX_PARALLEL})${RESET}"
2722
+ if [[ "$active_count" -gt 0 ]]; then
2723
+ while IFS=$'\t' read -r num title started; do
2724
+ local age=""
2725
+ if [[ "$started" != "—" ]] && [[ "$running" == "true" ]]; then
2726
+ local start_epoch
2727
+ start_epoch=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started" +%s 2>/dev/null || echo 0)
2728
+ if [[ "$start_epoch" -gt 0 ]]; then
2729
+ age=" ($(format_duration $(($(now_epoch) - start_epoch))))"
2730
+ fi
2731
+ fi
2732
+ echo -e " ${CYAN}#${num}${RESET} ${title} ${DIM}${age}${RESET}"
2733
+ done < <(jq -r '.active_jobs[] | " \(.issue)\t\(.title // "—")\t\(.started_at // "—")"' "$STATE_FILE" 2>/dev/null)
2734
+ else
2735
+ echo -e " ${DIM}None${RESET}"
2736
+ fi
2737
+ echo ""
2738
+
2739
+ # Queue
2740
+ local queue_count
2741
+ queue_count=$(jq -r '.queued | length' "$STATE_FILE" 2>/dev/null || echo 0)
2742
+
2743
+ echo -e "${BOLD} Queued (${queue_count})${RESET}"
2744
+ if [[ "$queue_count" -gt 0 ]]; then
2745
+ while read -r num; do
2746
+ echo -e " ${DIM}#${num}${RESET}"
2747
+ done < <(jq -r '.queued[]' "$STATE_FILE" 2>/dev/null)
2748
+ else
2749
+ echo -e " ${DIM}None${RESET}"
2750
+ fi
2751
+ echo ""
2752
+
2753
+ # Recent completed
2754
+ local completed_count
2755
+ completed_count=$(jq -r '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
2756
+
2757
+ echo -e "${BOLD} Recently Completed (${completed_count})${RESET}"
2758
+ if [[ "$completed_count" -gt 0 ]]; then
2759
+ # Show last 10
2760
+ while IFS=$'\t' read -r num result dur; do
2761
+ local icon
2762
+ if [[ "$result" == "success" ]]; then
2763
+ icon="${GREEN}✓${RESET}"
2764
+ else
2765
+ icon="${RED}✗${RESET}"
2766
+ fi
2767
+ echo -e " ${icon} ${CYAN}#${num}${RESET} ${result} ${DIM}(${dur})${RESET}"
2768
+ done < <(jq -r '.completed | reverse | .[:10][] | "\(.issue)\t\(.result)\t\(.duration // "—")"' "$STATE_FILE" 2>/dev/null)
2769
+ else
2770
+ echo -e " ${DIM}None${RESET}"
2771
+ fi
2772
+ echo ""
2773
+ }
2774
+
2775
+ # ─── daemon init ─────────────────────────────────────────────────────────────
2776
+
2777
+ daemon_init() {
2778
+ local config_dir=".claude"
2779
+ local config_file="${config_dir}/daemon-config.json"
2780
+
2781
+ if [[ -f "$config_file" ]]; then
2782
+ warn "Config file already exists: $config_file"
2783
+ info "Delete it first if you want to regenerate"
2784
+ return 0
2785
+ fi
2786
+
2787
+ mkdir -p "$config_dir"
2788
+
2789
+ cat > "$config_file" << 'CONFIGEOF'
2790
+ {
2791
+ "watch_label": "ready-to-build",
2792
+ "poll_interval": 60,
2793
+ "max_parallel": 2,
2794
+ "pipeline_template": "autonomous",
2795
+ "skip_gates": true,
2796
+ "model": "opus",
2797
+ "base_branch": "main",
2798
+ "on_success": {
2799
+ "remove_label": "ready-to-build",
2800
+ "add_label": "pipeline/complete",
2801
+ "close_issue": false
2802
+ },
2803
+ "on_failure": {
2804
+ "add_label": "pipeline/failed",
2805
+ "comment_log_lines": 50
2806
+ },
2807
+ "notifications": {
2808
+ "slack_webhook": null
2809
+ },
2810
+ "health": {
2811
+ "stale_timeout_s": 1800
2812
+ },
2813
+ "priority_labels": "urgent,p0,high,p1,normal,p2,low,p3",
2814
+ "alerts": {
2815
+ "degradation_window": 5,
2816
+ "cfr_threshold": 30,
2817
+ "success_threshold": 50
2818
+ },
2819
+ "patrol": {
2820
+ "interval": 3600,
2821
+ "max_issues": 5,
2822
+ "label": "auto-patrol"
2823
+ },
2824
+ "auto_template": false,
2825
+ "template_map": {
2826
+ "hotfix|incident": "hotfix",
2827
+ "security": "enterprise"
2828
+ },
2829
+ "max_retries": 2,
2830
+ "retry_escalation": true,
2831
+ "self_optimize": false,
2832
+ "optimize_interval": 10,
2833
+ "priority_lane": false,
2834
+ "priority_lane_labels": "hotfix,incident,p0,urgent",
2835
+ "priority_lane_max": 1,
2836
+ "watch_mode": "repo",
2837
+ "org": null,
2838
+ "repo_filter": null,
2839
+ "auto_scale": false,
2840
+ "auto_scale_interval": 5,
2841
+ "max_workers": 8,
2842
+ "min_workers": 1,
2843
+ "worker_mem_gb": 4,
2844
+ "estimated_cost_per_job_usd": 5.0
2845
+ }
2846
+ CONFIGEOF
2847
+
2848
+ success "Generated config: ${config_file}"
2849
+ echo ""
2850
+ echo -e "${DIM}Edit this file to customize the daemon behavior, then run:${RESET}"
2851
+ echo -e " ${CYAN}shipwright daemon start${RESET}"
2852
+ }
2853
+
2854
+ # ─── daemon logs ─────────────────────────────────────────────────────────────
2855
+
2856
+ daemon_logs() {
2857
+ if [[ ! -f "$LOG_FILE" ]]; then
2858
+ warn "No log file found at $LOG_FILE"
2859
+ info "Start the daemon first with ${CYAN}shipwright daemon start${RESET}"
2860
+ return 0
2861
+ fi
2862
+
2863
+ if [[ "$FOLLOW" == "true" ]]; then
2864
+ info "Following daemon log (Ctrl-C to stop)..."
2865
+ echo ""
2866
+ tail -f "$LOG_FILE"
2867
+ else
2868
+ tail -100 "$LOG_FILE"
2869
+ fi
2870
+ }
2871
+
2872
+ # ─── Metrics Dashboard ─────────────────────────────────────────────────────
2873
+
2874
+ daemon_metrics() {
2875
+ local period_days=7
2876
+ local json_output=false
2877
+
2878
+ # Parse metrics flags
2879
+ while [[ $# -gt 0 ]]; do
2880
+ case "$1" in
2881
+ --period) period_days="${2:-7}"; shift 2 ;;
2882
+ --json) json_output=true; shift ;;
2883
+ *) shift ;;
2884
+ esac
2885
+ done
2886
+
2887
+ if [[ ! -f "$EVENTS_FILE" ]]; then
2888
+ error "No events file found at $EVENTS_FILE"
2889
+ info "Events are generated when running ${CYAN}shipwright pipeline${RESET} or ${CYAN}shipwright daemon${RESET}"
2890
+ exit 1
2891
+ fi
2892
+
2893
+ if ! command -v jq &>/dev/null; then
2894
+ error "jq is required for metrics. Install: brew install jq"
2895
+ exit 1
2896
+ fi
2897
+
2898
+ # Calculate cutoff timestamp
2899
+ local cutoff_epoch
2900
+ cutoff_epoch=$(( $(now_epoch) - (period_days * 86400) ))
2901
+ local cutoff_iso
2902
+ cutoff_iso=$(epoch_to_iso "$cutoff_epoch")
2903
+
2904
+ # Filter events within period (prefer ts_epoch when available)
2905
+ local period_events
2906
+ period_events=$(jq -c "select(.ts_epoch >= $cutoff_epoch // .ts >= \"$cutoff_iso\")" "$EVENTS_FILE" 2>/dev/null)
2907
+
2908
+ if [[ -z "$period_events" ]]; then
2909
+ warn "No events in the last ${period_days} day(s)"
2910
+ return 0
2911
+ fi
2912
+
2913
+ # ── DORA: Deployment Frequency ──
2914
+ local total_completed successes failures
2915
+ total_completed=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed")] | length')
2916
+ successes=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success")] | length')
2917
+ failures=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "failure")] | length')
2918
+
2919
+ local deploy_freq=""
2920
+ if [[ "$period_days" -gt 0 ]]; then
2921
+ deploy_freq=$(echo "$successes $period_days" | awk '{printf "%.1f", $1 / ($2 / 7)}')
2922
+ fi
2923
+
2924
+ # ── DORA: Cycle Time (median pipeline duration for successes) ──
2925
+ local cycle_time_median cycle_time_p95
2926
+ cycle_time_median=$(echo "$period_events" | \
2927
+ jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success") | .duration_s] | sort | if length > 0 then .[length/2 | floor] else 0 end')
2928
+ cycle_time_p95=$(echo "$period_events" | \
2929
+ jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success") | .duration_s] | sort | if length > 0 then .[length * 95 / 100 | floor] else 0 end')
2930
+
2931
+ # ── DORA: Change Failure Rate ──
2932
+ local cfr="0"
2933
+ if [[ "$total_completed" -gt 0 ]]; then
2934
+ cfr=$(echo "$failures $total_completed" | awk '{printf "%.1f", ($1 / $2) * 100}')
2935
+ fi
2936
+
2937
+ # ── DORA: MTTR (average time between failure and next success) ──
2938
+ local mttr="0"
2939
+ # Real MTTR: time gap between each failure event and the next success event
2940
+ mttr=$(echo "$period_events" | \
2941
+ jq -s '
2942
+ [.[] | select(.type == "pipeline.completed")] | sort_by(.ts_epoch // 0) |
2943
+ [range(length) as $i |
2944
+ if .[$i].result == "failure" then
2945
+ [.[$i+1:][] | select(.result == "success")][0] as $next |
2946
+ if $next and $next.ts_epoch and .[$i].ts_epoch then
2947
+ ($next.ts_epoch - .[$i].ts_epoch)
2948
+ else null end
2949
+ else null end
2950
+ ] | map(select(. != null)) |
2951
+ if length > 0 then (add / length | floor) else 0 end
2952
+ ')
2953
+
2954
+ # ── DX: Compound quality first-pass rate ──
2955
+ local compound_events first_pass_total first_pass_success
2956
+ first_pass_total=$(echo "$period_events" | \
2957
+ jq -s '[.[] | select(.type == "compound.cycle" and .cycle == 1)] | length')
2958
+ first_pass_success=$(echo "$period_events" | \
2959
+ jq -s '[.[] | select(.type == "compound.cycle" and .cycle == 1 and .passed == "true")] | length')
2960
+ local first_pass_pct="0"
2961
+ [[ "$first_pass_total" -gt 0 ]] && first_pass_pct=$(echo "$first_pass_success $first_pass_total" | awk '{printf "%.0f", ($1/$2)*100}')
2962
+
2963
+ local avg_cycles
2964
+ avg_cycles=$(echo "$period_events" | \
2965
+ jq -s '[.[] | select(.type == "compound.cycle")] | if length > 0 then (group_by(.issue) | map(max_by(.cycle) | .cycle) | add / length) else 0 end | . * 10 | floor / 10')
2966
+
2967
+ # ── Throughput ──
2968
+ local issues_processed prs_created
2969
+ issues_processed=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.started") | .issue] | unique | length')
2970
+ prs_created=$successes
2971
+
2972
+ # ── Stage Timings ──
2973
+ local avg_stage_timings
2974
+ avg_stage_timings=$(echo "$period_events" | \
2975
+ jq -s '[.[] | select(.type == "stage.completed")] | group_by(.stage) | map({stage: .[0].stage, avg: ([.[].duration_s] | add / length | floor)}) | sort_by(.avg) | reverse')
2976
+
2977
+ # ── Autonomy ──
2978
+ local daemon_spawns daemon_reaps daemon_success
2979
+ daemon_spawns=$(echo "$period_events" | jq -s '[.[] | select(.type == "daemon.spawn")] | length')
2980
+ daemon_reaps=$(echo "$period_events" | jq -s '[.[] | select(.type == "daemon.reap")] | length')
2981
+ daemon_success=$(echo "$period_events" | jq -s '[.[] | select(.type == "daemon.reap" and .result == "success")] | length')
2982
+ local autonomy_pct="0"
2983
+ [[ "$daemon_reaps" -gt 0 ]] && autonomy_pct=$(echo "$daemon_success $daemon_reaps" | awk '{printf "%.1f", ($1/$2)*100}')
2984
+
2985
+ # ── Patrol ──
2986
+ local patrol_runs patrol_findings patrol_issues_created patrol_auto_resolved
2987
+ patrol_runs=$(echo "$period_events" | jq -s '[.[] | select(.type == "patrol.completed")] | length')
2988
+ patrol_findings=$(echo "$period_events" | jq -s '[.[] | select(.type == "patrol.finding")] | length')
2989
+ patrol_issues_created=$(echo "$period_events" | jq -s '[.[] | select(.type == "patrol.issue_created")] | length')
2990
+ # Auto-resolved: patrol issues that were later fixed by a pipeline
2991
+ patrol_auto_resolved=$(echo "$period_events" | jq -s '
2992
+ [.[] | select(.type == "patrol.issue_created") | .issue // empty] as $patrol_issues |
2993
+ [.[] | select(.type == "daemon.reap" and .result == "success") | .issue // empty] as $completed |
2994
+ [$patrol_issues[] | select(. as $p | $completed | any(. == $p))] | length
2995
+ ' 2>/dev/null || echo "0")
2996
+
2997
+ # ── DORA Scoring ──
2998
+ dora_grade() {
2999
+ local metric="$1" value="$2"
3000
+ case "$metric" in
3001
+ deploy_freq)
3002
+ if awk "BEGIN{exit !($value >= 7)}" 2>/dev/null; then echo "Elite"; return; fi
3003
+ if awk "BEGIN{exit !($value >= 1)}" 2>/dev/null; then echo "High"; return; fi
3004
+ if awk "BEGIN{exit !($value >= 0.25)}" 2>/dev/null; then echo "Medium"; return; fi
3005
+ echo "Low" ;;
3006
+ cycle_time)
3007
+ [[ "$value" -lt 3600 ]] && echo "Elite" && return
3008
+ [[ "$value" -lt 86400 ]] && echo "High" && return
3009
+ [[ "$value" -lt 604800 ]] && echo "Medium" && return
3010
+ echo "Low" ;;
3011
+ cfr)
3012
+ if awk "BEGIN{exit !($value < 5)}" 2>/dev/null; then echo "Elite"; return; fi
3013
+ if awk "BEGIN{exit !($value < 10)}" 2>/dev/null; then echo "High"; return; fi
3014
+ if awk "BEGIN{exit !($value < 15)}" 2>/dev/null; then echo "Medium"; return; fi
3015
+ echo "Low" ;;
3016
+ mttr)
3017
+ [[ "$value" -lt 3600 ]] && echo "Elite" && return
3018
+ [[ "$value" -lt 86400 ]] && echo "High" && return
3019
+ echo "Medium" ;;
3020
+ esac
3021
+ }
3022
+
3023
+ local df_grade ct_grade cfr_grade mttr_grade
3024
+ df_grade=$(dora_grade deploy_freq "${deploy_freq:-0}")
3025
+ ct_grade=$(dora_grade cycle_time "${cycle_time_median:-0}")
3026
+ cfr_grade=$(dora_grade cfr "${cfr:-0}")
3027
+ mttr_grade=$(dora_grade mttr "${mttr:-0}")
3028
+
3029
+ grade_icon() {
3030
+ case "$1" in
3031
+ Elite) echo "${GREEN}★${RESET}" ;;
3032
+ High) echo "${CYAN}●${RESET}" ;;
3033
+ Medium) echo "${YELLOW}◐${RESET}" ;;
3034
+ Low) echo "${RED}○${RESET}" ;;
3035
+ esac
3036
+ }
3037
+
3038
+ # ── JSON Output ──
3039
+ if [[ "$json_output" == "true" ]]; then
3040
+ jq -n \
3041
+ --arg period "${period_days}d" \
3042
+ --argjson deploy_freq "${deploy_freq:-0}" \
3043
+ --argjson cycle_time_median "${cycle_time_median:-0}" \
3044
+ --argjson cycle_time_p95 "${cycle_time_p95:-0}" \
3045
+ --arg cfr "$cfr" \
3046
+ --argjson mttr "${mttr:-0}" \
3047
+ --arg df_grade "$df_grade" \
3048
+ --arg ct_grade "$ct_grade" \
3049
+ --arg cfr_grade "$cfr_grade" \
3050
+ --arg mttr_grade "$mttr_grade" \
3051
+ --argjson total_completed "$total_completed" \
3052
+ --argjson successes "$successes" \
3053
+ --argjson failures "$failures" \
3054
+ --arg first_pass_pct "$first_pass_pct" \
3055
+ --arg avg_cycles "${avg_cycles:-0}" \
3056
+ --argjson issues_processed "$issues_processed" \
3057
+ --argjson daemon_spawns "$daemon_spawns" \
3058
+ --arg autonomy_pct "$autonomy_pct" \
3059
+ --argjson patrol_runs "$patrol_runs" \
3060
+ --argjson patrol_findings "$patrol_findings" \
3061
+ --argjson patrol_issues_created "$patrol_issues_created" \
3062
+ --argjson patrol_auto_resolved "${patrol_auto_resolved:-0}" \
3063
+ '{
3064
+ period: $period,
3065
+ dora: {
3066
+ deploy_frequency: { value: $deploy_freq, unit: "PRs/week", grade: $df_grade },
3067
+ cycle_time: { median_s: $cycle_time_median, p95_s: $cycle_time_p95, grade: $ct_grade },
3068
+ change_failure_rate: { pct: ($cfr | tonumber), grade: $cfr_grade },
3069
+ mttr: { avg_s: $mttr, grade: $mttr_grade }
3070
+ },
3071
+ effectiveness: {
3072
+ first_pass_pct: ($first_pass_pct | tonumber),
3073
+ avg_compound_cycles: ($avg_cycles | tonumber)
3074
+ },
3075
+ throughput: {
3076
+ issues_processed: $issues_processed,
3077
+ pipelines_completed: $total_completed,
3078
+ successes: $successes,
3079
+ failures: $failures
3080
+ },
3081
+ autonomy: {
3082
+ daemon_spawns: $daemon_spawns,
3083
+ autonomy_pct: ($autonomy_pct | tonumber)
3084
+ },
3085
+ patrol: {
3086
+ patrols_run: $patrol_runs,
3087
+ findings: $patrol_findings,
3088
+ issues_created: $patrol_issues_created,
3089
+ auto_resolved: $patrol_auto_resolved
3090
+ }
3091
+ }'
3092
+ return 0
3093
+ fi
3094
+
3095
+ # ── Dashboard Output ──
3096
+ echo ""
3097
+ echo -e "${PURPLE}${BOLD}━━━ Autonomous Team Metrics ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"
3098
+ echo -e " Period: last ${period_days} day(s) ${DIM}$(now_iso)${RESET}"
3099
+ echo ""
3100
+
3101
+ echo -e "${BOLD} DORA FOUR KEYS${RESET}"
3102
+ echo -e " Deploy Frequency ${deploy_freq:-0} PRs/week $(grade_icon "$df_grade") $df_grade"
3103
+ echo -e " Cycle Time (median) $(format_duration "${cycle_time_median:-0}") $(grade_icon "$ct_grade") $ct_grade"
3104
+ echo -e " Change Failure ${cfr}% (${failures}/${total_completed}) $(grade_icon "$cfr_grade") $cfr_grade"
3105
+ echo -e " MTTR $(format_duration "${mttr:-0}") $(grade_icon "$mttr_grade") $mttr_grade"
3106
+ echo ""
3107
+
3108
+ echo -e "${BOLD} EFFECTIVENESS${RESET}"
3109
+ echo -e " First-pass quality ${first_pass_pct}% (${first_pass_success}/${first_pass_total})"
3110
+ echo -e " Compound cycles avg ${avg_cycles:-0}"
3111
+ echo ""
3112
+
3113
+ echo -e "${BOLD} THROUGHPUT${RESET}"
3114
+ echo -e " Issues processed ${issues_processed}"
3115
+ echo -e " Pipelines completed ${total_completed} (${GREEN}${successes} passed${RESET}, ${RED}${failures} failed${RESET})"
3116
+ echo ""
3117
+
3118
+ # Stage breakdown
3119
+ local stage_count
3120
+ stage_count=$(echo "$avg_stage_timings" | jq 'length' 2>/dev/null || echo 0)
3121
+ if [[ "$stage_count" -gt 0 ]]; then
3122
+ echo -e "${BOLD} STAGE TIMINGS (avg)${RESET}"
3123
+ echo "$avg_stage_timings" | jq -r '.[] | " \(.stage)\t\(.avg)s"' 2>/dev/null | \
3124
+ while IFS=$'\t' read -r stage dur; do
3125
+ printf " %-20s %s\n" "$stage" "$(format_duration "${dur%s}")"
3126
+ done
3127
+ echo ""
3128
+ fi
3129
+
3130
+ echo -e "${BOLD} AUTONOMY${RESET}"
3131
+ echo -e " Daemon-spawned ${daemon_spawns} pipeline(s)"
3132
+ if [[ "$daemon_reaps" -gt 0 ]]; then
3133
+ echo -e " Success rate ${autonomy_pct}% (${daemon_success}/${daemon_reaps})"
3134
+ fi
3135
+ echo ""
3136
+
3137
+ echo -e "${BOLD} PATROL${RESET}"
3138
+ echo -e " Patrols run ${patrol_runs}"
3139
+ echo -e " Findings ${patrol_findings}"
3140
+ echo -e " Issues created ${patrol_issues_created}"
3141
+ echo -e " Auto-resolved ${patrol_auto_resolved:-0}"
3142
+ echo ""
3143
+
3144
+ echo -e "${PURPLE}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"
3145
+ echo ""
3146
+ }
3147
+
3148
+ # ─── Command Router ─────────────────────────────────────────────────────────
3149
+
3150
+ setup_dirs
3151
+
3152
+ case "$SUBCOMMAND" in
3153
+ start)
3154
+ daemon_start
3155
+ ;;
3156
+ stop)
3157
+ daemon_stop
3158
+ ;;
3159
+ status)
3160
+ daemon_status
3161
+ ;;
3162
+ init)
3163
+ daemon_init
3164
+ ;;
3165
+ logs)
3166
+ daemon_logs
3167
+ ;;
3168
+ metrics)
3169
+ daemon_metrics "$@"
3170
+ ;;
3171
+ triage)
3172
+ daemon_triage_show "$@"
3173
+ ;;
3174
+ patrol)
3175
+ daemon_patrol "$@"
3176
+ ;;
3177
+ test)
3178
+ exec "$SCRIPT_DIR/cct-daemon-test.sh" "$@"
3179
+ ;;
3180
+ help|--help|-h)
3181
+ show_help
3182
+ ;;
3183
+ *)
3184
+ error "Unknown command: ${SUBCOMMAND}"
3185
+ echo ""
3186
+ show_help
3187
+ exit 1
3188
+ ;;
3189
+ esac