shipwright-cli 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +926 -0
- package/claude-code/CLAUDE.md.shipwright +125 -0
- package/claude-code/hooks/notify-idle.sh +35 -0
- package/claude-code/hooks/pre-compact-save.sh +57 -0
- package/claude-code/hooks/task-completed.sh +170 -0
- package/claude-code/hooks/teammate-idle.sh +68 -0
- package/claude-code/settings.json.template +184 -0
- package/completions/_shipwright +140 -0
- package/completions/shipwright.bash +89 -0
- package/completions/shipwright.fish +107 -0
- package/docs/KNOWN-ISSUES.md +199 -0
- package/docs/TIPS.md +331 -0
- package/docs/definition-of-done.example.md +16 -0
- package/docs/patterns/README.md +139 -0
- package/docs/patterns/audit-loop.md +149 -0
- package/docs/patterns/bug-hunt.md +183 -0
- package/docs/patterns/feature-implementation.md +159 -0
- package/docs/patterns/refactoring.md +183 -0
- package/docs/patterns/research-exploration.md +144 -0
- package/docs/patterns/test-generation.md +173 -0
- package/package.json +49 -0
- package/scripts/adapters/docker-deploy.sh +50 -0
- package/scripts/adapters/fly-deploy.sh +41 -0
- package/scripts/adapters/iterm2-adapter.sh +122 -0
- package/scripts/adapters/railway-deploy.sh +34 -0
- package/scripts/adapters/tmux-adapter.sh +87 -0
- package/scripts/adapters/vercel-deploy.sh +35 -0
- package/scripts/adapters/wezterm-adapter.sh +103 -0
- package/scripts/cct +242 -0
- package/scripts/cct-cleanup.sh +172 -0
- package/scripts/cct-cost.sh +590 -0
- package/scripts/cct-daemon.sh +3189 -0
- package/scripts/cct-doctor.sh +328 -0
- package/scripts/cct-fix.sh +478 -0
- package/scripts/cct-fleet.sh +904 -0
- package/scripts/cct-init.sh +282 -0
- package/scripts/cct-logs.sh +273 -0
- package/scripts/cct-loop.sh +1332 -0
- package/scripts/cct-memory.sh +1148 -0
- package/scripts/cct-pipeline.sh +3844 -0
- package/scripts/cct-prep.sh +1352 -0
- package/scripts/cct-ps.sh +168 -0
- package/scripts/cct-reaper.sh +390 -0
- package/scripts/cct-session.sh +284 -0
- package/scripts/cct-status.sh +169 -0
- package/scripts/cct-templates.sh +242 -0
- package/scripts/cct-upgrade.sh +422 -0
- package/scripts/cct-worktree.sh +405 -0
- package/scripts/postinstall.mjs +96 -0
- package/templates/pipelines/autonomous.json +71 -0
- package/templates/pipelines/cost-aware.json +95 -0
- package/templates/pipelines/deployed.json +79 -0
- package/templates/pipelines/enterprise.json +114 -0
- package/templates/pipelines/fast.json +63 -0
- package/templates/pipelines/full.json +104 -0
- package/templates/pipelines/hotfix.json +63 -0
- package/templates/pipelines/standard.json +91 -0
- package/tmux/claude-teams-overlay.conf +109 -0
- package/tmux/templates/architecture.json +19 -0
- package/tmux/templates/bug-fix.json +24 -0
- package/tmux/templates/code-review.json +24 -0
- package/tmux/templates/devops.json +19 -0
- package/tmux/templates/documentation.json +19 -0
- package/tmux/templates/exploration.json +19 -0
- package/tmux/templates/feature-dev.json +24 -0
- package/tmux/templates/full-stack.json +24 -0
- package/tmux/templates/migration.json +24 -0
- package/tmux/templates/refactor.json +19 -0
- package/tmux/templates/security-audit.json +24 -0
- package/tmux/templates/testing.json +24 -0
- package/tmux/tmux.conf +167 -0
|
@@ -0,0 +1,3189 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
3
|
+
# ║ shipwright daemon — Autonomous GitHub Issue Watcher ║
|
|
4
|
+
# ║ Polls for labeled issues · Spawns pipelines · Manages worktrees ║
|
|
5
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
6
|
+
set -euo pipefail
|
|
7
|
+
|
|
8
|
+
VERSION="1.7.0"
|
|
9
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
10
|
+
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
11
|
+
|
|
12
|
+
# ─── Colors (matches Seth's tmux theme) ─────────────────────────────────────
|
|
13
|
+
CYAN='\033[38;2;0;212;255m' # #00d4ff — primary accent
|
|
14
|
+
PURPLE='\033[38;2;124;58;237m' # #7c3aed — secondary
|
|
15
|
+
BLUE='\033[38;2;0;102;255m' # #0066ff — tertiary
|
|
16
|
+
GREEN='\033[38;2;74;222;128m' # success
|
|
17
|
+
YELLOW='\033[38;2;250;204;21m' # warning
|
|
18
|
+
RED='\033[38;2;248;113;113m' # error
|
|
19
|
+
DIM='\033[2m'
|
|
20
|
+
BOLD='\033[1m'
|
|
21
|
+
RESET='\033[0m'
|
|
22
|
+
|
|
23
|
+
# ─── Output Helpers ─────────────────────────────────────────────────────────
|
|
24
|
+
info() { echo -e "${CYAN}${BOLD}▸${RESET} $*"; }
|
|
25
|
+
success() { echo -e "${GREEN}${BOLD}✓${RESET} $*"; }
|
|
26
|
+
warn() { echo -e "${YELLOW}${BOLD}⚠${RESET} $*"; }
|
|
27
|
+
error() { echo -e "${RED}${BOLD}✗${RESET} $*" >&2; }
|
|
28
|
+
|
|
29
|
+
now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
|
|
30
|
+
now_epoch() { date +%s; }
|
|
31
|
+
|
|
32
|
+
epoch_to_iso() {
|
|
33
|
+
local epoch="$1"
|
|
34
|
+
date -u -r "$epoch" +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || \
|
|
35
|
+
date -u -d "@$epoch" +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || \
|
|
36
|
+
python3 -c "import datetime; print(datetime.datetime.utcfromtimestamp($epoch).strftime('%Y-%m-%dT%H:%M:%SZ'))" 2>/dev/null || \
|
|
37
|
+
echo "1970-01-01T00:00:00Z"
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
format_duration() {
|
|
41
|
+
local secs="$1"
|
|
42
|
+
if [[ "$secs" -ge 3600 ]]; then
|
|
43
|
+
printf "%dh %dm %ds" $((secs/3600)) $((secs%3600/60)) $((secs%60))
|
|
44
|
+
elif [[ "$secs" -ge 60 ]]; then
|
|
45
|
+
printf "%dm %ds" $((secs/60)) $((secs%60))
|
|
46
|
+
else
|
|
47
|
+
printf "%ds" "$secs"
|
|
48
|
+
fi
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# ─── Structured Event Log ──────────────────────────────────────────────────
|
|
52
|
+
EVENTS_FILE="${HOME}/.claude-teams/events.jsonl"
|
|
53
|
+
|
|
54
|
+
emit_event() {
|
|
55
|
+
local event_type="$1"
|
|
56
|
+
shift
|
|
57
|
+
local json_fields=""
|
|
58
|
+
for kv in "$@"; do
|
|
59
|
+
local key="${kv%%=*}"
|
|
60
|
+
local val="${kv#*=}"
|
|
61
|
+
if [[ "$val" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then
|
|
62
|
+
json_fields="${json_fields},\"${key}\":${val}"
|
|
63
|
+
else
|
|
64
|
+
val="${val//\"/\\\"}"
|
|
65
|
+
json_fields="${json_fields},\"${key}\":\"${val}\""
|
|
66
|
+
fi
|
|
67
|
+
done
|
|
68
|
+
mkdir -p "${HOME}/.claude-teams"
|
|
69
|
+
echo "{\"ts\":\"$(now_iso)\",\"ts_epoch\":$(now_epoch),\"type\":\"${event_type}\"${json_fields}}" >> "$EVENTS_FILE"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# ─── GitHub API Retry with Backoff ────────────────────────────────────────
|
|
73
|
+
# Retries gh commands up to 3 times with exponential backoff (1s, 3s, 9s).
|
|
74
|
+
# Detects rate-limit (403/429) and transient errors. Returns the gh exit code.
|
|
75
|
+
gh_retry() {
|
|
76
|
+
local max_retries=3
|
|
77
|
+
local backoff=1
|
|
78
|
+
local attempt=0
|
|
79
|
+
local exit_code=0
|
|
80
|
+
|
|
81
|
+
while [[ $attempt -lt $max_retries ]]; do
|
|
82
|
+
attempt=$((attempt + 1))
|
|
83
|
+
# Run the gh command; capture exit code
|
|
84
|
+
if output=$("$@" 2>&1); then
|
|
85
|
+
echo "$output"
|
|
86
|
+
return 0
|
|
87
|
+
fi
|
|
88
|
+
exit_code=$?
|
|
89
|
+
|
|
90
|
+
# Check for rate-limit or server error indicators
|
|
91
|
+
if echo "$output" | grep -qiE "rate limit|403|429|502|503"; then
|
|
92
|
+
daemon_log WARN "gh_retry: rate limit / server error on attempt ${attempt}/${max_retries} — backoff ${backoff}s"
|
|
93
|
+
else
|
|
94
|
+
daemon_log WARN "gh_retry: transient error on attempt ${attempt}/${max_retries} (exit ${exit_code}) — backoff ${backoff}s"
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
if [[ $attempt -lt $max_retries ]]; then
|
|
98
|
+
sleep "$backoff"
|
|
99
|
+
backoff=$((backoff * 3))
|
|
100
|
+
fi
|
|
101
|
+
done
|
|
102
|
+
|
|
103
|
+
# Return last output and exit code after exhausting retries
|
|
104
|
+
echo "$output"
|
|
105
|
+
return "$exit_code"
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# ─── Defaults ───────────────────────────────────────────────────────────────
|
|
109
|
+
DAEMON_DIR="$HOME/.claude-teams"
|
|
110
|
+
PID_FILE="$DAEMON_DIR/daemon.pid"
|
|
111
|
+
SHUTDOWN_FLAG="$DAEMON_DIR/daemon.shutdown"
|
|
112
|
+
STATE_FILE=""
|
|
113
|
+
LOG_FILE=""
|
|
114
|
+
LOG_DIR=""
|
|
115
|
+
WORKTREE_DIR=""
|
|
116
|
+
|
|
117
|
+
# Config defaults (overridden by daemon-config.json)
|
|
118
|
+
WATCH_LABEL="ready-to-build"
|
|
119
|
+
POLL_INTERVAL=60
|
|
120
|
+
MAX_PARALLEL=2
|
|
121
|
+
PIPELINE_TEMPLATE="autonomous"
|
|
122
|
+
SKIP_GATES=true
|
|
123
|
+
MODEL="opus"
|
|
124
|
+
BASE_BRANCH="main"
|
|
125
|
+
ON_SUCCESS_REMOVE_LABEL="ready-to-build"
|
|
126
|
+
ON_SUCCESS_ADD_LABEL="pipeline/complete"
|
|
127
|
+
ON_SUCCESS_CLOSE_ISSUE=false
|
|
128
|
+
ON_FAILURE_ADD_LABEL="pipeline/failed"
|
|
129
|
+
ON_FAILURE_LOG_LINES=50
|
|
130
|
+
SLACK_WEBHOOK=""
|
|
131
|
+
|
|
132
|
+
# Priority lane defaults
|
|
133
|
+
PRIORITY_LANE=false
|
|
134
|
+
PRIORITY_LANE_LABELS="hotfix,incident,p0,urgent"
|
|
135
|
+
PRIORITY_LANE_MAX=1
|
|
136
|
+
|
|
137
|
+
# Org-wide daemon defaults
|
|
138
|
+
WATCH_MODE="repo"
|
|
139
|
+
ORG=""
|
|
140
|
+
REPO_FILTER=""
|
|
141
|
+
|
|
142
|
+
# Auto-scaling defaults
|
|
143
|
+
AUTO_SCALE=false
|
|
144
|
+
AUTO_SCALE_INTERVAL=5
|
|
145
|
+
MAX_WORKERS=8
|
|
146
|
+
MIN_WORKERS=1
|
|
147
|
+
WORKER_MEM_GB=4
|
|
148
|
+
EST_COST_PER_JOB=5.0
|
|
149
|
+
FLEET_MAX_PARALLEL=""
|
|
150
|
+
|
|
151
|
+
# Patrol defaults (overridden by daemon-config.json or env)
|
|
152
|
+
PATROL_INTERVAL="${PATROL_INTERVAL:-3600}"
|
|
153
|
+
PATROL_MAX_ISSUES="${PATROL_MAX_ISSUES:-5}"
|
|
154
|
+
PATROL_LABEL="${PATROL_LABEL:-auto-patrol}"
|
|
155
|
+
PATROL_DRY_RUN=false
|
|
156
|
+
LAST_PATROL_EPOCH=0
|
|
157
|
+
|
|
158
|
+
# Runtime
|
|
159
|
+
NO_GITHUB=false
|
|
160
|
+
CONFIG_PATH=""
|
|
161
|
+
DETACH=false
|
|
162
|
+
FOLLOW=false
|
|
163
|
+
BACKOFF_SECS=0
|
|
164
|
+
|
|
165
|
+
# ─── CLI Argument Parsing ──────────────────────────────────────────────────
|
|
166
|
+
|
|
167
|
+
SUBCOMMAND="${1:-help}"
|
|
168
|
+
shift 2>/dev/null || true
|
|
169
|
+
|
|
170
|
+
while [[ $# -gt 0 ]]; do
|
|
171
|
+
case "$1" in
|
|
172
|
+
--config)
|
|
173
|
+
CONFIG_PATH="${2:-}"
|
|
174
|
+
shift 2
|
|
175
|
+
;;
|
|
176
|
+
--config=*)
|
|
177
|
+
CONFIG_PATH="${1#--config=}"
|
|
178
|
+
shift
|
|
179
|
+
;;
|
|
180
|
+
--detach|-d)
|
|
181
|
+
DETACH=true
|
|
182
|
+
shift
|
|
183
|
+
;;
|
|
184
|
+
--follow|-f)
|
|
185
|
+
FOLLOW=true
|
|
186
|
+
shift
|
|
187
|
+
;;
|
|
188
|
+
--no-github)
|
|
189
|
+
NO_GITHUB=true
|
|
190
|
+
shift
|
|
191
|
+
;;
|
|
192
|
+
--help|-h)
|
|
193
|
+
SUBCOMMAND="help"
|
|
194
|
+
shift
|
|
195
|
+
;;
|
|
196
|
+
*)
|
|
197
|
+
# Pass unrecognized flags to subcommands (e.g. metrics --period 7)
|
|
198
|
+
break
|
|
199
|
+
;;
|
|
200
|
+
esac
|
|
201
|
+
done
|
|
202
|
+
|
|
203
|
+
# Remaining args available as "$@" for subcommands
|
|
204
|
+
|
|
205
|
+
# ─── Help ───────────────────────────────────────────────────────────────────
|
|
206
|
+
|
|
207
|
+
show_help() {
|
|
208
|
+
echo -e "${CYAN}${BOLD}shipwright daemon${RESET} ${DIM}v${VERSION}${RESET} — Autonomous GitHub Issue Watcher"
|
|
209
|
+
echo ""
|
|
210
|
+
echo -e "${BOLD}USAGE${RESET}"
|
|
211
|
+
echo -e " ${CYAN}shipwright daemon${RESET} <command> [options]"
|
|
212
|
+
echo ""
|
|
213
|
+
echo -e "${BOLD}COMMANDS${RESET}"
|
|
214
|
+
echo -e " ${CYAN}start${RESET} [--config path] [--detach] Start the issue watcher"
|
|
215
|
+
echo -e " ${CYAN}stop${RESET} Graceful shutdown via PID file"
|
|
216
|
+
echo -e " ${CYAN}status${RESET} Show active pipelines and queue"
|
|
217
|
+
echo -e " ${CYAN}init${RESET} Generate default daemon-config.json"
|
|
218
|
+
echo -e " ${CYAN}logs${RESET} [--follow] Tail daemon activity log"
|
|
219
|
+
echo -e " ${CYAN}metrics${RESET} [--period N] [--json] DORA/DX metrics dashboard"
|
|
220
|
+
echo -e " ${CYAN}triage${RESET} Show issue triage scores and priority"
|
|
221
|
+
echo -e " ${CYAN}patrol${RESET} [--once] [--dry-run] Run proactive codebase patrol"
|
|
222
|
+
echo ""
|
|
223
|
+
echo -e "${BOLD}OPTIONS${RESET}"
|
|
224
|
+
echo -e " ${CYAN}--config${RESET} <path> Path to daemon-config.json ${DIM}(default: .claude/daemon-config.json)${RESET}"
|
|
225
|
+
echo -e " ${CYAN}--detach${RESET}, ${CYAN}-d${RESET} Run in a detached tmux session"
|
|
226
|
+
echo -e " ${CYAN}--follow${RESET}, ${CYAN}-f${RESET} Follow log output (with ${CYAN}logs${RESET} command)"
|
|
227
|
+
echo -e " ${CYAN}--no-github${RESET} Disable GitHub API calls (dry-run mode)"
|
|
228
|
+
echo ""
|
|
229
|
+
echo -e "${BOLD}EXAMPLES${RESET}"
|
|
230
|
+
echo -e " ${DIM}shipwright daemon init${RESET} # Generate config file"
|
|
231
|
+
echo -e " ${DIM}shipwright daemon start${RESET} # Start watching in foreground"
|
|
232
|
+
echo -e " ${DIM}shipwright daemon start --detach${RESET} # Start in background tmux session"
|
|
233
|
+
echo -e " ${DIM}shipwright daemon start --config my-config.json${RESET} # Custom config"
|
|
234
|
+
echo -e " ${DIM}shipwright daemon status${RESET} # Show active jobs and queue"
|
|
235
|
+
echo -e " ${DIM}shipwright daemon stop${RESET} # Graceful shutdown"
|
|
236
|
+
echo -e " ${DIM}shipwright daemon logs --follow${RESET} # Tail the daemon log"
|
|
237
|
+
echo -e " ${DIM}shipwright daemon metrics${RESET} # DORA + DX metrics (last 7 days)"
|
|
238
|
+
echo -e " ${DIM}shipwright daemon metrics --period 30${RESET} # Last 30 days"
|
|
239
|
+
echo -e " ${DIM}shipwright daemon metrics --json${RESET} # JSON output for dashboards"
|
|
240
|
+
echo -e " ${DIM}shipwright daemon triage${RESET} # Show issue triage scores"
|
|
241
|
+
echo -e " ${DIM}shipwright daemon patrol${RESET} # Run proactive codebase patrol"
|
|
242
|
+
echo -e " ${DIM}shipwright daemon patrol --dry-run${RESET} # Show what patrol would find"
|
|
243
|
+
echo -e " ${DIM}shipwright daemon patrol --once${RESET} # Run patrol once and exit"
|
|
244
|
+
echo ""
|
|
245
|
+
echo -e "${BOLD}CONFIG FILE${RESET} ${DIM}(.claude/daemon-config.json)${RESET}"
|
|
246
|
+
echo -e " ${DIM}watch_label${RESET} GitHub label to watch for ${DIM}(default: ready-to-build)${RESET}"
|
|
247
|
+
echo -e " ${DIM}poll_interval${RESET} Seconds between polls ${DIM}(default: 60)${RESET}"
|
|
248
|
+
echo -e " ${DIM}max_parallel${RESET} Max concurrent pipeline jobs ${DIM}(default: 2)${RESET}"
|
|
249
|
+
echo -e " ${DIM}pipeline_template${RESET} Pipeline template to use ${DIM}(default: autonomous)${RESET}"
|
|
250
|
+
echo -e " ${DIM}base_branch${RESET} Branch to create worktrees from ${DIM}(default: main)${RESET}"
|
|
251
|
+
echo ""
|
|
252
|
+
echo -e " ${BOLD}Priority Lanes${RESET}"
|
|
253
|
+
echo -e " ${DIM}priority_lane${RESET} Enable priority bypass queue ${DIM}(default: false)${RESET}"
|
|
254
|
+
echo -e " ${DIM}priority_lane_labels${RESET} Labels that trigger priority ${DIM}(default: hotfix,incident,p0,urgent)${RESET}"
|
|
255
|
+
echo -e " ${DIM}priority_lane_max${RESET} Max extra slots for priority ${DIM}(default: 1)${RESET}"
|
|
256
|
+
echo ""
|
|
257
|
+
echo -e " ${BOLD}Org-Wide Mode${RESET}"
|
|
258
|
+
echo -e " ${DIM}watch_mode${RESET} \"repo\" or \"org\" ${DIM}(default: repo)${RESET}"
|
|
259
|
+
echo -e " ${DIM}org${RESET} GitHub org name ${DIM}(required for org mode)${RESET}"
|
|
260
|
+
echo -e " ${DIM}repo_filter${RESET} Regex filter for repo names ${DIM}(e.g. \"api-.*|web-.*\")${RESET}"
|
|
261
|
+
echo ""
|
|
262
|
+
echo -e "${BOLD}HOW IT WORKS${RESET}"
|
|
263
|
+
echo -e " 1. Polls GitHub for issues with the ${CYAN}${WATCH_LABEL}${RESET} label"
|
|
264
|
+
echo -e " 2. For each new issue, creates a git worktree and spawns a pipeline"
|
|
265
|
+
echo -e " 3. On success: removes label, adds ${GREEN}pipeline/complete${RESET}, comments on issue"
|
|
266
|
+
echo -e " 4. On failure: adds ${RED}pipeline/failed${RESET}, comments with log tail"
|
|
267
|
+
echo -e " 5. Respects ${CYAN}max_parallel${RESET} limit — excess issues are queued"
|
|
268
|
+
echo -e " 6. Priority lane: ${CYAN}hotfix${RESET}/${CYAN}incident${RESET} issues bypass the queue"
|
|
269
|
+
echo -e " 7. Org mode: watches issues across all repos in a GitHub org"
|
|
270
|
+
echo ""
|
|
271
|
+
echo -e "${DIM}Docs: https://sethdford.github.io/shipwright | GitHub: https://github.com/sethdford/shipwright${RESET}"
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
# ─── Config Loading ─────────────────────────────────────────────────────────
|
|
275
|
+
|
|
276
|
+
load_config() {
|
|
277
|
+
local config_file="${CONFIG_PATH:-.claude/daemon-config.json}"
|
|
278
|
+
|
|
279
|
+
if [[ ! -f "$config_file" ]]; then
|
|
280
|
+
warn "Config not found at $config_file — using defaults"
|
|
281
|
+
warn "Run ${CYAN}shipwright daemon init${RESET} to generate a config file"
|
|
282
|
+
return 0
|
|
283
|
+
fi
|
|
284
|
+
|
|
285
|
+
info "Loading config: ${DIM}${config_file}${RESET}"
|
|
286
|
+
|
|
287
|
+
WATCH_LABEL=$(jq -r '.watch_label // "ready-to-build"' "$config_file")
|
|
288
|
+
POLL_INTERVAL=$(jq -r '.poll_interval // 60' "$config_file")
|
|
289
|
+
MAX_PARALLEL=$(jq -r '.max_parallel // 2' "$config_file")
|
|
290
|
+
PIPELINE_TEMPLATE=$(jq -r '.pipeline_template // "autonomous"' "$config_file")
|
|
291
|
+
SKIP_GATES=$(jq -r '.skip_gates // true' "$config_file")
|
|
292
|
+
MODEL=$(jq -r '.model // "opus"' "$config_file")
|
|
293
|
+
BASE_BRANCH=$(jq -r '.base_branch // "main"' "$config_file")
|
|
294
|
+
|
|
295
|
+
# on_success settings
|
|
296
|
+
ON_SUCCESS_REMOVE_LABEL=$(jq -r '.on_success.remove_label // "ready-to-build"' "$config_file")
|
|
297
|
+
ON_SUCCESS_ADD_LABEL=$(jq -r '.on_success.add_label // "pipeline/complete"' "$config_file")
|
|
298
|
+
ON_SUCCESS_CLOSE_ISSUE=$(jq -r '.on_success.close_issue // false' "$config_file")
|
|
299
|
+
|
|
300
|
+
# on_failure settings
|
|
301
|
+
ON_FAILURE_ADD_LABEL=$(jq -r '.on_failure.add_label // "pipeline/failed"' "$config_file")
|
|
302
|
+
ON_FAILURE_LOG_LINES=$(jq -r '.on_failure.comment_log_lines // 50' "$config_file")
|
|
303
|
+
|
|
304
|
+
# notifications
|
|
305
|
+
SLACK_WEBHOOK=$(jq -r '.notifications.slack_webhook // ""' "$config_file")
|
|
306
|
+
if [[ "$SLACK_WEBHOOK" == "null" ]]; then SLACK_WEBHOOK=""; fi
|
|
307
|
+
|
|
308
|
+
# health monitoring
|
|
309
|
+
HEALTH_STALE_TIMEOUT=$(jq -r '.health.stale_timeout_s // 1800' "$config_file")
|
|
310
|
+
|
|
311
|
+
# priority labels
|
|
312
|
+
PRIORITY_LABELS=$(jq -r '.priority_labels // "urgent,p0,high,p1,normal,p2,low,p3"' "$config_file")
|
|
313
|
+
|
|
314
|
+
# degradation alerting
|
|
315
|
+
DEGRADATION_WINDOW=$(jq -r '.alerts.degradation_window // 5' "$config_file")
|
|
316
|
+
DEGRADATION_CFR_THRESHOLD=$(jq -r '.alerts.cfr_threshold // 30' "$config_file")
|
|
317
|
+
DEGRADATION_SUCCESS_THRESHOLD=$(jq -r '.alerts.success_threshold // 50' "$config_file")
|
|
318
|
+
|
|
319
|
+
# patrol settings
|
|
320
|
+
PATROL_INTERVAL=$(jq -r '.patrol.interval // 3600' "$config_file")
|
|
321
|
+
PATROL_MAX_ISSUES=$(jq -r '.patrol.max_issues // 5' "$config_file")
|
|
322
|
+
PATROL_LABEL=$(jq -r '.patrol.label // "auto-patrol"' "$config_file")
|
|
323
|
+
|
|
324
|
+
# adaptive template selection
|
|
325
|
+
AUTO_TEMPLATE=$(jq -r '.auto_template // false' "$config_file")
|
|
326
|
+
TEMPLATE_MAP=$(jq -r '.template_map // "{}" | @json' "$config_file" 2>/dev/null || echo '"{}"')
|
|
327
|
+
|
|
328
|
+
# auto-retry with escalation
|
|
329
|
+
MAX_RETRIES=$(jq -r '.max_retries // 2' "$config_file")
|
|
330
|
+
RETRY_ESCALATION=$(jq -r '.retry_escalation // true' "$config_file")
|
|
331
|
+
|
|
332
|
+
# self-optimization
|
|
333
|
+
SELF_OPTIMIZE=$(jq -r '.self_optimize // false' "$config_file")
|
|
334
|
+
OPTIMIZE_INTERVAL=$(jq -r '.optimize_interval // 10' "$config_file")
|
|
335
|
+
|
|
336
|
+
# gh_retry: enable retry wrapper on critical GitHub API calls
|
|
337
|
+
GH_RETRY_ENABLED=$(jq -r '.gh_retry // true' "$config_file")
|
|
338
|
+
|
|
339
|
+
# stale state reaper: clean old worktrees, artifacts, state entries
|
|
340
|
+
STALE_REAPER_ENABLED=$(jq -r '.stale_reaper // true' "$config_file")
|
|
341
|
+
STALE_REAPER_INTERVAL=$(jq -r '.stale_reaper_interval // 10' "$config_file")
|
|
342
|
+
STALE_REAPER_AGE_DAYS=$(jq -r '.stale_reaper_age_days // 7' "$config_file")
|
|
343
|
+
|
|
344
|
+
# priority lane settings
|
|
345
|
+
PRIORITY_LANE=$(jq -r '.priority_lane // false' "$config_file")
|
|
346
|
+
PRIORITY_LANE_LABELS=$(jq -r '.priority_lane_labels // "hotfix,incident,p0,urgent"' "$config_file")
|
|
347
|
+
PRIORITY_LANE_MAX=$(jq -r '.priority_lane_max // 1' "$config_file")
|
|
348
|
+
|
|
349
|
+
# org-wide daemon mode
|
|
350
|
+
WATCH_MODE=$(jq -r '.watch_mode // "repo"' "$config_file")
|
|
351
|
+
ORG=$(jq -r '.org // ""' "$config_file")
|
|
352
|
+
if [[ "$ORG" == "null" ]]; then ORG=""; fi
|
|
353
|
+
REPO_FILTER=$(jq -r '.repo_filter // ""' "$config_file")
|
|
354
|
+
if [[ "$REPO_FILTER" == "null" ]]; then REPO_FILTER=""; fi
|
|
355
|
+
|
|
356
|
+
# auto-scaling
|
|
357
|
+
AUTO_SCALE=$(jq -r '.auto_scale // false' "$config_file")
|
|
358
|
+
AUTO_SCALE_INTERVAL=$(jq -r '.auto_scale_interval // 5' "$config_file")
|
|
359
|
+
MAX_WORKERS=$(jq -r '.max_workers // 8' "$config_file")
|
|
360
|
+
MIN_WORKERS=$(jq -r '.min_workers // 1' "$config_file")
|
|
361
|
+
WORKER_MEM_GB=$(jq -r '.worker_mem_gb // 4' "$config_file")
|
|
362
|
+
EST_COST_PER_JOB=$(jq -r '.estimated_cost_per_job_usd // 5.0' "$config_file")
|
|
363
|
+
|
|
364
|
+
success "Config loaded"
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
# ─── Directory Setup ────────────────────────────────────────────────────────
|
|
368
|
+
|
|
369
|
+
setup_dirs() {
|
|
370
|
+
mkdir -p "$DAEMON_DIR"
|
|
371
|
+
|
|
372
|
+
STATE_FILE="$DAEMON_DIR/daemon-state.json"
|
|
373
|
+
LOG_FILE="$DAEMON_DIR/daemon.log"
|
|
374
|
+
LOG_DIR="$DAEMON_DIR/logs"
|
|
375
|
+
WORKTREE_DIR=".worktrees"
|
|
376
|
+
|
|
377
|
+
mkdir -p "$LOG_DIR"
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
# ─── Logging ─────────────────────────────────────────────────────────────────
|
|
381
|
+
|
|
382
|
+
daemon_log() {
|
|
383
|
+
local level="$1"
|
|
384
|
+
shift
|
|
385
|
+
local msg="$*"
|
|
386
|
+
local ts
|
|
387
|
+
ts=$(now_iso)
|
|
388
|
+
echo "[$ts] [$level] $msg" >> "$LOG_FILE"
|
|
389
|
+
|
|
390
|
+
# Also print to stdout
|
|
391
|
+
case "$level" in
|
|
392
|
+
INFO) info "$msg" ;;
|
|
393
|
+
SUCCESS) success "$msg" ;;
|
|
394
|
+
WARN) warn "$msg" ;;
|
|
395
|
+
ERROR) error "$msg" ;;
|
|
396
|
+
esac
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
# ─── Notification Helper ────────────────────────────────────────────────────
|
|
400
|
+
|
|
401
|
+
notify() {
|
|
402
|
+
local title="$1" message="$2" level="${3:-info}"
|
|
403
|
+
local emoji
|
|
404
|
+
case "$level" in
|
|
405
|
+
success) emoji="✅" ;;
|
|
406
|
+
error) emoji="❌" ;;
|
|
407
|
+
warn) emoji="⚠️" ;;
|
|
408
|
+
*) emoji="🔔" ;;
|
|
409
|
+
esac
|
|
410
|
+
|
|
411
|
+
# Slack webhook
|
|
412
|
+
if [[ -n "${SLACK_WEBHOOK:-}" ]]; then
|
|
413
|
+
local payload
|
|
414
|
+
payload=$(jq -n \
|
|
415
|
+
--arg text "${emoji} *${title}*\n${message}" \
|
|
416
|
+
'{text: $text}')
|
|
417
|
+
curl -sf -X POST -H 'Content-Type: application/json' \
|
|
418
|
+
-d "$payload" "$SLACK_WEBHOOK" >/dev/null 2>&1 || true
|
|
419
|
+
fi
|
|
420
|
+
|
|
421
|
+
# Custom webhook (env var SHIPWRIGHT_WEBHOOK_URL, with CCT_WEBHOOK_URL fallback)
|
|
422
|
+
local _webhook_url="${SHIPWRIGHT_WEBHOOK_URL:-${CCT_WEBHOOK_URL:-}}"
|
|
423
|
+
if [[ -n "$_webhook_url" ]]; then
|
|
424
|
+
local payload
|
|
425
|
+
payload=$(jq -n \
|
|
426
|
+
--arg title "$title" --arg message "$message" \
|
|
427
|
+
--arg level "$level" \
|
|
428
|
+
'{title:$title, message:$message, level:$level}')
|
|
429
|
+
curl -sf -X POST -H 'Content-Type: application/json' \
|
|
430
|
+
-d "$payload" "$_webhook_url" >/dev/null 2>&1 || true
|
|
431
|
+
fi
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
# ─── Pre-flight Checks ──────────────────────────────────────────────────────
|
|
435
|
+
|
|
436
|
+
preflight_checks() {
|
|
437
|
+
local errors=0
|
|
438
|
+
|
|
439
|
+
echo -e "${PURPLE}${BOLD}━━━ Pre-flight Checks ━━━${RESET}"
|
|
440
|
+
echo ""
|
|
441
|
+
|
|
442
|
+
# 1. Required tools
|
|
443
|
+
local required_tools=("git" "jq" "gh" "claude")
|
|
444
|
+
local optional_tools=("tmux" "curl")
|
|
445
|
+
|
|
446
|
+
for tool in "${required_tools[@]}"; do
|
|
447
|
+
if command -v "$tool" &>/dev/null; then
|
|
448
|
+
echo -e " ${GREEN}✓${RESET} $tool"
|
|
449
|
+
else
|
|
450
|
+
echo -e " ${RED}✗${RESET} $tool ${RED}(required)${RESET}"
|
|
451
|
+
errors=$((errors + 1))
|
|
452
|
+
fi
|
|
453
|
+
done
|
|
454
|
+
|
|
455
|
+
for tool in "${optional_tools[@]}"; do
|
|
456
|
+
if command -v "$tool" &>/dev/null; then
|
|
457
|
+
echo -e " ${GREEN}✓${RESET} $tool"
|
|
458
|
+
else
|
|
459
|
+
echo -e " ${DIM}○${RESET} $tool ${DIM}(optional — some features disabled)${RESET}"
|
|
460
|
+
fi
|
|
461
|
+
done
|
|
462
|
+
|
|
463
|
+
# 2. Git state
|
|
464
|
+
echo ""
|
|
465
|
+
if git rev-parse --is-inside-work-tree &>/dev/null; then
|
|
466
|
+
echo -e " ${GREEN}✓${RESET} Inside git repo"
|
|
467
|
+
else
|
|
468
|
+
echo -e " ${RED}✗${RESET} Not inside a git repository"
|
|
469
|
+
errors=$((errors + 1))
|
|
470
|
+
fi
|
|
471
|
+
|
|
472
|
+
# Check base branch exists
|
|
473
|
+
if git rev-parse --verify "$BASE_BRANCH" &>/dev/null; then
|
|
474
|
+
echo -e " ${GREEN}✓${RESET} Base branch: $BASE_BRANCH"
|
|
475
|
+
else
|
|
476
|
+
echo -e " ${RED}✗${RESET} Base branch not found: $BASE_BRANCH"
|
|
477
|
+
errors=$((errors + 1))
|
|
478
|
+
fi
|
|
479
|
+
|
|
480
|
+
# 3. GitHub auth (required for daemon — it needs to poll issues)
|
|
481
|
+
if [[ "$NO_GITHUB" != "true" ]]; then
|
|
482
|
+
if gh auth status &>/dev/null 2>&1; then
|
|
483
|
+
echo -e " ${GREEN}✓${RESET} GitHub authenticated"
|
|
484
|
+
else
|
|
485
|
+
echo -e " ${RED}✗${RESET} GitHub not authenticated (required for daemon)"
|
|
486
|
+
errors=$((errors + 1))
|
|
487
|
+
fi
|
|
488
|
+
else
|
|
489
|
+
echo -e " ${DIM}○${RESET} GitHub disabled (--no-github)"
|
|
490
|
+
fi
|
|
491
|
+
|
|
492
|
+
# 4. Pipeline script
|
|
493
|
+
if [[ -x "$SCRIPT_DIR/cct-pipeline.sh" ]]; then
|
|
494
|
+
echo -e " ${GREEN}✓${RESET} cct-pipeline.sh available"
|
|
495
|
+
else
|
|
496
|
+
echo -e " ${RED}✗${RESET} cct-pipeline.sh not found at $SCRIPT_DIR"
|
|
497
|
+
errors=$((errors + 1))
|
|
498
|
+
fi
|
|
499
|
+
|
|
500
|
+
# 5. Disk space check (warn if < 1GB free)
|
|
501
|
+
local free_space_kb
|
|
502
|
+
free_space_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
|
|
503
|
+
if [[ -n "$free_space_kb" ]] && [[ "$free_space_kb" -lt 1048576 ]] 2>/dev/null; then
|
|
504
|
+
echo -e " ${YELLOW}⚠${RESET} Low disk space: $(( free_space_kb / 1024 ))MB free"
|
|
505
|
+
fi
|
|
506
|
+
|
|
507
|
+
echo ""
|
|
508
|
+
|
|
509
|
+
if [[ "$errors" -gt 0 ]]; then
|
|
510
|
+
error "Pre-flight failed: $errors error(s)"
|
|
511
|
+
return 1
|
|
512
|
+
fi
|
|
513
|
+
|
|
514
|
+
success "Pre-flight passed"
|
|
515
|
+
echo ""
|
|
516
|
+
return 0
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
# ─── State Management ───────────────────────────────────────────────────────
|
|
520
|
+
|
|
521
|
+
# Atomic write: write to tmp file, then mv (prevents corruption on crash)
|
|
522
|
+
atomic_write_state() {
|
|
523
|
+
local content="$1"
|
|
524
|
+
local tmp_file="${STATE_FILE}.tmp.$$"
|
|
525
|
+
echo "$content" > "$tmp_file"
|
|
526
|
+
mv "$tmp_file" "$STATE_FILE"
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
init_state() {
|
|
530
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
531
|
+
jq -n \
|
|
532
|
+
--arg pid "$$" \
|
|
533
|
+
--arg started "$(now_iso)" \
|
|
534
|
+
--argjson interval "$POLL_INTERVAL" \
|
|
535
|
+
--argjson max_parallel "$MAX_PARALLEL" \
|
|
536
|
+
--arg label "$WATCH_LABEL" \
|
|
537
|
+
--arg watch_mode "$WATCH_MODE" \
|
|
538
|
+
'{
|
|
539
|
+
version: 1,
|
|
540
|
+
pid: ($pid | tonumber),
|
|
541
|
+
started_at: $started,
|
|
542
|
+
last_poll: null,
|
|
543
|
+
config: {
|
|
544
|
+
poll_interval: $interval,
|
|
545
|
+
max_parallel: $max_parallel,
|
|
546
|
+
watch_label: $label,
|
|
547
|
+
watch_mode: $watch_mode
|
|
548
|
+
},
|
|
549
|
+
active_jobs: [],
|
|
550
|
+
queued: [],
|
|
551
|
+
completed: [],
|
|
552
|
+
retry_counts: {},
|
|
553
|
+
priority_lane_active: []
|
|
554
|
+
}' > "$STATE_FILE"
|
|
555
|
+
else
|
|
556
|
+
# Update PID and start time in existing state
|
|
557
|
+
local tmp
|
|
558
|
+
tmp=$(jq \
|
|
559
|
+
--arg pid "$$" \
|
|
560
|
+
--arg started "$(now_iso)" \
|
|
561
|
+
'.pid = ($pid | tonumber) | .started_at = $started' \
|
|
562
|
+
"$STATE_FILE")
|
|
563
|
+
atomic_write_state "$tmp"
|
|
564
|
+
fi
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
update_state_field() {
|
|
568
|
+
local field="$1" value="$2"
|
|
569
|
+
local tmp
|
|
570
|
+
tmp=$(jq --arg val "$value" ".${field} = \$val" "$STATE_FILE")
|
|
571
|
+
atomic_write_state "$tmp"
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
# ─── Inflight Check ─────────────────────────────────────────────────────────
|
|
575
|
+
|
|
576
|
+
daemon_is_inflight() {
|
|
577
|
+
local issue_num="$1"
|
|
578
|
+
|
|
579
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
580
|
+
return 1
|
|
581
|
+
fi
|
|
582
|
+
|
|
583
|
+
# Check active_jobs
|
|
584
|
+
local active_match
|
|
585
|
+
active_match=$(jq -r --argjson num "$issue_num" \
|
|
586
|
+
'.active_jobs[] | select(.issue == $num) | .issue' \
|
|
587
|
+
"$STATE_FILE" 2>/dev/null || true)
|
|
588
|
+
if [[ -n "$active_match" ]]; then
|
|
589
|
+
return 0
|
|
590
|
+
fi
|
|
591
|
+
|
|
592
|
+
# Check queued
|
|
593
|
+
local queued_match
|
|
594
|
+
queued_match=$(jq -r --argjson num "$issue_num" \
|
|
595
|
+
'.queued[] | select(. == $num)' \
|
|
596
|
+
"$STATE_FILE" 2>/dev/null || true)
|
|
597
|
+
if [[ -n "$queued_match" ]]; then
|
|
598
|
+
return 0
|
|
599
|
+
fi
|
|
600
|
+
|
|
601
|
+
return 1
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
# ─── Active Job Count ───────────────────────────────────────────────────────
|
|
605
|
+
|
|
606
|
+
get_active_count() {
|
|
607
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
608
|
+
echo 0
|
|
609
|
+
return
|
|
610
|
+
fi
|
|
611
|
+
jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo 0
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
# ─── Queue Management ───────────────────────────────────────────────────────
|
|
615
|
+
|
|
616
|
+
enqueue_issue() {
|
|
617
|
+
local issue_num="$1"
|
|
618
|
+
local tmp
|
|
619
|
+
tmp=$(jq --argjson num "$issue_num" \
|
|
620
|
+
'.queued += [$num] | .queued |= unique' \
|
|
621
|
+
"$STATE_FILE")
|
|
622
|
+
atomic_write_state "$tmp"
|
|
623
|
+
daemon_log INFO "Queued issue #${issue_num} (at capacity)"
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
dequeue_next() {
|
|
627
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
628
|
+
return
|
|
629
|
+
fi
|
|
630
|
+
|
|
631
|
+
local next
|
|
632
|
+
next=$(jq -r '.queued[0] // empty' "$STATE_FILE" 2>/dev/null || true)
|
|
633
|
+
if [[ -n "$next" ]]; then
|
|
634
|
+
# Remove from queue
|
|
635
|
+
local tmp
|
|
636
|
+
tmp=$(jq '.queued = .queued[1:]' "$STATE_FILE")
|
|
637
|
+
atomic_write_state "$tmp"
|
|
638
|
+
echo "$next"
|
|
639
|
+
fi
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
# ─── Priority Lane Helpers ─────────────────────────────────────────────────
|
|
643
|
+
|
|
644
|
+
is_priority_issue() {
|
|
645
|
+
local labels_csv="$1"
|
|
646
|
+
local IFS=','
|
|
647
|
+
local lane_labels
|
|
648
|
+
read -ra lane_labels <<< "$PRIORITY_LANE_LABELS"
|
|
649
|
+
for lane_label in "${lane_labels[@]}"; do
|
|
650
|
+
# Trim whitespace
|
|
651
|
+
lane_label="${lane_label## }"
|
|
652
|
+
lane_label="${lane_label%% }"
|
|
653
|
+
if [[ ",$labels_csv," == *",$lane_label,"* ]]; then
|
|
654
|
+
return 0
|
|
655
|
+
fi
|
|
656
|
+
done
|
|
657
|
+
return 1
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
get_priority_active_count() {
|
|
661
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
662
|
+
echo 0
|
|
663
|
+
return
|
|
664
|
+
fi
|
|
665
|
+
jq -r '.priority_lane_active // [] | length' "$STATE_FILE" 2>/dev/null || echo 0
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
track_priority_job() {
|
|
669
|
+
local issue_num="$1"
|
|
670
|
+
local tmp
|
|
671
|
+
tmp=$(jq --argjson num "$issue_num" \
|
|
672
|
+
'.priority_lane_active = ((.priority_lane_active // []) + [$num] | unique)' \
|
|
673
|
+
"$STATE_FILE")
|
|
674
|
+
atomic_write_state "$tmp"
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
untrack_priority_job() {
|
|
678
|
+
local issue_num="$1"
|
|
679
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
680
|
+
return
|
|
681
|
+
fi
|
|
682
|
+
local tmp
|
|
683
|
+
tmp=$(jq --argjson num "$issue_num" \
|
|
684
|
+
'.priority_lane_active = [(.priority_lane_active // [])[] | select(. != $num)]' \
|
|
685
|
+
"$STATE_FILE")
|
|
686
|
+
atomic_write_state "$tmp"
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
# ─── Org-Wide Repo Management ─────────────────────────────────────────────
|
|
690
|
+
|
|
691
|
+
daemon_ensure_repo() {
|
|
692
|
+
local owner="$1" repo="$2"
|
|
693
|
+
local repo_dir="$DAEMON_DIR/repos/${owner}/${repo}"
|
|
694
|
+
|
|
695
|
+
if [[ -d "$repo_dir/.git" ]]; then
|
|
696
|
+
# Pull latest
|
|
697
|
+
(cd "$repo_dir" && git pull --ff-only 2>/dev/null) || {
|
|
698
|
+
daemon_log WARN "Failed to update ${owner}/${repo} — using existing clone"
|
|
699
|
+
}
|
|
700
|
+
else
|
|
701
|
+
mkdir -p "$DAEMON_DIR/repos/${owner}"
|
|
702
|
+
if ! git clone --depth=1 "https://github.com/${owner}/${repo}.git" "$repo_dir" 2>/dev/null; then
|
|
703
|
+
daemon_log ERROR "Failed to clone ${owner}/${repo}"
|
|
704
|
+
return 1
|
|
705
|
+
fi
|
|
706
|
+
daemon_log INFO "Cloned ${owner}/${repo} to ${repo_dir}"
|
|
707
|
+
fi
|
|
708
|
+
|
|
709
|
+
echo "$repo_dir"
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
# ─── Spawn Pipeline ─────────────────────────────────────────────────────────
|
|
713
|
+
|
|
714
|
+
daemon_spawn_pipeline() {
|
|
715
|
+
local issue_num="$1"
|
|
716
|
+
local issue_title="${2:-}"
|
|
717
|
+
local repo_full_name="${3:-}" # owner/repo (org mode only)
|
|
718
|
+
|
|
719
|
+
daemon_log INFO "Spawning pipeline for issue #${issue_num}: ${issue_title}"
|
|
720
|
+
|
|
721
|
+
# Check disk space before spawning
|
|
722
|
+
local free_space_kb
|
|
723
|
+
free_space_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
|
|
724
|
+
if [[ -n "$free_space_kb" ]] && [[ "$free_space_kb" -lt 1048576 ]] 2>/dev/null; then
|
|
725
|
+
daemon_log WARN "Low disk space ($(( free_space_kb / 1024 ))MB) — skipping issue #${issue_num}"
|
|
726
|
+
return 1
|
|
727
|
+
fi
|
|
728
|
+
|
|
729
|
+
local work_dir="" branch_name="daemon/issue-${issue_num}"
|
|
730
|
+
|
|
731
|
+
if [[ "$WATCH_MODE" == "org" && -n "$repo_full_name" ]]; then
|
|
732
|
+
# Org mode: use cloned repo directory
|
|
733
|
+
local owner="${repo_full_name%%/*}"
|
|
734
|
+
local repo="${repo_full_name##*/}"
|
|
735
|
+
work_dir=$(daemon_ensure_repo "$owner" "$repo") || return 1
|
|
736
|
+
|
|
737
|
+
# Create branch in the cloned repo
|
|
738
|
+
(
|
|
739
|
+
cd "$work_dir"
|
|
740
|
+
git checkout -B "$branch_name" "${BASE_BRANCH}" 2>/dev/null
|
|
741
|
+
) || {
|
|
742
|
+
daemon_log ERROR "Failed to create branch in ${repo_full_name}"
|
|
743
|
+
return 1
|
|
744
|
+
}
|
|
745
|
+
daemon_log INFO "Org mode: working in ${work_dir} (${repo_full_name})"
|
|
746
|
+
else
|
|
747
|
+
# Standard mode: use git worktree
|
|
748
|
+
work_dir="${WORKTREE_DIR}/daemon-issue-${issue_num}"
|
|
749
|
+
|
|
750
|
+
# Clean up stale worktree if it exists
|
|
751
|
+
if [[ -d "$work_dir" ]]; then
|
|
752
|
+
git worktree remove "$work_dir" --force 2>/dev/null || true
|
|
753
|
+
fi
|
|
754
|
+
git branch -D "$branch_name" 2>/dev/null || true
|
|
755
|
+
|
|
756
|
+
if ! git worktree add "$work_dir" -b "$branch_name" "$BASE_BRANCH" 2>/dev/null; then
|
|
757
|
+
daemon_log ERROR "Failed to create worktree for issue #${issue_num}"
|
|
758
|
+
return 1
|
|
759
|
+
fi
|
|
760
|
+
daemon_log INFO "Worktree created at ${work_dir}"
|
|
761
|
+
fi
|
|
762
|
+
|
|
763
|
+
# Build pipeline args
|
|
764
|
+
local pipeline_args=("start" "--issue" "$issue_num" "--pipeline" "$PIPELINE_TEMPLATE")
|
|
765
|
+
if [[ "$SKIP_GATES" == "true" ]]; then
|
|
766
|
+
pipeline_args+=("--skip-gates")
|
|
767
|
+
fi
|
|
768
|
+
if [[ -n "$MODEL" ]]; then
|
|
769
|
+
pipeline_args+=("--model" "$MODEL")
|
|
770
|
+
fi
|
|
771
|
+
if [[ "$NO_GITHUB" == "true" ]]; then
|
|
772
|
+
pipeline_args+=("--no-github")
|
|
773
|
+
fi
|
|
774
|
+
|
|
775
|
+
# Run pipeline in work directory (background)
|
|
776
|
+
(
|
|
777
|
+
cd "$work_dir"
|
|
778
|
+
"$SCRIPT_DIR/cct-pipeline.sh" "${pipeline_args[@]}"
|
|
779
|
+
) > "$LOG_DIR/issue-${issue_num}.log" 2>&1 &
|
|
780
|
+
local pid=$!
|
|
781
|
+
|
|
782
|
+
daemon_log INFO "Pipeline started for issue #${issue_num} (PID: ${pid})"
|
|
783
|
+
|
|
784
|
+
# Track the job (include repo for org mode)
|
|
785
|
+
daemon_track_job "$issue_num" "$pid" "$work_dir" "$issue_title" "$repo_full_name"
|
|
786
|
+
emit_event "daemon.spawn" "issue=$issue_num" "pid=$pid" "repo=${repo_full_name:-local}"
|
|
787
|
+
|
|
788
|
+
# Comment on the issue
|
|
789
|
+
if [[ "$NO_GITHUB" != "true" ]]; then
|
|
790
|
+
local gh_args=()
|
|
791
|
+
if [[ -n "$repo_full_name" ]]; then
|
|
792
|
+
gh_args+=("--repo" "$repo_full_name")
|
|
793
|
+
fi
|
|
794
|
+
gh issue comment "$issue_num" "${gh_args[@]}" --body "## 🤖 Pipeline Started
|
|
795
|
+
|
|
796
|
+
**Daemon** picked up this issue and started an autonomous pipeline.
|
|
797
|
+
|
|
798
|
+
| Field | Value |
|
|
799
|
+
|-------|-------|
|
|
800
|
+
| Template | \`${PIPELINE_TEMPLATE}\` |
|
|
801
|
+
| Branch | \`${branch_name}\` |
|
|
802
|
+
| Repo | \`${repo_full_name:-local}\` |
|
|
803
|
+
| Started | $(now_iso) |
|
|
804
|
+
|
|
805
|
+
_Progress updates will be posted as the pipeline advances._" 2>/dev/null || true
|
|
806
|
+
fi
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
# ─── Track Job ───────────────────────────────────────────────────────────────
|
|
810
|
+
|
|
811
|
+
daemon_track_job() {
|
|
812
|
+
local issue_num="$1" pid="$2" worktree="$3" title="${4:-}" repo="${5:-}"
|
|
813
|
+
local tmp
|
|
814
|
+
tmp=$(jq \
|
|
815
|
+
--argjson num "$issue_num" \
|
|
816
|
+
--argjson pid "$pid" \
|
|
817
|
+
--arg wt "$worktree" \
|
|
818
|
+
--arg title "$title" \
|
|
819
|
+
--arg started "$(now_iso)" \
|
|
820
|
+
--arg repo "$repo" \
|
|
821
|
+
'.active_jobs += [{
|
|
822
|
+
issue: $num,
|
|
823
|
+
pid: $pid,
|
|
824
|
+
worktree: $wt,
|
|
825
|
+
title: $title,
|
|
826
|
+
started_at: $started,
|
|
827
|
+
repo: $repo
|
|
828
|
+
}]' \
|
|
829
|
+
"$STATE_FILE")
|
|
830
|
+
atomic_write_state "$tmp"
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
# ─── Reap Completed Jobs ────────────────────────────────────────────────────
|
|
834
|
+
|
|
835
|
+
daemon_reap_completed() {
|
|
836
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
837
|
+
return
|
|
838
|
+
fi
|
|
839
|
+
|
|
840
|
+
local jobs
|
|
841
|
+
jobs=$(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
|
|
842
|
+
if [[ -z "$jobs" ]]; then
|
|
843
|
+
return
|
|
844
|
+
fi
|
|
845
|
+
|
|
846
|
+
while IFS= read -r job; do
|
|
847
|
+
local issue_num pid worktree
|
|
848
|
+
issue_num=$(echo "$job" | jq -r '.issue')
|
|
849
|
+
pid=$(echo "$job" | jq -r '.pid')
|
|
850
|
+
worktree=$(echo "$job" | jq -r '.worktree')
|
|
851
|
+
|
|
852
|
+
# Check if process is still running
|
|
853
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
854
|
+
continue
|
|
855
|
+
fi
|
|
856
|
+
|
|
857
|
+
# Process is dead — determine exit code
|
|
858
|
+
local exit_code=0
|
|
859
|
+
wait "$pid" 2>/dev/null || exit_code=$?
|
|
860
|
+
|
|
861
|
+
local started_at duration_str=""
|
|
862
|
+
started_at=$(echo "$job" | jq -r '.started_at // empty')
|
|
863
|
+
if [[ -n "$started_at" ]]; then
|
|
864
|
+
local start_epoch end_epoch
|
|
865
|
+
# macOS date -j for parsing ISO dates (TZ=UTC to parse Z-suffix correctly)
|
|
866
|
+
start_epoch=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started_at" +%s 2>/dev/null || date -d "$started_at" +%s 2>/dev/null || echo "0")
|
|
867
|
+
end_epoch=$(now_epoch)
|
|
868
|
+
if [[ "$start_epoch" -gt 0 ]]; then
|
|
869
|
+
duration_str=$(format_duration $((end_epoch - start_epoch)))
|
|
870
|
+
fi
|
|
871
|
+
fi
|
|
872
|
+
|
|
873
|
+
local result_str="success"
|
|
874
|
+
[[ "$exit_code" -ne 0 ]] && result_str="failure"
|
|
875
|
+
local dur_s=0
|
|
876
|
+
[[ "$start_epoch" -gt 0 ]] && dur_s=$((end_epoch - start_epoch))
|
|
877
|
+
emit_event "daemon.reap" "issue=$issue_num" "result=$result_str" "duration_s=$dur_s"
|
|
878
|
+
|
|
879
|
+
if [[ "$exit_code" -eq 0 ]]; then
|
|
880
|
+
daemon_on_success "$issue_num" "$duration_str"
|
|
881
|
+
else
|
|
882
|
+
daemon_on_failure "$issue_num" "$exit_code" "$duration_str"
|
|
883
|
+
fi
|
|
884
|
+
|
|
885
|
+
# Remove from active_jobs and priority lane tracking
|
|
886
|
+
local tmp
|
|
887
|
+
tmp=$(jq --argjson num "$issue_num" \
|
|
888
|
+
'.active_jobs = [.active_jobs[] | select(.issue != $num)]' \
|
|
889
|
+
"$STATE_FILE")
|
|
890
|
+
atomic_write_state "$tmp"
|
|
891
|
+
untrack_priority_job "$issue_num"
|
|
892
|
+
|
|
893
|
+
# Clean up worktree (skip for org-mode clones — they persist)
|
|
894
|
+
local job_repo
|
|
895
|
+
job_repo=$(echo "$job" | jq -r '.repo // ""')
|
|
896
|
+
if [[ -z "$job_repo" ]] && [[ -d "$worktree" ]]; then
|
|
897
|
+
git worktree remove "$worktree" --force 2>/dev/null || true
|
|
898
|
+
daemon_log INFO "Cleaned worktree: $worktree"
|
|
899
|
+
git branch -D "daemon/issue-${issue_num}" 2>/dev/null || true
|
|
900
|
+
elif [[ -n "$job_repo" ]]; then
|
|
901
|
+
daemon_log INFO "Org-mode: preserving clone for ${job_repo}"
|
|
902
|
+
fi
|
|
903
|
+
|
|
904
|
+
# Dequeue next issue if available
|
|
905
|
+
local next_issue
|
|
906
|
+
next_issue=$(dequeue_next)
|
|
907
|
+
if [[ -n "$next_issue" ]]; then
|
|
908
|
+
daemon_log INFO "Dequeuing issue #${next_issue}"
|
|
909
|
+
daemon_spawn_pipeline "$next_issue"
|
|
910
|
+
fi
|
|
911
|
+
done <<< "$jobs"
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
# ─── Success Handler ────────────────────────────────────────────────────────
|
|
915
|
+
|
|
916
|
+
daemon_on_success() {
|
|
917
|
+
local issue_num="$1" duration="${2:-}"
|
|
918
|
+
|
|
919
|
+
daemon_log SUCCESS "Pipeline completed for issue #${issue_num} (${duration:-unknown})"
|
|
920
|
+
|
|
921
|
+
# Record in completed list
|
|
922
|
+
local tmp
|
|
923
|
+
tmp=$(jq \
|
|
924
|
+
--argjson num "$issue_num" \
|
|
925
|
+
--arg result "success" \
|
|
926
|
+
--arg dur "${duration:-unknown}" \
|
|
927
|
+
--arg completed_at "$(now_iso)" \
|
|
928
|
+
'.completed += [{
|
|
929
|
+
issue: $num,
|
|
930
|
+
result: $result,
|
|
931
|
+
duration: $dur,
|
|
932
|
+
completed_at: $completed_at
|
|
933
|
+
}]' \
|
|
934
|
+
"$STATE_FILE")
|
|
935
|
+
atomic_write_state "$tmp"
|
|
936
|
+
|
|
937
|
+
if [[ "$NO_GITHUB" != "true" ]]; then
|
|
938
|
+
# Remove watch label, add success label
|
|
939
|
+
gh issue edit "$issue_num" \
|
|
940
|
+
--remove-label "$ON_SUCCESS_REMOVE_LABEL" \
|
|
941
|
+
--add-label "$ON_SUCCESS_ADD_LABEL" 2>/dev/null || true
|
|
942
|
+
|
|
943
|
+
# Comment on issue
|
|
944
|
+
gh issue comment "$issue_num" --body "## ✅ Pipeline Complete
|
|
945
|
+
|
|
946
|
+
The autonomous pipeline finished successfully.
|
|
947
|
+
|
|
948
|
+
| Field | Value |
|
|
949
|
+
|-------|-------|
|
|
950
|
+
| Duration | ${duration:-unknown} |
|
|
951
|
+
| Completed | $(now_iso) |
|
|
952
|
+
|
|
953
|
+
Check the associated PR for the implementation." 2>/dev/null || true
|
|
954
|
+
|
|
955
|
+
# Optionally close the issue
|
|
956
|
+
if [[ "$ON_SUCCESS_CLOSE_ISSUE" == "true" ]]; then
|
|
957
|
+
gh issue close "$issue_num" 2>/dev/null || true
|
|
958
|
+
fi
|
|
959
|
+
fi
|
|
960
|
+
|
|
961
|
+
notify "Pipeline Complete — Issue #${issue_num}" \
|
|
962
|
+
"Duration: ${duration:-unknown}" "success"
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
# ─── Failure Handler ────────────────────────────────────────────────────────
|
|
966
|
+
|
|
967
|
+
daemon_on_failure() {
|
|
968
|
+
local issue_num="$1" exit_code="${2:-1}" duration="${3:-}"
|
|
969
|
+
|
|
970
|
+
daemon_log ERROR "Pipeline failed for issue #${issue_num} (exit: ${exit_code}, ${duration:-unknown})"
|
|
971
|
+
|
|
972
|
+
# Record in completed list
|
|
973
|
+
local tmp
|
|
974
|
+
tmp=$(jq \
|
|
975
|
+
--argjson num "$issue_num" \
|
|
976
|
+
--arg result "failed" \
|
|
977
|
+
--argjson code "$exit_code" \
|
|
978
|
+
--arg dur "${duration:-unknown}" \
|
|
979
|
+
--arg completed_at "$(now_iso)" \
|
|
980
|
+
'.completed += [{
|
|
981
|
+
issue: $num,
|
|
982
|
+
result: $result,
|
|
983
|
+
exit_code: $code,
|
|
984
|
+
duration: $dur,
|
|
985
|
+
completed_at: $completed_at
|
|
986
|
+
}]' \
|
|
987
|
+
"$STATE_FILE")
|
|
988
|
+
atomic_write_state "$tmp"
|
|
989
|
+
|
|
990
|
+
# ── Auto-retry with strategy escalation ──
|
|
991
|
+
if [[ "${RETRY_ESCALATION:-true}" == "true" ]]; then
|
|
992
|
+
local retry_count
|
|
993
|
+
retry_count=$(jq -r --arg num "$issue_num" \
|
|
994
|
+
'.retry_counts[$num] // 0' "$STATE_FILE" 2>/dev/null || echo "0")
|
|
995
|
+
|
|
996
|
+
if [[ "$retry_count" -lt "${MAX_RETRIES:-2}" ]]; then
|
|
997
|
+
retry_count=$((retry_count + 1))
|
|
998
|
+
|
|
999
|
+
# Update retry count in state
|
|
1000
|
+
local tmp_state
|
|
1001
|
+
tmp_state=$(jq --arg num "$issue_num" --argjson count "$retry_count" \
|
|
1002
|
+
'.retry_counts[$num] = $count' "$STATE_FILE")
|
|
1003
|
+
atomic_write_state "$tmp_state"
|
|
1004
|
+
|
|
1005
|
+
daemon_log WARN "Auto-retry #${retry_count}/${MAX_RETRIES:-2} for issue #${issue_num}"
|
|
1006
|
+
emit_event "daemon.retry" "issue=$issue_num" "retry=$retry_count" "max=${MAX_RETRIES:-2}"
|
|
1007
|
+
|
|
1008
|
+
# Build escalated pipeline args
|
|
1009
|
+
local retry_template="$PIPELINE_TEMPLATE"
|
|
1010
|
+
local retry_model="${MODEL:-opus}"
|
|
1011
|
+
local extra_args=()
|
|
1012
|
+
|
|
1013
|
+
if [[ "$retry_count" -eq 1 ]]; then
|
|
1014
|
+
# Retry 1: same template, upgrade model, more iterations
|
|
1015
|
+
retry_model="opus"
|
|
1016
|
+
extra_args+=("--max-iterations" "30")
|
|
1017
|
+
daemon_log INFO "Escalation: model=opus, max_iterations=30"
|
|
1018
|
+
elif [[ "$retry_count" -ge 2 ]]; then
|
|
1019
|
+
# Retry 2: full template, compound quality max cycles
|
|
1020
|
+
retry_template="full"
|
|
1021
|
+
retry_model="opus"
|
|
1022
|
+
extra_args+=("--max-iterations" "30" "--compound-cycles" "5")
|
|
1023
|
+
daemon_log INFO "Escalation: template=full, compound_cycles=5"
|
|
1024
|
+
fi
|
|
1025
|
+
|
|
1026
|
+
if [[ "$NO_GITHUB" != "true" ]]; then
|
|
1027
|
+
gh issue comment "$issue_num" --body "## 🔄 Auto-Retry #${retry_count}
|
|
1028
|
+
|
|
1029
|
+
Pipeline failed — retrying with escalated strategy.
|
|
1030
|
+
|
|
1031
|
+
| Field | Value |
|
|
1032
|
+
|-------|-------|
|
|
1033
|
+
| Retry | ${retry_count} / ${MAX_RETRIES:-2} |
|
|
1034
|
+
| Template | \`${retry_template}\` |
|
|
1035
|
+
| Model | \`${retry_model}\` |
|
|
1036
|
+
| Started | $(now_iso) |
|
|
1037
|
+
|
|
1038
|
+
_Escalation: $(if [[ "$retry_count" -eq 1 ]]; then echo "upgraded model + increased iterations"; else echo "full template + compound quality"; fi)_" 2>/dev/null || true
|
|
1039
|
+
fi
|
|
1040
|
+
|
|
1041
|
+
# Re-spawn with escalated strategy
|
|
1042
|
+
local orig_template="$PIPELINE_TEMPLATE"
|
|
1043
|
+
local orig_model="$MODEL"
|
|
1044
|
+
PIPELINE_TEMPLATE="$retry_template"
|
|
1045
|
+
MODEL="$retry_model"
|
|
1046
|
+
daemon_spawn_pipeline "$issue_num" "retry-${retry_count}"
|
|
1047
|
+
PIPELINE_TEMPLATE="$orig_template"
|
|
1048
|
+
MODEL="$orig_model"
|
|
1049
|
+
return
|
|
1050
|
+
fi
|
|
1051
|
+
|
|
1052
|
+
daemon_log WARN "Max retries (${MAX_RETRIES:-2}) exhausted for issue #${issue_num}"
|
|
1053
|
+
emit_event "daemon.retry_exhausted" "issue=$issue_num" "retries=$retry_count"
|
|
1054
|
+
fi
|
|
1055
|
+
|
|
1056
|
+
# ── No retry — report final failure ──
|
|
1057
|
+
if [[ "$NO_GITHUB" != "true" ]]; then
|
|
1058
|
+
# Add failure label
|
|
1059
|
+
gh issue edit "$issue_num" \
|
|
1060
|
+
--add-label "$ON_FAILURE_ADD_LABEL" 2>/dev/null || true
|
|
1061
|
+
|
|
1062
|
+
# Comment with log tail
|
|
1063
|
+
local log_tail=""
|
|
1064
|
+
local log_path="$LOG_DIR/issue-${issue_num}.log"
|
|
1065
|
+
if [[ -f "$log_path" ]]; then
|
|
1066
|
+
log_tail=$(tail -"$ON_FAILURE_LOG_LINES" "$log_path" 2>/dev/null || true)
|
|
1067
|
+
fi
|
|
1068
|
+
|
|
1069
|
+
local retry_info=""
|
|
1070
|
+
if [[ "${RETRY_ESCALATION:-true}" == "true" ]]; then
|
|
1071
|
+
local final_count
|
|
1072
|
+
final_count=$(jq -r --arg num "$issue_num" \
|
|
1073
|
+
'.retry_counts[$num] // 0' "$STATE_FILE" 2>/dev/null || echo "0")
|
|
1074
|
+
retry_info="| Retries | ${final_count} / ${MAX_RETRIES:-2} (exhausted) |"
|
|
1075
|
+
fi
|
|
1076
|
+
|
|
1077
|
+
gh issue comment "$issue_num" --body "## ❌ Pipeline Failed
|
|
1078
|
+
|
|
1079
|
+
The autonomous pipeline encountered an error.
|
|
1080
|
+
|
|
1081
|
+
| Field | Value |
|
|
1082
|
+
|-------|-------|
|
|
1083
|
+
| Exit Code | ${exit_code} |
|
|
1084
|
+
| Duration | ${duration:-unknown} |
|
|
1085
|
+
| Failed At | $(now_iso) |
|
|
1086
|
+
${retry_info}
|
|
1087
|
+
|
|
1088
|
+
<details>
|
|
1089
|
+
<summary>Last ${ON_FAILURE_LOG_LINES} lines of log</summary>
|
|
1090
|
+
|
|
1091
|
+
\`\`\`
|
|
1092
|
+
${log_tail}
|
|
1093
|
+
\`\`\`
|
|
1094
|
+
|
|
1095
|
+
</details>
|
|
1096
|
+
|
|
1097
|
+
_Re-add the \`${WATCH_LABEL}\` label to retry._" 2>/dev/null || true
|
|
1098
|
+
fi
|
|
1099
|
+
|
|
1100
|
+
notify "Pipeline Failed — Issue #${issue_num}" \
|
|
1101
|
+
"Exit code: ${exit_code}, Duration: ${duration:-unknown}" "error"
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
# ─── Intelligent Triage ──────────────────────────────────────────────────────
|
|
1105
|
+
|
|
1106
|
+
# Score an issue from 0-100 based on multiple signals for intelligent prioritization.
|
|
1107
|
+
# Combines priority labels, age, complexity, dependencies, type, and memory signals.
|
|
1108
|
+
triage_score_issue() {
|
|
1109
|
+
local issue_json="$1"
|
|
1110
|
+
local issue_num issue_title issue_body labels_csv created_at
|
|
1111
|
+
issue_num=$(echo "$issue_json" | jq -r '.number')
|
|
1112
|
+
issue_title=$(echo "$issue_json" | jq -r '.title // ""')
|
|
1113
|
+
issue_body=$(echo "$issue_json" | jq -r '.body // ""')
|
|
1114
|
+
labels_csv=$(echo "$issue_json" | jq -r '[.labels[].name] | join(",")')
|
|
1115
|
+
created_at=$(echo "$issue_json" | jq -r '.createdAt // ""')
|
|
1116
|
+
|
|
1117
|
+
local score=0
|
|
1118
|
+
|
|
1119
|
+
# ── 1. Priority labels (0-30 points) ──
|
|
1120
|
+
local priority_score=0
|
|
1121
|
+
if echo "$labels_csv" | grep -qiE "urgent|p0"; then
|
|
1122
|
+
priority_score=30
|
|
1123
|
+
elif echo "$labels_csv" | grep -qiE "^high$|^high,|,high,|,high$|p1"; then
|
|
1124
|
+
priority_score=20
|
|
1125
|
+
elif echo "$labels_csv" | grep -qiE "normal|p2"; then
|
|
1126
|
+
priority_score=10
|
|
1127
|
+
elif echo "$labels_csv" | grep -qiE "^low$|^low,|,low,|,low$|p3"; then
|
|
1128
|
+
priority_score=5
|
|
1129
|
+
fi
|
|
1130
|
+
|
|
1131
|
+
# ── 2. Issue age (0-15 points) — older issues boosted to prevent starvation ──
|
|
1132
|
+
local age_score=0
|
|
1133
|
+
if [[ -n "$created_at" ]]; then
|
|
1134
|
+
local created_epoch now_e age_secs
|
|
1135
|
+
created_epoch=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$created_at" +%s 2>/dev/null || \
|
|
1136
|
+
date -d "$created_at" +%s 2>/dev/null || echo "0")
|
|
1137
|
+
now_e=$(now_epoch)
|
|
1138
|
+
if [[ "$created_epoch" -gt 0 ]]; then
|
|
1139
|
+
age_secs=$((now_e - created_epoch))
|
|
1140
|
+
if [[ "$age_secs" -gt 604800 ]]; then # > 7 days
|
|
1141
|
+
age_score=15
|
|
1142
|
+
elif [[ "$age_secs" -gt 259200 ]]; then # > 3 days
|
|
1143
|
+
age_score=10
|
|
1144
|
+
elif [[ "$age_secs" -gt 86400 ]]; then # > 1 day
|
|
1145
|
+
age_score=5
|
|
1146
|
+
fi
|
|
1147
|
+
fi
|
|
1148
|
+
fi
|
|
1149
|
+
|
|
1150
|
+
# ── 3. Complexity estimate (0-20 points, INVERTED — simpler = higher) ──
|
|
1151
|
+
local complexity_score=0
|
|
1152
|
+
local body_len=${#issue_body}
|
|
1153
|
+
local file_refs
|
|
1154
|
+
file_refs=$(echo "$issue_body" | grep -coE '[a-zA-Z0-9_/-]+\.(ts|js|py|go|rs|sh|json|yaml|yml|md)' || true)
|
|
1155
|
+
file_refs=${file_refs:-0}
|
|
1156
|
+
|
|
1157
|
+
if [[ "$body_len" -lt 200 ]] && [[ "$file_refs" -lt 3 ]]; then
|
|
1158
|
+
complexity_score=20 # Short + few files = likely simple
|
|
1159
|
+
elif [[ "$body_len" -lt 1000 ]]; then
|
|
1160
|
+
complexity_score=10 # Medium
|
|
1161
|
+
elif [[ "$file_refs" -lt 5 ]]; then
|
|
1162
|
+
complexity_score=5 # Long but not many files
|
|
1163
|
+
fi
|
|
1164
|
+
# Long + many files = complex = 0 points (lower throughput)
|
|
1165
|
+
|
|
1166
|
+
# ── 4. Dependencies (0-15 points / -15 for blocked) ──
|
|
1167
|
+
local dep_score=0
|
|
1168
|
+
local combined_text="${issue_title} ${issue_body}"
|
|
1169
|
+
|
|
1170
|
+
# Check if this issue is blocked
|
|
1171
|
+
local blocked_refs
|
|
1172
|
+
blocked_refs=$(echo "$combined_text" | grep -oE '(blocked by|depends on) #[0-9]+' | grep -oE '#[0-9]+' || true)
|
|
1173
|
+
if [[ -n "$blocked_refs" ]] && [[ "$NO_GITHUB" != "true" ]]; then
|
|
1174
|
+
local all_closed=true
|
|
1175
|
+
while IFS= read -r ref; do
|
|
1176
|
+
local ref_num="${ref#\#}"
|
|
1177
|
+
local ref_state
|
|
1178
|
+
ref_state=$(gh issue view "$ref_num" --json state -q '.state' 2>/dev/null || echo "UNKNOWN")
|
|
1179
|
+
if [[ "$ref_state" != "CLOSED" ]]; then
|
|
1180
|
+
all_closed=false
|
|
1181
|
+
break
|
|
1182
|
+
fi
|
|
1183
|
+
done <<< "$blocked_refs"
|
|
1184
|
+
if [[ "$all_closed" == "false" ]]; then
|
|
1185
|
+
dep_score=-15
|
|
1186
|
+
fi
|
|
1187
|
+
fi
|
|
1188
|
+
|
|
1189
|
+
# Check if this issue blocks others (search issue references)
|
|
1190
|
+
if [[ "$NO_GITHUB" != "true" ]]; then
|
|
1191
|
+
local mentions
|
|
1192
|
+
mentions=$(gh api "repos/{owner}/{repo}/issues/${issue_num}/timeline" --paginate -q '
|
|
1193
|
+
[.[] | select(.event == "cross-referenced") | .source.issue.body // ""] |
|
|
1194
|
+
map(select(test("blocked by #'"${issue_num}"'|depends on #'"${issue_num}"'"; "i"))) | length
|
|
1195
|
+
' 2>/dev/null || echo "0")
|
|
1196
|
+
mentions=${mentions:-0}
|
|
1197
|
+
if [[ "$mentions" -gt 0 ]]; then
|
|
1198
|
+
dep_score=15
|
|
1199
|
+
fi
|
|
1200
|
+
fi
|
|
1201
|
+
|
|
1202
|
+
# ── 5. Type bonus (0-10 points) ──
|
|
1203
|
+
local type_score=0
|
|
1204
|
+
if echo "$labels_csv" | grep -qiE "security"; then
|
|
1205
|
+
type_score=10
|
|
1206
|
+
elif echo "$labels_csv" | grep -qiE "bug"; then
|
|
1207
|
+
type_score=10
|
|
1208
|
+
elif echo "$labels_csv" | grep -qiE "feature|enhancement"; then
|
|
1209
|
+
type_score=5
|
|
1210
|
+
fi
|
|
1211
|
+
|
|
1212
|
+
# ── 6. Memory bonus (0-10 points / -5 for prior failures) ──
|
|
1213
|
+
local memory_score=0
|
|
1214
|
+
if [[ -x "$SCRIPT_DIR/cct-memory.sh" ]]; then
|
|
1215
|
+
local memory_result
|
|
1216
|
+
memory_result=$("$SCRIPT_DIR/cct-memory.sh" search --issue "$issue_num" --json 2>/dev/null || true)
|
|
1217
|
+
if [[ -n "$memory_result" ]]; then
|
|
1218
|
+
local prior_result
|
|
1219
|
+
prior_result=$(echo "$memory_result" | jq -r '.last_result // ""' 2>/dev/null || true)
|
|
1220
|
+
if [[ "$prior_result" == "success" ]]; then
|
|
1221
|
+
memory_score=10
|
|
1222
|
+
elif [[ "$prior_result" == "failure" ]]; then
|
|
1223
|
+
memory_score=-5
|
|
1224
|
+
fi
|
|
1225
|
+
fi
|
|
1226
|
+
fi
|
|
1227
|
+
|
|
1228
|
+
# ── Total ──
|
|
1229
|
+
score=$((priority_score + age_score + complexity_score + dep_score + type_score + memory_score))
|
|
1230
|
+
# Clamp to 0-100
|
|
1231
|
+
[[ "$score" -lt 0 ]] && score=0
|
|
1232
|
+
[[ "$score" -gt 100 ]] && score=100
|
|
1233
|
+
|
|
1234
|
+
emit_event "daemon.triage" \
|
|
1235
|
+
"issue=$issue_num" \
|
|
1236
|
+
"score=$score" \
|
|
1237
|
+
"priority=$priority_score" \
|
|
1238
|
+
"age=$age_score" \
|
|
1239
|
+
"complexity=$complexity_score" \
|
|
1240
|
+
"dependency=$dep_score" \
|
|
1241
|
+
"type=$type_score" \
|
|
1242
|
+
"memory=$memory_score"
|
|
1243
|
+
|
|
1244
|
+
echo "$score"
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
# Auto-select pipeline template based on issue labels
|
|
1248
|
+
select_pipeline_template() {
|
|
1249
|
+
local labels="$1"
|
|
1250
|
+
local score="${2:-50}"
|
|
1251
|
+
|
|
1252
|
+
# When auto_template is disabled, use default pipeline template
|
|
1253
|
+
if [[ "${AUTO_TEMPLATE:-false}" != "true" ]]; then
|
|
1254
|
+
echo "$PIPELINE_TEMPLATE"
|
|
1255
|
+
return
|
|
1256
|
+
fi
|
|
1257
|
+
|
|
1258
|
+
# ── Label-based overrides (highest priority) ──
|
|
1259
|
+
if echo "$labels" | grep -qi "hotfix\|incident"; then
|
|
1260
|
+
echo "hotfix"
|
|
1261
|
+
return
|
|
1262
|
+
fi
|
|
1263
|
+
if echo "$labels" | grep -qi "security"; then
|
|
1264
|
+
echo "enterprise"
|
|
1265
|
+
return
|
|
1266
|
+
fi
|
|
1267
|
+
|
|
1268
|
+
# ── Config-driven template_map overrides ──
|
|
1269
|
+
local map="${TEMPLATE_MAP:-\"{}\"}"
|
|
1270
|
+
# Unwrap double-encoded JSON if needed
|
|
1271
|
+
local decoded_map
|
|
1272
|
+
decoded_map=$(echo "$map" | jq -r 'if type == "string" then . else tostring end' 2>/dev/null || echo "{}")
|
|
1273
|
+
if [[ "$decoded_map" != "{}" ]]; then
|
|
1274
|
+
local matched
|
|
1275
|
+
matched=$(echo "$decoded_map" | jq -r --arg labels "$labels" '
|
|
1276
|
+
to_entries[] |
|
|
1277
|
+
select($labels | test(.key; "i")) |
|
|
1278
|
+
.value' 2>/dev/null | head -1)
|
|
1279
|
+
if [[ -n "$matched" ]]; then
|
|
1280
|
+
echo "$matched"
|
|
1281
|
+
return
|
|
1282
|
+
fi
|
|
1283
|
+
fi
|
|
1284
|
+
|
|
1285
|
+
# ── Score-based selection ──
|
|
1286
|
+
if [[ "$score" -ge 70 ]]; then
|
|
1287
|
+
echo "fast"
|
|
1288
|
+
elif [[ "$score" -ge 40 ]]; then
|
|
1289
|
+
echo "standard"
|
|
1290
|
+
else
|
|
1291
|
+
echo "full"
|
|
1292
|
+
fi
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
# ─── Triage Display ──────────────────────────────────────────────────────────
|
|
1296
|
+
|
|
1297
|
+
daemon_triage_show() {
|
|
1298
|
+
if [[ "$NO_GITHUB" == "true" ]]; then
|
|
1299
|
+
error "Triage requires GitHub access (--no-github is set)"
|
|
1300
|
+
exit 1
|
|
1301
|
+
fi
|
|
1302
|
+
|
|
1303
|
+
load_config
|
|
1304
|
+
|
|
1305
|
+
echo -e "${PURPLE}${BOLD}━━━ Issue Triage Scores ━━━${RESET}"
|
|
1306
|
+
echo ""
|
|
1307
|
+
|
|
1308
|
+
local issues_json
|
|
1309
|
+
issues_json=$(gh issue list \
|
|
1310
|
+
--label "$WATCH_LABEL" \
|
|
1311
|
+
--state open \
|
|
1312
|
+
--json number,title,labels,body,createdAt \
|
|
1313
|
+
--limit 50 2>/dev/null) || {
|
|
1314
|
+
error "Failed to fetch issues from GitHub"
|
|
1315
|
+
exit 1
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
local issue_count
|
|
1319
|
+
issue_count=$(echo "$issues_json" | jq 'length' 2>/dev/null || echo 0)
|
|
1320
|
+
|
|
1321
|
+
if [[ "$issue_count" -eq 0 ]]; then
|
|
1322
|
+
echo -e " ${DIM}No open issues with label '${WATCH_LABEL}'${RESET}"
|
|
1323
|
+
return 0
|
|
1324
|
+
fi
|
|
1325
|
+
|
|
1326
|
+
# Score each issue and collect results
|
|
1327
|
+
local scored_lines=()
|
|
1328
|
+
while IFS= read -r issue; do
|
|
1329
|
+
local num title labels_csv score template
|
|
1330
|
+
num=$(echo "$issue" | jq -r '.number')
|
|
1331
|
+
title=$(echo "$issue" | jq -r '.title // "—"')
|
|
1332
|
+
labels_csv=$(echo "$issue" | jq -r '[.labels[].name] | join(", ")')
|
|
1333
|
+
score=$(triage_score_issue "$issue")
|
|
1334
|
+
template=$(select_pipeline_template "$labels_csv" "$score")
|
|
1335
|
+
|
|
1336
|
+
scored_lines+=("${score}|${num}|${title}|${labels_csv}|${template}")
|
|
1337
|
+
done < <(echo "$issues_json" | jq -c '.[]')
|
|
1338
|
+
|
|
1339
|
+
# Sort by score descending
|
|
1340
|
+
local sorted
|
|
1341
|
+
sorted=$(printf '%s\n' "${scored_lines[@]}" | sort -t'|' -k1 -rn)
|
|
1342
|
+
|
|
1343
|
+
# Print header
|
|
1344
|
+
printf " ${BOLD}%-6s %-7s %-45s %-12s %s${RESET}\n" "Score" "Issue" "Title" "Template" "Labels"
|
|
1345
|
+
echo -e " ${DIM}$(printf '%.0s─' {1..90})${RESET}"
|
|
1346
|
+
|
|
1347
|
+
while IFS='|' read -r score num title labels_csv template; do
|
|
1348
|
+
# Color score by tier
|
|
1349
|
+
local score_color="$RED"
|
|
1350
|
+
[[ "$score" -ge 20 ]] && score_color="$YELLOW"
|
|
1351
|
+
[[ "$score" -ge 40 ]] && score_color="$CYAN"
|
|
1352
|
+
[[ "$score" -ge 60 ]] && score_color="$GREEN"
|
|
1353
|
+
|
|
1354
|
+
# Truncate title
|
|
1355
|
+
[[ ${#title} -gt 42 ]] && title="${title:0:39}..."
|
|
1356
|
+
|
|
1357
|
+
printf " ${score_color}%-6s${RESET} ${CYAN}#%-6s${RESET} %-45s ${DIM}%-12s %s${RESET}\n" \
|
|
1358
|
+
"$score" "$num" "$title" "$template" "$labels_csv"
|
|
1359
|
+
done <<< "$sorted"
|
|
1360
|
+
|
|
1361
|
+
echo ""
|
|
1362
|
+
echo -e " ${DIM}${issue_count} issue(s) scored | Higher score = higher processing priority${RESET}"
|
|
1363
|
+
echo ""
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
# ─── Proactive Patrol Mode ───────────────────────────────────────────────────
|
|
1367
|
+
|
|
1368
|
+
daemon_patrol() {
|
|
1369
|
+
local once=false
|
|
1370
|
+
local dry_run="$PATROL_DRY_RUN"
|
|
1371
|
+
|
|
1372
|
+
while [[ $# -gt 0 ]]; do
|
|
1373
|
+
case "$1" in
|
|
1374
|
+
--once) once=true; shift ;;
|
|
1375
|
+
--dry-run) dry_run=true; shift ;;
|
|
1376
|
+
*) shift ;;
|
|
1377
|
+
esac
|
|
1378
|
+
done
|
|
1379
|
+
|
|
1380
|
+
echo -e "${PURPLE}${BOLD}━━━ Codebase Patrol ━━━${RESET}"
|
|
1381
|
+
echo ""
|
|
1382
|
+
|
|
1383
|
+
if [[ "$dry_run" == "true" ]]; then
|
|
1384
|
+
echo -e " ${YELLOW}DRY RUN${RESET} — findings will be reported but no issues created"
|
|
1385
|
+
echo ""
|
|
1386
|
+
fi
|
|
1387
|
+
|
|
1388
|
+
emit_event "patrol.started" "dry_run=$dry_run"
|
|
1389
|
+
|
|
1390
|
+
local total_findings=0
|
|
1391
|
+
local issues_created=0
|
|
1392
|
+
|
|
1393
|
+
# ── 1. Dependency Security Audit ──
|
|
1394
|
+
patrol_security_audit() {
|
|
1395
|
+
daemon_log INFO "Patrol: running dependency security audit"
|
|
1396
|
+
local findings=0
|
|
1397
|
+
|
|
1398
|
+
# npm audit
|
|
1399
|
+
if [[ -f "package.json" ]] && command -v npm &>/dev/null; then
|
|
1400
|
+
local audit_json
|
|
1401
|
+
audit_json=$(npm audit --json 2>/dev/null || true)
|
|
1402
|
+
if [[ -n "$audit_json" ]]; then
|
|
1403
|
+
while IFS= read -r vuln; do
|
|
1404
|
+
local severity name advisory_url title
|
|
1405
|
+
severity=$(echo "$vuln" | jq -r '.severity // "unknown"')
|
|
1406
|
+
name=$(echo "$vuln" | jq -r '.name // "unknown"')
|
|
1407
|
+
advisory_url=$(echo "$vuln" | jq -r '.url // ""')
|
|
1408
|
+
title=$(echo "$vuln" | jq -r '.title // "vulnerability"')
|
|
1409
|
+
|
|
1410
|
+
# Only report critical/high
|
|
1411
|
+
if [[ "$severity" != "critical" ]] && [[ "$severity" != "high" ]]; then
|
|
1412
|
+
continue
|
|
1413
|
+
fi
|
|
1414
|
+
|
|
1415
|
+
findings=$((findings + 1))
|
|
1416
|
+
emit_event "patrol.finding" "type=security" "severity=$severity" "package=$name"
|
|
1417
|
+
|
|
1418
|
+
# Check if issue already exists
|
|
1419
|
+
if [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
|
|
1420
|
+
local existing
|
|
1421
|
+
existing=$(gh issue list --label "$PATROL_LABEL" --label "security" \
|
|
1422
|
+
--search "Security: $name" --json number -q 'length' 2>/dev/null || echo "0")
|
|
1423
|
+
if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
|
|
1424
|
+
gh issue create \
|
|
1425
|
+
--title "Security: ${title} in ${name}" \
|
|
1426
|
+
--body "## Dependency Security Finding
|
|
1427
|
+
|
|
1428
|
+
| Field | Value |
|
|
1429
|
+
|-------|-------|
|
|
1430
|
+
| Package | \`${name}\` |
|
|
1431
|
+
| Severity | **${severity}** |
|
|
1432
|
+
| Advisory | ${advisory_url} |
|
|
1433
|
+
| Found by | Shipwright patrol |
|
|
1434
|
+
| Date | $(now_iso) |
|
|
1435
|
+
|
|
1436
|
+
Auto-detected by \`shipwright daemon patrol\`." \
|
|
1437
|
+
--label "security" --label "$PATROL_LABEL" 2>/dev/null || true
|
|
1438
|
+
issues_created=$((issues_created + 1))
|
|
1439
|
+
emit_event "patrol.issue_created" "type=security" "package=$name"
|
|
1440
|
+
fi
|
|
1441
|
+
else
|
|
1442
|
+
echo -e " ${RED}●${RESET} ${BOLD}${severity}${RESET}: ${title} in ${CYAN}${name}${RESET}"
|
|
1443
|
+
fi
|
|
1444
|
+
done < <(echo "$audit_json" | jq -c '.vulnerabilities | to_entries[] | .value' 2>/dev/null)
|
|
1445
|
+
fi
|
|
1446
|
+
fi
|
|
1447
|
+
|
|
1448
|
+
# pip-audit
|
|
1449
|
+
if [[ -f "requirements.txt" ]] && command -v pip-audit &>/dev/null; then
|
|
1450
|
+
local pip_json
|
|
1451
|
+
pip_json=$(pip-audit --format=json 2>/dev/null || true)
|
|
1452
|
+
if [[ -n "$pip_json" ]]; then
|
|
1453
|
+
local vuln_count
|
|
1454
|
+
vuln_count=$(echo "$pip_json" | jq '[.dependencies[] | select(.vulns | length > 0)] | length' 2>/dev/null || echo "0")
|
|
1455
|
+
findings=$((findings + ${vuln_count:-0}))
|
|
1456
|
+
fi
|
|
1457
|
+
fi
|
|
1458
|
+
|
|
1459
|
+
# cargo audit
|
|
1460
|
+
if [[ -f "Cargo.toml" ]] && command -v cargo-audit &>/dev/null; then
|
|
1461
|
+
local cargo_json
|
|
1462
|
+
cargo_json=$(cargo audit --json 2>/dev/null || true)
|
|
1463
|
+
if [[ -n "$cargo_json" ]]; then
|
|
1464
|
+
local vuln_count
|
|
1465
|
+
vuln_count=$(echo "$cargo_json" | jq '.vulnerabilities.found' 2>/dev/null || echo "0")
|
|
1466
|
+
findings=$((findings + ${vuln_count:-0}))
|
|
1467
|
+
fi
|
|
1468
|
+
fi
|
|
1469
|
+
|
|
1470
|
+
total_findings=$((total_findings + findings))
|
|
1471
|
+
if [[ "$findings" -gt 0 ]]; then
|
|
1472
|
+
daemon_log INFO "Patrol: found ${findings} security vulnerability(ies)"
|
|
1473
|
+
else
|
|
1474
|
+
daemon_log INFO "Patrol: no security vulnerabilities found"
|
|
1475
|
+
fi
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
# ── 2. Stale Dependency Check ──
|
|
1479
|
+
patrol_stale_dependencies() {
|
|
1480
|
+
daemon_log INFO "Patrol: checking for stale dependencies"
|
|
1481
|
+
local findings=0
|
|
1482
|
+
|
|
1483
|
+
if [[ -f "package.json" ]] && command -v npm &>/dev/null; then
|
|
1484
|
+
local outdated_json
|
|
1485
|
+
outdated_json=$(npm outdated --json 2>/dev/null || true)
|
|
1486
|
+
if [[ -n "$outdated_json" ]] && [[ "$outdated_json" != "{}" ]]; then
|
|
1487
|
+
local stale_packages=""
|
|
1488
|
+
while IFS= read -r pkg; do
|
|
1489
|
+
local name current latest current_major latest_major
|
|
1490
|
+
name=$(echo "$pkg" | jq -r '.key')
|
|
1491
|
+
current=$(echo "$pkg" | jq -r '.value.current // "0.0.0"')
|
|
1492
|
+
latest=$(echo "$pkg" | jq -r '.value.latest // "0.0.0"')
|
|
1493
|
+
current_major="${current%%.*}"
|
|
1494
|
+
latest_major="${latest%%.*}"
|
|
1495
|
+
|
|
1496
|
+
# Only flag if > 2 major versions behind
|
|
1497
|
+
if [[ "$latest_major" =~ ^[0-9]+$ ]] && [[ "$current_major" =~ ^[0-9]+$ ]]; then
|
|
1498
|
+
local diff=$((latest_major - current_major))
|
|
1499
|
+
if [[ "$diff" -ge 2 ]]; then
|
|
1500
|
+
findings=$((findings + 1))
|
|
1501
|
+
stale_packages="${stale_packages}\n- \`${name}\`: ${current} → ${latest} (${diff} major versions behind)"
|
|
1502
|
+
emit_event "patrol.finding" "type=stale_dependency" "package=$name" "current=$current" "latest=$latest"
|
|
1503
|
+
|
|
1504
|
+
if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
|
|
1505
|
+
echo -e " ${YELLOW}●${RESET} ${CYAN}${name}${RESET}: ${current} → ${latest} (${diff} major versions behind)"
|
|
1506
|
+
fi
|
|
1507
|
+
fi
|
|
1508
|
+
fi
|
|
1509
|
+
done < <(echo "$outdated_json" | jq -c 'to_entries[]' 2>/dev/null)
|
|
1510
|
+
|
|
1511
|
+
# Create a single issue for all stale deps
|
|
1512
|
+
if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
|
|
1513
|
+
local existing
|
|
1514
|
+
existing=$(gh issue list --label "$PATROL_LABEL" --label "dependencies" \
|
|
1515
|
+
--search "Stale dependencies" --json number -q 'length' 2>/dev/null || echo "0")
|
|
1516
|
+
if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
|
|
1517
|
+
gh issue create \
|
|
1518
|
+
--title "Update ${findings} stale dependencies" \
|
|
1519
|
+
--body "## Stale Dependencies
|
|
1520
|
+
|
|
1521
|
+
The following packages are 2+ major versions behind:
|
|
1522
|
+
$(echo -e "$stale_packages")
|
|
1523
|
+
|
|
1524
|
+
Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
|
|
1525
|
+
--label "dependencies" --label "$PATROL_LABEL" 2>/dev/null || true
|
|
1526
|
+
issues_created=$((issues_created + 1))
|
|
1527
|
+
emit_event "patrol.issue_created" "type=stale_dependency" "count=$findings"
|
|
1528
|
+
fi
|
|
1529
|
+
fi
|
|
1530
|
+
fi
|
|
1531
|
+
fi
|
|
1532
|
+
|
|
1533
|
+
total_findings=$((total_findings + findings))
|
|
1534
|
+
daemon_log INFO "Patrol: found ${findings} stale dependency(ies)"
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
# ── 3. Dead Code Detection ──
|
|
1538
|
+
patrol_dead_code() {
|
|
1539
|
+
daemon_log INFO "Patrol: scanning for dead code"
|
|
1540
|
+
local findings=0
|
|
1541
|
+
local dead_files=""
|
|
1542
|
+
|
|
1543
|
+
# For JS/TS projects: find exported files not imported anywhere
|
|
1544
|
+
if [[ -f "package.json" ]] || [[ -f "tsconfig.json" ]]; then
|
|
1545
|
+
local src_dirs=("src" "lib" "app")
|
|
1546
|
+
for dir in "${src_dirs[@]}"; do
|
|
1547
|
+
[[ -d "$dir" ]] || continue
|
|
1548
|
+
while IFS= read -r file; do
|
|
1549
|
+
local basename_no_ext
|
|
1550
|
+
basename_no_ext=$(basename "$file" | sed 's/\.\(ts\|js\|tsx\|jsx\)$//')
|
|
1551
|
+
# Skip index files and test files
|
|
1552
|
+
[[ "$basename_no_ext" == "index" ]] && continue
|
|
1553
|
+
[[ "$basename_no_ext" =~ \.(test|spec)$ ]] && continue
|
|
1554
|
+
|
|
1555
|
+
# Check if this file is imported anywhere
|
|
1556
|
+
local import_count
|
|
1557
|
+
import_count=$(grep -rlE "(from|require).*['\"].*${basename_no_ext}['\"]" \
|
|
1558
|
+
--include="*.ts" --include="*.js" --include="*.tsx" --include="*.jsx" \
|
|
1559
|
+
. 2>/dev/null | grep -cv "$file" || true)
|
|
1560
|
+
import_count=${import_count:-0}
|
|
1561
|
+
|
|
1562
|
+
if [[ "$import_count" -eq 0 ]]; then
|
|
1563
|
+
findings=$((findings + 1))
|
|
1564
|
+
dead_files="${dead_files}\n- \`${file}\`"
|
|
1565
|
+
if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
|
|
1566
|
+
echo -e " ${DIM}●${RESET} ${file} ${DIM}(not imported)${RESET}"
|
|
1567
|
+
fi
|
|
1568
|
+
fi
|
|
1569
|
+
done < <(find "$dir" -type f \( -name "*.ts" -o -name "*.js" -o -name "*.tsx" -o -name "*.jsx" \) \
|
|
1570
|
+
! -name "*.test.*" ! -name "*.spec.*" ! -name "*.d.ts" 2>/dev/null)
|
|
1571
|
+
done
|
|
1572
|
+
fi
|
|
1573
|
+
|
|
1574
|
+
if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
|
|
1575
|
+
local existing
|
|
1576
|
+
existing=$(gh issue list --label "$PATROL_LABEL" --label "tech-debt" \
|
|
1577
|
+
--search "Dead code candidates" --json number -q 'length' 2>/dev/null || echo "0")
|
|
1578
|
+
if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
|
|
1579
|
+
gh issue create \
|
|
1580
|
+
--title "Dead code candidates (${findings} files)" \
|
|
1581
|
+
--body "## Dead Code Detection
|
|
1582
|
+
|
|
1583
|
+
These files appear to have no importers — they may be unused:
|
|
1584
|
+
$(echo -e "$dead_files")
|
|
1585
|
+
|
|
1586
|
+
> **Note:** Some files may be entry points or dynamically loaded. Verify before removing.
|
|
1587
|
+
|
|
1588
|
+
Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
|
|
1589
|
+
--label "tech-debt" --label "$PATROL_LABEL" 2>/dev/null || true
|
|
1590
|
+
issues_created=$((issues_created + 1))
|
|
1591
|
+
emit_event "patrol.issue_created" "type=dead_code" "count=$findings"
|
|
1592
|
+
fi
|
|
1593
|
+
fi
|
|
1594
|
+
|
|
1595
|
+
total_findings=$((total_findings + findings))
|
|
1596
|
+
daemon_log INFO "Patrol: found ${findings} dead code candidate(s)"
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
# ── 4. Test Coverage Gaps ──
|
|
1600
|
+
patrol_coverage_gaps() {
|
|
1601
|
+
daemon_log INFO "Patrol: checking test coverage gaps"
|
|
1602
|
+
local findings=0
|
|
1603
|
+
local low_cov_files=""
|
|
1604
|
+
|
|
1605
|
+
# Look for coverage reports from last pipeline run
|
|
1606
|
+
local coverage_file=""
|
|
1607
|
+
for candidate in \
|
|
1608
|
+
".claude/pipeline-artifacts/coverage/coverage-summary.json" \
|
|
1609
|
+
"coverage/coverage-summary.json" \
|
|
1610
|
+
".coverage/coverage-summary.json"; do
|
|
1611
|
+
if [[ -f "$candidate" ]]; then
|
|
1612
|
+
coverage_file="$candidate"
|
|
1613
|
+
break
|
|
1614
|
+
fi
|
|
1615
|
+
done
|
|
1616
|
+
|
|
1617
|
+
if [[ -z "$coverage_file" ]]; then
|
|
1618
|
+
daemon_log INFO "Patrol: no coverage report found — skipping"
|
|
1619
|
+
return
|
|
1620
|
+
fi
|
|
1621
|
+
|
|
1622
|
+
while IFS= read -r entry; do
|
|
1623
|
+
local file_path line_pct
|
|
1624
|
+
file_path=$(echo "$entry" | jq -r '.key')
|
|
1625
|
+
line_pct=$(echo "$entry" | jq -r '.value.lines.pct // 100')
|
|
1626
|
+
|
|
1627
|
+
# Skip total and well-covered files
|
|
1628
|
+
[[ "$file_path" == "total" ]] && continue
|
|
1629
|
+
if awk "BEGIN{exit !($line_pct >= 50)}" 2>/dev/null; then continue; fi
|
|
1630
|
+
|
|
1631
|
+
findings=$((findings + 1))
|
|
1632
|
+
low_cov_files="${low_cov_files}\n- \`${file_path}\`: ${line_pct}% line coverage"
|
|
1633
|
+
|
|
1634
|
+
if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
|
|
1635
|
+
echo -e " ${YELLOW}●${RESET} ${file_path}: ${line_pct}% coverage"
|
|
1636
|
+
fi
|
|
1637
|
+
done < <(jq -c 'to_entries[]' "$coverage_file" 2>/dev/null)
|
|
1638
|
+
|
|
1639
|
+
if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
|
|
1640
|
+
local existing
|
|
1641
|
+
existing=$(gh issue list --label "$PATROL_LABEL" --label "testing" \
|
|
1642
|
+
--search "Test coverage gaps" --json number -q 'length' 2>/dev/null || echo "0")
|
|
1643
|
+
if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
|
|
1644
|
+
gh issue create \
|
|
1645
|
+
--title "Improve test coverage for ${findings} file(s)" \
|
|
1646
|
+
--body "## Test Coverage Gaps
|
|
1647
|
+
|
|
1648
|
+
These files have < 50% line coverage:
|
|
1649
|
+
$(echo -e "$low_cov_files")
|
|
1650
|
+
|
|
1651
|
+
Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
|
|
1652
|
+
--label "testing" --label "$PATROL_LABEL" 2>/dev/null || true
|
|
1653
|
+
issues_created=$((issues_created + 1))
|
|
1654
|
+
emit_event "patrol.issue_created" "type=coverage" "count=$findings"
|
|
1655
|
+
fi
|
|
1656
|
+
fi
|
|
1657
|
+
|
|
1658
|
+
total_findings=$((total_findings + findings))
|
|
1659
|
+
daemon_log INFO "Patrol: found ${findings} low-coverage file(s)"
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
# ── 5. Documentation Staleness ──
|
|
1663
|
+
patrol_doc_staleness() {
|
|
1664
|
+
daemon_log INFO "Patrol: checking documentation staleness"
|
|
1665
|
+
local findings=0
|
|
1666
|
+
local stale_docs=""
|
|
1667
|
+
|
|
1668
|
+
# Check if README is older than recent source changes
|
|
1669
|
+
if [[ -f "README.md" ]]; then
|
|
1670
|
+
local readme_epoch src_epoch
|
|
1671
|
+
readme_epoch=$(git log -1 --format=%ct -- README.md 2>/dev/null || echo "0")
|
|
1672
|
+
src_epoch=$(git log -1 --format=%ct -- "*.ts" "*.js" "*.py" "*.go" "*.rs" "*.sh" 2>/dev/null || echo "0")
|
|
1673
|
+
|
|
1674
|
+
if [[ "$src_epoch" -gt 0 ]] && [[ "$readme_epoch" -gt 0 ]]; then
|
|
1675
|
+
local drift=$((src_epoch - readme_epoch))
|
|
1676
|
+
# Flag if README is > 30 days behind source
|
|
1677
|
+
if [[ "$drift" -gt 2592000 ]]; then
|
|
1678
|
+
findings=$((findings + 1))
|
|
1679
|
+
local days_behind=$((drift / 86400))
|
|
1680
|
+
stale_docs="${stale_docs}\n- \`README.md\`: ${days_behind} days behind source code"
|
|
1681
|
+
if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
|
|
1682
|
+
echo -e " ${YELLOW}●${RESET} README.md is ${days_behind} days behind source code"
|
|
1683
|
+
fi
|
|
1684
|
+
fi
|
|
1685
|
+
fi
|
|
1686
|
+
fi
|
|
1687
|
+
|
|
1688
|
+
# Check if CHANGELOG is behind latest tag
|
|
1689
|
+
if [[ -f "CHANGELOG.md" ]]; then
|
|
1690
|
+
local latest_tag changelog_epoch tag_epoch
|
|
1691
|
+
latest_tag=$(git describe --tags --abbrev=0 2>/dev/null || true)
|
|
1692
|
+
if [[ -n "$latest_tag" ]]; then
|
|
1693
|
+
changelog_epoch=$(git log -1 --format=%ct -- CHANGELOG.md 2>/dev/null || echo "0")
|
|
1694
|
+
tag_epoch=$(git log -1 --format=%ct "$latest_tag" 2>/dev/null || echo "0")
|
|
1695
|
+
if [[ "$tag_epoch" -gt "$changelog_epoch" ]] && [[ "$changelog_epoch" -gt 0 ]]; then
|
|
1696
|
+
findings=$((findings + 1))
|
|
1697
|
+
stale_docs="${stale_docs}\n- \`CHANGELOG.md\`: not updated since tag \`${latest_tag}\`"
|
|
1698
|
+
if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
|
|
1699
|
+
echo -e " ${YELLOW}●${RESET} CHANGELOG.md not updated since ${latest_tag}"
|
|
1700
|
+
fi
|
|
1701
|
+
fi
|
|
1702
|
+
fi
|
|
1703
|
+
fi
|
|
1704
|
+
|
|
1705
|
+
if [[ "$findings" -gt 0 ]] && [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
|
|
1706
|
+
local existing
|
|
1707
|
+
existing=$(gh issue list --label "$PATROL_LABEL" --label "documentation" \
|
|
1708
|
+
--search "Stale documentation" --json number -q 'length' 2>/dev/null || echo "0")
|
|
1709
|
+
if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
|
|
1710
|
+
gh issue create \
|
|
1711
|
+
--title "Stale documentation detected" \
|
|
1712
|
+
--body "## Documentation Staleness
|
|
1713
|
+
|
|
1714
|
+
The following docs may need updating:
|
|
1715
|
+
$(echo -e "$stale_docs")
|
|
1716
|
+
|
|
1717
|
+
Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
|
|
1718
|
+
--label "documentation" --label "$PATROL_LABEL" 2>/dev/null || true
|
|
1719
|
+
issues_created=$((issues_created + 1))
|
|
1720
|
+
emit_event "patrol.issue_created" "type=documentation" "count=$findings"
|
|
1721
|
+
fi
|
|
1722
|
+
fi
|
|
1723
|
+
|
|
1724
|
+
total_findings=$((total_findings + findings))
|
|
1725
|
+
daemon_log INFO "Patrol: found ${findings} stale documentation item(s)"
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
# ── 6. Performance Baseline ──
|
|
1729
|
+
patrol_performance_baseline() {
|
|
1730
|
+
daemon_log INFO "Patrol: checking performance baseline"
|
|
1731
|
+
|
|
1732
|
+
# Look for test timing in recent pipeline events
|
|
1733
|
+
if [[ ! -f "$EVENTS_FILE" ]]; then
|
|
1734
|
+
daemon_log INFO "Patrol: no events file — skipping performance check"
|
|
1735
|
+
return
|
|
1736
|
+
fi
|
|
1737
|
+
|
|
1738
|
+
local baseline_file="$DAEMON_DIR/patrol-perf-baseline.json"
|
|
1739
|
+
local recent_test_dur
|
|
1740
|
+
recent_test_dur=$(tail -500 "$EVENTS_FILE" | \
|
|
1741
|
+
jq -s '[.[] | select(.type == "stage.completed" and .stage == "test") | .duration_s] | if length > 0 then .[-1] else null end' \
|
|
1742
|
+
2>/dev/null || echo "null")
|
|
1743
|
+
|
|
1744
|
+
if [[ "$recent_test_dur" == "null" ]] || [[ -z "$recent_test_dur" ]]; then
|
|
1745
|
+
daemon_log INFO "Patrol: no recent test duration found — skipping"
|
|
1746
|
+
return
|
|
1747
|
+
fi
|
|
1748
|
+
|
|
1749
|
+
if [[ -f "$baseline_file" ]]; then
|
|
1750
|
+
local baseline_dur
|
|
1751
|
+
baseline_dur=$(jq -r '.test_duration_s // 0' "$baseline_file" 2>/dev/null || echo "0")
|
|
1752
|
+
if [[ "$baseline_dur" -gt 0 ]]; then
|
|
1753
|
+
local threshold=$(( baseline_dur * 130 / 100 )) # 30% slower
|
|
1754
|
+
if [[ "$recent_test_dur" -gt "$threshold" ]]; then
|
|
1755
|
+
total_findings=$((total_findings + 1))
|
|
1756
|
+
local pct_slower=$(( (recent_test_dur - baseline_dur) * 100 / baseline_dur ))
|
|
1757
|
+
emit_event "patrol.finding" "type=performance" "baseline=${baseline_dur}s" "current=${recent_test_dur}s" "regression=${pct_slower}%"
|
|
1758
|
+
|
|
1759
|
+
if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
|
|
1760
|
+
echo -e " ${RED}●${RESET} Test suite ${pct_slower}% slower than baseline (${baseline_dur}s → ${recent_test_dur}s)"
|
|
1761
|
+
elif [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
|
|
1762
|
+
local existing
|
|
1763
|
+
existing=$(gh issue list --label "$PATROL_LABEL" --label "performance" \
|
|
1764
|
+
--search "Test suite performance regression" --json number -q 'length' 2>/dev/null || echo "0")
|
|
1765
|
+
if [[ "${existing:-0}" -eq 0 ]]; then
|
|
1766
|
+
gh issue create \
|
|
1767
|
+
--title "Test suite performance regression (${pct_slower}% slower)" \
|
|
1768
|
+
--body "## Performance Regression
|
|
1769
|
+
|
|
1770
|
+
| Metric | Value |
|
|
1771
|
+
|--------|-------|
|
|
1772
|
+
| Baseline | ${baseline_dur}s |
|
|
1773
|
+
| Current | ${recent_test_dur}s |
|
|
1774
|
+
| Regression | ${pct_slower}% |
|
|
1775
|
+
|
|
1776
|
+
Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
|
|
1777
|
+
--label "performance" --label "$PATROL_LABEL" 2>/dev/null || true
|
|
1778
|
+
issues_created=$((issues_created + 1))
|
|
1779
|
+
emit_event "patrol.issue_created" "type=performance"
|
|
1780
|
+
fi
|
|
1781
|
+
fi
|
|
1782
|
+
|
|
1783
|
+
daemon_log WARN "Patrol: test suite ${pct_slower}% slower than baseline"
|
|
1784
|
+
return
|
|
1785
|
+
fi
|
|
1786
|
+
fi
|
|
1787
|
+
fi
|
|
1788
|
+
|
|
1789
|
+
# Save/update baseline
|
|
1790
|
+
jq -n --argjson dur "$recent_test_dur" --arg ts "$(now_iso)" \
|
|
1791
|
+
'{test_duration_s: $dur, updated_at: $ts}' > "$baseline_file"
|
|
1792
|
+
daemon_log INFO "Patrol: performance baseline updated (${recent_test_dur}s)"
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
# ── Run all patrol checks ──
|
|
1796
|
+
echo -e " ${BOLD}Security Audit${RESET}"
|
|
1797
|
+
patrol_security_audit
|
|
1798
|
+
echo ""
|
|
1799
|
+
|
|
1800
|
+
echo -e " ${BOLD}Stale Dependencies${RESET}"
|
|
1801
|
+
patrol_stale_dependencies
|
|
1802
|
+
echo ""
|
|
1803
|
+
|
|
1804
|
+
echo -e " ${BOLD}Dead Code Detection${RESET}"
|
|
1805
|
+
patrol_dead_code
|
|
1806
|
+
echo ""
|
|
1807
|
+
|
|
1808
|
+
echo -e " ${BOLD}Test Coverage Gaps${RESET}"
|
|
1809
|
+
patrol_coverage_gaps
|
|
1810
|
+
echo ""
|
|
1811
|
+
|
|
1812
|
+
echo -e " ${BOLD}Documentation Staleness${RESET}"
|
|
1813
|
+
patrol_doc_staleness
|
|
1814
|
+
echo ""
|
|
1815
|
+
|
|
1816
|
+
echo -e " ${BOLD}Performance Baseline${RESET}"
|
|
1817
|
+
patrol_performance_baseline
|
|
1818
|
+
echo ""
|
|
1819
|
+
|
|
1820
|
+
# ── Summary ──
|
|
1821
|
+
emit_event "patrol.completed" "findings=$total_findings" "issues_created=$issues_created" "dry_run=$dry_run"
|
|
1822
|
+
|
|
1823
|
+
echo -e "${PURPLE}${BOLD}━━━ Patrol Summary ━━━${RESET}"
|
|
1824
|
+
echo -e " Findings: ${total_findings}"
|
|
1825
|
+
echo -e " Issues created: ${issues_created}"
|
|
1826
|
+
if [[ "$dry_run" == "true" ]]; then
|
|
1827
|
+
echo -e " ${DIM}(dry run — no issues were created)${RESET}"
|
|
1828
|
+
fi
|
|
1829
|
+
echo ""
|
|
1830
|
+
|
|
1831
|
+
daemon_log INFO "Patrol complete: ${total_findings} findings, ${issues_created} issues created"
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
# ─── Poll Issues ─────────────────────────────────────────────────────────────
|
|
1835
|
+
|
|
1836
|
+
daemon_poll_issues() {
|
|
1837
|
+
if [[ "$NO_GITHUB" == "true" ]]; then
|
|
1838
|
+
daemon_log INFO "Polling skipped (--no-github)"
|
|
1839
|
+
return
|
|
1840
|
+
fi
|
|
1841
|
+
|
|
1842
|
+
local issues_json
|
|
1843
|
+
|
|
1844
|
+
# Select gh command wrapper: gh_retry for critical poll calls when enabled
|
|
1845
|
+
local gh_cmd="gh"
|
|
1846
|
+
if [[ "${GH_RETRY_ENABLED:-true}" == "true" ]]; then
|
|
1847
|
+
gh_cmd="gh_retry gh"
|
|
1848
|
+
fi
|
|
1849
|
+
|
|
1850
|
+
if [[ "$WATCH_MODE" == "org" && -n "$ORG" ]]; then
|
|
1851
|
+
# Org-wide mode: search issues across all org repos
|
|
1852
|
+
issues_json=$($gh_cmd search issues \
|
|
1853
|
+
--label "$WATCH_LABEL" \
|
|
1854
|
+
--owner "$ORG" \
|
|
1855
|
+
--state open \
|
|
1856
|
+
--json repository,number,title,labels,body,createdAt \
|
|
1857
|
+
--limit 20 2>/dev/null) || {
|
|
1858
|
+
# Handle rate limiting with exponential backoff
|
|
1859
|
+
if [[ $BACKOFF_SECS -eq 0 ]]; then
|
|
1860
|
+
BACKOFF_SECS=30
|
|
1861
|
+
elif [[ $BACKOFF_SECS -lt 300 ]]; then
|
|
1862
|
+
BACKOFF_SECS=$((BACKOFF_SECS * 2))
|
|
1863
|
+
if [[ $BACKOFF_SECS -gt 300 ]]; then
|
|
1864
|
+
BACKOFF_SECS=300
|
|
1865
|
+
fi
|
|
1866
|
+
fi
|
|
1867
|
+
daemon_log WARN "GitHub API error (org search) — backing off ${BACKOFF_SECS}s"
|
|
1868
|
+
sleep "$BACKOFF_SECS"
|
|
1869
|
+
return
|
|
1870
|
+
}
|
|
1871
|
+
|
|
1872
|
+
# Filter by repo_filter regex if set
|
|
1873
|
+
if [[ -n "$REPO_FILTER" ]]; then
|
|
1874
|
+
issues_json=$(echo "$issues_json" | jq -c --arg filter "$REPO_FILTER" \
|
|
1875
|
+
'[.[] | select(.repository.nameWithOwner | test($filter))]')
|
|
1876
|
+
fi
|
|
1877
|
+
else
|
|
1878
|
+
# Standard single-repo mode
|
|
1879
|
+
issues_json=$($gh_cmd issue list \
|
|
1880
|
+
--label "$WATCH_LABEL" \
|
|
1881
|
+
--state open \
|
|
1882
|
+
--json number,title,labels,body,createdAt \
|
|
1883
|
+
--limit 20 2>/dev/null) || {
|
|
1884
|
+
# Handle rate limiting with exponential backoff
|
|
1885
|
+
if [[ $BACKOFF_SECS -eq 0 ]]; then
|
|
1886
|
+
BACKOFF_SECS=30
|
|
1887
|
+
elif [[ $BACKOFF_SECS -lt 300 ]]; then
|
|
1888
|
+
BACKOFF_SECS=$((BACKOFF_SECS * 2))
|
|
1889
|
+
if [[ $BACKOFF_SECS -gt 300 ]]; then
|
|
1890
|
+
BACKOFF_SECS=300
|
|
1891
|
+
fi
|
|
1892
|
+
fi
|
|
1893
|
+
daemon_log WARN "GitHub API error — backing off ${BACKOFF_SECS}s"
|
|
1894
|
+
sleep "$BACKOFF_SECS"
|
|
1895
|
+
return
|
|
1896
|
+
}
|
|
1897
|
+
fi
|
|
1898
|
+
|
|
1899
|
+
# Reset backoff on success
|
|
1900
|
+
BACKOFF_SECS=0
|
|
1901
|
+
|
|
1902
|
+
local issue_count
|
|
1903
|
+
issue_count=$(echo "$issues_json" | jq 'length' 2>/dev/null || echo 0)
|
|
1904
|
+
|
|
1905
|
+
if [[ "$issue_count" -eq 0 ]]; then
|
|
1906
|
+
return
|
|
1907
|
+
fi
|
|
1908
|
+
|
|
1909
|
+
local mode_label="repo"
|
|
1910
|
+
[[ "$WATCH_MODE" == "org" ]] && mode_label="org:${ORG}"
|
|
1911
|
+
daemon_log INFO "Found ${issue_count} issue(s) with label '${WATCH_LABEL}' (${mode_label})"
|
|
1912
|
+
emit_event "daemon.poll" "issues_found=$issue_count" "active=$(get_active_count)" "mode=$WATCH_MODE"
|
|
1913
|
+
|
|
1914
|
+
# Score each issue using intelligent triage and sort by descending score
|
|
1915
|
+
local scored_issues=()
|
|
1916
|
+
while IFS= read -r issue; do
|
|
1917
|
+
local num score
|
|
1918
|
+
num=$(echo "$issue" | jq -r '.number')
|
|
1919
|
+
score=$(triage_score_issue "$issue")
|
|
1920
|
+
# For org mode, include repo name in the scored entry
|
|
1921
|
+
local repo_name=""
|
|
1922
|
+
if [[ "$WATCH_MODE" == "org" ]]; then
|
|
1923
|
+
repo_name=$(echo "$issue" | jq -r '.repository.nameWithOwner // ""')
|
|
1924
|
+
fi
|
|
1925
|
+
scored_issues+=("${score}|${num}|${repo_name}")
|
|
1926
|
+
done < <(echo "$issues_json" | jq -c '.[]')
|
|
1927
|
+
|
|
1928
|
+
# Sort by score descending
|
|
1929
|
+
local sorted_order
|
|
1930
|
+
sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1 -rn)
|
|
1931
|
+
|
|
1932
|
+
local active_count
|
|
1933
|
+
active_count=$(get_active_count)
|
|
1934
|
+
|
|
1935
|
+
# Process each issue in triage order (process substitution keeps state in current shell)
|
|
1936
|
+
while IFS='|' read -r score issue_num repo_name; do
|
|
1937
|
+
[[ -z "$issue_num" ]] && continue
|
|
1938
|
+
|
|
1939
|
+
local issue_title labels_csv
|
|
1940
|
+
issue_title=$(echo "$issues_json" | jq -r --argjson n "$issue_num" '.[] | select(.number == $n) | .title')
|
|
1941
|
+
labels_csv=$(echo "$issues_json" | jq -r --argjson n "$issue_num" '.[] | select(.number == $n) | [.labels[].name] | join(",")')
|
|
1942
|
+
|
|
1943
|
+
# Skip if already inflight
|
|
1944
|
+
if daemon_is_inflight "$issue_num"; then
|
|
1945
|
+
continue
|
|
1946
|
+
fi
|
|
1947
|
+
|
|
1948
|
+
# Priority lane: bypass queue for critical issues
|
|
1949
|
+
if [[ "$PRIORITY_LANE" == "true" ]]; then
|
|
1950
|
+
local priority_active
|
|
1951
|
+
priority_active=$(get_priority_active_count)
|
|
1952
|
+
if is_priority_issue "$labels_csv" && [[ "$priority_active" -lt "$PRIORITY_LANE_MAX" ]]; then
|
|
1953
|
+
daemon_log WARN "PRIORITY LANE: issue #${issue_num} bypassing queue (${labels_csv})"
|
|
1954
|
+
emit_event "daemon.priority_lane" "issue=$issue_num" "score=$score"
|
|
1955
|
+
|
|
1956
|
+
local template
|
|
1957
|
+
template=$(select_pipeline_template "$labels_csv" "$score")
|
|
1958
|
+
daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template} [PRIORITY]"
|
|
1959
|
+
|
|
1960
|
+
local orig_template="$PIPELINE_TEMPLATE"
|
|
1961
|
+
PIPELINE_TEMPLATE="$template"
|
|
1962
|
+
daemon_spawn_pipeline "$issue_num" "$issue_title" "$repo_name"
|
|
1963
|
+
PIPELINE_TEMPLATE="$orig_template"
|
|
1964
|
+
track_priority_job "$issue_num"
|
|
1965
|
+
continue
|
|
1966
|
+
fi
|
|
1967
|
+
fi
|
|
1968
|
+
|
|
1969
|
+
# Check capacity
|
|
1970
|
+
active_count=$(get_active_count)
|
|
1971
|
+
if [[ "$active_count" -ge "$MAX_PARALLEL" ]]; then
|
|
1972
|
+
enqueue_issue "$issue_num"
|
|
1973
|
+
continue
|
|
1974
|
+
fi
|
|
1975
|
+
|
|
1976
|
+
# Auto-select pipeline template based on labels + triage score
|
|
1977
|
+
local template
|
|
1978
|
+
template=$(select_pipeline_template "$labels_csv" "$score")
|
|
1979
|
+
daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template}"
|
|
1980
|
+
|
|
1981
|
+
# Spawn pipeline (template selection applied via PIPELINE_TEMPLATE override)
|
|
1982
|
+
local orig_template="$PIPELINE_TEMPLATE"
|
|
1983
|
+
PIPELINE_TEMPLATE="$template"
|
|
1984
|
+
daemon_spawn_pipeline "$issue_num" "$issue_title" "$repo_name"
|
|
1985
|
+
PIPELINE_TEMPLATE="$orig_template"
|
|
1986
|
+
done <<< "$sorted_order"
|
|
1987
|
+
|
|
1988
|
+
# Update last poll
|
|
1989
|
+
update_state_field "last_poll" "$(now_iso)"
|
|
1990
|
+
}
|
|
1991
|
+
|
|
1992
|
+
# ─── Health Check ─────────────────────────────────────────────────────────────
|
|
1993
|
+
|
|
1994
|
+
daemon_health_check() {
|
|
1995
|
+
local findings=0
|
|
1996
|
+
|
|
1997
|
+
# Stale jobs: kill processes running > timeout
|
|
1998
|
+
local stale_timeout="${HEALTH_STALE_TIMEOUT:-1800}" # default 30min
|
|
1999
|
+
local now_e
|
|
2000
|
+
now_e=$(now_epoch)
|
|
2001
|
+
|
|
2002
|
+
if [[ -f "$STATE_FILE" ]]; then
|
|
2003
|
+
while IFS= read -r job; do
|
|
2004
|
+
local pid started_at issue_num
|
|
2005
|
+
pid=$(echo "$job" | jq -r '.pid')
|
|
2006
|
+
started_at=$(echo "$job" | jq -r '.started_at // empty')
|
|
2007
|
+
issue_num=$(echo "$job" | jq -r '.issue')
|
|
2008
|
+
|
|
2009
|
+
if [[ -n "$started_at" ]]; then
|
|
2010
|
+
local start_e
|
|
2011
|
+
start_e=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started_at" +%s 2>/dev/null || date -d "$started_at" +%s 2>/dev/null || echo "0")
|
|
2012
|
+
local elapsed=$(( now_e - start_e ))
|
|
2013
|
+
if [[ "$elapsed" -gt "$stale_timeout" ]] && kill -0 "$pid" 2>/dev/null; then
|
|
2014
|
+
daemon_log WARN "Stale job detected: issue #${issue_num} (${elapsed}s, PID $pid) — killing"
|
|
2015
|
+
kill "$pid" 2>/dev/null || true
|
|
2016
|
+
findings=$((findings + 1))
|
|
2017
|
+
fi
|
|
2018
|
+
fi
|
|
2019
|
+
done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null)
|
|
2020
|
+
fi
|
|
2021
|
+
|
|
2022
|
+
# Disk space warning
|
|
2023
|
+
local free_kb
|
|
2024
|
+
free_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
|
|
2025
|
+
if [[ -n "$free_kb" ]] && [[ "$free_kb" -lt 1048576 ]] 2>/dev/null; then
|
|
2026
|
+
daemon_log WARN "Low disk space: $(( free_kb / 1024 ))MB free"
|
|
2027
|
+
findings=$((findings + 1))
|
|
2028
|
+
fi
|
|
2029
|
+
|
|
2030
|
+
# Events file size warning
|
|
2031
|
+
if [[ -f "$EVENTS_FILE" ]]; then
|
|
2032
|
+
local events_size
|
|
2033
|
+
events_size=$(wc -c < "$EVENTS_FILE" 2>/dev/null || echo 0)
|
|
2034
|
+
if [[ "$events_size" -gt 104857600 ]]; then # 100MB
|
|
2035
|
+
daemon_log WARN "Events file large ($(( events_size / 1048576 ))MB) — consider rotating"
|
|
2036
|
+
findings=$((findings + 1))
|
|
2037
|
+
fi
|
|
2038
|
+
fi
|
|
2039
|
+
|
|
2040
|
+
if [[ "$findings" -gt 0 ]]; then
|
|
2041
|
+
emit_event "daemon.health" "findings=$findings"
|
|
2042
|
+
fi
|
|
2043
|
+
}
|
|
2044
|
+
|
|
2045
|
+
# ─── Degradation Alerting ─────────────────────────────────────────────────────
|
|
2046
|
+
|
|
2047
|
+
daemon_check_degradation() {
|
|
2048
|
+
if [[ ! -f "$EVENTS_FILE" ]]; then return; fi
|
|
2049
|
+
|
|
2050
|
+
local window="${DEGRADATION_WINDOW:-5}"
|
|
2051
|
+
local cfr_threshold="${DEGRADATION_CFR_THRESHOLD:-30}"
|
|
2052
|
+
local success_threshold="${DEGRADATION_SUCCESS_THRESHOLD:-50}"
|
|
2053
|
+
|
|
2054
|
+
# Get last N pipeline completions
|
|
2055
|
+
local recent
|
|
2056
|
+
recent=$(tail -200 "$EVENTS_FILE" | jq -s "[.[] | select(.type == \"pipeline.completed\")] | .[-${window}:]" 2>/dev/null)
|
|
2057
|
+
local count
|
|
2058
|
+
count=$(echo "$recent" | jq 'length' 2>/dev/null || echo 0)
|
|
2059
|
+
|
|
2060
|
+
if [[ "$count" -lt "$window" ]]; then return; fi
|
|
2061
|
+
|
|
2062
|
+
local failures successes
|
|
2063
|
+
failures=$(echo "$recent" | jq '[.[] | select(.result == "failure")] | length')
|
|
2064
|
+
successes=$(echo "$recent" | jq '[.[] | select(.result == "success")] | length')
|
|
2065
|
+
local cfr_pct=$(( failures * 100 / count ))
|
|
2066
|
+
local success_pct=$(( successes * 100 / count ))
|
|
2067
|
+
|
|
2068
|
+
local alerts=""
|
|
2069
|
+
if [[ "$cfr_pct" -gt "$cfr_threshold" ]]; then
|
|
2070
|
+
alerts="CFR ${cfr_pct}% exceeds threshold ${cfr_threshold}%"
|
|
2071
|
+
daemon_log WARN "DEGRADATION: $alerts"
|
|
2072
|
+
fi
|
|
2073
|
+
if [[ "$success_pct" -lt "$success_threshold" ]]; then
|
|
2074
|
+
local msg="Success rate ${success_pct}% below threshold ${success_threshold}%"
|
|
2075
|
+
[[ -n "$alerts" ]] && alerts="$alerts; $msg" || alerts="$msg"
|
|
2076
|
+
daemon_log WARN "DEGRADATION: $msg"
|
|
2077
|
+
fi
|
|
2078
|
+
|
|
2079
|
+
if [[ -n "$alerts" ]]; then
|
|
2080
|
+
emit_event "daemon.alert" "alerts=$alerts" "cfr_pct=$cfr_pct" "success_pct=$success_pct"
|
|
2081
|
+
|
|
2082
|
+
# Slack notification
|
|
2083
|
+
if [[ -n "${SLACK_WEBHOOK:-}" ]]; then
|
|
2084
|
+
notify "Pipeline Degradation Alert" "$alerts" "warn"
|
|
2085
|
+
fi
|
|
2086
|
+
fi
|
|
2087
|
+
}
|
|
2088
|
+
|
|
2089
|
+
# ─── Auto-Scaling ─────────────────────────────────────────────────────────
|
|
2090
|
+
# Dynamically adjusts MAX_PARALLEL based on CPU, memory, budget, and queue depth
|
|
2091
|
+
|
|
2092
|
+
daemon_auto_scale() {
|
|
2093
|
+
if [[ "${AUTO_SCALE:-false}" != "true" ]]; then
|
|
2094
|
+
return
|
|
2095
|
+
fi
|
|
2096
|
+
|
|
2097
|
+
local prev_max="$MAX_PARALLEL"
|
|
2098
|
+
|
|
2099
|
+
# ── CPU cores ──
|
|
2100
|
+
local cpu_cores=2
|
|
2101
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
2102
|
+
cpu_cores=$(sysctl -n hw.ncpu 2>/dev/null || echo 2)
|
|
2103
|
+
else
|
|
2104
|
+
cpu_cores=$(nproc 2>/dev/null || echo 2)
|
|
2105
|
+
fi
|
|
2106
|
+
local max_by_cpu=$(( (cpu_cores * 3) / 4 )) # 75% utilization cap
|
|
2107
|
+
[[ "$max_by_cpu" -lt 1 ]] && max_by_cpu=1
|
|
2108
|
+
|
|
2109
|
+
# ── Load average check (back off if system is stressed) ──
|
|
2110
|
+
local load_avg
|
|
2111
|
+
load_avg=$(uptime | awk -F'load averages?: ' '{print $2}' | awk -F'[, ]+' '{print $1}' 2>/dev/null || echo "0")
|
|
2112
|
+
# Validate numeric
|
|
2113
|
+
if [[ ! "$load_avg" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
2114
|
+
load_avg="0"
|
|
2115
|
+
fi
|
|
2116
|
+
local load_ratio=0
|
|
2117
|
+
if [[ "$cpu_cores" -gt 0 ]]; then
|
|
2118
|
+
load_ratio=$(awk -v load="$load_avg" -v cores="$cpu_cores" 'BEGIN { printf "%.0f", (load / cores) * 100 }')
|
|
2119
|
+
fi
|
|
2120
|
+
if [[ "$load_ratio" -gt 90 ]]; then
|
|
2121
|
+
# System under heavy load — scale down to min
|
|
2122
|
+
max_by_cpu="$MIN_WORKERS"
|
|
2123
|
+
daemon_log WARN "Auto-scale: high load (${load_avg}/${cpu_cores} cores) — constraining to ${max_by_cpu}"
|
|
2124
|
+
fi
|
|
2125
|
+
|
|
2126
|
+
# ── Available memory ──
|
|
2127
|
+
local avail_mem_gb=8
|
|
2128
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
2129
|
+
local page_size free_pages inactive_pages purgeable_pages speculative_pages
|
|
2130
|
+
# Page size is in format: "(page size of 16384 bytes)"
|
|
2131
|
+
page_size=$(vm_stat | awk '/page size of/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) print $i}')
|
|
2132
|
+
page_size="${page_size:-16384}"
|
|
2133
|
+
free_pages=$(vm_stat | awk '/^Pages free:/ {gsub(/\./, "", $NF); print $NF}')
|
|
2134
|
+
free_pages="${free_pages:-0}"
|
|
2135
|
+
speculative_pages=$(vm_stat | awk '/^Pages speculative:/ {gsub(/\./, "", $NF); print $NF}')
|
|
2136
|
+
speculative_pages="${speculative_pages:-0}"
|
|
2137
|
+
inactive_pages=$(vm_stat | awk '/^Pages inactive:/ {gsub(/\./, "", $NF); print $NF}')
|
|
2138
|
+
inactive_pages="${inactive_pages:-0}"
|
|
2139
|
+
purgeable_pages=$(vm_stat | awk '/^Pages purgeable:/ {gsub(/\./, "", $NF); print $NF}')
|
|
2140
|
+
purgeable_pages="${purgeable_pages:-0}"
|
|
2141
|
+
# Available ≈ free + speculative + inactive + purgeable
|
|
2142
|
+
local avail_pages=$(( free_pages + speculative_pages + inactive_pages + purgeable_pages ))
|
|
2143
|
+
if [[ "$avail_pages" -gt 0 && "$page_size" -gt 0 ]]; then
|
|
2144
|
+
local free_bytes=$(( avail_pages * page_size ))
|
|
2145
|
+
avail_mem_gb=$(( free_bytes / 1073741824 ))
|
|
2146
|
+
fi
|
|
2147
|
+
else
|
|
2148
|
+
local avail_kb
|
|
2149
|
+
avail_kb=$(awk '/MemAvailable/ {print $2}' /proc/meminfo 2>/dev/null || echo "8388608")
|
|
2150
|
+
avail_mem_gb=$(( avail_kb / 1048576 ))
|
|
2151
|
+
fi
|
|
2152
|
+
[[ "$avail_mem_gb" -lt 1 ]] && avail_mem_gb=1
|
|
2153
|
+
local max_by_mem=$(( avail_mem_gb / WORKER_MEM_GB ))
|
|
2154
|
+
[[ "$max_by_mem" -lt 1 ]] && max_by_mem=1
|
|
2155
|
+
|
|
2156
|
+
# ── Budget remaining ──
|
|
2157
|
+
local max_by_budget="$MAX_WORKERS"
|
|
2158
|
+
local remaining_usd
|
|
2159
|
+
remaining_usd=$("$SCRIPT_DIR/cct-cost.sh" remaining-budget 2>/dev/null || echo "unlimited")
|
|
2160
|
+
if [[ "$remaining_usd" != "unlimited" && -n "$remaining_usd" ]]; then
|
|
2161
|
+
if awk -v r="$remaining_usd" -v c="$EST_COST_PER_JOB" 'BEGIN { exit !(r > 0 && c > 0) }'; then
|
|
2162
|
+
max_by_budget=$(awk -v r="$remaining_usd" -v c="$EST_COST_PER_JOB" 'BEGIN { printf "%.0f", r / c }')
|
|
2163
|
+
[[ "$max_by_budget" -lt 0 ]] && max_by_budget=0
|
|
2164
|
+
else
|
|
2165
|
+
max_by_budget=0
|
|
2166
|
+
fi
|
|
2167
|
+
fi
|
|
2168
|
+
|
|
2169
|
+
# ── Queue depth (don't over-provision) ──
|
|
2170
|
+
local queue_depth active_count
|
|
2171
|
+
queue_depth=$(jq -r '.queued | length' "$STATE_FILE" 2>/dev/null || echo 0)
|
|
2172
|
+
queue_depth="${queue_depth:-0}"
|
|
2173
|
+
[[ ! "$queue_depth" =~ ^[0-9]+$ ]] && queue_depth=0
|
|
2174
|
+
active_count=$(get_active_count)
|
|
2175
|
+
active_count="${active_count:-0}"
|
|
2176
|
+
[[ ! "$active_count" =~ ^[0-9]+$ ]] && active_count=0
|
|
2177
|
+
local max_by_queue=$(( queue_depth + active_count ))
|
|
2178
|
+
[[ "$max_by_queue" -lt 1 ]] && max_by_queue=1
|
|
2179
|
+
|
|
2180
|
+
# ── Compute final value ──
|
|
2181
|
+
local computed="$max_by_cpu"
|
|
2182
|
+
[[ "$max_by_mem" -lt "$computed" ]] && computed="$max_by_mem"
|
|
2183
|
+
[[ "$max_by_budget" -lt "$computed" ]] && computed="$max_by_budget"
|
|
2184
|
+
[[ "$max_by_queue" -lt "$computed" ]] && computed="$max_by_queue"
|
|
2185
|
+
[[ "$MAX_WORKERS" -lt "$computed" ]] && computed="$MAX_WORKERS"
|
|
2186
|
+
|
|
2187
|
+
# Respect fleet-assigned ceiling if set
|
|
2188
|
+
if [[ -n "${FLEET_MAX_PARALLEL:-}" && "$FLEET_MAX_PARALLEL" -lt "$computed" ]]; then
|
|
2189
|
+
computed="$FLEET_MAX_PARALLEL"
|
|
2190
|
+
fi
|
|
2191
|
+
|
|
2192
|
+
# Clamp to min_workers
|
|
2193
|
+
[[ "$computed" -lt "$MIN_WORKERS" ]] && computed="$MIN_WORKERS"
|
|
2194
|
+
|
|
2195
|
+
MAX_PARALLEL="$computed"
|
|
2196
|
+
|
|
2197
|
+
if [[ "$MAX_PARALLEL" -ne "$prev_max" ]]; then
|
|
2198
|
+
daemon_log INFO "Auto-scale: ${prev_max} → ${MAX_PARALLEL} (cpu=${max_by_cpu} mem=${max_by_mem} budget=${max_by_budget} queue=${max_by_queue})"
|
|
2199
|
+
emit_event "daemon.scale" \
|
|
2200
|
+
"from=$prev_max" \
|
|
2201
|
+
"to=$MAX_PARALLEL" \
|
|
2202
|
+
"max_by_cpu=$max_by_cpu" \
|
|
2203
|
+
"max_by_mem=$max_by_mem" \
|
|
2204
|
+
"max_by_budget=$max_by_budget" \
|
|
2205
|
+
"max_by_queue=$max_by_queue" \
|
|
2206
|
+
"cpu_cores=$cpu_cores" \
|
|
2207
|
+
"avail_mem_gb=$avail_mem_gb" \
|
|
2208
|
+
"remaining_usd=$remaining_usd"
|
|
2209
|
+
fi
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
# ─── Fleet Config Reload ──────────────────────────────────────────────────
|
|
2213
|
+
# Checks for fleet-reload.flag and reloads MAX_PARALLEL from fleet-managed config
|
|
2214
|
+
|
|
2215
|
+
daemon_reload_config() {
|
|
2216
|
+
local reload_flag="$HOME/.claude-teams/fleet-reload.flag"
|
|
2217
|
+
if [[ ! -f "$reload_flag" ]]; then
|
|
2218
|
+
return
|
|
2219
|
+
fi
|
|
2220
|
+
|
|
2221
|
+
local fleet_config=".claude/.fleet-daemon-config.json"
|
|
2222
|
+
if [[ -f "$fleet_config" ]]; then
|
|
2223
|
+
local new_max
|
|
2224
|
+
new_max=$(jq -r '.max_parallel // empty' "$fleet_config" 2>/dev/null || true)
|
|
2225
|
+
if [[ -n "$new_max" && "$new_max" != "null" ]]; then
|
|
2226
|
+
local prev="$MAX_PARALLEL"
|
|
2227
|
+
FLEET_MAX_PARALLEL="$new_max"
|
|
2228
|
+
MAX_PARALLEL="$new_max"
|
|
2229
|
+
daemon_log INFO "Fleet reload: max_parallel ${prev} → ${MAX_PARALLEL} (fleet ceiling: ${FLEET_MAX_PARALLEL})"
|
|
2230
|
+
emit_event "daemon.fleet_reload" "from=$prev" "to=$MAX_PARALLEL"
|
|
2231
|
+
fi
|
|
2232
|
+
fi
|
|
2233
|
+
|
|
2234
|
+
rm -f "$reload_flag"
|
|
2235
|
+
}
|
|
2236
|
+
|
|
2237
|
+
# ─── Self-Optimizing Metrics Loop ──────────────────────────────────────────
|
|
2238
|
+
|
|
2239
|
+
daemon_self_optimize() {
|
|
2240
|
+
if [[ "${SELF_OPTIMIZE:-false}" != "true" ]]; then
|
|
2241
|
+
return
|
|
2242
|
+
fi
|
|
2243
|
+
|
|
2244
|
+
if [[ ! -f "$EVENTS_FILE" ]]; then
|
|
2245
|
+
return
|
|
2246
|
+
fi
|
|
2247
|
+
|
|
2248
|
+
daemon_log INFO "Running self-optimization check"
|
|
2249
|
+
|
|
2250
|
+
# Read DORA metrics from recent events (last 7 days)
|
|
2251
|
+
local cutoff_epoch
|
|
2252
|
+
cutoff_epoch=$(( $(now_epoch) - (7 * 86400) ))
|
|
2253
|
+
|
|
2254
|
+
local period_events
|
|
2255
|
+
period_events=$(jq -c "select(.ts_epoch >= $cutoff_epoch)" "$EVENTS_FILE" 2>/dev/null || true)
|
|
2256
|
+
|
|
2257
|
+
if [[ -z "$period_events" ]]; then
|
|
2258
|
+
daemon_log INFO "No recent events for optimization"
|
|
2259
|
+
return
|
|
2260
|
+
fi
|
|
2261
|
+
|
|
2262
|
+
local total_completed successes failures
|
|
2263
|
+
total_completed=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed")] | length')
|
|
2264
|
+
successes=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success")] | length')
|
|
2265
|
+
failures=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "failure")] | length')
|
|
2266
|
+
|
|
2267
|
+
# Change Failure Rate
|
|
2268
|
+
local cfr=0
|
|
2269
|
+
if [[ "$total_completed" -gt 0 ]]; then
|
|
2270
|
+
cfr=$(echo "$failures $total_completed" | awk '{printf "%.0f", ($1 / $2) * 100}')
|
|
2271
|
+
fi
|
|
2272
|
+
|
|
2273
|
+
# Cycle time (median, in seconds)
|
|
2274
|
+
local cycle_time_median
|
|
2275
|
+
cycle_time_median=$(echo "$period_events" | \
|
|
2276
|
+
jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success") | .duration_s // 0] | sort | if length > 0 then .[length/2 | floor] else 0 end')
|
|
2277
|
+
|
|
2278
|
+
# Deploy frequency (per week)
|
|
2279
|
+
local deploy_freq
|
|
2280
|
+
deploy_freq=$(echo "$successes" | awk '{printf "%.1f", $1 / 1}') # Already 7 days
|
|
2281
|
+
|
|
2282
|
+
# MTTR
|
|
2283
|
+
local mttr
|
|
2284
|
+
mttr=$(echo "$period_events" | \
|
|
2285
|
+
jq -s '
|
|
2286
|
+
[.[] | select(.type == "pipeline.completed")] | sort_by(.ts_epoch // 0) |
|
|
2287
|
+
[range(length) as $i |
|
|
2288
|
+
if .[$i].result == "failure" then
|
|
2289
|
+
[.[$i+1:][] | select(.result == "success")][0] as $next |
|
|
2290
|
+
if $next and $next.ts_epoch and .[$i].ts_epoch then
|
|
2291
|
+
($next.ts_epoch - .[$i].ts_epoch)
|
|
2292
|
+
else null end
|
|
2293
|
+
else null end
|
|
2294
|
+
] | map(select(. != null)) |
|
|
2295
|
+
if length > 0 then (add / length | floor) else 0 end
|
|
2296
|
+
')
|
|
2297
|
+
|
|
2298
|
+
local adjustments=()
|
|
2299
|
+
|
|
2300
|
+
# ── CFR > 20%: enable compound_quality, increase max_cycles ──
|
|
2301
|
+
if [[ "$cfr" -gt 40 ]]; then
|
|
2302
|
+
PIPELINE_TEMPLATE="full"
|
|
2303
|
+
adjustments+=("template→full (CFR ${cfr}% > 40%)")
|
|
2304
|
+
daemon_log WARN "Self-optimize: CFR ${cfr}% critical — switching to full template"
|
|
2305
|
+
elif [[ "$cfr" -gt 20 ]]; then
|
|
2306
|
+
adjustments+=("compound_quality enabled (CFR ${cfr}% > 20%)")
|
|
2307
|
+
daemon_log WARN "Self-optimize: CFR ${cfr}% elevated — enabling compound quality"
|
|
2308
|
+
fi
|
|
2309
|
+
|
|
2310
|
+
# ── Lead time > 4hrs: increase max_parallel, reduce poll_interval ──
|
|
2311
|
+
if [[ "$cycle_time_median" -gt 14400 ]]; then
|
|
2312
|
+
MAX_PARALLEL=$((MAX_PARALLEL + 1))
|
|
2313
|
+
if [[ "$POLL_INTERVAL" -gt 30 ]]; then
|
|
2314
|
+
POLL_INTERVAL=$((POLL_INTERVAL / 2))
|
|
2315
|
+
fi
|
|
2316
|
+
adjustments+=("max_parallel→${MAX_PARALLEL}, poll_interval→${POLL_INTERVAL}s (lead time > 4hrs)")
|
|
2317
|
+
daemon_log WARN "Self-optimize: lead time $(format_duration "$cycle_time_median") — increasing parallelism"
|
|
2318
|
+
elif [[ "$cycle_time_median" -gt 7200 ]]; then
|
|
2319
|
+
# ── Lead time > 2hrs: enable auto_template for fast-pathing ──
|
|
2320
|
+
AUTO_TEMPLATE="true"
|
|
2321
|
+
adjustments+=("auto_template enabled (lead time > 2hrs)")
|
|
2322
|
+
daemon_log INFO "Self-optimize: lead time $(format_duration "$cycle_time_median") — enabling adaptive templates"
|
|
2323
|
+
fi
|
|
2324
|
+
|
|
2325
|
+
# ── Deploy freq < 1/day (< 7/week): enable merge stage ──
|
|
2326
|
+
if [[ "$(echo "$deploy_freq < 7" | bc -l 2>/dev/null || echo 0)" == "1" ]]; then
|
|
2327
|
+
adjustments+=("merge stage recommended (deploy freq ${deploy_freq}/week)")
|
|
2328
|
+
daemon_log INFO "Self-optimize: low deploy frequency — consider enabling merge stage"
|
|
2329
|
+
fi
|
|
2330
|
+
|
|
2331
|
+
# ── MTTR > 2hrs: enable auto_rollback ──
|
|
2332
|
+
if [[ "$mttr" -gt 7200 ]]; then
|
|
2333
|
+
adjustments+=("auto_rollback recommended (MTTR $(format_duration "$mttr"))")
|
|
2334
|
+
daemon_log WARN "Self-optimize: high MTTR $(format_duration "$mttr") — consider enabling auto-rollback"
|
|
2335
|
+
fi
|
|
2336
|
+
|
|
2337
|
+
# Write adjustments to state and persist to config
|
|
2338
|
+
if [[ ${#adjustments[@]} -gt 0 ]]; then
|
|
2339
|
+
local adj_str
|
|
2340
|
+
adj_str=$(printf '%s; ' "${adjustments[@]}")
|
|
2341
|
+
|
|
2342
|
+
local tmp_state
|
|
2343
|
+
tmp_state=$(jq \
|
|
2344
|
+
--arg adj "$adj_str" \
|
|
2345
|
+
--arg ts "$(now_iso)" \
|
|
2346
|
+
'.last_optimization = {timestamp: $ts, adjustments: $adj}' \
|
|
2347
|
+
"$STATE_FILE")
|
|
2348
|
+
atomic_write_state "$tmp_state"
|
|
2349
|
+
|
|
2350
|
+
# ── Persist adjustments to daemon-config.json (survives restart) ──
|
|
2351
|
+
local config_file="${CONFIG_PATH:-.claude/daemon-config.json}"
|
|
2352
|
+
if [[ -f "$config_file" ]]; then
|
|
2353
|
+
local tmp_config
|
|
2354
|
+
tmp_config=$(jq \
|
|
2355
|
+
--argjson max_parallel "$MAX_PARALLEL" \
|
|
2356
|
+
--argjson poll_interval "$POLL_INTERVAL" \
|
|
2357
|
+
--arg template "$PIPELINE_TEMPLATE" \
|
|
2358
|
+
--arg auto_template "${AUTO_TEMPLATE:-false}" \
|
|
2359
|
+
--arg ts "$(now_iso)" \
|
|
2360
|
+
--arg adj "$adj_str" \
|
|
2361
|
+
'.max_parallel = $max_parallel |
|
|
2362
|
+
.poll_interval = $poll_interval |
|
|
2363
|
+
.pipeline_template = $template |
|
|
2364
|
+
.auto_template = ($auto_template == "true") |
|
|
2365
|
+
.last_optimization = {timestamp: $ts, adjustments: $adj}' \
|
|
2366
|
+
"$config_file")
|
|
2367
|
+
# Atomic write: tmp file + mv
|
|
2368
|
+
local tmp_cfg_file="${config_file}.tmp.$$"
|
|
2369
|
+
echo "$tmp_config" > "$tmp_cfg_file"
|
|
2370
|
+
mv "$tmp_cfg_file" "$config_file"
|
|
2371
|
+
daemon_log INFO "Self-optimize: persisted adjustments to ${config_file}"
|
|
2372
|
+
fi
|
|
2373
|
+
|
|
2374
|
+
emit_event "daemon.optimize" "adjustments=${adj_str}" "cfr=$cfr" "cycle_time=$cycle_time_median" "deploy_freq=$deploy_freq" "mttr=$mttr"
|
|
2375
|
+
daemon_log SUCCESS "Self-optimization applied ${#adjustments[@]} adjustment(s)"
|
|
2376
|
+
else
|
|
2377
|
+
daemon_log INFO "Self-optimization: all metrics within thresholds"
|
|
2378
|
+
fi
|
|
2379
|
+
}
|
|
2380
|
+
|
|
2381
|
+
# ─── Stale State Reaper ──────────────────────────────────────────────────────
|
|
2382
|
+
# Cleans old worktrees, pipeline artifacts, and completed state entries.
|
|
2383
|
+
# Called every N poll cycles (configurable via stale_reaper_interval).
|
|
2384
|
+
|
|
2385
|
+
daemon_cleanup_stale() {
|
|
2386
|
+
if [[ "${STALE_REAPER_ENABLED:-true}" != "true" ]]; then
|
|
2387
|
+
return
|
|
2388
|
+
fi
|
|
2389
|
+
|
|
2390
|
+
daemon_log INFO "Running stale state reaper"
|
|
2391
|
+
local cleaned=0
|
|
2392
|
+
local age_days="${STALE_REAPER_AGE_DAYS:-7}"
|
|
2393
|
+
local age_secs=$((age_days * 86400))
|
|
2394
|
+
local now_e
|
|
2395
|
+
now_e=$(now_epoch)
|
|
2396
|
+
|
|
2397
|
+
# ── 1. Clean old git worktrees ──
|
|
2398
|
+
if command -v git &>/dev/null; then
|
|
2399
|
+
while IFS= read -r line; do
|
|
2400
|
+
local wt_path
|
|
2401
|
+
wt_path=$(echo "$line" | awk '{print $1}')
|
|
2402
|
+
# Only clean daemon-created worktrees
|
|
2403
|
+
[[ "$wt_path" == *"daemon-issue-"* ]] || continue
|
|
2404
|
+
# Check worktree age via directory mtime
|
|
2405
|
+
local mtime
|
|
2406
|
+
mtime=$(stat -f '%m' "$wt_path" 2>/dev/null || stat -c '%Y' "$wt_path" 2>/dev/null || echo "0")
|
|
2407
|
+
if [[ $((now_e - mtime)) -gt $age_secs ]]; then
|
|
2408
|
+
daemon_log INFO "Removing stale worktree: ${wt_path}"
|
|
2409
|
+
git worktree remove "$wt_path" --force 2>/dev/null || true
|
|
2410
|
+
cleaned=$((cleaned + 1))
|
|
2411
|
+
fi
|
|
2412
|
+
done < <(git worktree list --porcelain 2>/dev/null | grep '^worktree ' | sed 's/^worktree //')
|
|
2413
|
+
fi
|
|
2414
|
+
|
|
2415
|
+
# ── 2. Clean old pipeline artifacts ──
|
|
2416
|
+
local artifacts_dir=".claude/pipeline-artifacts"
|
|
2417
|
+
if [[ -d "$artifacts_dir" ]]; then
|
|
2418
|
+
while IFS= read -r artifact_dir; do
|
|
2419
|
+
[[ -d "$artifact_dir" ]] || continue
|
|
2420
|
+
local mtime
|
|
2421
|
+
mtime=$(stat -f '%m' "$artifact_dir" 2>/dev/null || stat -c '%Y' "$artifact_dir" 2>/dev/null || echo "0")
|
|
2422
|
+
if [[ $((now_e - mtime)) -gt $age_secs ]]; then
|
|
2423
|
+
daemon_log INFO "Removing stale artifact: ${artifact_dir}"
|
|
2424
|
+
rm -rf "$artifact_dir"
|
|
2425
|
+
cleaned=$((cleaned + 1))
|
|
2426
|
+
fi
|
|
2427
|
+
done < <(find "$artifacts_dir" -mindepth 1 -maxdepth 1 -type d 2>/dev/null)
|
|
2428
|
+
fi
|
|
2429
|
+
|
|
2430
|
+
# ── 3. Prune completed/failed state entries older than age_days ──
|
|
2431
|
+
if [[ -f "$STATE_FILE" ]]; then
|
|
2432
|
+
local cutoff_iso
|
|
2433
|
+
cutoff_iso=$(epoch_to_iso $((now_e - age_secs)))
|
|
2434
|
+
local before_count after_count
|
|
2435
|
+
before_count=$(jq '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
|
|
2436
|
+
local tmp_state
|
|
2437
|
+
tmp_state=$(jq --arg cutoff "$cutoff_iso" \
|
|
2438
|
+
'.completed = [.completed[] | select(.completed_at > $cutoff)]' \
|
|
2439
|
+
"$STATE_FILE" 2>/dev/null) || true
|
|
2440
|
+
if [[ -n "$tmp_state" ]]; then
|
|
2441
|
+
atomic_write_state "$tmp_state"
|
|
2442
|
+
after_count=$(jq '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
|
|
2443
|
+
local pruned=$((before_count - after_count))
|
|
2444
|
+
if [[ "$pruned" -gt 0 ]]; then
|
|
2445
|
+
daemon_log INFO "Pruned ${pruned} old completed state entries"
|
|
2446
|
+
cleaned=$((cleaned + pruned))
|
|
2447
|
+
fi
|
|
2448
|
+
fi
|
|
2449
|
+
fi
|
|
2450
|
+
|
|
2451
|
+
if [[ "$cleaned" -gt 0 ]]; then
|
|
2452
|
+
emit_event "daemon.cleanup" "cleaned=$cleaned" "age_days=$age_days"
|
|
2453
|
+
daemon_log SUCCESS "Stale reaper cleaned ${cleaned} item(s)"
|
|
2454
|
+
else
|
|
2455
|
+
daemon_log INFO "Stale reaper: nothing to clean"
|
|
2456
|
+
fi
|
|
2457
|
+
}
|
|
2458
|
+
|
|
2459
|
+
# ─── Poll Loop ───────────────────────────────────────────────────────────────
|
|
2460
|
+
|
|
2461
|
+
POLL_CYCLE_COUNT=0
|
|
2462
|
+
|
|
2463
|
+
daemon_poll_loop() {
|
|
2464
|
+
daemon_log INFO "Entering poll loop (interval: ${POLL_INTERVAL}s, max_parallel: ${MAX_PARALLEL})"
|
|
2465
|
+
daemon_log INFO "Watching for label: ${CYAN}${WATCH_LABEL}${RESET}"
|
|
2466
|
+
|
|
2467
|
+
while [[ ! -f "$SHUTDOWN_FLAG" ]]; do
|
|
2468
|
+
daemon_poll_issues
|
|
2469
|
+
daemon_reap_completed
|
|
2470
|
+
daemon_health_check
|
|
2471
|
+
|
|
2472
|
+
# Increment cycle counter (must be before all modulo checks)
|
|
2473
|
+
POLL_CYCLE_COUNT=$((POLL_CYCLE_COUNT + 1))
|
|
2474
|
+
|
|
2475
|
+
# Fleet config reload every 3 cycles
|
|
2476
|
+
if [[ $((POLL_CYCLE_COUNT % 3)) -eq 0 ]]; then
|
|
2477
|
+
daemon_reload_config
|
|
2478
|
+
fi
|
|
2479
|
+
|
|
2480
|
+
# Check degradation every 5 poll cycles
|
|
2481
|
+
if [[ $((POLL_CYCLE_COUNT % 5)) -eq 0 ]]; then
|
|
2482
|
+
daemon_check_degradation
|
|
2483
|
+
fi
|
|
2484
|
+
|
|
2485
|
+
# Auto-scale every N cycles (default: 5)
|
|
2486
|
+
if [[ $((POLL_CYCLE_COUNT % ${AUTO_SCALE_INTERVAL:-5})) -eq 0 ]]; then
|
|
2487
|
+
daemon_auto_scale
|
|
2488
|
+
fi
|
|
2489
|
+
|
|
2490
|
+
# Self-optimize every N cycles (default: 10)
|
|
2491
|
+
if [[ $((POLL_CYCLE_COUNT % ${OPTIMIZE_INTERVAL:-10})) -eq 0 ]]; then
|
|
2492
|
+
daemon_self_optimize
|
|
2493
|
+
fi
|
|
2494
|
+
|
|
2495
|
+
# Stale state reaper every N cycles (default: 10)
|
|
2496
|
+
if [[ $((POLL_CYCLE_COUNT % ${STALE_REAPER_INTERVAL:-10})) -eq 0 ]]; then
|
|
2497
|
+
daemon_cleanup_stale
|
|
2498
|
+
fi
|
|
2499
|
+
|
|
2500
|
+
# Proactive patrol during quiet periods
|
|
2501
|
+
local issue_count_now active_count_now
|
|
2502
|
+
issue_count_now=$(jq -r '.queued | length' "$STATE_FILE" 2>/dev/null || echo 0)
|
|
2503
|
+
active_count_now=$(get_active_count)
|
|
2504
|
+
if [[ "$issue_count_now" -eq 0 ]] && [[ "$active_count_now" -eq 0 ]]; then
|
|
2505
|
+
local now_e
|
|
2506
|
+
now_e=$(now_epoch)
|
|
2507
|
+
if [[ $((now_e - LAST_PATROL_EPOCH)) -ge "$PATROL_INTERVAL" ]]; then
|
|
2508
|
+
daemon_log INFO "No active work — running patrol"
|
|
2509
|
+
daemon_patrol --once
|
|
2510
|
+
LAST_PATROL_EPOCH=$now_e
|
|
2511
|
+
fi
|
|
2512
|
+
fi
|
|
2513
|
+
|
|
2514
|
+
# Sleep in 1s intervals so we can catch shutdown quickly
|
|
2515
|
+
local i=0
|
|
2516
|
+
while [[ $i -lt $POLL_INTERVAL ]] && [[ ! -f "$SHUTDOWN_FLAG" ]]; do
|
|
2517
|
+
sleep 1
|
|
2518
|
+
i=$((i + 1))
|
|
2519
|
+
done
|
|
2520
|
+
done
|
|
2521
|
+
|
|
2522
|
+
daemon_log INFO "Shutdown flag detected — exiting poll loop"
|
|
2523
|
+
}
|
|
2524
|
+
|
|
2525
|
+
# ─── Graceful Shutdown Handler ───────────────────────────────────────────────
|
|
2526
|
+
|
|
2527
|
+
cleanup_on_exit() {
|
|
2528
|
+
daemon_log INFO "Cleaning up..."
|
|
2529
|
+
rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
|
|
2530
|
+
daemon_log INFO "Daemon stopped"
|
|
2531
|
+
emit_event "daemon.stopped" "pid=$$"
|
|
2532
|
+
}
|
|
2533
|
+
|
|
2534
|
+
# ─── daemon start ───────────────────────────────────────────────────────────
|
|
2535
|
+
|
|
2536
|
+
daemon_start() {
|
|
2537
|
+
echo -e "${PURPLE}${BOLD}━━━ shipwright daemon v${VERSION} ━━━${RESET}"
|
|
2538
|
+
echo ""
|
|
2539
|
+
|
|
2540
|
+
# Acquire exclusive lock on PID file (prevents race between concurrent starts)
|
|
2541
|
+
exec 9>"$PID_FILE"
|
|
2542
|
+
if ! flock -n 9 2>/dev/null; then
|
|
2543
|
+
# flock unavailable or lock held — fall back to PID check
|
|
2544
|
+
local existing_pid
|
|
2545
|
+
existing_pid=$(cat "$PID_FILE" 2>/dev/null || true)
|
|
2546
|
+
if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then
|
|
2547
|
+
error "Daemon already running (PID: ${existing_pid})"
|
|
2548
|
+
info "Use ${CYAN}shipwright daemon stop${RESET} to stop it first"
|
|
2549
|
+
exit 1
|
|
2550
|
+
else
|
|
2551
|
+
warn "Stale PID file found — removing"
|
|
2552
|
+
rm -f "$PID_FILE"
|
|
2553
|
+
exec 9>"$PID_FILE"
|
|
2554
|
+
fi
|
|
2555
|
+
fi
|
|
2556
|
+
|
|
2557
|
+
# Load config
|
|
2558
|
+
load_config
|
|
2559
|
+
|
|
2560
|
+
# Pre-flight
|
|
2561
|
+
if ! preflight_checks; then
|
|
2562
|
+
exit 1
|
|
2563
|
+
fi
|
|
2564
|
+
|
|
2565
|
+
# Detach mode: re-exec in a tmux session
|
|
2566
|
+
if [[ "$DETACH" == "true" ]]; then
|
|
2567
|
+
if ! command -v tmux &>/dev/null; then
|
|
2568
|
+
error "tmux required for --detach mode"
|
|
2569
|
+
exit 1
|
|
2570
|
+
fi
|
|
2571
|
+
|
|
2572
|
+
info "Starting daemon in detached tmux session: ${CYAN}cct-daemon${RESET}"
|
|
2573
|
+
|
|
2574
|
+
# Build the command to run in tmux
|
|
2575
|
+
local cmd_args=("$SCRIPT_DIR/cct-daemon.sh" "start")
|
|
2576
|
+
if [[ -n "$CONFIG_PATH" ]]; then
|
|
2577
|
+
cmd_args+=("--config" "$CONFIG_PATH")
|
|
2578
|
+
fi
|
|
2579
|
+
if [[ "$NO_GITHUB" == "true" ]]; then
|
|
2580
|
+
cmd_args+=("--no-github")
|
|
2581
|
+
fi
|
|
2582
|
+
|
|
2583
|
+
tmux new-session -d -s "cct-daemon" "${cmd_args[*]}" 2>/dev/null || {
|
|
2584
|
+
# Session may already exist — try killing and recreating
|
|
2585
|
+
tmux kill-session -t "cct-daemon" 2>/dev/null || true
|
|
2586
|
+
tmux new-session -d -s "cct-daemon" "${cmd_args[*]}"
|
|
2587
|
+
}
|
|
2588
|
+
|
|
2589
|
+
success "Daemon started in tmux session ${CYAN}cct-daemon${RESET}"
|
|
2590
|
+
info "Attach with: ${DIM}tmux attach -t cct-daemon${RESET}"
|
|
2591
|
+
info "View logs: ${DIM}shipwright daemon logs --follow${RESET}"
|
|
2592
|
+
return 0
|
|
2593
|
+
fi
|
|
2594
|
+
|
|
2595
|
+
# Foreground mode
|
|
2596
|
+
info "Starting daemon (PID: $$)"
|
|
2597
|
+
|
|
2598
|
+
# Write PID file
|
|
2599
|
+
echo "$$" > "$PID_FILE"
|
|
2600
|
+
|
|
2601
|
+
# Remove stale shutdown flag
|
|
2602
|
+
rm -f "$SHUTDOWN_FLAG"
|
|
2603
|
+
|
|
2604
|
+
# Initialize state
|
|
2605
|
+
init_state
|
|
2606
|
+
|
|
2607
|
+
# Trap signals for graceful shutdown
|
|
2608
|
+
trap cleanup_on_exit EXIT
|
|
2609
|
+
trap 'touch "$SHUTDOWN_FLAG"' SIGINT SIGTERM
|
|
2610
|
+
|
|
2611
|
+
# Reap any orphaned jobs from previous runs
|
|
2612
|
+
daemon_reap_completed
|
|
2613
|
+
|
|
2614
|
+
daemon_log INFO "Daemon started successfully"
|
|
2615
|
+
daemon_log INFO "Config: poll_interval=${POLL_INTERVAL}s, max_parallel=${MAX_PARALLEL}, label=${WATCH_LABEL}"
|
|
2616
|
+
|
|
2617
|
+
emit_event "daemon.started" \
|
|
2618
|
+
"pid=$$" \
|
|
2619
|
+
"poll_interval=$POLL_INTERVAL" \
|
|
2620
|
+
"max_parallel=$MAX_PARALLEL" \
|
|
2621
|
+
"watch_label=$WATCH_LABEL"
|
|
2622
|
+
|
|
2623
|
+
# Enter poll loop
|
|
2624
|
+
daemon_poll_loop
|
|
2625
|
+
}
|
|
2626
|
+
|
|
2627
|
+
# ─── daemon stop ─────────────────────────────────────────────────────────────
|
|
2628
|
+
|
|
2629
|
+
daemon_stop() {
|
|
2630
|
+
if [[ ! -f "$PID_FILE" ]]; then
|
|
2631
|
+
error "No daemon PID file found at $PID_FILE"
|
|
2632
|
+
info "Is the daemon running?"
|
|
2633
|
+
exit 1
|
|
2634
|
+
fi
|
|
2635
|
+
|
|
2636
|
+
local pid
|
|
2637
|
+
pid=$(cat "$PID_FILE" 2>/dev/null || true)
|
|
2638
|
+
|
|
2639
|
+
if [[ -z "$pid" ]]; then
|
|
2640
|
+
error "Empty PID file"
|
|
2641
|
+
rm -f "$PID_FILE"
|
|
2642
|
+
exit 1
|
|
2643
|
+
fi
|
|
2644
|
+
|
|
2645
|
+
if ! kill -0 "$pid" 2>/dev/null; then
|
|
2646
|
+
warn "Daemon process (PID: ${pid}) is not running — cleaning up"
|
|
2647
|
+
rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
|
|
2648
|
+
return 0
|
|
2649
|
+
fi
|
|
2650
|
+
|
|
2651
|
+
info "Sending shutdown signal to daemon (PID: ${pid})..."
|
|
2652
|
+
|
|
2653
|
+
# Touch shutdown flag for graceful exit
|
|
2654
|
+
touch "$SHUTDOWN_FLAG"
|
|
2655
|
+
|
|
2656
|
+
# Wait for graceful shutdown (up to 30s)
|
|
2657
|
+
local wait_secs=0
|
|
2658
|
+
while kill -0 "$pid" 2>/dev/null && [[ $wait_secs -lt 30 ]]; do
|
|
2659
|
+
sleep 1
|
|
2660
|
+
wait_secs=$((wait_secs + 1))
|
|
2661
|
+
done
|
|
2662
|
+
|
|
2663
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
2664
|
+
warn "Daemon didn't stop gracefully — sending SIGTERM"
|
|
2665
|
+
kill "$pid" 2>/dev/null || true
|
|
2666
|
+
sleep 2
|
|
2667
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
2668
|
+
warn "Sending SIGKILL"
|
|
2669
|
+
kill -9 "$pid" 2>/dev/null || true
|
|
2670
|
+
fi
|
|
2671
|
+
fi
|
|
2672
|
+
|
|
2673
|
+
rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
|
|
2674
|
+
|
|
2675
|
+
# Also kill tmux session if it exists
|
|
2676
|
+
tmux kill-session -t "cct-daemon" 2>/dev/null || true
|
|
2677
|
+
|
|
2678
|
+
success "Daemon stopped"
|
|
2679
|
+
}
|
|
2680
|
+
|
|
2681
|
+
# ─── daemon status ───────────────────────────────────────────────────────────
|
|
2682
|
+
|
|
2683
|
+
daemon_status() {
|
|
2684
|
+
echo -e "${PURPLE}${BOLD}━━━ Daemon Status ━━━${RESET}"
|
|
2685
|
+
echo ""
|
|
2686
|
+
|
|
2687
|
+
# Check if running
|
|
2688
|
+
local running=false
|
|
2689
|
+
if [[ -f "$PID_FILE" ]]; then
|
|
2690
|
+
local pid
|
|
2691
|
+
pid=$(cat "$PID_FILE" 2>/dev/null || true)
|
|
2692
|
+
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
|
2693
|
+
running=true
|
|
2694
|
+
echo -e " ${GREEN}●${RESET} ${BOLD}Running${RESET} ${DIM}(PID: ${pid})${RESET}"
|
|
2695
|
+
else
|
|
2696
|
+
echo -e " ${RED}●${RESET} ${BOLD}Stopped${RESET} ${DIM}(stale PID file)${RESET}"
|
|
2697
|
+
fi
|
|
2698
|
+
else
|
|
2699
|
+
echo -e " ${RED}●${RESET} ${BOLD}Stopped${RESET}"
|
|
2700
|
+
fi
|
|
2701
|
+
|
|
2702
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
2703
|
+
echo ""
|
|
2704
|
+
echo -e " ${DIM}No state file found. Start the daemon first.${RESET}"
|
|
2705
|
+
return
|
|
2706
|
+
fi
|
|
2707
|
+
|
|
2708
|
+
# Read state
|
|
2709
|
+
local last_poll started_at
|
|
2710
|
+
last_poll=$(jq -r '.last_poll // "never"' "$STATE_FILE" 2>/dev/null)
|
|
2711
|
+
started_at=$(jq -r '.started_at // "unknown"' "$STATE_FILE" 2>/dev/null)
|
|
2712
|
+
|
|
2713
|
+
echo -e " Started: ${DIM}${started_at}${RESET}"
|
|
2714
|
+
echo -e " Last poll: ${DIM}${last_poll}${RESET}"
|
|
2715
|
+
echo ""
|
|
2716
|
+
|
|
2717
|
+
# Active jobs
|
|
2718
|
+
local active_count
|
|
2719
|
+
active_count=$(jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo 0)
|
|
2720
|
+
|
|
2721
|
+
echo -e "${BOLD} Active Jobs (${active_count}/${MAX_PARALLEL})${RESET}"
|
|
2722
|
+
if [[ "$active_count" -gt 0 ]]; then
|
|
2723
|
+
while IFS=$'\t' read -r num title started; do
|
|
2724
|
+
local age=""
|
|
2725
|
+
if [[ "$started" != "—" ]] && [[ "$running" == "true" ]]; then
|
|
2726
|
+
local start_epoch
|
|
2727
|
+
start_epoch=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started" +%s 2>/dev/null || echo 0)
|
|
2728
|
+
if [[ "$start_epoch" -gt 0 ]]; then
|
|
2729
|
+
age=" ($(format_duration $(($(now_epoch) - start_epoch))))"
|
|
2730
|
+
fi
|
|
2731
|
+
fi
|
|
2732
|
+
echo -e " ${CYAN}#${num}${RESET} ${title} ${DIM}${age}${RESET}"
|
|
2733
|
+
done < <(jq -r '.active_jobs[] | " \(.issue)\t\(.title // "—")\t\(.started_at // "—")"' "$STATE_FILE" 2>/dev/null)
|
|
2734
|
+
else
|
|
2735
|
+
echo -e " ${DIM}None${RESET}"
|
|
2736
|
+
fi
|
|
2737
|
+
echo ""
|
|
2738
|
+
|
|
2739
|
+
# Queue
|
|
2740
|
+
local queue_count
|
|
2741
|
+
queue_count=$(jq -r '.queued | length' "$STATE_FILE" 2>/dev/null || echo 0)
|
|
2742
|
+
|
|
2743
|
+
echo -e "${BOLD} Queued (${queue_count})${RESET}"
|
|
2744
|
+
if [[ "$queue_count" -gt 0 ]]; then
|
|
2745
|
+
while read -r num; do
|
|
2746
|
+
echo -e " ${DIM}#${num}${RESET}"
|
|
2747
|
+
done < <(jq -r '.queued[]' "$STATE_FILE" 2>/dev/null)
|
|
2748
|
+
else
|
|
2749
|
+
echo -e " ${DIM}None${RESET}"
|
|
2750
|
+
fi
|
|
2751
|
+
echo ""
|
|
2752
|
+
|
|
2753
|
+
# Recent completed
|
|
2754
|
+
local completed_count
|
|
2755
|
+
completed_count=$(jq -r '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
|
|
2756
|
+
|
|
2757
|
+
echo -e "${BOLD} Recently Completed (${completed_count})${RESET}"
|
|
2758
|
+
if [[ "$completed_count" -gt 0 ]]; then
|
|
2759
|
+
# Show last 10
|
|
2760
|
+
while IFS=$'\t' read -r num result dur; do
|
|
2761
|
+
local icon
|
|
2762
|
+
if [[ "$result" == "success" ]]; then
|
|
2763
|
+
icon="${GREEN}✓${RESET}"
|
|
2764
|
+
else
|
|
2765
|
+
icon="${RED}✗${RESET}"
|
|
2766
|
+
fi
|
|
2767
|
+
echo -e " ${icon} ${CYAN}#${num}${RESET} ${result} ${DIM}(${dur})${RESET}"
|
|
2768
|
+
done < <(jq -r '.completed | reverse | .[:10][] | "\(.issue)\t\(.result)\t\(.duration // "—")"' "$STATE_FILE" 2>/dev/null)
|
|
2769
|
+
else
|
|
2770
|
+
echo -e " ${DIM}None${RESET}"
|
|
2771
|
+
fi
|
|
2772
|
+
echo ""
|
|
2773
|
+
}
|
|
2774
|
+
|
|
2775
|
+
# ─── daemon init ─────────────────────────────────────────────────────────────
|
|
2776
|
+
|
|
2777
|
+
daemon_init() {
|
|
2778
|
+
local config_dir=".claude"
|
|
2779
|
+
local config_file="${config_dir}/daemon-config.json"
|
|
2780
|
+
|
|
2781
|
+
if [[ -f "$config_file" ]]; then
|
|
2782
|
+
warn "Config file already exists: $config_file"
|
|
2783
|
+
info "Delete it first if you want to regenerate"
|
|
2784
|
+
return 0
|
|
2785
|
+
fi
|
|
2786
|
+
|
|
2787
|
+
mkdir -p "$config_dir"
|
|
2788
|
+
|
|
2789
|
+
cat > "$config_file" << 'CONFIGEOF'
|
|
2790
|
+
{
|
|
2791
|
+
"watch_label": "ready-to-build",
|
|
2792
|
+
"poll_interval": 60,
|
|
2793
|
+
"max_parallel": 2,
|
|
2794
|
+
"pipeline_template": "autonomous",
|
|
2795
|
+
"skip_gates": true,
|
|
2796
|
+
"model": "opus",
|
|
2797
|
+
"base_branch": "main",
|
|
2798
|
+
"on_success": {
|
|
2799
|
+
"remove_label": "ready-to-build",
|
|
2800
|
+
"add_label": "pipeline/complete",
|
|
2801
|
+
"close_issue": false
|
|
2802
|
+
},
|
|
2803
|
+
"on_failure": {
|
|
2804
|
+
"add_label": "pipeline/failed",
|
|
2805
|
+
"comment_log_lines": 50
|
|
2806
|
+
},
|
|
2807
|
+
"notifications": {
|
|
2808
|
+
"slack_webhook": null
|
|
2809
|
+
},
|
|
2810
|
+
"health": {
|
|
2811
|
+
"stale_timeout_s": 1800
|
|
2812
|
+
},
|
|
2813
|
+
"priority_labels": "urgent,p0,high,p1,normal,p2,low,p3",
|
|
2814
|
+
"alerts": {
|
|
2815
|
+
"degradation_window": 5,
|
|
2816
|
+
"cfr_threshold": 30,
|
|
2817
|
+
"success_threshold": 50
|
|
2818
|
+
},
|
|
2819
|
+
"patrol": {
|
|
2820
|
+
"interval": 3600,
|
|
2821
|
+
"max_issues": 5,
|
|
2822
|
+
"label": "auto-patrol"
|
|
2823
|
+
},
|
|
2824
|
+
"auto_template": false,
|
|
2825
|
+
"template_map": {
|
|
2826
|
+
"hotfix|incident": "hotfix",
|
|
2827
|
+
"security": "enterprise"
|
|
2828
|
+
},
|
|
2829
|
+
"max_retries": 2,
|
|
2830
|
+
"retry_escalation": true,
|
|
2831
|
+
"self_optimize": false,
|
|
2832
|
+
"optimize_interval": 10,
|
|
2833
|
+
"priority_lane": false,
|
|
2834
|
+
"priority_lane_labels": "hotfix,incident,p0,urgent",
|
|
2835
|
+
"priority_lane_max": 1,
|
|
2836
|
+
"watch_mode": "repo",
|
|
2837
|
+
"org": null,
|
|
2838
|
+
"repo_filter": null,
|
|
2839
|
+
"auto_scale": false,
|
|
2840
|
+
"auto_scale_interval": 5,
|
|
2841
|
+
"max_workers": 8,
|
|
2842
|
+
"min_workers": 1,
|
|
2843
|
+
"worker_mem_gb": 4,
|
|
2844
|
+
"estimated_cost_per_job_usd": 5.0
|
|
2845
|
+
}
|
|
2846
|
+
CONFIGEOF
|
|
2847
|
+
|
|
2848
|
+
success "Generated config: ${config_file}"
|
|
2849
|
+
echo ""
|
|
2850
|
+
echo -e "${DIM}Edit this file to customize the daemon behavior, then run:${RESET}"
|
|
2851
|
+
echo -e " ${CYAN}shipwright daemon start${RESET}"
|
|
2852
|
+
}
|
|
2853
|
+
|
|
2854
|
+
# ─── daemon logs ─────────────────────────────────────────────────────────────
|
|
2855
|
+
|
|
2856
|
+
daemon_logs() {
|
|
2857
|
+
if [[ ! -f "$LOG_FILE" ]]; then
|
|
2858
|
+
warn "No log file found at $LOG_FILE"
|
|
2859
|
+
info "Start the daemon first with ${CYAN}shipwright daemon start${RESET}"
|
|
2860
|
+
return 0
|
|
2861
|
+
fi
|
|
2862
|
+
|
|
2863
|
+
if [[ "$FOLLOW" == "true" ]]; then
|
|
2864
|
+
info "Following daemon log (Ctrl-C to stop)..."
|
|
2865
|
+
echo ""
|
|
2866
|
+
tail -f "$LOG_FILE"
|
|
2867
|
+
else
|
|
2868
|
+
tail -100 "$LOG_FILE"
|
|
2869
|
+
fi
|
|
2870
|
+
}
|
|
2871
|
+
|
|
2872
|
+
# ─── Metrics Dashboard ─────────────────────────────────────────────────────
|
|
2873
|
+
|
|
2874
|
+
daemon_metrics() {
|
|
2875
|
+
local period_days=7
|
|
2876
|
+
local json_output=false
|
|
2877
|
+
|
|
2878
|
+
# Parse metrics flags
|
|
2879
|
+
while [[ $# -gt 0 ]]; do
|
|
2880
|
+
case "$1" in
|
|
2881
|
+
--period) period_days="${2:-7}"; shift 2 ;;
|
|
2882
|
+
--json) json_output=true; shift ;;
|
|
2883
|
+
*) shift ;;
|
|
2884
|
+
esac
|
|
2885
|
+
done
|
|
2886
|
+
|
|
2887
|
+
if [[ ! -f "$EVENTS_FILE" ]]; then
|
|
2888
|
+
error "No events file found at $EVENTS_FILE"
|
|
2889
|
+
info "Events are generated when running ${CYAN}shipwright pipeline${RESET} or ${CYAN}shipwright daemon${RESET}"
|
|
2890
|
+
exit 1
|
|
2891
|
+
fi
|
|
2892
|
+
|
|
2893
|
+
if ! command -v jq &>/dev/null; then
|
|
2894
|
+
error "jq is required for metrics. Install: brew install jq"
|
|
2895
|
+
exit 1
|
|
2896
|
+
fi
|
|
2897
|
+
|
|
2898
|
+
# Calculate cutoff timestamp
|
|
2899
|
+
local cutoff_epoch
|
|
2900
|
+
cutoff_epoch=$(( $(now_epoch) - (period_days * 86400) ))
|
|
2901
|
+
local cutoff_iso
|
|
2902
|
+
cutoff_iso=$(epoch_to_iso "$cutoff_epoch")
|
|
2903
|
+
|
|
2904
|
+
# Filter events within period (prefer ts_epoch when available)
|
|
2905
|
+
local period_events
|
|
2906
|
+
period_events=$(jq -c "select(.ts_epoch >= $cutoff_epoch // .ts >= \"$cutoff_iso\")" "$EVENTS_FILE" 2>/dev/null)
|
|
2907
|
+
|
|
2908
|
+
if [[ -z "$period_events" ]]; then
|
|
2909
|
+
warn "No events in the last ${period_days} day(s)"
|
|
2910
|
+
return 0
|
|
2911
|
+
fi
|
|
2912
|
+
|
|
2913
|
+
# ── DORA: Deployment Frequency ──
|
|
2914
|
+
local total_completed successes failures
|
|
2915
|
+
total_completed=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed")] | length')
|
|
2916
|
+
successes=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success")] | length')
|
|
2917
|
+
failures=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.completed" and .result == "failure")] | length')
|
|
2918
|
+
|
|
2919
|
+
local deploy_freq=""
|
|
2920
|
+
if [[ "$period_days" -gt 0 ]]; then
|
|
2921
|
+
deploy_freq=$(echo "$successes $period_days" | awk '{printf "%.1f", $1 / ($2 / 7)}')
|
|
2922
|
+
fi
|
|
2923
|
+
|
|
2924
|
+
# ── DORA: Cycle Time (median pipeline duration for successes) ──
|
|
2925
|
+
local cycle_time_median cycle_time_p95
|
|
2926
|
+
cycle_time_median=$(echo "$period_events" | \
|
|
2927
|
+
jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success") | .duration_s] | sort | if length > 0 then .[length/2 | floor] else 0 end')
|
|
2928
|
+
cycle_time_p95=$(echo "$period_events" | \
|
|
2929
|
+
jq -s '[.[] | select(.type == "pipeline.completed" and .result == "success") | .duration_s] | sort | if length > 0 then .[length * 95 / 100 | floor] else 0 end')
|
|
2930
|
+
|
|
2931
|
+
# ── DORA: Change Failure Rate ──
|
|
2932
|
+
local cfr="0"
|
|
2933
|
+
if [[ "$total_completed" -gt 0 ]]; then
|
|
2934
|
+
cfr=$(echo "$failures $total_completed" | awk '{printf "%.1f", ($1 / $2) * 100}')
|
|
2935
|
+
fi
|
|
2936
|
+
|
|
2937
|
+
# ── DORA: MTTR (average time between failure and next success) ──
|
|
2938
|
+
local mttr="0"
|
|
2939
|
+
# Real MTTR: time gap between each failure event and the next success event
|
|
2940
|
+
mttr=$(echo "$period_events" | \
|
|
2941
|
+
jq -s '
|
|
2942
|
+
[.[] | select(.type == "pipeline.completed")] | sort_by(.ts_epoch // 0) |
|
|
2943
|
+
[range(length) as $i |
|
|
2944
|
+
if .[$i].result == "failure" then
|
|
2945
|
+
[.[$i+1:][] | select(.result == "success")][0] as $next |
|
|
2946
|
+
if $next and $next.ts_epoch and .[$i].ts_epoch then
|
|
2947
|
+
($next.ts_epoch - .[$i].ts_epoch)
|
|
2948
|
+
else null end
|
|
2949
|
+
else null end
|
|
2950
|
+
] | map(select(. != null)) |
|
|
2951
|
+
if length > 0 then (add / length | floor) else 0 end
|
|
2952
|
+
')
|
|
2953
|
+
|
|
2954
|
+
# ── DX: Compound quality first-pass rate ──
|
|
2955
|
+
local compound_events first_pass_total first_pass_success
|
|
2956
|
+
first_pass_total=$(echo "$period_events" | \
|
|
2957
|
+
jq -s '[.[] | select(.type == "compound.cycle" and .cycle == 1)] | length')
|
|
2958
|
+
first_pass_success=$(echo "$period_events" | \
|
|
2959
|
+
jq -s '[.[] | select(.type == "compound.cycle" and .cycle == 1 and .passed == "true")] | length')
|
|
2960
|
+
local first_pass_pct="0"
|
|
2961
|
+
[[ "$first_pass_total" -gt 0 ]] && first_pass_pct=$(echo "$first_pass_success $first_pass_total" | awk '{printf "%.0f", ($1/$2)*100}')
|
|
2962
|
+
|
|
2963
|
+
local avg_cycles
|
|
2964
|
+
avg_cycles=$(echo "$period_events" | \
|
|
2965
|
+
jq -s '[.[] | select(.type == "compound.cycle")] | if length > 0 then (group_by(.issue) | map(max_by(.cycle) | .cycle) | add / length) else 0 end | . * 10 | floor / 10')
|
|
2966
|
+
|
|
2967
|
+
# ── Throughput ──
|
|
2968
|
+
local issues_processed prs_created
|
|
2969
|
+
issues_processed=$(echo "$period_events" | jq -s '[.[] | select(.type == "pipeline.started") | .issue] | unique | length')
|
|
2970
|
+
prs_created=$successes
|
|
2971
|
+
|
|
2972
|
+
# ── Stage Timings ──
|
|
2973
|
+
local avg_stage_timings
|
|
2974
|
+
avg_stage_timings=$(echo "$period_events" | \
|
|
2975
|
+
jq -s '[.[] | select(.type == "stage.completed")] | group_by(.stage) | map({stage: .[0].stage, avg: ([.[].duration_s] | add / length | floor)}) | sort_by(.avg) | reverse')
|
|
2976
|
+
|
|
2977
|
+
# ── Autonomy ──
|
|
2978
|
+
local daemon_spawns daemon_reaps daemon_success
|
|
2979
|
+
daemon_spawns=$(echo "$period_events" | jq -s '[.[] | select(.type == "daemon.spawn")] | length')
|
|
2980
|
+
daemon_reaps=$(echo "$period_events" | jq -s '[.[] | select(.type == "daemon.reap")] | length')
|
|
2981
|
+
daemon_success=$(echo "$period_events" | jq -s '[.[] | select(.type == "daemon.reap" and .result == "success")] | length')
|
|
2982
|
+
local autonomy_pct="0"
|
|
2983
|
+
[[ "$daemon_reaps" -gt 0 ]] && autonomy_pct=$(echo "$daemon_success $daemon_reaps" | awk '{printf "%.1f", ($1/$2)*100}')
|
|
2984
|
+
|
|
2985
|
+
# ── Patrol ──
|
|
2986
|
+
local patrol_runs patrol_findings patrol_issues_created patrol_auto_resolved
|
|
2987
|
+
patrol_runs=$(echo "$period_events" | jq -s '[.[] | select(.type == "patrol.completed")] | length')
|
|
2988
|
+
patrol_findings=$(echo "$period_events" | jq -s '[.[] | select(.type == "patrol.finding")] | length')
|
|
2989
|
+
patrol_issues_created=$(echo "$period_events" | jq -s '[.[] | select(.type == "patrol.issue_created")] | length')
|
|
2990
|
+
# Auto-resolved: patrol issues that were later fixed by a pipeline
|
|
2991
|
+
patrol_auto_resolved=$(echo "$period_events" | jq -s '
|
|
2992
|
+
[.[] | select(.type == "patrol.issue_created") | .issue // empty] as $patrol_issues |
|
|
2993
|
+
[.[] | select(.type == "daemon.reap" and .result == "success") | .issue // empty] as $completed |
|
|
2994
|
+
[$patrol_issues[] | select(. as $p | $completed | any(. == $p))] | length
|
|
2995
|
+
' 2>/dev/null || echo "0")
|
|
2996
|
+
|
|
2997
|
+
# ── DORA Scoring ──
|
|
2998
|
+
dora_grade() {
|
|
2999
|
+
local metric="$1" value="$2"
|
|
3000
|
+
case "$metric" in
|
|
3001
|
+
deploy_freq)
|
|
3002
|
+
if awk "BEGIN{exit !($value >= 7)}" 2>/dev/null; then echo "Elite"; return; fi
|
|
3003
|
+
if awk "BEGIN{exit !($value >= 1)}" 2>/dev/null; then echo "High"; return; fi
|
|
3004
|
+
if awk "BEGIN{exit !($value >= 0.25)}" 2>/dev/null; then echo "Medium"; return; fi
|
|
3005
|
+
echo "Low" ;;
|
|
3006
|
+
cycle_time)
|
|
3007
|
+
[[ "$value" -lt 3600 ]] && echo "Elite" && return
|
|
3008
|
+
[[ "$value" -lt 86400 ]] && echo "High" && return
|
|
3009
|
+
[[ "$value" -lt 604800 ]] && echo "Medium" && return
|
|
3010
|
+
echo "Low" ;;
|
|
3011
|
+
cfr)
|
|
3012
|
+
if awk "BEGIN{exit !($value < 5)}" 2>/dev/null; then echo "Elite"; return; fi
|
|
3013
|
+
if awk "BEGIN{exit !($value < 10)}" 2>/dev/null; then echo "High"; return; fi
|
|
3014
|
+
if awk "BEGIN{exit !($value < 15)}" 2>/dev/null; then echo "Medium"; return; fi
|
|
3015
|
+
echo "Low" ;;
|
|
3016
|
+
mttr)
|
|
3017
|
+
[[ "$value" -lt 3600 ]] && echo "Elite" && return
|
|
3018
|
+
[[ "$value" -lt 86400 ]] && echo "High" && return
|
|
3019
|
+
echo "Medium" ;;
|
|
3020
|
+
esac
|
|
3021
|
+
}
|
|
3022
|
+
|
|
3023
|
+
local df_grade ct_grade cfr_grade mttr_grade
|
|
3024
|
+
df_grade=$(dora_grade deploy_freq "${deploy_freq:-0}")
|
|
3025
|
+
ct_grade=$(dora_grade cycle_time "${cycle_time_median:-0}")
|
|
3026
|
+
cfr_grade=$(dora_grade cfr "${cfr:-0}")
|
|
3027
|
+
mttr_grade=$(dora_grade mttr "${mttr:-0}")
|
|
3028
|
+
|
|
3029
|
+
grade_icon() {
|
|
3030
|
+
case "$1" in
|
|
3031
|
+
Elite) echo "${GREEN}★${RESET}" ;;
|
|
3032
|
+
High) echo "${CYAN}●${RESET}" ;;
|
|
3033
|
+
Medium) echo "${YELLOW}◐${RESET}" ;;
|
|
3034
|
+
Low) echo "${RED}○${RESET}" ;;
|
|
3035
|
+
esac
|
|
3036
|
+
}
|
|
3037
|
+
|
|
3038
|
+
# ── JSON Output ──
|
|
3039
|
+
if [[ "$json_output" == "true" ]]; then
|
|
3040
|
+
jq -n \
|
|
3041
|
+
--arg period "${period_days}d" \
|
|
3042
|
+
--argjson deploy_freq "${deploy_freq:-0}" \
|
|
3043
|
+
--argjson cycle_time_median "${cycle_time_median:-0}" \
|
|
3044
|
+
--argjson cycle_time_p95 "${cycle_time_p95:-0}" \
|
|
3045
|
+
--arg cfr "$cfr" \
|
|
3046
|
+
--argjson mttr "${mttr:-0}" \
|
|
3047
|
+
--arg df_grade "$df_grade" \
|
|
3048
|
+
--arg ct_grade "$ct_grade" \
|
|
3049
|
+
--arg cfr_grade "$cfr_grade" \
|
|
3050
|
+
--arg mttr_grade "$mttr_grade" \
|
|
3051
|
+
--argjson total_completed "$total_completed" \
|
|
3052
|
+
--argjson successes "$successes" \
|
|
3053
|
+
--argjson failures "$failures" \
|
|
3054
|
+
--arg first_pass_pct "$first_pass_pct" \
|
|
3055
|
+
--arg avg_cycles "${avg_cycles:-0}" \
|
|
3056
|
+
--argjson issues_processed "$issues_processed" \
|
|
3057
|
+
--argjson daemon_spawns "$daemon_spawns" \
|
|
3058
|
+
--arg autonomy_pct "$autonomy_pct" \
|
|
3059
|
+
--argjson patrol_runs "$patrol_runs" \
|
|
3060
|
+
--argjson patrol_findings "$patrol_findings" \
|
|
3061
|
+
--argjson patrol_issues_created "$patrol_issues_created" \
|
|
3062
|
+
--argjson patrol_auto_resolved "${patrol_auto_resolved:-0}" \
|
|
3063
|
+
'{
|
|
3064
|
+
period: $period,
|
|
3065
|
+
dora: {
|
|
3066
|
+
deploy_frequency: { value: $deploy_freq, unit: "PRs/week", grade: $df_grade },
|
|
3067
|
+
cycle_time: { median_s: $cycle_time_median, p95_s: $cycle_time_p95, grade: $ct_grade },
|
|
3068
|
+
change_failure_rate: { pct: ($cfr | tonumber), grade: $cfr_grade },
|
|
3069
|
+
mttr: { avg_s: $mttr, grade: $mttr_grade }
|
|
3070
|
+
},
|
|
3071
|
+
effectiveness: {
|
|
3072
|
+
first_pass_pct: ($first_pass_pct | tonumber),
|
|
3073
|
+
avg_compound_cycles: ($avg_cycles | tonumber)
|
|
3074
|
+
},
|
|
3075
|
+
throughput: {
|
|
3076
|
+
issues_processed: $issues_processed,
|
|
3077
|
+
pipelines_completed: $total_completed,
|
|
3078
|
+
successes: $successes,
|
|
3079
|
+
failures: $failures
|
|
3080
|
+
},
|
|
3081
|
+
autonomy: {
|
|
3082
|
+
daemon_spawns: $daemon_spawns,
|
|
3083
|
+
autonomy_pct: ($autonomy_pct | tonumber)
|
|
3084
|
+
},
|
|
3085
|
+
patrol: {
|
|
3086
|
+
patrols_run: $patrol_runs,
|
|
3087
|
+
findings: $patrol_findings,
|
|
3088
|
+
issues_created: $patrol_issues_created,
|
|
3089
|
+
auto_resolved: $patrol_auto_resolved
|
|
3090
|
+
}
|
|
3091
|
+
}'
|
|
3092
|
+
return 0
|
|
3093
|
+
fi
|
|
3094
|
+
|
|
3095
|
+
# ── Dashboard Output ──
|
|
3096
|
+
echo ""
|
|
3097
|
+
echo -e "${PURPLE}${BOLD}━━━ Autonomous Team Metrics ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"
|
|
3098
|
+
echo -e " Period: last ${period_days} day(s) ${DIM}$(now_iso)${RESET}"
|
|
3099
|
+
echo ""
|
|
3100
|
+
|
|
3101
|
+
echo -e "${BOLD} DORA FOUR KEYS${RESET}"
|
|
3102
|
+
echo -e " Deploy Frequency ${deploy_freq:-0} PRs/week $(grade_icon "$df_grade") $df_grade"
|
|
3103
|
+
echo -e " Cycle Time (median) $(format_duration "${cycle_time_median:-0}") $(grade_icon "$ct_grade") $ct_grade"
|
|
3104
|
+
echo -e " Change Failure ${cfr}% (${failures}/${total_completed}) $(grade_icon "$cfr_grade") $cfr_grade"
|
|
3105
|
+
echo -e " MTTR $(format_duration "${mttr:-0}") $(grade_icon "$mttr_grade") $mttr_grade"
|
|
3106
|
+
echo ""
|
|
3107
|
+
|
|
3108
|
+
echo -e "${BOLD} EFFECTIVENESS${RESET}"
|
|
3109
|
+
echo -e " First-pass quality ${first_pass_pct}% (${first_pass_success}/${first_pass_total})"
|
|
3110
|
+
echo -e " Compound cycles avg ${avg_cycles:-0}"
|
|
3111
|
+
echo ""
|
|
3112
|
+
|
|
3113
|
+
echo -e "${BOLD} THROUGHPUT${RESET}"
|
|
3114
|
+
echo -e " Issues processed ${issues_processed}"
|
|
3115
|
+
echo -e " Pipelines completed ${total_completed} (${GREEN}${successes} passed${RESET}, ${RED}${failures} failed${RESET})"
|
|
3116
|
+
echo ""
|
|
3117
|
+
|
|
3118
|
+
# Stage breakdown
|
|
3119
|
+
local stage_count
|
|
3120
|
+
stage_count=$(echo "$avg_stage_timings" | jq 'length' 2>/dev/null || echo 0)
|
|
3121
|
+
if [[ "$stage_count" -gt 0 ]]; then
|
|
3122
|
+
echo -e "${BOLD} STAGE TIMINGS (avg)${RESET}"
|
|
3123
|
+
echo "$avg_stage_timings" | jq -r '.[] | " \(.stage)\t\(.avg)s"' 2>/dev/null | \
|
|
3124
|
+
while IFS=$'\t' read -r stage dur; do
|
|
3125
|
+
printf " %-20s %s\n" "$stage" "$(format_duration "${dur%s}")"
|
|
3126
|
+
done
|
|
3127
|
+
echo ""
|
|
3128
|
+
fi
|
|
3129
|
+
|
|
3130
|
+
echo -e "${BOLD} AUTONOMY${RESET}"
|
|
3131
|
+
echo -e " Daemon-spawned ${daemon_spawns} pipeline(s)"
|
|
3132
|
+
if [[ "$daemon_reaps" -gt 0 ]]; then
|
|
3133
|
+
echo -e " Success rate ${autonomy_pct}% (${daemon_success}/${daemon_reaps})"
|
|
3134
|
+
fi
|
|
3135
|
+
echo ""
|
|
3136
|
+
|
|
3137
|
+
echo -e "${BOLD} PATROL${RESET}"
|
|
3138
|
+
echo -e " Patrols run ${patrol_runs}"
|
|
3139
|
+
echo -e " Findings ${patrol_findings}"
|
|
3140
|
+
echo -e " Issues created ${patrol_issues_created}"
|
|
3141
|
+
echo -e " Auto-resolved ${patrol_auto_resolved:-0}"
|
|
3142
|
+
echo ""
|
|
3143
|
+
|
|
3144
|
+
echo -e "${PURPLE}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"
|
|
3145
|
+
echo ""
|
|
3146
|
+
}
|
|
3147
|
+
|
|
3148
|
+
# ─── Command Router ─────────────────────────────────────────────────────────
|
|
3149
|
+
|
|
3150
|
+
setup_dirs
|
|
3151
|
+
|
|
3152
|
+
case "$SUBCOMMAND" in
|
|
3153
|
+
start)
|
|
3154
|
+
daemon_start
|
|
3155
|
+
;;
|
|
3156
|
+
stop)
|
|
3157
|
+
daemon_stop
|
|
3158
|
+
;;
|
|
3159
|
+
status)
|
|
3160
|
+
daemon_status
|
|
3161
|
+
;;
|
|
3162
|
+
init)
|
|
3163
|
+
daemon_init
|
|
3164
|
+
;;
|
|
3165
|
+
logs)
|
|
3166
|
+
daemon_logs
|
|
3167
|
+
;;
|
|
3168
|
+
metrics)
|
|
3169
|
+
daemon_metrics "$@"
|
|
3170
|
+
;;
|
|
3171
|
+
triage)
|
|
3172
|
+
daemon_triage_show "$@"
|
|
3173
|
+
;;
|
|
3174
|
+
patrol)
|
|
3175
|
+
daemon_patrol "$@"
|
|
3176
|
+
;;
|
|
3177
|
+
test)
|
|
3178
|
+
exec "$SCRIPT_DIR/cct-daemon-test.sh" "$@"
|
|
3179
|
+
;;
|
|
3180
|
+
help|--help|-h)
|
|
3181
|
+
show_help
|
|
3182
|
+
;;
|
|
3183
|
+
*)
|
|
3184
|
+
error "Unknown command: ${SUBCOMMAND}"
|
|
3185
|
+
echo ""
|
|
3186
|
+
show_help
|
|
3187
|
+
exit 1
|
|
3188
|
+
;;
|
|
3189
|
+
esac
|