shipwright-cli 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +19 -19
  2. package/dashboard/public/index.html +224 -8
  3. package/dashboard/public/styles.css +1078 -4
  4. package/dashboard/server.ts +1100 -15
  5. package/dashboard/src/canvas/interactions.ts +74 -0
  6. package/dashboard/src/canvas/layout.ts +85 -0
  7. package/dashboard/src/canvas/overlays.ts +117 -0
  8. package/dashboard/src/canvas/particles.ts +105 -0
  9. package/dashboard/src/canvas/renderer.ts +191 -0
  10. package/dashboard/src/components/charts/bar.ts +54 -0
  11. package/dashboard/src/components/charts/donut.ts +25 -0
  12. package/dashboard/src/components/charts/pipeline-rail.ts +105 -0
  13. package/dashboard/src/components/charts/sparkline.ts +82 -0
  14. package/dashboard/src/components/header.ts +616 -0
  15. package/dashboard/src/components/modal.ts +413 -0
  16. package/dashboard/src/components/terminal.ts +144 -0
  17. package/dashboard/src/core/api.ts +381 -0
  18. package/dashboard/src/core/helpers.ts +118 -0
  19. package/dashboard/src/core/router.ts +190 -0
  20. package/dashboard/src/core/sse.ts +38 -0
  21. package/dashboard/src/core/state.ts +150 -0
  22. package/dashboard/src/core/ws.ts +143 -0
  23. package/dashboard/src/design/icons.ts +131 -0
  24. package/dashboard/src/design/tokens.ts +160 -0
  25. package/dashboard/src/main.ts +68 -0
  26. package/dashboard/src/types/api.ts +337 -0
  27. package/dashboard/src/views/activity.ts +185 -0
  28. package/dashboard/src/views/agent-cockpit.ts +236 -0
  29. package/dashboard/src/views/agents.ts +72 -0
  30. package/dashboard/src/views/fleet-map.ts +299 -0
  31. package/dashboard/src/views/insights.ts +298 -0
  32. package/dashboard/src/views/machines.ts +162 -0
  33. package/dashboard/src/views/metrics.ts +420 -0
  34. package/dashboard/src/views/overview.ts +409 -0
  35. package/dashboard/src/views/pipeline-theater.ts +219 -0
  36. package/dashboard/src/views/pipelines.ts +595 -0
  37. package/dashboard/src/views/team.ts +362 -0
  38. package/dashboard/src/views/timeline.ts +389 -0
  39. package/dashboard/tsconfig.json +21 -0
  40. package/docs/AGI-PLATFORM-PLAN.md +5 -5
  41. package/docs/AGI-WHATS-NEXT.md +19 -16
  42. package/docs/README.md +2 -0
  43. package/package.json +8 -1
  44. package/scripts/check-version-consistency.sh +72 -0
  45. package/scripts/lib/daemon-adaptive.sh +610 -0
  46. package/scripts/lib/daemon-dispatch.sh +489 -0
  47. package/scripts/lib/daemon-failure.sh +387 -0
  48. package/scripts/lib/daemon-patrol.sh +1113 -0
  49. package/scripts/lib/daemon-poll.sh +1202 -0
  50. package/scripts/lib/daemon-state.sh +550 -0
  51. package/scripts/lib/daemon-triage.sh +490 -0
  52. package/scripts/lib/helpers.sh +81 -0
  53. package/scripts/lib/pipeline-intelligence.sh +0 -6
  54. package/scripts/lib/pipeline-quality-checks.sh +3 -1
  55. package/scripts/lib/pipeline-stages.sh +20 -0
  56. package/scripts/sw +109 -168
  57. package/scripts/sw-activity.sh +1 -1
  58. package/scripts/sw-adaptive.sh +2 -2
  59. package/scripts/sw-adversarial.sh +1 -1
  60. package/scripts/sw-architecture-enforcer.sh +1 -1
  61. package/scripts/sw-auth.sh +14 -6
  62. package/scripts/sw-autonomous.sh +1 -1
  63. package/scripts/sw-changelog.sh +2 -2
  64. package/scripts/sw-checkpoint.sh +1 -1
  65. package/scripts/sw-ci.sh +1 -1
  66. package/scripts/sw-cleanup.sh +1 -1
  67. package/scripts/sw-code-review.sh +1 -1
  68. package/scripts/sw-connect.sh +1 -1
  69. package/scripts/sw-context.sh +1 -1
  70. package/scripts/sw-cost.sh +1 -1
  71. package/scripts/sw-daemon.sh +53 -4817
  72. package/scripts/sw-dashboard.sh +1 -1
  73. package/scripts/sw-db.sh +1 -1
  74. package/scripts/sw-decompose.sh +1 -1
  75. package/scripts/sw-deps.sh +1 -1
  76. package/scripts/sw-developer-simulation.sh +1 -1
  77. package/scripts/sw-discovery.sh +1 -1
  78. package/scripts/sw-doc-fleet.sh +1 -1
  79. package/scripts/sw-docs-agent.sh +1 -1
  80. package/scripts/sw-docs.sh +1 -1
  81. package/scripts/sw-doctor.sh +49 -1
  82. package/scripts/sw-dora.sh +1 -1
  83. package/scripts/sw-durable.sh +1 -1
  84. package/scripts/sw-e2e-orchestrator.sh +1 -1
  85. package/scripts/sw-eventbus.sh +1 -1
  86. package/scripts/sw-feedback.sh +1 -1
  87. package/scripts/sw-fix.sh +6 -5
  88. package/scripts/sw-fleet-discover.sh +1 -1
  89. package/scripts/sw-fleet-viz.sh +3 -3
  90. package/scripts/sw-fleet.sh +1 -1
  91. package/scripts/sw-github-app.sh +5 -2
  92. package/scripts/sw-github-checks.sh +1 -1
  93. package/scripts/sw-github-deploy.sh +1 -1
  94. package/scripts/sw-github-graphql.sh +1 -1
  95. package/scripts/sw-guild.sh +1 -1
  96. package/scripts/sw-heartbeat.sh +1 -1
  97. package/scripts/sw-hygiene.sh +1 -1
  98. package/scripts/sw-incident.sh +1 -1
  99. package/scripts/sw-init.sh +112 -9
  100. package/scripts/sw-instrument.sh +6 -1
  101. package/scripts/sw-intelligence.sh +5 -1
  102. package/scripts/sw-jira.sh +1 -1
  103. package/scripts/sw-launchd.sh +1 -1
  104. package/scripts/sw-linear.sh +20 -9
  105. package/scripts/sw-logs.sh +1 -1
  106. package/scripts/sw-loop.sh +2 -1
  107. package/scripts/sw-memory.sh +10 -1
  108. package/scripts/sw-mission-control.sh +1 -1
  109. package/scripts/sw-model-router.sh +4 -1
  110. package/scripts/sw-otel.sh +4 -4
  111. package/scripts/sw-oversight.sh +1 -1
  112. package/scripts/sw-pipeline-composer.sh +3 -1
  113. package/scripts/sw-pipeline-vitals.sh +4 -6
  114. package/scripts/sw-pipeline.sh +19 -56
  115. package/scripts/sw-pipeline.sh.mock +7 -0
  116. package/scripts/sw-pm.sh +5 -2
  117. package/scripts/sw-pr-lifecycle.sh +1 -1
  118. package/scripts/sw-predictive.sh +4 -1
  119. package/scripts/sw-prep.sh +3 -2
  120. package/scripts/sw-ps.sh +1 -1
  121. package/scripts/sw-public-dashboard.sh +10 -4
  122. package/scripts/sw-quality.sh +1 -1
  123. package/scripts/sw-reaper.sh +1 -1
  124. package/scripts/sw-recruit.sh +25 -1
  125. package/scripts/sw-regression.sh +2 -1
  126. package/scripts/sw-release-manager.sh +1 -1
  127. package/scripts/sw-release.sh +7 -5
  128. package/scripts/sw-remote.sh +1 -1
  129. package/scripts/sw-replay.sh +1 -1
  130. package/scripts/sw-retro.sh +1 -1
  131. package/scripts/sw-scale.sh +11 -5
  132. package/scripts/sw-security-audit.sh +1 -1
  133. package/scripts/sw-self-optimize.sh +172 -7
  134. package/scripts/sw-session.sh +1 -1
  135. package/scripts/sw-setup.sh +1 -1
  136. package/scripts/sw-standup.sh +4 -3
  137. package/scripts/sw-status.sh +1 -1
  138. package/scripts/sw-strategic.sh +2 -1
  139. package/scripts/sw-stream.sh +8 -2
  140. package/scripts/sw-swarm.sh +12 -10
  141. package/scripts/sw-team-stages.sh +1 -1
  142. package/scripts/sw-templates.sh +1 -1
  143. package/scripts/sw-testgen.sh +3 -2
  144. package/scripts/sw-tmux-pipeline.sh +2 -1
  145. package/scripts/sw-tmux.sh +1 -1
  146. package/scripts/sw-trace.sh +1 -1
  147. package/scripts/sw-tracker-jira.sh +1 -0
  148. package/scripts/sw-tracker-linear.sh +1 -0
  149. package/scripts/sw-tracker.sh +24 -6
  150. package/scripts/sw-triage.sh +1 -1
  151. package/scripts/sw-upgrade.sh +1 -1
  152. package/scripts/sw-ux.sh +1 -1
  153. package/scripts/sw-webhook.sh +1 -1
  154. package/scripts/sw-widgets.sh +2 -2
  155. package/scripts/sw-worktree.sh +1 -1
  156. package/dashboard/public/app.js +0 -4422
@@ -0,0 +1,387 @@
1
+ # daemon-failure.sh — Failure classification, retry, backoff (for sw-daemon.sh)
2
+ # Source from sw-daemon.sh. Requires state, helpers.
3
+ [[ -n "${_DAEMON_FAILURE_LOADED:-}" ]] && return 0
4
+ _DAEMON_FAILURE_LOADED=1
5
+
6
+ classify_failure() {
7
+ local issue_num="$1"
8
+ if [[ -z "${LOG_DIR:-}" ]]; then
9
+ echo "unknown"
10
+ return
11
+ fi
12
+ local log_path="$LOG_DIR/issue-${issue_num}.log"
13
+ if [[ ! -f "$log_path" ]]; then
14
+ echo "unknown"
15
+ return
16
+ fi
17
+ local tail_content
18
+ tail_content=$(tail -200 "$log_path" 2>/dev/null || true)
19
+
20
+ # Auth errors
21
+ if echo "$tail_content" | grep -qiE 'not logged in|unauthorized|auth.*fail|401 |invalid.*token|CLAUDE_CODE_OAUTH_TOKEN|api key.*invalid|authentication required'; then
22
+ echo "auth_error"
23
+ return
24
+ fi
25
+ # API errors (rate limits, timeouts, server errors)
26
+ if echo "$tail_content" | grep -qiE 'rate limit|429 |503 |502 |overloaded|timeout|ETIMEDOUT|ECONNRESET|socket hang up|service unavailable'; then
27
+ echo "api_error"
28
+ return
29
+ fi
30
+ # Invalid issue (not found, empty body)
31
+ if echo "$tail_content" | grep -qiE 'issue not found|404 |no body|could not resolve|GraphQL.*not found|issue.*does not exist'; then
32
+ echo "invalid_issue"
33
+ return
34
+ fi
35
+ # Context exhaustion — check progress file
36
+ local issue_worktree_path="${WORKTREE_DIR:-${REPO_DIR}/.worktrees}/daemon-issue-${issue_num}"
37
+ local progress_file="${issue_worktree_path}/.claude/loop-logs/progress.md"
38
+ if [[ -f "$progress_file" ]]; then
39
+ local cf_iter
40
+ cf_iter=$(grep -oE 'Iteration: [0-9]+' "$progress_file" 2>/dev/null | tail -1 | grep -oE '[0-9]+' || echo "0")
41
+ if ! [[ "${cf_iter:-0}" =~ ^[0-9]+$ ]]; then cf_iter="0"; fi
42
+ local cf_tests
43
+ cf_tests=$(grep -oE 'Tests passing: (true|false)' "$progress_file" 2>/dev/null | awk '{print $NF}' || echo "unknown")
44
+ if [[ "${cf_iter:-0}" -gt 0 ]] && { [[ "$cf_tests" == "false" ]] || [[ "$cf_tests" == "unknown" ]]; }; then
45
+ echo "context_exhaustion"
46
+ return
47
+ fi
48
+ fi
49
+ # Build failure (test errors, compile errors)
50
+ if echo "$tail_content" | grep -qiE 'test.*fail|FAIL|build.*error|compile.*error|lint.*fail|npm ERR|exit code [1-9]'; then
51
+ echo "build_failure"
52
+ return
53
+ fi
54
+ echo "unknown"
55
+ }
56
+
57
+ # ─── Consecutive Failure Tracking (persisted + adaptive) ─────────────────────
58
+
59
+ DAEMON_CONSECUTIVE_FAILURE_CLASS=""
60
+ DAEMON_CONSECUTIVE_FAILURE_COUNT=0
61
+
62
+ # Max retries per failure class (adaptive retry strategy)
63
+ get_max_retries_for_class() {
64
+ local class="${1:-unknown}"
65
+ case "$class" in
66
+ auth_error|invalid_issue) echo 0 ;;
67
+ api_error) echo "${MAX_RETRIES_API_ERROR:-4}" ;;
68
+ context_exhaustion) echo "${MAX_RETRIES_CONTEXT_EXHAUSTION:-2}" ;;
69
+ build_failure) echo "${MAX_RETRIES_BUILD:-2}" ;;
70
+ *) echo "${MAX_RETRIES:-2}" ;;
71
+ esac
72
+ }
73
+
74
+ # Append failure to persisted history and compute consecutive count; smart pause with exponential backoff
75
+ record_failure_class() {
76
+ local failure_class="$1"
77
+ # In-memory consecutive (for backward compat)
78
+ if [[ "$failure_class" == "$DAEMON_CONSECUTIVE_FAILURE_CLASS" ]]; then
79
+ DAEMON_CONSECUTIVE_FAILURE_COUNT=$((DAEMON_CONSECUTIVE_FAILURE_COUNT + 1))
80
+ else
81
+ DAEMON_CONSECUTIVE_FAILURE_CLASS="$failure_class"
82
+ DAEMON_CONSECUTIVE_FAILURE_COUNT=1
83
+ fi
84
+
85
+ # Persist failure to state (failure_history) for pattern tracking
86
+ if [[ -f "${STATE_FILE:-}" ]]; then
87
+ local entry
88
+ entry=$(jq -n --arg ts "$(now_iso)" --arg class "$failure_class" '{ts: $ts, class: $class}')
89
+ locked_state_update --argjson entry "$entry" \
90
+ '.failure_history = ((.failure_history // []) + [$entry] | .[-100:])' 2>/dev/null || true
91
+ fi
92
+
93
+ # Consecutive count from persisted tail: count only the unbroken run of $failure_class
94
+ # from the newest entry backwards (not total occurrences)
95
+ local consecutive="$DAEMON_CONSECUTIVE_FAILURE_COUNT"
96
+ if [[ -f "${STATE_FILE:-}" ]]; then
97
+ local from_state
98
+ from_state=$(jq -r --arg c "$failure_class" '
99
+ (.failure_history // []) | [.[].class] | reverse |
100
+ if length == 0 then 0
101
+ elif .[0] != $c then 0
102
+ else
103
+ reduce .[] as $x (
104
+ {count: 0, done: false};
105
+ if .done then . elif $x == $c then .count += 1 else .done = true end
106
+ ) | .count
107
+ end
108
+ ' "$STATE_FILE" 2>/dev/null || echo "1")
109
+ consecutive="${from_state:-1}"
110
+ [[ "$consecutive" -eq 0 ]] && consecutive="$DAEMON_CONSECUTIVE_FAILURE_COUNT"
111
+ DAEMON_CONSECUTIVE_FAILURE_COUNT="$consecutive"
112
+ fi
113
+
114
+ # Smart pause: exponential backoff instead of hard stop (resume_after so daemon can auto-resume)
115
+ if [[ "$consecutive" -ge 3 ]]; then
116
+ local pause_mins=$((5 * (1 << (consecutive - 3))))
117
+ [[ "$pause_mins" -gt 480 ]] && pause_mins=480
118
+ local resume_ts resume_after
119
+ resume_ts=$(($(date +%s) + pause_mins * 60))
120
+ resume_after=$(epoch_to_iso "$resume_ts")
121
+ daemon_log ERROR "${consecutive} consecutive failures (class: ${failure_class}) — auto-pausing until ${resume_after} (${pause_mins}m backoff)"
122
+ local pause_json
123
+ pause_json=$(jq -n \
124
+ --arg reason "consecutive_${failure_class}" \
125
+ --arg ts "$(now_iso)" \
126
+ --arg resume "$resume_after" \
127
+ --argjson count "$consecutive" \
128
+ '{reason: $reason, timestamp: $ts, resume_after: $resume, consecutive_count: $count}')
129
+ local _tmp_pause
130
+ _tmp_pause=$(mktemp "${TMPDIR:-/tmp}/sw-pause.XXXXXX")
131
+ echo "$pause_json" > "$_tmp_pause"
132
+ mv "$_tmp_pause" "$PAUSE_FLAG"
133
+ emit_event "daemon.auto_pause" "reason=consecutive_failures" "class=$failure_class" "count=$consecutive" "resume_after=$resume_after"
134
+ fi
135
+ }
136
+
137
+ reset_failure_tracking() {
138
+ DAEMON_CONSECUTIVE_FAILURE_CLASS=""
139
+ DAEMON_CONSECUTIVE_FAILURE_COUNT=0
140
+ }
141
+
142
+ # ─── Failure Handler ────────────────────────────────────────────────────────
143
+
144
+ daemon_on_failure() {
145
+ local issue_num="$1" exit_code="${2:-1}" duration="${3:-}"
146
+
147
+ daemon_log ERROR "Pipeline failed for issue #${issue_num} (exit: ${exit_code}, ${duration:-unknown})"
148
+
149
+ # Record pipeline duration for adaptive threshold learning
150
+ if [[ -n "$duration" && "$duration" != "unknown" ]]; then
151
+ local dur_secs=0
152
+ local _h _m _s
153
+ _h=$(echo "$duration" | grep -oE '[0-9]+h' | grep -oE '[0-9]+' || true)
154
+ _m=$(echo "$duration" | grep -oE '[0-9]+m' | grep -oE '[0-9]+' || true)
155
+ _s=$(echo "$duration" | grep -oE '[0-9]+s' | grep -oE '[0-9]+' || true)
156
+ dur_secs=$(( ${_h:-0} * 3600 + ${_m:-0} * 60 + ${_s:-0} ))
157
+ if [[ "$dur_secs" -gt 0 ]]; then
158
+ record_pipeline_duration "$PIPELINE_TEMPLATE" "$dur_secs" "failure"
159
+ record_scaling_outcome "$MAX_PARALLEL" "failure"
160
+ fi
161
+ fi
162
+
163
+ # Record in completed list
164
+ locked_state_update \
165
+ --argjson num "$issue_num" \
166
+ --arg result "failed" \
167
+ --argjson code "$exit_code" \
168
+ --arg dur "${duration:-unknown}" \
169
+ --arg completed_at "$(now_iso)" \
170
+ '.completed += [{
171
+ issue: $num,
172
+ result: $result,
173
+ exit_code: $code,
174
+ duration: $dur,
175
+ completed_at: $completed_at
176
+ }] | .completed = .completed[-500:]'
177
+
178
+ # ── Classify failure and decide retry strategy ──
179
+ local failure_class
180
+ failure_class=$(classify_failure "$issue_num")
181
+ daemon_log INFO "Failure classified as: ${failure_class} for issue #${issue_num}"
182
+ emit_event "daemon.failure_classified" "issue=$issue_num" "class=$failure_class"
183
+ record_failure_class "$failure_class"
184
+
185
+ # ── Auto-retry with strategy escalation ──
186
+ if [[ "${RETRY_ESCALATION:-true}" == "true" ]]; then
187
+ local retry_count
188
+ retry_count=$(jq -r --arg num "$issue_num" \
189
+ '.retry_counts[$num] // 0' "$STATE_FILE" 2>/dev/null || echo "0")
190
+
191
+ # Non-retryable failures — skip retry entirely
192
+ case "$failure_class" in
193
+ auth_error)
194
+ daemon_log ERROR "Auth error for issue #${issue_num} — skipping retry"
195
+ emit_event "daemon.skip_retry" "issue=$issue_num" "reason=auth_error"
196
+ if [[ "$NO_GITHUB" != "true" ]]; then
197
+ gh issue edit "$issue_num" --add-label "pipeline/auth-error" 2>/dev/null || true
198
+ fi
199
+ ;;
200
+ invalid_issue)
201
+ daemon_log ERROR "Invalid issue #${issue_num} — skipping retry"
202
+ emit_event "daemon.skip_retry" "issue=$issue_num" "reason=invalid_issue"
203
+ if [[ "$NO_GITHUB" != "true" ]]; then
204
+ gh issue comment "$issue_num" --body "Pipeline skipped retry: issue appears invalid or has no body." 2>/dev/null || true
205
+ fi
206
+ ;;
207
+ *)
208
+ # Retryable failures — per-class max retries and escalation
209
+ local effective_max
210
+ effective_max=$(get_max_retries_for_class "$failure_class")
211
+ if [[ "$retry_count" -lt "$effective_max" ]]; then
212
+ retry_count=$((retry_count + 1))
213
+
214
+ # Update retry count in state (locked to prevent race)
215
+ locked_state_update \
216
+ --arg num "$issue_num" --argjson count "$retry_count" \
217
+ '.retry_counts[$num] = $count'
218
+
219
+ daemon_log WARN "Auto-retry #${retry_count}/${effective_max} for issue #${issue_num} (class: ${failure_class})"
220
+ emit_event "daemon.retry" "issue=$issue_num" "retry=$retry_count" "max=$effective_max" "class=$failure_class"
221
+
222
+ # Check for checkpoint to enable resume-from-checkpoint
223
+ local checkpoint_args=()
224
+ if [[ "${CHECKPOINT_ENABLED:-true}" == "true" ]]; then
225
+ local issue_worktree="${REPO_DIR}/.worktrees/daemon-issue-${issue_num}"
226
+ if [[ -d "$issue_worktree/.claude/pipeline-artifacts/checkpoints" ]]; then
227
+ local latest_checkpoint=""
228
+ for cp_file in "$issue_worktree/.claude/pipeline-artifacts/checkpoints"/*-checkpoint.json; do
229
+ [[ -f "$cp_file" ]] && latest_checkpoint="$cp_file"
230
+ done
231
+ if [[ -n "$latest_checkpoint" ]]; then
232
+ daemon_log INFO "Found checkpoint: $latest_checkpoint"
233
+ emit_event "daemon.recovery" "issue=$issue_num" "checkpoint=$latest_checkpoint"
234
+ checkpoint_args+=("--resume")
235
+ fi
236
+ fi
237
+ fi
238
+
239
+ # Build escalated pipeline args
240
+ local retry_template="$PIPELINE_TEMPLATE"
241
+ local retry_model="${MODEL:-opus}"
242
+ local extra_args=()
243
+
244
+ if [[ "$retry_count" -eq 1 ]]; then
245
+ retry_model="opus"
246
+ extra_args+=("--max-iterations" "30")
247
+ daemon_log INFO "Escalation: model=opus, max_iterations=30"
248
+ elif [[ "$retry_count" -ge 2 ]]; then
249
+ retry_template="full"
250
+ retry_model="opus"
251
+ extra_args+=("--max-iterations" "30" "--compound-cycles" "5")
252
+ daemon_log INFO "Escalation: template=full, compound_cycles=5"
253
+ fi
254
+
255
+ # Increase restarts on context exhaustion
256
+ if [[ "$failure_class" == "context_exhaustion" ]]; then
257
+ local boosted_restarts=$(( ${MAX_RESTARTS_CFG:-3} + retry_count ))
258
+ if [[ "$boosted_restarts" -gt 5 ]]; then
259
+ boosted_restarts=5
260
+ fi
261
+ extra_args+=("--max-restarts" "$boosted_restarts")
262
+ daemon_log INFO "Boosting max-restarts to $boosted_restarts (context exhaustion)"
263
+ fi
264
+
265
+ # Exponential backoff (per-class base); cap at 1h
266
+ local base_secs=30
267
+ [[ "$failure_class" == "api_error" ]] && base_secs=300
268
+ local backoff_secs=$((base_secs * (1 << (retry_count - 1))))
269
+ [[ "$backoff_secs" -gt 3600 ]] && backoff_secs=3600
270
+ [[ "$failure_class" == "api_error" ]] && daemon_log INFO "API error — exponential backoff ${backoff_secs}s"
271
+
272
+ if [[ "$NO_GITHUB" != "true" ]]; then
273
+ gh issue comment "$issue_num" --body "## 🔄 Auto-Retry #${retry_count}
274
+
275
+ Pipeline failed (${failure_class}) — retrying with escalated strategy.
276
+
277
+ | Field | Value |
278
+ |-------|-------|
279
+ | Retry | ${retry_count} / ${MAX_RETRIES:-2} |
280
+ | Failure | \`${failure_class}\` |
281
+ | Template | \`${retry_template}\` |
282
+ | Model | \`${retry_model}\` |
283
+ | Started | $(now_iso) |
284
+
285
+ _Escalation: $(if [[ "$retry_count" -eq 1 ]]; then echo "upgraded model + increased iterations"; else echo "full template + compound quality"; fi)_" 2>/dev/null || true
286
+ fi
287
+
288
+ daemon_log INFO "Waiting ${backoff_secs}s before retry #${retry_count}"
289
+ sleep "$backoff_secs"
290
+
291
+ # Merge checkpoint args + extra args for passthrough
292
+ local all_extra_args=()
293
+ if [[ ${#checkpoint_args[@]} -gt 0 ]]; then
294
+ all_extra_args+=("${checkpoint_args[@]}")
295
+ fi
296
+ if [[ ${#extra_args[@]} -gt 0 ]]; then
297
+ all_extra_args+=("${extra_args[@]}")
298
+ fi
299
+
300
+ # Re-spawn with escalated strategy
301
+ local orig_template="$PIPELINE_TEMPLATE"
302
+ local orig_model="$MODEL"
303
+ PIPELINE_TEMPLATE="$retry_template"
304
+ MODEL="$retry_model"
305
+ daemon_spawn_pipeline "$issue_num" "retry-${retry_count}" "" "${all_extra_args[@]}"
306
+ _retry_spawned_for="$issue_num"
307
+ PIPELINE_TEMPLATE="$orig_template"
308
+ MODEL="$orig_model"
309
+ return
310
+ fi
311
+
312
+ daemon_log WARN "Max retries (${effective_max}) exhausted for issue #${issue_num}"
313
+ emit_event "daemon.retry_exhausted" "issue=$issue_num" "retries=$retry_count"
314
+ ;;
315
+ esac
316
+ fi
317
+
318
+ # ── No retry — report final failure ──
319
+ # PM agent: record failure for learning (only when we're done with this issue)
320
+ if [[ -x "$SCRIPT_DIR/sw-pm.sh" ]]; then
321
+ bash "$SCRIPT_DIR/sw-pm.sh" learn "$issue_num" failure 2>/dev/null || true
322
+ fi
323
+
324
+ if [[ "$NO_GITHUB" != "true" ]]; then
325
+ # Add failure label and remove watch label (prevent re-processing)
326
+ gh issue edit "$issue_num" \
327
+ --add-label "$ON_FAILURE_ADD_LABEL" \
328
+ --remove-label "$WATCH_LABEL" 2>/dev/null || true
329
+
330
+ # Close any draft PR created for this issue (cleanup abandoned work)
331
+ local draft_pr
332
+ draft_pr=$(gh pr list --head "daemon/issue-${issue_num}" --head "pipeline/pipeline-issue-${issue_num}" \
333
+ --json number,isDraft --jq '.[] | select(.isDraft == true) | .number' 2>/dev/null | head -1 || true)
334
+ if [[ -n "$draft_pr" ]]; then
335
+ gh pr close "$draft_pr" --delete-branch 2>/dev/null || true
336
+ daemon_log INFO "Closed draft PR #${draft_pr} for failed issue #${issue_num}"
337
+ fi
338
+
339
+ # Comment with log tail
340
+ local log_tail=""
341
+ local log_path="$LOG_DIR/issue-${issue_num}.log"
342
+ if [[ -f "$log_path" ]]; then
343
+ log_tail=$(tail -"$ON_FAILURE_LOG_LINES" "$log_path" 2>/dev/null || true)
344
+ fi
345
+
346
+ local retry_info=""
347
+ if [[ "${RETRY_ESCALATION:-true}" == "true" ]]; then
348
+ local final_count final_max
349
+ final_count=$(jq -r --arg num "$issue_num" \
350
+ '.retry_counts[$num] // 0' "$STATE_FILE" 2>/dev/null || echo "0")
351
+ final_max=$(get_max_retries_for_class "$failure_class")
352
+ retry_info="| Retries | ${final_count} / ${final_max} (exhausted) |"
353
+ fi
354
+
355
+ gh issue comment "$issue_num" --body "## ❌ Pipeline Failed
356
+
357
+ The autonomous pipeline encountered an error.
358
+
359
+ | Field | Value |
360
+ |-------|-------|
361
+ | Exit Code | ${exit_code} |
362
+ | Duration | ${duration:-unknown} |
363
+ | Failed At | $(now_iso) |
364
+ ${retry_info}
365
+
366
+ <details>
367
+ <summary>Last ${ON_FAILURE_LOG_LINES} lines of log</summary>
368
+
369
+ \`\`\`
370
+ ${log_tail}
371
+ \`\`\`
372
+
373
+ </details>
374
+
375
+ _Re-add the \`${WATCH_LABEL}\` label to retry._" 2>/dev/null || true
376
+ fi
377
+
378
+ notify "Pipeline Failed — Issue #${issue_num}" \
379
+ "Exit code: ${exit_code}, Duration: ${duration:-unknown}" "error"
380
+ "$SCRIPT_DIR/sw-tracker.sh" notify "failed" "$issue_num" "Exit code: ${exit_code}, Duration: ${duration:-unknown}" 2>/dev/null || true
381
+ }
382
+
383
+ # ─── Intelligent Triage ──────────────────────────────────────────────────────
384
+
385
+ # Score an issue from 0-100 based on multiple signals for intelligent prioritization.
386
+ # Combines priority labels, age, complexity, dependencies, type, and memory signals.
387
+ # When intelligence engine is enabled, uses semantic AI analysis for richer scoring.