shipwright-cli 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +22 -8
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +8 -6
- package/dashboard/public/styles.css +176 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +117 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +12 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/metrics.ts +69 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +14 -2
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +25 -4
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +119 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +180 -5
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +101 -3
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +104 -1138
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -711
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +161 -2901
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -8
- package/scripts/sw-adaptive.sh +8 -7
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +15 -6
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +45 -20
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +107 -5
- package/scripts/sw-daemon.sh +71 -11
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +71 -20
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +378 -5
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +12 -7
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +13 -4
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +9 -4
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +18 -4
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +10 -3
- package/scripts/sw-incident.sh +273 -5
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +44 -7
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +436 -1076
- package/scripts/sw-memory.sh +357 -3
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +483 -27
- package/scripts/sw-otel.sh +15 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +7 -1
- package/scripts/sw-pipeline-vitals.sh +12 -6
- package/scripts/sw-pipeline.sh +54 -2653
- package/scripts/sw-pm.sh +16 -8
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +17 -5
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +17 -4
- package/scripts/sw-quality.sh +14 -6
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +14 -5
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +173 -6
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +14 -6
- package/scripts/sw-stream.sh +13 -4
- package/scripts/sw-swarm.sh +20 -7
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +7 -31
- package/scripts/sw-testgen.sh +17 -6
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +37 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +3 -2
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +5 -2
- package/scripts/sw-widgets.sh +9 -4
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -3,594 +3,31 @@
|
|
|
3
3
|
[[ -n "${_DAEMON_POLL_LOADED:-}" ]] && return 0
|
|
4
4
|
_DAEMON_POLL_LOADED=1
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
return
|
|
32
|
-
fi
|
|
33
|
-
fi
|
|
34
|
-
|
|
35
|
-
# Circuit breaker: skip poll if in backoff window
|
|
36
|
-
if gh_rate_limited; then
|
|
37
|
-
daemon_log INFO "Polling skipped (rate-limit backoff until $(epoch_to_iso "$GH_BACKOFF_UNTIL"))"
|
|
38
|
-
return
|
|
39
|
-
fi
|
|
40
|
-
|
|
41
|
-
local issues_json
|
|
42
|
-
|
|
43
|
-
# Select gh command wrapper: gh_retry for critical poll calls when enabled
|
|
44
|
-
local gh_cmd="gh"
|
|
45
|
-
if [[ "${GH_RETRY_ENABLED:-true}" == "true" ]]; then
|
|
46
|
-
gh_cmd="gh_retry gh"
|
|
47
|
-
fi
|
|
48
|
-
|
|
49
|
-
if [[ "$WATCH_MODE" == "org" && -n "$ORG" ]]; then
|
|
50
|
-
# Org-wide mode: search issues across all org repos
|
|
51
|
-
issues_json=$($gh_cmd search issues \
|
|
52
|
-
--label "$WATCH_LABEL" \
|
|
53
|
-
--owner "$ORG" \
|
|
54
|
-
--state open \
|
|
55
|
-
--json repository,number,title,labels,body,createdAt \
|
|
56
|
-
--limit "${ISSUE_LIMIT:-100}" 2>/dev/null) || {
|
|
57
|
-
# Handle rate limiting with exponential backoff
|
|
58
|
-
if [[ $BACKOFF_SECS -eq 0 ]]; then
|
|
59
|
-
BACKOFF_SECS=30
|
|
60
|
-
elif [[ $BACKOFF_SECS -lt 300 ]]; then
|
|
61
|
-
BACKOFF_SECS=$((BACKOFF_SECS * 2))
|
|
62
|
-
if [[ $BACKOFF_SECS -gt 300 ]]; then
|
|
63
|
-
BACKOFF_SECS=300
|
|
64
|
-
fi
|
|
65
|
-
fi
|
|
66
|
-
daemon_log WARN "GitHub API error (org search) — backing off ${BACKOFF_SECS}s"
|
|
67
|
-
gh_record_failure
|
|
68
|
-
sleep "$BACKOFF_SECS"
|
|
69
|
-
return
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
# Filter by repo_filter regex if set
|
|
73
|
-
if [[ -n "$REPO_FILTER" ]]; then
|
|
74
|
-
issues_json=$(echo "$issues_json" | jq -c --arg filter "$REPO_FILTER" \
|
|
75
|
-
'[.[] | select(.repository.nameWithOwner | test($filter))]')
|
|
76
|
-
fi
|
|
77
|
-
else
|
|
78
|
-
# Standard single-repo mode
|
|
79
|
-
issues_json=$($gh_cmd issue list \
|
|
80
|
-
--label "$WATCH_LABEL" \
|
|
81
|
-
--state open \
|
|
82
|
-
--json number,title,labels,body,createdAt \
|
|
83
|
-
--limit 100 2>/dev/null) || {
|
|
84
|
-
# Handle rate limiting with exponential backoff
|
|
85
|
-
if [[ $BACKOFF_SECS -eq 0 ]]; then
|
|
86
|
-
BACKOFF_SECS=30
|
|
87
|
-
elif [[ $BACKOFF_SECS -lt 300 ]]; then
|
|
88
|
-
BACKOFF_SECS=$((BACKOFF_SECS * 2))
|
|
89
|
-
if [[ $BACKOFF_SECS -gt 300 ]]; then
|
|
90
|
-
BACKOFF_SECS=300
|
|
91
|
-
fi
|
|
92
|
-
fi
|
|
93
|
-
daemon_log WARN "GitHub API error — backing off ${BACKOFF_SECS}s"
|
|
94
|
-
gh_record_failure
|
|
95
|
-
sleep "$BACKOFF_SECS"
|
|
96
|
-
return
|
|
97
|
-
}
|
|
98
|
-
fi
|
|
99
|
-
|
|
100
|
-
# Reset backoff on success
|
|
101
|
-
BACKOFF_SECS=0
|
|
102
|
-
gh_record_success
|
|
103
|
-
|
|
104
|
-
local issue_count
|
|
105
|
-
issue_count=$(echo "$issues_json" | jq 'length' 2>/dev/null || echo 0)
|
|
106
|
-
|
|
107
|
-
if [[ "$issue_count" -eq 0 ]]; then
|
|
108
|
-
return
|
|
109
|
-
fi
|
|
110
|
-
|
|
111
|
-
local mode_label="repo"
|
|
112
|
-
[[ "$WATCH_MODE" == "org" ]] && mode_label="org:${ORG}"
|
|
113
|
-
daemon_log INFO "Found ${issue_count} issue(s) with label '${WATCH_LABEL}' (${mode_label})"
|
|
114
|
-
emit_event "daemon.poll" "issues_found=$issue_count" "active=$(get_active_count)" "mode=$WATCH_MODE"
|
|
115
|
-
|
|
116
|
-
# Score each issue using intelligent triage and sort by descending score
|
|
117
|
-
local scored_issues=()
|
|
118
|
-
local dep_graph="" # "issue:dep1,dep2" entries for dependency ordering
|
|
119
|
-
while IFS= read -r issue; do
|
|
120
|
-
local num score
|
|
121
|
-
num=$(echo "$issue" | jq -r '.number')
|
|
122
|
-
score=$(triage_score_issue "$issue" 2>/dev/null | tail -1)
|
|
123
|
-
score=$(printf '%s' "$score" | tr -cd '[:digit:]')
|
|
124
|
-
[[ -z "$score" ]] && score=50
|
|
125
|
-
# For org mode, include repo name in the scored entry
|
|
126
|
-
local repo_name=""
|
|
127
|
-
if [[ "$WATCH_MODE" == "org" ]]; then
|
|
128
|
-
repo_name=$(echo "$issue" | jq -r '.repository.nameWithOwner // ""')
|
|
129
|
-
fi
|
|
130
|
-
scored_issues+=("${score}|${num}|${repo_name}")
|
|
131
|
-
|
|
132
|
-
# Issue dependency detection (adaptive: extract "depends on #X", "blocked by #X")
|
|
133
|
-
if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" == "true" ]]; then
|
|
134
|
-
local issue_text
|
|
135
|
-
issue_text=$(echo "$issue" | jq -r '(.title // "") + " " + (.body // "")')
|
|
136
|
-
local deps
|
|
137
|
-
deps=$(extract_issue_dependencies "$issue_text")
|
|
138
|
-
if [[ -n "$deps" ]]; then
|
|
139
|
-
local dep_nums
|
|
140
|
-
dep_nums=$(echo "$deps" | tr -d '#' | tr '\n' ',' | sed 's/,$//')
|
|
141
|
-
dep_graph="${dep_graph}${num}:${dep_nums}\n"
|
|
142
|
-
daemon_log INFO "Issue #${num} depends on: ${deps//$'\n'/, }"
|
|
143
|
-
fi
|
|
144
|
-
fi
|
|
145
|
-
done < <(echo "$issues_json" | jq -c '.[]')
|
|
146
|
-
|
|
147
|
-
# Sort by score — strategy determines ascending vs descending
|
|
148
|
-
local sorted_order
|
|
149
|
-
if [[ "${PRIORITY_STRATEGY:-quick-wins-first}" == "complex-first" ]]; then
|
|
150
|
-
# Complex-first: lower score (more complex) first
|
|
151
|
-
sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1,1 -n -k2,2 -n)
|
|
152
|
-
else
|
|
153
|
-
# Quick-wins-first (default): higher score (simpler) first, lowest issue# first on ties
|
|
154
|
-
sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1,1 -rn -k2,2 -n)
|
|
155
|
-
fi
|
|
156
|
-
|
|
157
|
-
# Dependency-aware reordering: move dependencies before dependents
|
|
158
|
-
if [[ -n "$dep_graph" && "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" == "true" ]]; then
|
|
159
|
-
local reordered=""
|
|
160
|
-
local scheduled=""
|
|
161
|
-
# Multiple passes to resolve transitive dependencies (max 3)
|
|
162
|
-
local pass=0
|
|
163
|
-
while [[ $pass -lt 3 ]]; do
|
|
164
|
-
local changed=false
|
|
165
|
-
local new_order=""
|
|
166
|
-
while IFS='|' read -r s_score s_num s_repo; do
|
|
167
|
-
[[ -z "$s_num" ]] && continue
|
|
168
|
-
# Check if this issue has unscheduled dependencies
|
|
169
|
-
local issue_deps
|
|
170
|
-
issue_deps=$(echo -e "$dep_graph" | grep "^${s_num}:" | head -1 | cut -d: -f2 || true)
|
|
171
|
-
if [[ -n "$issue_deps" ]]; then
|
|
172
|
-
# Check if all deps are scheduled (or not in our issue set)
|
|
173
|
-
local all_deps_ready=true
|
|
174
|
-
local IFS_SAVE="$IFS"
|
|
175
|
-
IFS=','
|
|
176
|
-
for dep in $issue_deps; do
|
|
177
|
-
dep="${dep## }"
|
|
178
|
-
dep="${dep%% }"
|
|
179
|
-
# Is this dep in our scored set and not yet scheduled?
|
|
180
|
-
if echo "$sorted_order" | grep -q "|${dep}|" && ! echo "$scheduled" | grep -q "|${dep}|"; then
|
|
181
|
-
all_deps_ready=false
|
|
182
|
-
break
|
|
183
|
-
fi
|
|
184
|
-
done
|
|
185
|
-
IFS="$IFS_SAVE"
|
|
186
|
-
if [[ "$all_deps_ready" == "false" ]]; then
|
|
187
|
-
# Defer this issue — append at end
|
|
188
|
-
new_order="${new_order}${s_score}|${s_num}|${s_repo}\n"
|
|
189
|
-
changed=true
|
|
190
|
-
continue
|
|
191
|
-
fi
|
|
192
|
-
fi
|
|
193
|
-
reordered="${reordered}${s_score}|${s_num}|${s_repo}\n"
|
|
194
|
-
scheduled="${scheduled}|${s_num}|"
|
|
195
|
-
done <<< "$sorted_order"
|
|
196
|
-
# Append deferred issues
|
|
197
|
-
reordered="${reordered}${new_order}"
|
|
198
|
-
sorted_order=$(echo -e "$reordered" | grep -v '^$')
|
|
199
|
-
reordered=""
|
|
200
|
-
scheduled=""
|
|
201
|
-
if [[ "$changed" == "false" ]]; then
|
|
202
|
-
break
|
|
203
|
-
fi
|
|
204
|
-
pass=$((pass + 1))
|
|
205
|
-
done
|
|
206
|
-
fi
|
|
207
|
-
|
|
208
|
-
local active_count
|
|
209
|
-
active_count=$(locked_get_active_count)
|
|
210
|
-
|
|
211
|
-
# Process each issue in triage order (process substitution keeps state in current shell)
|
|
212
|
-
while IFS='|' read -r score issue_num repo_name; do
|
|
213
|
-
[[ -z "$issue_num" ]] && continue
|
|
214
|
-
|
|
215
|
-
local issue_key
|
|
216
|
-
issue_key="$issue_num"
|
|
217
|
-
[[ -n "$repo_name" ]] && issue_key="${repo_name}:${issue_num}"
|
|
218
|
-
|
|
219
|
-
local issue_title labels_csv
|
|
220
|
-
issue_title=$(echo "$issues_json" | jq -r --argjson n "$issue_num" --arg repo "$repo_name" '.[] | select(.number == $n) | select($repo == "" or (.repository.nameWithOwner // "") == $repo) | .title')
|
|
221
|
-
labels_csv=$(echo "$issues_json" | jq -r --argjson n "$issue_num" --arg repo "$repo_name" '.[] | select(.number == $n) | select($repo == "" or (.repository.nameWithOwner // "") == $repo) | [.labels[].name] | join(",")')
|
|
222
|
-
|
|
223
|
-
# Cache title in state for dashboard visibility (use issue_key for org mode)
|
|
224
|
-
if [[ -n "$issue_title" ]]; then
|
|
225
|
-
locked_state_update --arg num "$issue_key" --arg title "$issue_title" \
|
|
226
|
-
'.titles[$num] = $title'
|
|
227
|
-
fi
|
|
228
|
-
|
|
229
|
-
# Skip if already inflight
|
|
230
|
-
if daemon_is_inflight "$issue_key"; then
|
|
231
|
-
continue
|
|
232
|
-
fi
|
|
233
|
-
|
|
234
|
-
# Distributed claim (skip if no machines registered)
|
|
235
|
-
if [[ -f "$HOME/.shipwright/machines.json" ]]; then
|
|
236
|
-
local machine_name
|
|
237
|
-
machine_name=$(jq -r '.machines[] | select(.role == "primary") | .name' "$HOME/.shipwright/machines.json" 2>/dev/null || hostname -s)
|
|
238
|
-
if ! claim_issue "$issue_num" "$machine_name"; then
|
|
239
|
-
daemon_log INFO "Issue #${issue_num} claimed by another machine — skipping"
|
|
240
|
-
continue
|
|
241
|
-
fi
|
|
242
|
-
fi
|
|
243
|
-
|
|
244
|
-
# Priority lane: bypass queue for critical issues
|
|
245
|
-
if [[ "$PRIORITY_LANE" == "true" ]]; then
|
|
246
|
-
local priority_active
|
|
247
|
-
priority_active=$(get_priority_active_count)
|
|
248
|
-
if is_priority_issue "$labels_csv" && [[ "$priority_active" -lt "$PRIORITY_LANE_MAX" ]]; then
|
|
249
|
-
daemon_log WARN "PRIORITY LANE: issue #${issue_num} bypassing queue (${labels_csv})"
|
|
250
|
-
emit_event "daemon.priority_lane" "issue=$issue_num" "score=$score"
|
|
251
|
-
|
|
252
|
-
local template
|
|
253
|
-
template=$(select_pipeline_template "$labels_csv" "$score" 2>/dev/null | tail -1)
|
|
254
|
-
template=$(printf '%s' "$template" | sed $'s/\x1b\\[[0-9;]*m//g' | tr -cd '[:alnum:]-_')
|
|
255
|
-
[[ -z "$template" ]] && template="$PIPELINE_TEMPLATE"
|
|
256
|
-
daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template} [PRIORITY]"
|
|
257
|
-
|
|
258
|
-
local orig_template="$PIPELINE_TEMPLATE"
|
|
259
|
-
PIPELINE_TEMPLATE="$template"
|
|
260
|
-
daemon_spawn_pipeline "$issue_num" "$issue_title" "$repo_name"
|
|
261
|
-
PIPELINE_TEMPLATE="$orig_template"
|
|
262
|
-
track_priority_job "$issue_num"
|
|
263
|
-
continue
|
|
264
|
-
fi
|
|
265
|
-
fi
|
|
266
|
-
|
|
267
|
-
# Check capacity
|
|
268
|
-
active_count=$(locked_get_active_count)
|
|
269
|
-
if [[ "$active_count" -ge "$MAX_PARALLEL" ]]; then
|
|
270
|
-
enqueue_issue "$issue_key"
|
|
271
|
-
continue
|
|
272
|
-
fi
|
|
273
|
-
|
|
274
|
-
# Auto-select pipeline template: PM recommendation (if available) else labels + triage score
|
|
275
|
-
local template
|
|
276
|
-
if [[ "$NO_GITHUB" != "true" ]] && [[ -x "$SCRIPT_DIR/sw-pm.sh" ]]; then
|
|
277
|
-
local pm_rec
|
|
278
|
-
pm_rec=$(bash "$SCRIPT_DIR/sw-pm.sh" recommend --json "$issue_num" 2>/dev/null) || true
|
|
279
|
-
if [[ -n "$pm_rec" ]]; then
|
|
280
|
-
template=$(echo "$pm_rec" | jq -r '.team_composition.template // empty' 2>/dev/null) || true
|
|
281
|
-
# Capability self-assessment: low confidence → upgrade to full template
|
|
282
|
-
local confidence
|
|
283
|
-
confidence=$(echo "$pm_rec" | jq -r '.team_composition.confidence_percent // 100' 2>/dev/null) || true
|
|
284
|
-
if [[ -n "$confidence" && "$confidence" != "null" && "$confidence" -lt 60 ]]; then
|
|
285
|
-
daemon_log INFO "Low PM confidence (${confidence}%) — upgrading to full template"
|
|
286
|
-
template="full"
|
|
287
|
-
fi
|
|
288
|
-
fi
|
|
289
|
-
fi
|
|
290
|
-
if [[ -z "$template" ]]; then
|
|
291
|
-
template=$(select_pipeline_template "$labels_csv" "$score" 2>/dev/null | tail -1)
|
|
292
|
-
fi
|
|
293
|
-
template=$(printf '%s' "$template" | sed $'s/\x1b\\[[0-9;]*m//g' | tr -cd '[:alnum:]-_')
|
|
294
|
-
[[ -z "$template" ]] && template="$PIPELINE_TEMPLATE"
|
|
295
|
-
daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template}"
|
|
296
|
-
|
|
297
|
-
# Spawn pipeline (template selection applied via PIPELINE_TEMPLATE override)
|
|
298
|
-
local orig_template="$PIPELINE_TEMPLATE"
|
|
299
|
-
PIPELINE_TEMPLATE="$template"
|
|
300
|
-
daemon_spawn_pipeline "$issue_num" "$issue_title" "$repo_name"
|
|
301
|
-
PIPELINE_TEMPLATE="$orig_template"
|
|
302
|
-
|
|
303
|
-
# Stagger delay between spawns to avoid API contention
|
|
304
|
-
local stagger_delay="${SPAWN_STAGGER_SECONDS:-15}"
|
|
305
|
-
if [[ "$stagger_delay" -gt 0 ]]; then
|
|
306
|
-
sleep "$stagger_delay"
|
|
307
|
-
fi
|
|
308
|
-
done <<< "$sorted_order"
|
|
309
|
-
|
|
310
|
-
# ── Drain queue if we have capacity (prevents deadlock when queue is
|
|
311
|
-
# populated but no active jobs exist to trigger dequeue) ──
|
|
312
|
-
local drain_active
|
|
313
|
-
drain_active=$(locked_get_active_count)
|
|
314
|
-
while [[ "$drain_active" -lt "$MAX_PARALLEL" ]]; do
|
|
315
|
-
local drain_issue_key
|
|
316
|
-
drain_issue_key=$(dequeue_next)
|
|
317
|
-
[[ -z "$drain_issue_key" ]] && break
|
|
318
|
-
local drain_issue_num="$drain_issue_key" drain_repo=""
|
|
319
|
-
[[ "$drain_issue_key" == *:* ]] && drain_repo="${drain_issue_key%%:*}" && drain_issue_num="${drain_issue_key##*:}"
|
|
320
|
-
local drain_title
|
|
321
|
-
drain_title=$(jq -r --arg n "$drain_issue_key" '.titles[$n] // ""' "$STATE_FILE" 2>/dev/null || true)
|
|
322
|
-
|
|
323
|
-
local drain_labels drain_score drain_template
|
|
324
|
-
drain_labels=$(echo "$issues_json" | jq -r --argjson n "$drain_issue_num" --arg repo "$drain_repo" \
|
|
325
|
-
'.[] | select(.number == $n) | select($repo == "" or (.repository.nameWithOwner // "") == $repo) | [.labels[].name] | join(",")' 2>/dev/null || echo "")
|
|
326
|
-
drain_score=$(echo "$sorted_order" | grep "|${drain_issue_num}|" | cut -d'|' -f1 || echo "50")
|
|
327
|
-
drain_template=$(select_pipeline_template "$drain_labels" "${drain_score:-50}" 2>/dev/null | tail -1)
|
|
328
|
-
drain_template=$(printf '%s' "$drain_template" | sed $'s/\x1b\\[[0-9;]*m//g' | tr -cd '[:alnum:]-_')
|
|
329
|
-
[[ -z "$drain_template" ]] && drain_template="$PIPELINE_TEMPLATE"
|
|
330
|
-
|
|
331
|
-
daemon_log INFO "Draining queue: issue #${drain_issue_num}${drain_repo:+, repo=${drain_repo}}, template=${drain_template}"
|
|
332
|
-
local orig_template="$PIPELINE_TEMPLATE"
|
|
333
|
-
PIPELINE_TEMPLATE="$drain_template"
|
|
334
|
-
daemon_spawn_pipeline "$drain_issue_num" "$drain_title" "$drain_repo"
|
|
335
|
-
PIPELINE_TEMPLATE="$orig_template"
|
|
336
|
-
drain_active=$(locked_get_active_count)
|
|
337
|
-
done
|
|
338
|
-
|
|
339
|
-
# Update last poll
|
|
340
|
-
update_state_field "last_poll" "$(now_iso)"
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
# ─── Health Check ─────────────────────────────────────────────────────────────
|
|
344
|
-
|
|
345
|
-
daemon_health_check() {
|
|
346
|
-
local findings=0
|
|
347
|
-
local now_e
|
|
348
|
-
now_e=$(now_epoch)
|
|
349
|
-
|
|
350
|
-
if [[ -f "$STATE_FILE" ]]; then
|
|
351
|
-
# ── Intelligent Health Monitoring ──
|
|
352
|
-
# Instead of killing after a countdown, sense what the agent is doing.
|
|
353
|
-
# Agents think for long stretches — that's normal and expected.
|
|
354
|
-
# Strategy: sense → understand → be patient → nudge → only kill as last resort.
|
|
355
|
-
|
|
356
|
-
local hard_limit="${PROGRESS_HARD_LIMIT_S:-0}"
|
|
357
|
-
local use_progress="${PROGRESS_MONITORING:-true}"
|
|
358
|
-
local nudge_enabled="${NUDGE_ENABLED:-true}"
|
|
359
|
-
local nudge_after="${NUDGE_AFTER_CHECKS:-40}"
|
|
360
|
-
|
|
361
|
-
while IFS= read -r job; do
|
|
362
|
-
local pid started_at issue_num worktree
|
|
363
|
-
pid=$(echo "$job" | jq -r '.pid')
|
|
364
|
-
started_at=$(echo "$job" | jq -r '.started_at // empty')
|
|
365
|
-
issue_num=$(echo "$job" | jq -r '.issue')
|
|
366
|
-
worktree=$(echo "$job" | jq -r '.worktree // ""')
|
|
367
|
-
|
|
368
|
-
# Skip dead processes
|
|
369
|
-
if ! kill -0 "$pid" 2>/dev/null; then
|
|
370
|
-
continue
|
|
371
|
-
fi
|
|
372
|
-
|
|
373
|
-
local elapsed=0
|
|
374
|
-
if [[ -n "$started_at" ]]; then
|
|
375
|
-
local start_e
|
|
376
|
-
start_e=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started_at" +%s 2>/dev/null || date -d "$started_at" +%s 2>/dev/null || echo "0")
|
|
377
|
-
elapsed=$(( now_e - start_e ))
|
|
378
|
-
fi
|
|
379
|
-
|
|
380
|
-
# Hard wall-clock limit — disabled by default (0 = off)
|
|
381
|
-
if [[ "$hard_limit" -gt 0 && "$elapsed" -gt "$hard_limit" ]]; then
|
|
382
|
-
daemon_log WARN "Hard limit exceeded: issue #${issue_num} (${elapsed}s > ${hard_limit}s, PID $pid) — killing"
|
|
383
|
-
emit_event "daemon.hard_limit" "issue=$issue_num" "elapsed_s=$elapsed" "limit_s=$hard_limit" "pid=$pid"
|
|
384
|
-
kill "$pid" 2>/dev/null || true
|
|
385
|
-
daemon_clear_progress "$issue_num"
|
|
386
|
-
findings=$((findings + 1))
|
|
387
|
-
continue
|
|
388
|
-
fi
|
|
389
|
-
|
|
390
|
-
# ── Intelligent Progress Sensing ──
|
|
391
|
-
if [[ "$use_progress" == "true" && -n "$worktree" ]]; then
|
|
392
|
-
local snapshot verdict
|
|
393
|
-
snapshot=$(daemon_collect_snapshot "$issue_num" "$worktree" "$pid" 2>/dev/null || echo '{}')
|
|
394
|
-
|
|
395
|
-
if [[ "$snapshot" != "{}" ]]; then
|
|
396
|
-
verdict=$(daemon_assess_progress "$issue_num" "$snapshot" 2>/dev/null || echo "healthy")
|
|
397
|
-
|
|
398
|
-
local no_progress_count=0
|
|
399
|
-
no_progress_count=$(jq -r '.no_progress_count // 0' "$PROGRESS_DIR/issue-${issue_num}.json" 2>/dev/null || echo 0)
|
|
400
|
-
local cur_stage
|
|
401
|
-
cur_stage=$(echo "$snapshot" | jq -r '.stage // "unknown"')
|
|
402
|
-
|
|
403
|
-
case "$verdict" in
|
|
404
|
-
healthy)
|
|
405
|
-
# All good — agent is making progress
|
|
406
|
-
;;
|
|
407
|
-
slowing)
|
|
408
|
-
daemon_log INFO "Issue #${issue_num} slowing (no visible changes for ${no_progress_count} checks, ${elapsed}s elapsed, stage=${cur_stage})"
|
|
409
|
-
;;
|
|
410
|
-
stalled)
|
|
411
|
-
# Check if agent subprocess is alive and consuming CPU
|
|
412
|
-
local agent_alive=false
|
|
413
|
-
local child_cpu=0
|
|
414
|
-
child_cpu=$(pgrep -P "$pid" 2>/dev/null | xargs -I{} ps -o pcpu= -p {} 2>/dev/null | awk '{sum+=$1} END{printf "%d", sum+0}' || echo "0")
|
|
415
|
-
if [[ "${child_cpu:-0}" -gt 0 ]]; then
|
|
416
|
-
agent_alive=true
|
|
417
|
-
fi
|
|
418
|
-
|
|
419
|
-
if [[ "$agent_alive" == "true" ]]; then
|
|
420
|
-
daemon_log INFO "Issue #${issue_num} no visible progress (${no_progress_count} checks) but agent is alive (CPU: ${child_cpu}%, stage=${cur_stage}, ${elapsed}s) — being patient"
|
|
421
|
-
else
|
|
422
|
-
daemon_log WARN "Issue #${issue_num} stalled: no progress for ${no_progress_count} checks, no CPU activity (${elapsed}s elapsed, PID $pid)"
|
|
423
|
-
emit_event "daemon.stalled" "issue=$issue_num" "no_progress=$no_progress_count" "elapsed_s=$elapsed" "pid=$pid"
|
|
424
|
-
fi
|
|
425
|
-
;;
|
|
426
|
-
stuck)
|
|
427
|
-
local repeated_errors
|
|
428
|
-
repeated_errors=$(jq -r '.repeated_error_count // 0' "$PROGRESS_DIR/issue-${issue_num}.json" 2>/dev/null || echo 0)
|
|
429
|
-
|
|
430
|
-
# Even "stuck" — check if the process tree is alive first
|
|
431
|
-
local agent_alive=false
|
|
432
|
-
local child_cpu=0
|
|
433
|
-
child_cpu=$(pgrep -P "$pid" 2>/dev/null | xargs -I{} ps -o pcpu= -p {} 2>/dev/null | awk '{sum+=$1} END{printf "%d", sum+0}' || echo "0")
|
|
434
|
-
if [[ "${child_cpu:-0}" -gt 0 ]]; then
|
|
435
|
-
agent_alive=true
|
|
436
|
-
fi
|
|
437
|
-
|
|
438
|
-
if [[ "$agent_alive" == "true" && "$repeated_errors" -lt 3 ]]; then
|
|
439
|
-
# Agent is alive — nudge instead of kill
|
|
440
|
-
if [[ "$nudge_enabled" == "true" && "$no_progress_count" -ge "$nudge_after" ]]; then
|
|
441
|
-
local nudge_file="${worktree}/.claude/nudge.md"
|
|
442
|
-
if [[ ! -f "$nudge_file" ]]; then
|
|
443
|
-
cat > "$nudge_file" <<NUDGE_EOF
|
|
444
|
-
# Nudge from Daemon Health Monitor
|
|
445
|
-
|
|
446
|
-
The daemon has noticed no visible progress for $(( no_progress_count * 30 / 60 )) minutes.
|
|
447
|
-
Current stage: ${cur_stage}
|
|
448
|
-
|
|
449
|
-
If you're stuck, consider:
|
|
450
|
-
- Breaking the task into smaller steps
|
|
451
|
-
- Committing partial progress
|
|
452
|
-
- Running tests to validate current state
|
|
453
|
-
|
|
454
|
-
This is just a gentle check-in — take your time if you're working through a complex problem.
|
|
455
|
-
NUDGE_EOF
|
|
456
|
-
daemon_log INFO "Issue #${issue_num} nudged (${no_progress_count} checks, stage=${cur_stage}, CPU=${child_cpu}%) — file written to worktree"
|
|
457
|
-
emit_event "daemon.nudge" "issue=$issue_num" "no_progress=$no_progress_count" "stage=$cur_stage" "elapsed_s=$elapsed"
|
|
458
|
-
fi
|
|
459
|
-
else
|
|
460
|
-
daemon_log INFO "Issue #${issue_num} no visible progress (${no_progress_count} checks) but agent is alive (CPU: ${child_cpu}%, stage=${cur_stage}) — waiting"
|
|
461
|
-
fi
|
|
462
|
-
elif [[ "$repeated_errors" -ge 5 ]]; then
|
|
463
|
-
# Truly stuck in an error loop — kill as last resort
|
|
464
|
-
daemon_log WARN "Issue #${issue_num} in error loop: ${repeated_errors} repeated errors (stage=${cur_stage}, ${elapsed}s, PID $pid) — killing"
|
|
465
|
-
emit_event "daemon.stuck_kill" "issue=$issue_num" "no_progress=$no_progress_count" "repeated_errors=$repeated_errors" "stage=$cur_stage" "elapsed_s=$elapsed" "pid=$pid" "reason=error_loop"
|
|
466
|
-
kill "$pid" 2>/dev/null || true
|
|
467
|
-
daemon_clear_progress "$issue_num"
|
|
468
|
-
findings=$((findings + 1))
|
|
469
|
-
elif [[ "$agent_alive" != "true" && "$no_progress_count" -ge "$((PROGRESS_CHECKS_BEFORE_KILL * 2))" ]]; then
|
|
470
|
-
# Process tree is dead AND no progress for very long time
|
|
471
|
-
daemon_log WARN "Issue #${issue_num} appears dead: no CPU, no progress for ${no_progress_count} checks (${elapsed}s, PID $pid) — killing"
|
|
472
|
-
emit_event "daemon.stuck_kill" "issue=$issue_num" "no_progress=$no_progress_count" "repeated_errors=$repeated_errors" "stage=$cur_stage" "elapsed_s=$elapsed" "pid=$pid" "reason=dead_process"
|
|
473
|
-
kill "$pid" 2>/dev/null || true
|
|
474
|
-
daemon_clear_progress "$issue_num"
|
|
475
|
-
findings=$((findings + 1))
|
|
476
|
-
else
|
|
477
|
-
daemon_log WARN "Issue #${issue_num} struggling (${no_progress_count} checks, ${repeated_errors} errors, CPU=${child_cpu}%, stage=${cur_stage}) — monitoring"
|
|
478
|
-
fi
|
|
479
|
-
;;
|
|
480
|
-
esac
|
|
481
|
-
fi
|
|
482
|
-
else
|
|
483
|
-
# Fallback: legacy time-based detection when progress monitoring is off
|
|
484
|
-
local stale_timeout
|
|
485
|
-
stale_timeout=$(get_adaptive_stale_timeout "$PIPELINE_TEMPLATE")
|
|
486
|
-
if [[ "$elapsed" -gt "$stale_timeout" ]]; then
|
|
487
|
-
# Check if process is still alive
|
|
488
|
-
if kill -0 "$pid" 2>/dev/null; then
|
|
489
|
-
# Kill at 2x stale timeout — the process is truly hung
|
|
490
|
-
local kill_threshold=$(( stale_timeout * 2 ))
|
|
491
|
-
if [[ "$elapsed" -gt "$kill_threshold" ]]; then
|
|
492
|
-
daemon_log WARN "Killing stale job (legacy): issue #${issue_num} (${elapsed}s > ${kill_threshold}s kill threshold, PID $pid)"
|
|
493
|
-
emit_event "daemon.stale_kill" "issue=$issue_num" "elapsed_s=$elapsed" "pid=$pid"
|
|
494
|
-
kill "$pid" 2>/dev/null || true
|
|
495
|
-
sleep 2
|
|
496
|
-
kill -9 "$pid" 2>/dev/null || true
|
|
497
|
-
else
|
|
498
|
-
daemon_log WARN "Stale job (legacy): issue #${issue_num} (${elapsed}s > ${stale_timeout}s, PID $pid) — will kill at ${kill_threshold}s"
|
|
499
|
-
emit_event "daemon.stale_warning" "issue=$issue_num" "elapsed_s=$elapsed" "pid=$pid"
|
|
500
|
-
fi
|
|
501
|
-
else
|
|
502
|
-
daemon_log WARN "Stale job with dead process: issue #${issue_num} (PID $pid no longer exists)"
|
|
503
|
-
emit_event "daemon.stale_dead" "issue=$issue_num" "pid=$pid"
|
|
504
|
-
fi
|
|
505
|
-
findings=$((findings + 1))
|
|
506
|
-
fi
|
|
507
|
-
fi
|
|
508
|
-
done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
|
|
509
|
-
fi
|
|
510
|
-
|
|
511
|
-
# Disk space warning (check both repo dir and ~/.shipwright)
|
|
512
|
-
local free_kb
|
|
513
|
-
free_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
|
|
514
|
-
if [[ -n "$free_kb" ]] && [[ "$free_kb" -lt 1048576 ]] 2>/dev/null; then
|
|
515
|
-
daemon_log WARN "Low disk space: $(( free_kb / 1024 ))MB free"
|
|
516
|
-
findings=$((findings + 1))
|
|
517
|
-
fi
|
|
518
|
-
|
|
519
|
-
# Critical disk space on ~/.shipwright — pause spawning
|
|
520
|
-
local sw_free_kb
|
|
521
|
-
sw_free_kb=$(df -k "$HOME/.shipwright" 2>/dev/null | tail -1 | awk '{print $4}')
|
|
522
|
-
if [[ -n "$sw_free_kb" ]] && [[ "$sw_free_kb" -lt 512000 ]] 2>/dev/null; then
|
|
523
|
-
daemon_log WARN "Critical disk space on ~/.shipwright: $(( sw_free_kb / 1024 ))MB — pausing spawns"
|
|
524
|
-
emit_event "daemon.disk_low" "free_mb=$(( sw_free_kb / 1024 ))"
|
|
525
|
-
mkdir -p "$HOME/.shipwright"
|
|
526
|
-
echo '{"paused":true,"reason":"disk_low"}' > "$HOME/.shipwright/daemon-pause.flag"
|
|
527
|
-
findings=$((findings + 1))
|
|
528
|
-
fi
|
|
529
|
-
|
|
530
|
-
# Events file size warning
|
|
531
|
-
if [[ -f "$EVENTS_FILE" ]]; then
|
|
532
|
-
local events_size
|
|
533
|
-
events_size=$(wc -c < "$EVENTS_FILE" 2>/dev/null || echo 0)
|
|
534
|
-
if [[ "$events_size" -gt 104857600 ]]; then # 100MB
|
|
535
|
-
daemon_log WARN "Events file large ($(( events_size / 1048576 ))MB) — consider rotating"
|
|
536
|
-
findings=$((findings + 1))
|
|
537
|
-
fi
|
|
538
|
-
fi
|
|
539
|
-
|
|
540
|
-
if [[ "$findings" -gt 0 ]]; then
|
|
541
|
-
emit_event "daemon.health" "findings=$findings"
|
|
542
|
-
fi
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
# ─── Degradation Alerting ─────────────────────────────────────────────────────
|
|
546
|
-
|
|
547
|
-
daemon_check_degradation() {
|
|
548
|
-
if [[ ! -f "$EVENTS_FILE" ]]; then return; fi
|
|
549
|
-
|
|
550
|
-
local window="${DEGRADATION_WINDOW:-5}"
|
|
551
|
-
local cfr_threshold="${DEGRADATION_CFR_THRESHOLD:-30}"
|
|
552
|
-
local success_threshold="${DEGRADATION_SUCCESS_THRESHOLD:-50}"
|
|
553
|
-
|
|
554
|
-
# Get last N pipeline completions
|
|
555
|
-
local recent
|
|
556
|
-
recent=$(tail -200 "$EVENTS_FILE" | jq -s "[.[] | select(.type == \"pipeline.completed\")] | .[-${window}:]" 2>/dev/null)
|
|
557
|
-
local count
|
|
558
|
-
count=$(echo "$recent" | jq 'length' 2>/dev/null || echo 0)
|
|
559
|
-
|
|
560
|
-
if [[ "$count" -lt "$window" ]]; then return; fi
|
|
561
|
-
|
|
562
|
-
local failures successes
|
|
563
|
-
failures=$(echo "$recent" | jq '[.[] | select(.result == "failure")] | length')
|
|
564
|
-
successes=$(echo "$recent" | jq '[.[] | select(.result == "success")] | length')
|
|
565
|
-
local cfr_pct=0 success_pct=0
|
|
566
|
-
if [[ "${count:-0}" -gt 0 ]]; then
|
|
567
|
-
cfr_pct=$(( failures * 100 / count ))
|
|
568
|
-
success_pct=$(( successes * 100 / count ))
|
|
569
|
-
fi
|
|
570
|
-
|
|
571
|
-
local alerts=""
|
|
572
|
-
if [[ "$cfr_pct" -gt "$cfr_threshold" ]]; then
|
|
573
|
-
alerts="CFR ${cfr_pct}% exceeds threshold ${cfr_threshold}%"
|
|
574
|
-
daemon_log WARN "DEGRADATION: $alerts"
|
|
575
|
-
fi
|
|
576
|
-
if [[ "$success_pct" -lt "$success_threshold" ]]; then
|
|
577
|
-
local msg="Success rate ${success_pct}% below threshold ${success_threshold}%"
|
|
578
|
-
[[ -n "$alerts" ]] && alerts="$alerts; $msg" || alerts="$msg"
|
|
579
|
-
daemon_log WARN "DEGRADATION: $msg"
|
|
580
|
-
fi
|
|
581
|
-
|
|
582
|
-
if [[ -n "$alerts" ]]; then
|
|
583
|
-
emit_event "daemon.alert" "alerts=$alerts" "cfr_pct=$cfr_pct" "success_pct=$success_pct"
|
|
584
|
-
|
|
585
|
-
# Slack notification
|
|
586
|
-
if [[ -n "${SLACK_WEBHOOK:-}" ]]; then
|
|
587
|
-
notify "Pipeline Degradation Alert" "$alerts" "warn"
|
|
588
|
-
fi
|
|
589
|
-
fi
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
# ─── Auto-Scaling ─────────────────────────────────────────────────────────
|
|
593
|
-
# Dynamically adjusts MAX_PARALLEL based on CPU, memory, budget, and queue depth
|
|
6
|
+
# Defaults for variables normally set by sw-daemon.sh (safe under set -u).
|
|
7
|
+
DAEMON_DIR="${DAEMON_DIR:-${HOME}/.shipwright}"
|
|
8
|
+
STATE_FILE="${STATE_FILE:-${DAEMON_DIR}/daemon-state.json}"
|
|
9
|
+
PAUSE_FLAG="${PAUSE_FLAG:-${DAEMON_DIR}/daemon-pause.flag}"
|
|
10
|
+
SHUTDOWN_FLAG="${SHUTDOWN_FLAG:-${DAEMON_DIR}/daemon.shutdown}"
|
|
11
|
+
EVENTS_FILE="${EVENTS_FILE:-${DAEMON_DIR}/events.jsonl}"
|
|
12
|
+
SCRIPT_DIR="${SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
|
|
13
|
+
NO_GITHUB="${NO_GITHUB:-false}"
|
|
14
|
+
POLL_INTERVAL="${POLL_INTERVAL:-60}"
|
|
15
|
+
MAX_PARALLEL="${MAX_PARALLEL:-4}"
|
|
16
|
+
WATCH_LABEL="${WATCH_LABEL:-shipwright}"
|
|
17
|
+
WATCH_MODE="${WATCH_MODE:-repo}"
|
|
18
|
+
PIPELINE_TEMPLATE="${PIPELINE_TEMPLATE:-autonomous}"
|
|
19
|
+
ISSUE_LIMIT="${ISSUE_LIMIT:-100}"
|
|
20
|
+
SLACK_WEBHOOK="${SLACK_WEBHOOK:-}"
|
|
21
|
+
BACKOFF_SECS="${BACKOFF_SECS:-0}"
|
|
22
|
+
POLL_CYCLE_COUNT="${POLL_CYCLE_COUNT:-0}"
|
|
23
|
+
|
|
24
|
+
# Source sub-modules
|
|
25
|
+
if [[ -f "${SCRIPT_DIR}/lib/daemon-poll-github.sh" ]]; then
|
|
26
|
+
source "${SCRIPT_DIR}/lib/daemon-poll-github.sh"
|
|
27
|
+
fi
|
|
28
|
+
if [[ -f "${SCRIPT_DIR}/lib/daemon-poll-health.sh" ]]; then
|
|
29
|
+
source "${SCRIPT_DIR}/lib/daemon-poll-health.sh"
|
|
30
|
+
fi
|
|
594
31
|
|
|
595
32
|
daemon_auto_scale() {
|
|
596
33
|
if [[ "${AUTO_SCALE:-false}" != "true" ]]; then
|
|
@@ -785,29 +222,6 @@ daemon_auto_scale() {
|
|
|
785
222
|
# ─── Fleet Config Reload ──────────────────────────────────────────────────
|
|
786
223
|
# Checks for fleet-reload.flag and reloads MAX_PARALLEL from fleet-managed config
|
|
787
224
|
|
|
788
|
-
daemon_reload_config() {
|
|
789
|
-
local reload_flag="$HOME/.shipwright/fleet-reload.flag"
|
|
790
|
-
if [[ ! -f "$reload_flag" ]]; then
|
|
791
|
-
return
|
|
792
|
-
fi
|
|
793
|
-
|
|
794
|
-
local fleet_config=".claude/.fleet-daemon-config.json"
|
|
795
|
-
if [[ -f "$fleet_config" ]]; then
|
|
796
|
-
local new_max
|
|
797
|
-
new_max=$(jq -r '.max_parallel // empty' "$fleet_config" 2>/dev/null || true)
|
|
798
|
-
if [[ -n "$new_max" && "$new_max" != "null" ]]; then
|
|
799
|
-
local prev="$MAX_PARALLEL"
|
|
800
|
-
FLEET_MAX_PARALLEL="$new_max"
|
|
801
|
-
MAX_PARALLEL="$new_max"
|
|
802
|
-
daemon_log INFO "Fleet reload: max_parallel ${prev} → ${MAX_PARALLEL} (fleet ceiling: ${FLEET_MAX_PARALLEL})"
|
|
803
|
-
emit_event "daemon.fleet_reload" "from=$prev" "to=$MAX_PARALLEL"
|
|
804
|
-
fi
|
|
805
|
-
fi
|
|
806
|
-
|
|
807
|
-
rm -f "$reload_flag"
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
# ─── Self-Optimizing Metrics Loop ──────────────────────────────────────────
|
|
811
225
|
|
|
812
226
|
daemon_self_optimize() {
|
|
813
227
|
if [[ "${SELF_OPTIMIZE:-false}" != "true" ]]; then
|
|
@@ -961,6 +375,7 @@ daemon_self_optimize() {
|
|
|
961
375
|
# Cleans old worktrees, pipeline artifacts, and completed state entries.
|
|
962
376
|
# Called every N poll cycles (configurable via stale_reaper_interval).
|
|
963
377
|
|
|
378
|
+
|
|
964
379
|
daemon_cleanup_stale() {
|
|
965
380
|
if [[ "${STALE_REAPER_ENABLED:-true}" != "true" ]]; then
|
|
966
381
|
return
|
|
@@ -1131,6 +546,7 @@ daemon_cleanup_stale() {
|
|
|
1131
546
|
|
|
1132
547
|
POLL_CYCLE_COUNT=0
|
|
1133
548
|
|
|
549
|
+
|
|
1134
550
|
daemon_poll_loop() {
|
|
1135
551
|
daemon_log INFO "Entering poll loop (interval: ${POLL_INTERVAL}s, max_parallel: ${MAX_PARALLEL})"
|
|
1136
552
|
daemon_log INFO "Watching for label: ${CYAN}${WATCH_LABEL}${RESET}"
|