npm - shipwright-cli - Versions diffs - 1.9.0 → 1.10.0 - Mend

shipwright-cli 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/.claude/hooks/post-tool-use.sh +12 -5
package/package.json +2 -2
package/scripts/sw +9 -1
package/scripts/sw-adversarial.sh +1 -1
package/scripts/sw-architecture-enforcer.sh +1 -1
package/scripts/sw-checkpoint.sh +79 -1
package/scripts/sw-cleanup.sh +192 -7
package/scripts/sw-connect.sh +1 -1
package/scripts/sw-cost.sh +1 -1
package/scripts/sw-daemon.sh +409 -37
package/scripts/sw-dashboard.sh +1 -1
package/scripts/sw-developer-simulation.sh +1 -1
package/scripts/sw-docs.sh +1 -1
package/scripts/sw-doctor.sh +1 -1
package/scripts/sw-fix.sh +1 -1
package/scripts/sw-fleet.sh +1 -1
package/scripts/sw-github-checks.sh +1 -1
package/scripts/sw-github-deploy.sh +1 -1
package/scripts/sw-github-graphql.sh +1 -1
package/scripts/sw-heartbeat.sh +1 -1
package/scripts/sw-init.sh +1 -1
package/scripts/sw-intelligence.sh +1 -1
package/scripts/sw-jira.sh +1 -1
package/scripts/sw-launchd.sh +4 -4
package/scripts/sw-linear.sh +1 -1
package/scripts/sw-logs.sh +1 -1
package/scripts/sw-loop.sh +444 -49
package/scripts/sw-memory.sh +198 -3
package/scripts/sw-pipeline-composer.sh +8 -8
package/scripts/sw-pipeline-vitals.sh +1096 -0
package/scripts/sw-pipeline.sh +1692 -84
package/scripts/sw-predictive.sh +1 -1
package/scripts/sw-prep.sh +1 -1
package/scripts/sw-ps.sh +4 -3
package/scripts/sw-reaper.sh +5 -3
package/scripts/sw-remote.sh +1 -1
package/scripts/sw-self-optimize.sh +109 -8
package/scripts/sw-session.sh +31 -9
package/scripts/sw-setup.sh +1 -1
package/scripts/sw-status.sh +192 -1
package/scripts/sw-templates.sh +1 -1
package/scripts/sw-tmux.sh +1 -1
package/scripts/sw-tracker.sh +1 -1
package/scripts/sw-upgrade.sh +1 -1
package/scripts/sw-worktree.sh +1 -1
package/templates/pipelines/autonomous.json +8 -1
package/templates/pipelines/cost-aware.json +21 -0
package/templates/pipelines/deployed.json +40 -6
package/templates/pipelines/enterprise.json +16 -2
package/templates/pipelines/fast.json +19 -0
package/templates/pipelines/full.json +16 -2
package/templates/pipelines/hotfix.json +19 -0
package/templates/pipelines/standard.json +19 -0

package/scripts/sw-daemon.sh CHANGED Viewed

@@ -6,7 +6,7 @@
 set -euo pipefail
 trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
-VERSION="1.9.0"
+VERSION="1.10.0"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
@@ -34,6 +34,8 @@ RESET='\033[0m'
 [[ -f "$SCRIPT_DIR/sw-self-optimize.sh" ]] && source "$SCRIPT_DIR/sw-self-optimize.sh"
 # shellcheck source=sw-predictive.sh
 [[ -f "$SCRIPT_DIR/sw-predictive.sh" ]] && source "$SCRIPT_DIR/sw-predictive.sh"
+# shellcheck source=sw-pipeline-vitals.sh
+[[ -f "$SCRIPT_DIR/sw-pipeline-vitals.sh" ]] && source "$SCRIPT_DIR/sw-pipeline-vitals.sh"
 # ─── GitHub API Modules (optional) ────────────────────────────────────────
 # shellcheck source=sw-github-graphql.sh
@@ -125,7 +127,6 @@ rotate_event_log() {
 }
 # ─── GitHub Context (loaded once at startup) ──────────────────────────────
-DAEMON_GITHUB_CONTEXT=""
 daemon_github_context() {
     # Skip if no GitHub
@@ -141,8 +142,6 @@ daemon_github_context() {
     context=$(gh_repo_context "$owner" "$repo" 2>/dev/null || echo "{}")
     if [[ -n "$context" && "$context" != "{}" ]]; then
         daemon_log INFO "GitHub context loaded: $(echo "$context" | jq -r '.contributor_count // 0') contributors, $(echo "$context" | jq -r '.security_alert_count // 0') security alerts"
-        DAEMON_GITHUB_CONTEXT="$context"
-        export DAEMON_GITHUB_CONTEXT
     fi
 }
@@ -166,9 +165,9 @@ gh_retry() {
         # Check for rate-limit or server error indicators
         if echo "$output" | grep -qiE "rate limit|403|429|502|503"; then
-            daemon_log WARN "gh_retry: rate limit / server error on attempt ${attempt}/${max_retries} — backoff ${backoff}s"
+            daemon_log WARN "gh_retry: rate limit / server error on attempt ${attempt}/${max_retries} — backoff ${backoff}s" >&2
         else
-            daemon_log WARN "gh_retry: transient error on attempt ${attempt}/${max_retries} (exit ${exit_code}) — backoff ${backoff}s"
+            daemon_log WARN "gh_retry: transient error on attempt ${attempt}/${max_retries} (exit ${exit_code}) — backoff ${backoff}s" >&2
         fi
         if [[ $attempt -lt $max_retries ]]; then
@@ -421,6 +420,14 @@ load_config() {
     MAX_RETRIES=$(jq -r '.max_retries // 2' "$config_file")
     RETRY_ESCALATION=$(jq -r '.retry_escalation // true' "$config_file")
+    # session restart + fast test passthrough
+    MAX_RESTARTS_CFG=$(jq -r '.max_restarts // 3' "$config_file" 2>/dev/null || echo "3")
+    if ! [[ "$MAX_RESTARTS_CFG" =~ ^[0-9]+$ ]]; then
+        daemon_log WARN "Invalid max_restarts in config: $MAX_RESTARTS_CFG (using default: 3)"
+        MAX_RESTARTS_CFG="3"
+    fi
+    FAST_TEST_CMD_CFG=$(jq -r '.fast_test_cmd // ""' "$config_file" 2>/dev/null || echo "")
     # self-optimization
     SELF_OPTIMIZE=$(jq -r '.self_optimize // false' "$config_file")
     OPTIMIZE_INTERVAL=$(jq -r '.optimize_interval // 10' "$config_file")
@@ -482,6 +489,12 @@ load_config() {
         DASHBOARD_URL="$cfg_dashboard_url"
     fi
+    # Auto-enable self_optimize when auto_template is on
+    if [[ "${AUTO_TEMPLATE:-false}" == "true" && "${SELF_OPTIMIZE:-false}" == "false" ]]; then
+        SELF_OPTIMIZE="true"
+        daemon_log INFO "Auto-enabling self_optimize (auto_template is true)"
+    fi
     success "Config loaded"
 }
@@ -855,7 +868,56 @@ daemon_assess_progress() {
         if $npc == 0 then .last_progress_at = $ts else . end
         ' "$progress_file" > "$tmp_progress" 2>/dev/null && mv "$tmp_progress" "$progress_file"
-    # Determine verdict
+    # ── Vitals-based verdict (preferred over static thresholds) ──
+    if type pipeline_compute_vitals &>/dev/null 2>&1 && type pipeline_health_verdict &>/dev/null 2>&1; then
+        # Compute vitals using the worktree's pipeline state if available
+        local _worktree_state=""
+        local _worktree_artifacts=""
+        local _worktree_dir
+        _worktree_dir=$(jq -r --arg i "$issue_num" '.active_jobs[] | select(.issue == ($i | tonumber)) | .worktree // ""' "$STATE_FILE" 2>/dev/null || echo "")
+        if [[ -n "$_worktree_dir" && -d "$_worktree_dir/.claude" ]]; then
+            _worktree_state="$_worktree_dir/.claude/pipeline-state.md"
+            _worktree_artifacts="$_worktree_dir/.claude/pipeline-artifacts"
+        fi
+        local _vitals_json
+        _vitals_json=$(pipeline_compute_vitals "$_worktree_state" "$_worktree_artifacts" "$issue_num" 2>/dev/null) || true
+        if [[ -n "$_vitals_json" && "$_vitals_json" != "{}" ]]; then
+            local _health_verdict _health_score
+            _health_verdict=$(echo "$_vitals_json" | jq -r '.verdict // "continue"' 2>/dev/null || echo "continue")
+            _health_score=$(echo "$_vitals_json" | jq -r '.health_score // 50' 2>/dev/null || echo "50")
+            emit_event "pipeline.vitals_check" \
+                "issue=$issue_num" \
+                "health_score=$_health_score" \
+                "verdict=$_health_verdict" \
+                "no_progress=$no_progress_count" \
+                "repeated_errors=$repeated_errors"
+            # Map vitals verdict to daemon verdict
+            case "$_health_verdict" in
+                continue)
+                    echo "healthy"
+                    return
+                    ;;
+                warn)
+                    # Sluggish but not dead — equivalent to slowing
+                    echo "slowing"
+                    return
+                    ;;
+                intervene)
+                    echo "stalled"
+                    return
+                    ;;
+                abort)
+                    echo "stuck"
+                    return
+                    ;;
+            esac
+        fi
+    fi
+    # ── Fallback: static threshold verdict ──
     local warn_threshold="${PROGRESS_CHECKS_BEFORE_WARN:-3}"
     local kill_threshold="${PROGRESS_CHECKS_BEFORE_KILL:-6}"
@@ -1039,6 +1101,7 @@ extract_issue_dependencies() {
 }
 # ─── Logging ─────────────────────────────────────────────────────────────────
+DAEMON_LOG_WRITE_COUNT=0
 daemon_log() {
     local level="$1"
@@ -1048,8 +1111,9 @@ daemon_log() {
     ts=$(now_iso)
     echo "[$ts] [$level] $msg" >> "$LOG_FILE"
-    # Rotate daemon.log if over 20MB (checked every ~100 writes)
-    if [[ $(( RANDOM % 100 )) -eq 0 ]] && [[ -f "$LOG_FILE" ]]; then
+    # Rotate daemon.log if over 20MB (checked every 100 writes)
+    DAEMON_LOG_WRITE_COUNT=$(( DAEMON_LOG_WRITE_COUNT + 1 ))
+    if [[ $(( DAEMON_LOG_WRITE_COUNT % 100 )) -eq 0 ]] && [[ -f "$LOG_FILE" ]]; then
         local log_size
         log_size=$(wc -c < "$LOG_FILE" 2>/dev/null || echo 0)
         if [[ "$log_size" -gt 20971520 ]]; then
@@ -1060,11 +1124,14 @@ daemon_log() {
         fi
     fi
-    # Also print to stdout
+    # Print to stderr (NOT stdout) to avoid corrupting command substitution captures.
+    # This is critical: functions like select_pipeline_template(), triage_score_issue(),
+    # gh_retry(), and locked_get_active_count() return values via echo/stdout and are
+    # called via $(). If daemon_log writes to stdout, the log text corrupts return values.
     case "$level" in
-        INFO)    info "$msg" ;;
-        SUCCESS) success "$msg" ;;
-        WARN)    warn "$msg" ;;
+        INFO)    info "$msg" >&2 ;;
+        SUCCESS) success "$msg" >&2 ;;
+        WARN)    warn "$msg" >&2 ;;
         ERROR)   error "$msg" ;;
     esac
 }
@@ -1130,7 +1197,10 @@ gh_record_failure() {
     GH_CONSECUTIVE_FAILURES=$((GH_CONSECUTIVE_FAILURES + 1))
     if [[ "$GH_CONSECUTIVE_FAILURES" -ge 3 ]]; then
         # Exponential backoff: 30s, 60s, 120s, 240s (capped at 5min)
-        local backoff_secs=$((30 * (1 << (GH_CONSECUTIVE_FAILURES - 3))))
+        # Cap shift to avoid integer overflow for large failure counts
+        local shift_amt=$(( GH_CONSECUTIVE_FAILURES - 3 ))
+        [[ "$shift_amt" -gt 4 ]] && shift_amt=4
+        local backoff_secs=$((30 * (1 << shift_amt)))
         [[ "$backoff_secs" -gt 300 ]] && backoff_secs=300
         GH_BACKOFF_UNTIL=$(( $(now_epoch) + backoff_secs ))
         daemon_log WARN "GitHub rate-limit circuit breaker: backing off ${backoff_secs}s after ${GH_CONSECUTIVE_FAILURES} failures"
@@ -1380,7 +1450,7 @@ locked_get_active_count() {
         (
             if command -v flock &>/dev/null; then
                 flock -w 5 200 2>/dev/null || {
-                    daemon_log WARN "locked_get_active_count: lock timeout — returning MAX_PARALLEL as safe default"
+                    daemon_log WARN "locked_get_active_count: lock timeout — returning MAX_PARALLEL as safe default" >&2
                     echo "$MAX_PARALLEL"
                     exit 0
                 }
@@ -1626,6 +1696,17 @@ daemon_spawn_pipeline() {
         daemon_log INFO "Worktree created at ${work_dir}"
     fi
+    # If template is "composed", copy the composed spec into the worktree
+    if [[ "$PIPELINE_TEMPLATE" == "composed" ]]; then
+        local _src_composed="${REPO_DIR:-.}/.claude/pipeline-artifacts/composed-pipeline.json"
+        if [[ -f "$_src_composed" ]]; then
+            local _dst_artifacts="${work_dir}/.claude/pipeline-artifacts"
+            mkdir -p "$_dst_artifacts"
+            cp "$_src_composed" "$_dst_artifacts/composed-pipeline.json" 2>/dev/null || true
+            daemon_log INFO "Copied composed pipeline spec to worktree"
+        fi
+    fi
     # Build pipeline args
     local pipeline_args=("start" "--issue" "$issue_num" "--pipeline" "$PIPELINE_TEMPLATE")
     if [[ "$SKIP_GATES" == "true" ]]; then
@@ -1637,6 +1718,14 @@ daemon_spawn_pipeline() {
     if [[ "$NO_GITHUB" == "true" ]]; then
         pipeline_args+=("--no-github")
     fi
+    # Pass session restart config
+    if [[ "${MAX_RESTARTS_CFG:-0}" -gt 0 ]]; then
+        pipeline_args+=("--max-restarts" "$MAX_RESTARTS_CFG")
+    fi
+    # Pass fast test command
+    if [[ -n "${FAST_TEST_CMD_CFG:-}" ]]; then
+        pipeline_args+=("--fast-test-cmd" "$FAST_TEST_CMD_CFG")
+    fi
     # Run pipeline in work directory (background)
     echo -e "\n\n===== Pipeline run $(date -u +%Y-%m-%dT%H:%M:%SZ) =====" >> "$LOG_DIR/issue-${issue_num}.log" 2>/dev/null || true
@@ -1770,6 +1859,41 @@ daemon_reap_completed() {
             daemon_on_success "$issue_num" "$duration_str"
         else
             daemon_on_failure "$issue_num" "$exit_code" "$duration_str"
+            # Cancel any lingering in_progress GitHub Check Runs for failed job
+            if [[ "${NO_GITHUB:-false}" != "true" && -n "$worktree" ]]; then
+                local check_ids_file="${worktree}/.claude/pipeline-artifacts/check-run-ids.json"
+                if [[ -f "$check_ids_file" ]]; then
+                    daemon_log INFO "Cancelling in-progress check runs for issue #${issue_num}"
+                    local _stage
+                    while IFS= read -r _stage; do
+                        [[ -z "$_stage" ]] && continue
+                        # Direct API call since we're in daemon context
+                        local _run_id
+                        _run_id=$(jq -r --arg s "$_stage" '.[$s] // empty' "$check_ids_file" 2>/dev/null || true)
+                        if [[ -n "$_run_id" && "$_run_id" != "null" ]]; then
+                            local _detected
+                            _detected=$(git remote get-url origin 2>/dev/null | sed 's|.*github.com[:/]\(.*\)\.git$|\1|' || true)
+                            if [[ -n "$_detected" ]]; then
+                                local _owner="${_detected%%/*}" _repo="${_detected##*/}"
+                                gh api "repos/${_owner}/${_repo}/check-runs/${_run_id}" \
+                                    --method PATCH \
+                                    --field status=completed \
+                                    --field conclusion=cancelled \
+                                    --silent 2>/dev/null || true
+                            fi
+                        fi
+                    done < <(jq -r 'keys[]' "$check_ids_file" 2>/dev/null || true)
+                fi
+            fi
+        fi
+        # Finalize memory (capture failure patterns for future runs)
+        if type memory_finalize_pipeline &>/dev/null 2>&1; then
+            local _job_state _job_artifacts
+            _job_state="${worktree:-.}/.claude/pipeline-state.md"
+            _job_artifacts="${worktree:-.}/.claude/pipeline-artifacts"
+            memory_finalize_pipeline "$_job_state" "$_job_artifacts" 2>/dev/null || true
         fi
         # Clean up progress tracking for this job
@@ -1958,6 +2082,25 @@ daemon_on_failure() {
                 fi
             fi
+            # Detect context exhaustion from progress file
+            local failure_reason="unknown"
+            local issue_worktree_path="${WORKTREE_DIR:-${REPO_DIR}/.worktrees}/daemon-issue-${issue_num}"
+            local progress_file="${issue_worktree_path}/.claude/loop-logs/progress.md"
+            if [[ -f "$progress_file" ]]; then
+                local progress_iter
+                progress_iter=$(grep -oE 'Iteration: [0-9]+' "$progress_file" 2>/dev/null | tail -1 | grep -oE '[0-9]+' || echo "0")
+                if ! [[ "${progress_iter:-0}" =~ ^[0-9]+$ ]]; then
+                    progress_iter="0"
+                fi
+                local progress_tests
+                progress_tests=$(grep -oE 'Tests passing: (true|false)' "$progress_file" 2>/dev/null | awk '{print $NF}' || echo "unknown")
+                if [[ "${progress_iter:-0}" -gt 0 ]] && { [[ "$progress_tests" == "false" ]] || [[ "$progress_tests" == "unknown" ]]; }; then
+                    failure_reason="context_exhaustion"
+                    emit_event "daemon.context_exhaustion" "issue=$issue_num" "iterations=$progress_iter"
+                    daemon_log WARN "Context exhaustion detected for issue #${issue_num} (iterations: ${progress_iter})"
+                fi
+            fi
             # Build escalated pipeline args
             local retry_template="$PIPELINE_TEMPLATE"
             local retry_model="${MODEL:-opus}"
@@ -1976,6 +2119,17 @@ daemon_on_failure() {
                 daemon_log INFO "Escalation: template=full, compound_cycles=5"
             fi
+            # Increase restarts on context exhaustion
+            if [[ "$failure_reason" == "context_exhaustion" ]]; then
+                local boosted_restarts=$(( ${MAX_RESTARTS_CFG:-3} + retry_count ))
+                # Cap at sw-loop's hard limit of 5
+                if [[ "$boosted_restarts" -gt 5 ]]; then
+                    boosted_restarts=5
+                fi
+                extra_args+=("--max-restarts" "$boosted_restarts")
+                daemon_log INFO "Boosting max-restarts to $boosted_restarts (context exhaustion)"
+            fi
             if [[ "$NO_GITHUB" != "true" ]]; then
                 gh issue comment "$issue_num" --body "## 🔄 Auto-Retry #${retry_count}
@@ -2014,9 +2168,19 @@ _Escalation: $(if [[ "$retry_count" -eq 1 ]]; then echo "upgraded model + increa
     # ── No retry — report final failure ──
     if [[ "$NO_GITHUB" != "true" ]]; then
-        # Add failure label
+        # Add failure label and remove watch label (prevent re-processing)
         gh issue edit "$issue_num" \
-            --add-label "$ON_FAILURE_ADD_LABEL" 2>/dev/null || true
+            --add-label "$ON_FAILURE_ADD_LABEL" \
+            --remove-label "$WATCH_LABEL" 2>/dev/null || true
+        # Close any draft PR created for this issue (cleanup abandoned work)
+        local draft_pr
+        draft_pr=$(gh pr list --head "daemon/issue-${issue_num}" --head "pipeline/pipeline-issue-${issue_num}" \
+            --json number,isDraft --jq '.[] | select(.isDraft == true) | .number' 2>/dev/null | head -1 || true)
+        if [[ -n "$draft_pr" ]]; then
+            gh pr close "$draft_pr" --delete-branch 2>/dev/null || true
+            daemon_log INFO "Closed draft PR #${draft_pr} for failed issue #${issue_num}"
+        fi
         # Comment with log tail
         local log_tail=""
@@ -2075,7 +2239,7 @@ triage_score_issue() {
     # ── Intelligence-powered triage (if enabled) ──
     if [[ "${INTELLIGENCE_ENABLED:-false}" == "true" ]] && type intelligence_analyze_issue &>/dev/null 2>&1; then
-        daemon_log INFO "Intelligence: using AI triage (intelligence enabled)"
+        daemon_log INFO "Intelligence: using AI triage (intelligence enabled)" >&2
         local analysis
         analysis=$(intelligence_analyze_issue "$issue_json" 2>/dev/null || echo "")
         if [[ -n "$analysis" && "$analysis" != "{}" && "$analysis" != "null" ]]; then
@@ -2114,9 +2278,9 @@ triage_score_issue() {
             return
         fi
         # Fall through to heuristic scoring if intelligence call failed
-        daemon_log INFO "Intelligence: AI triage failed, falling back to heuristic scoring"
+        daemon_log INFO "Intelligence: AI triage failed, falling back to heuristic scoring" >&2
     else
-        daemon_log INFO "Intelligence: using heuristic triage (intelligence disabled, enable with intelligence.enabled=true)"
+        daemon_log INFO "Intelligence: using heuristic triage (intelligence disabled, enable with intelligence.enabled=true)" >&2
     fi
     labels_csv=$(echo "$issue_json" | jq -r '[.labels[].name] | join(",")')
     created_at=$(echo "$issue_json" | jq -r '.createdAt // ""')
@@ -2256,6 +2420,7 @@ triage_score_issue() {
 select_pipeline_template() {
     local labels="$1"
     local score="${2:-50}"
+    local _selected_template=""
     # When auto_template is disabled, use default pipeline template
     if [[ "${AUTO_TEMPLATE:-false}" != "true" ]]; then
@@ -2265,7 +2430,7 @@ select_pipeline_template() {
     # ── Intelligence-composed pipeline (if enabled) ──
     if [[ "${COMPOSER_ENABLED:-false}" == "true" ]] && type composer_create_pipeline &>/dev/null 2>&1; then
-        daemon_log INFO "Intelligence: using AI pipeline composition (composer enabled)"
+        daemon_log INFO "Intelligence: using AI pipeline composition (composer enabled)" >&2
         local analysis="${INTELLIGENCE_ANALYSIS:-{}}"
         local repo_context=""
         if [[ -f "${REPO_DIR:-}/.claude/pipeline-state.md" ]]; then
@@ -2287,9 +2452,69 @@ select_pipeline_template() {
             return
         fi
         # Fall through to static selection if composition failed
-        daemon_log INFO "Intelligence: AI pipeline composition failed, falling back to static template selection"
+        daemon_log INFO "Intelligence: AI pipeline composition failed, falling back to static template selection" >&2
     else
-        daemon_log INFO "Intelligence: using static template selection (composer disabled, enable with intelligence.composer_enabled=true)"
+        daemon_log INFO "Intelligence: using static template selection (composer disabled, enable with intelligence.composer_enabled=true)" >&2
+    fi
+    # ── DORA-driven template escalation ──
+    if [[ -f "${EVENTS_FILE:-$HOME/.shipwright/events.jsonl}" ]]; then
+        local _dora_events _dora_total _dora_failures _dora_cfr
+        _dora_events=$(tail -500 "${EVENTS_FILE:-$HOME/.shipwright/events.jsonl}" \
+            | grep '"type":"pipeline.completed"' 2>/dev/null \
+            | tail -5 || true)
+        _dora_total=$(echo "$_dora_events" | grep -c '.' 2>/dev/null || echo "0")
+        _dora_total="${_dora_total:-0}"
+        if [[ "$_dora_total" -ge 3 ]]; then
+            _dora_failures=$(echo "$_dora_events" | grep -c '"result":"failure"' 2>/dev/null || true)
+            _dora_failures="${_dora_failures:-0}"
+            _dora_cfr=$(( _dora_failures * 100 / _dora_total ))
+            if [[ "$_dora_cfr" -gt 40 ]]; then
+                daemon_log INFO "DORA escalation: CFR ${_dora_cfr}% > 40% — forcing enterprise template" >&2
+                emit_event "daemon.dora_escalation" \
+                    "cfr=$_dora_cfr" \
+                    "total=$_dora_total" \
+                    "failures=$_dora_failures" \
+                    "template=enterprise"
+                echo "enterprise"
+                return
+            fi
+            if [[ "$_dora_cfr" -lt 10 && "$score" -ge 60 ]]; then
+                daemon_log INFO "DORA: CFR ${_dora_cfr}% < 10% — fast template eligible" >&2
+                # Fall through to allow other factors to also vote for fast
+            fi
+            # ── DORA multi-factor ──
+            # Cycle time: if median > 120min, prefer faster templates
+            local _dora_cycle_time=0
+            _dora_cycle_time=$(echo "$_dora_events" | jq -r 'select(.duration_s) | .duration_s' 2>/dev/null \
+                | sort -n | awk '{ a[NR]=$1 } END { if (NR>0) print int(a[int(NR/2)+1]/60); else print 0 }' 2>/dev/null) || _dora_cycle_time=0
+            _dora_cycle_time="${_dora_cycle_time:-0}"
+            if [[ "${_dora_cycle_time:-0}" -gt 120 ]]; then
+                daemon_log INFO "DORA: cycle time ${_dora_cycle_time}min > 120 — preferring fast template" >&2
+                if [[ "${score:-0}" -ge 60 ]]; then
+                    echo "fast"
+                    return
+                fi
+            fi
+            # Deploy frequency: if < 1/week, use cost-aware
+            local _dora_deploy_freq=0
+            local _dora_first_epoch _dora_last_epoch _dora_span_days
+            _dora_first_epoch=$(echo "$_dora_events" | head -1 | jq -r '.timestamp // empty' 2>/dev/null | xargs -I{} date -j -f "%Y-%m-%dT%H:%M:%SZ" {} +%s 2>/dev/null || echo "0")
+            _dora_last_epoch=$(echo "$_dora_events" | tail -1 | jq -r '.timestamp // empty' 2>/dev/null | xargs -I{} date -j -f "%Y-%m-%dT%H:%M:%SZ" {} +%s 2>/dev/null || echo "0")
+            if [[ "${_dora_first_epoch:-0}" -gt 0 && "${_dora_last_epoch:-0}" -gt 0 ]]; then
+                _dora_span_days=$(( (_dora_last_epoch - _dora_first_epoch) / 86400 ))
+                if [[ "${_dora_span_days:-0}" -gt 0 ]]; then
+                    _dora_deploy_freq=$(awk -v t="$_dora_total" -v d="$_dora_span_days" 'BEGIN { printf "%.1f", t * 7 / d }' 2>/dev/null) || _dora_deploy_freq=0
+                fi
+            fi
+            if [[ -n "${_dora_deploy_freq:-}" ]] && awk -v f="${_dora_deploy_freq:-0}" 'BEGIN{exit !(f > 0 && f < 1)}' 2>/dev/null; then
+                daemon_log INFO "DORA: deploy freq ${_dora_deploy_freq}/week — using cost-aware" >&2
+                echo "cost-aware"
+                return
+            fi
+        fi
     fi
     # ── Branch protection escalation (highest priority) ──
@@ -2306,7 +2531,7 @@ select_pipeline_template() {
             local required_reviews
             required_reviews=$(echo "$protection" | jq -r '.required_pull_request_reviews.required_approving_review_count // 0' 2>/dev/null || echo "0")
             if [[ "$strict_protection" == "true" ]] || [[ "${required_reviews:-0}" -gt 1 ]]; then
-                daemon_log INFO "Branch has strict protection — escalating to enterprise template"
+                daemon_log INFO "Branch has strict protection — escalating to enterprise template" >&2
                 echo "enterprise"
                 return
             fi
@@ -2340,6 +2565,62 @@ select_pipeline_template() {
         fi
     fi
+    # ── Quality memory-driven selection ──
+    local quality_scores_file="${HOME}/.shipwright/optimization/quality-scores.jsonl"
+    if [[ -f "$quality_scores_file" ]]; then
+        local repo_hash
+        repo_hash=$(cd "${REPO_DIR:-.}" && git rev-parse --show-toplevel 2>/dev/null | shasum -a 256 | cut -c1-16 || echo "unknown")
+        # Get last 5 quality scores for this repo
+        local recent_scores avg_quality has_critical
+        recent_scores=$(grep "\"repo\":\"$repo_hash\"" "$quality_scores_file" 2>/dev/null | tail -5 || true)
+        if [[ -n "$recent_scores" ]]; then
+            avg_quality=$(echo "$recent_scores" | jq -r '.quality_score // 70' 2>/dev/null | awk '{ sum += $1; count++ } END { if (count > 0) printf "%.0f", sum/count; else print 70 }')
+            has_critical=$(echo "$recent_scores" | jq -r '.findings.critical // 0' 2>/dev/null | awk '{ sum += $1 } END { print (sum > 0) ? "yes" : "no" }')
+            # Critical findings in recent history → force enterprise
+            if [[ "$has_critical" == "yes" ]]; then
+                daemon_log INFO "Quality memory: critical findings in recent runs — using enterprise template" >&2
+                echo "enterprise"
+                return
+            fi
+            # Poor quality history → use full template
+            if [[ "${avg_quality:-70}" -lt 60 ]]; then
+                daemon_log INFO "Quality memory: avg score ${avg_quality}/100 in recent runs — using full template" >&2
+                echo "full"
+                return
+            fi
+            # Excellent quality history → allow faster template
+            if [[ "${avg_quality:-70}" -gt 80 ]]; then
+                daemon_log INFO "Quality memory: avg score ${avg_quality}/100 in recent runs — eligible for fast template" >&2
+                # Only upgrade if score also suggests fast
+                if [[ "$score" -ge 60 ]]; then
+                    echo "fast"
+                    return
+                fi
+            fi
+        fi
+    fi
+    # ── Learned template weights ──
+    local _tw_file="${HOME}/.shipwright/optimization/template-weights.json"
+    if [[ -f "$_tw_file" ]]; then
+        local _best_template _best_rate
+        _best_template=$(jq -r '
+            .weights // {} | to_entries
+            | map(select(.value.sample_size >= 3))
+            | sort_by(-.value.success_rate)
+            | .[0].key // ""
+        ' "$_tw_file" 2>/dev/null) || true
+        if [[ -n "${_best_template:-}" && "${_best_template:-}" != "null" && "${_best_template:-}" != "" ]]; then
+            _best_rate=$(jq -r --arg t "$_best_template" '.weights[$t].success_rate // 0' "$_tw_file" 2>/dev/null) || _best_rate=0
+            daemon_log INFO "Template weights: ${_best_template} (${_best_rate} success rate)" >&2
+            echo "$_best_template"
+            return
+        fi
+    fi
     # ── Score-based selection ──
     if [[ "$score" -ge 70 ]]; then
         echo "fast"
@@ -2388,8 +2669,12 @@ daemon_triage_show() {
         num=$(echo "$issue" | jq -r '.number')
         title=$(echo "$issue" | jq -r '.title // "—"')
         labels_csv=$(echo "$issue" | jq -r '[.labels[].name] | join(", ")')
-        score=$(triage_score_issue "$issue")
-        template=$(select_pipeline_template "$labels_csv" "$score")
+        score=$(triage_score_issue "$issue" 2>/dev/null | tail -1)
+        score=$(printf '%s' "$score" | tr -cd '[:digit:]')
+        [[ -z "$score" ]] && score=50
+        template=$(select_pipeline_template "$labels_csv" "$score" 2>/dev/null | tail -1)
+        template=$(printf '%s' "$template" | sed $'s/\x1b\\[[0-9;]*m//g' | tr -cd '[:alnum:]-_')
+        [[ -z "$template" ]] && template="$PIPELINE_TEMPLATE"
         scored_lines+=("${score}|${num}|${title}|${labels_csv}|${template}")
     done < <(echo "$issues_json" | jq -c '.[]')
@@ -3221,11 +3506,12 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
             if [[ ! -f "$scripts_dir/sw-${name}-test.sh" ]]; then
                 # Count usage across other scripts
                 local usage_count
-                usage_count=$(grep -rl "sw-${name}" "$scripts_dir"/sw-*.sh 2>/dev/null | grep -cv "$basename" || true)
+                usage_count=$(grep -rl "sw-${name}" "$scripts_dir"/sw-*.sh 2>/dev/null | grep -cv "$basename" 2>/dev/null || echo "0")
                 usage_count=${usage_count:-0}
                 local line_count
-                line_count=$(wc -l < "$script" 2>/dev/null | tr -d ' ')
+                line_count=$(wc -l < "$script" 2>/dev/null | tr -d ' ' || echo "0")
+                line_count=${line_count:-0}
                 untested_entries="${untested_entries}${usage_count}|${basename}|${line_count}\n"
                 findings=$((findings + 1))
@@ -3602,7 +3888,9 @@ daemon_poll_issues() {
     while IFS= read -r issue; do
         local num score
         num=$(echo "$issue" | jq -r '.number')
-        score=$(triage_score_issue "$issue")
+        score=$(triage_score_issue "$issue" 2>/dev/null | tail -1)
+        score=$(printf '%s' "$score" | tr -cd '[:digit:]')
+        [[ -z "$score" ]] && score=50
         # For org mode, include repo name in the scored entry
         local repo_name=""
         if [[ "$WATCH_MODE" == "org" ]]; then
@@ -3629,10 +3917,10 @@ daemon_poll_issues() {
     local sorted_order
     if [[ "${PRIORITY_STRATEGY:-quick-wins-first}" == "complex-first" ]]; then
         # Complex-first: lower score (more complex) first
-        sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1 -n)
+        sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1,1 -n -k2,2 -n)
     else
-        # Quick-wins-first (default): higher score (simpler) first
-        sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1 -rn)
+        # Quick-wins-first (default): higher score (simpler) first, lowest issue# first on ties
+        sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1,1 -rn -k2,2 -n)
     fi
     # Dependency-aware reordering: move dependencies before dependents
@@ -3727,7 +4015,9 @@ daemon_poll_issues() {
                 emit_event "daemon.priority_lane" "issue=$issue_num" "score=$score"
                 local template
-                template=$(select_pipeline_template "$labels_csv" "$score")
+                template=$(select_pipeline_template "$labels_csv" "$score" 2>/dev/null | tail -1)
+                template=$(printf '%s' "$template" | sed $'s/\x1b\\[[0-9;]*m//g' | tr -cd '[:alnum:]-_')
+                [[ -z "$template" ]] && template="$PIPELINE_TEMPLATE"
                 daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template} [PRIORITY]"
                 local orig_template="$PIPELINE_TEMPLATE"
@@ -3748,7 +4038,9 @@ daemon_poll_issues() {
         # Auto-select pipeline template based on labels + triage score
         local template
-        template=$(select_pipeline_template "$labels_csv" "$score")
+        template=$(select_pipeline_template "$labels_csv" "$score" 2>/dev/null | tail -1)
+        template=$(printf '%s' "$template" | sed $'s/\x1b\\[[0-9;]*m//g' | tr -cd '[:alnum:]-_')
+        [[ -z "$template" ]] && template="$PIPELINE_TEMPLATE"
         daemon_log INFO "Triage: issue #${issue_num} scored ${score}, template=${template}"
         # Spawn pipeline (template selection applied via PIPELINE_TEMPLATE override)
@@ -3756,8 +4048,41 @@ daemon_poll_issues() {
         PIPELINE_TEMPLATE="$template"
         daemon_spawn_pipeline "$issue_num" "$issue_title" "$repo_name"
         PIPELINE_TEMPLATE="$orig_template"
+        # Stagger delay between spawns to avoid API contention
+        local stagger_delay="${SPAWN_STAGGER_SECONDS:-15}"
+        if [[ "$stagger_delay" -gt 0 ]]; then
+            sleep "$stagger_delay"
+        fi
     done <<< "$sorted_order"
+    # ── Drain queue if we have capacity (prevents deadlock when queue is
+    #    populated but no active jobs exist to trigger dequeue) ──
+    local drain_active
+    drain_active=$(locked_get_active_count)
+    while [[ "$drain_active" -lt "$MAX_PARALLEL" ]]; do
+        local drain_issue
+        drain_issue=$(dequeue_next)
+        [[ -z "$drain_issue" ]] && break
+        local drain_title
+        drain_title=$(jq -r --arg n "$drain_issue" '.titles[$n] // ""' "$STATE_FILE" 2>/dev/null || true)
+        local drain_labels drain_score drain_template
+        drain_labels=$(echo "$issues_json" | jq -r --argjson n "$drain_issue" \
+            '.[] | select(.number == $n) | [.labels[].name] | join(",")' 2>/dev/null || echo "")
+        drain_score=$(echo "$sorted_order" | grep "|${drain_issue}|" | cut -d'|' -f1 || echo "50")
+        drain_template=$(select_pipeline_template "$drain_labels" "${drain_score:-50}" 2>/dev/null | tail -1)
+        drain_template=$(printf '%s' "$drain_template" | sed $'s/\x1b\\[[0-9;]*m//g' | tr -cd '[:alnum:]-_')
+        [[ -z "$drain_template" ]] && drain_template="$PIPELINE_TEMPLATE"
+        daemon_log INFO "Draining queue: issue #${drain_issue}, template=${drain_template}"
+        local orig_template="$PIPELINE_TEMPLATE"
+        PIPELINE_TEMPLATE="$drain_template"
+        daemon_spawn_pipeline "$drain_issue" "$drain_title"
+        PIPELINE_TEMPLATE="$orig_template"
+        drain_active=$(locked_get_active_count)
+    done
     # Update last poll
     update_state_field "last_poll" "$(now_iso)"
 }
@@ -3908,8 +4233,11 @@ daemon_check_degradation() {
     local failures successes
     failures=$(echo "$recent" | jq '[.[] | select(.result == "failure")] | length')
     successes=$(echo "$recent" | jq '[.[] | select(.result == "success")] | length')
-    local cfr_pct=$(( failures * 100 / count ))
-    local success_pct=$(( successes * 100 / count ))
+    local cfr_pct=0 success_pct=0
+    if [[ "${count:-0}" -gt 0 ]]; then
+        cfr_pct=$(( failures * 100 / count ))
+        success_pct=$(( successes * 100 / count ))
+    fi
     local alerts=""
     if [[ "$cfr_pct" -gt "$cfr_threshold" ]]; then
@@ -4039,11 +4367,43 @@ daemon_auto_scale() {
     local max_by_queue=$(( queue_depth + active_count ))
     [[ "$max_by_queue" -lt 1 ]] && max_by_queue=1
+    # ── Vitals-driven scaling factor ──
+    local max_by_vitals="$MAX_WORKERS"
+    if type pipeline_compute_vitals &>/dev/null 2>&1 && [[ -f "$STATE_FILE" ]]; then
+        local _total_health=0 _health_count=0
+        while IFS= read -r _job; do
+            local _job_issue _job_worktree
+            _job_issue=$(echo "$_job" | jq -r '.issue // 0')
+            _job_worktree=$(echo "$_job" | jq -r '.worktree // ""')
+            if [[ -n "$_job_worktree" && -d "$_job_worktree/.claude" ]]; then
+                local _job_vitals _job_health
+                _job_vitals=$(pipeline_compute_vitals "$_job_worktree/.claude/pipeline-state.md" "$_job_worktree/.claude/pipeline-artifacts" "$_job_issue" 2>/dev/null) || true
+                if [[ -n "$_job_vitals" && "$_job_vitals" != "{}" ]]; then
+                    _job_health=$(echo "$_job_vitals" | jq -r '.health_score // 50' 2>/dev/null || echo "50")
+                    _total_health=$((_total_health + _job_health))
+                    _health_count=$((_health_count + 1))
+                fi
+            fi
+        done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
+        if [[ "$_health_count" -gt 0 ]]; then
+            local _avg_health=$((_total_health / _health_count))
+            if [[ "$_avg_health" -lt 50 ]]; then
+                # Pipelines struggling — reduce workers to give each more resources
+                max_by_vitals=$(( MAX_WORKERS * _avg_health / 100 ))
+                [[ "$max_by_vitals" -lt "$MIN_WORKERS" ]] && max_by_vitals="$MIN_WORKERS"
+                daemon_log INFO "Auto-scale: vitals avg health ${_avg_health}% — capping at ${max_by_vitals} workers"
+            fi
+            # avg_health > 70: no reduction (full capacity available)
+        fi
+    fi
     # ── Compute final value ──
     local computed="$max_by_cpu"
     [[ "$max_by_mem" -lt "$computed" ]] && computed="$max_by_mem"
     [[ "$max_by_budget" -lt "$computed" ]] && computed="$max_by_budget"
     [[ "$max_by_queue" -lt "$computed" ]] && computed="$max_by_queue"
+    [[ "$max_by_vitals" -lt "$computed" ]] && computed="$max_by_vitals"
     [[ "$MAX_WORKERS" -lt "$computed" ]] && computed="$MAX_WORKERS"
     # Respect fleet-assigned ceiling if set
@@ -4302,7 +4662,19 @@ daemon_cleanup_stale() {
         done < <(git worktree list --porcelain 2>/dev/null | grep '^worktree ' | sed 's/^worktree //')
     fi
-    # ── 2. Clean old pipeline artifacts ──
+    # ── 2. Expire old checkpoints ──
+    if [[ -x "$SCRIPT_DIR/sw-checkpoint.sh" ]]; then
+        local expired_output
+        expired_output=$(bash "$SCRIPT_DIR/sw-checkpoint.sh" expire --hours "$((age_days * 24))" 2>/dev/null || true)
+        if [[ -n "$expired_output" ]] && echo "$expired_output" | grep -q "Expired"; then
+            local expired_count
+            expired_count=$(echo "$expired_output" | grep -c "Expired" || true)
+            cleaned=$((cleaned + ${expired_count:-0}))
+            daemon_log INFO "Expired ${expired_count:-0} old checkpoint(s)"
+        fi
+    fi
+    # ── 3. Clean old pipeline artifacts (subdirectories only) ──
     local artifacts_dir=".claude/pipeline-artifacts"
     if [[ -d "$artifacts_dir" ]]; then
         while IFS= read -r artifact_dir; do