npm - shipwright-cli - Versions diffs - 1.7.0 → 1.9.0 - Mend

shipwright-cli 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/.claude/agents/code-reviewer.md +90 -0
package/.claude/agents/devops-engineer.md +142 -0
package/.claude/agents/pipeline-agent.md +80 -0
package/.claude/agents/shell-script-specialist.md +150 -0
package/.claude/agents/test-specialist.md +196 -0
package/.claude/hooks/post-tool-use.sh +38 -0
package/.claude/hooks/pre-tool-use.sh +25 -0
package/.claude/hooks/session-started.sh +37 -0
package/README.md +212 -814
package/claude-code/CLAUDE.md.shipwright +54 -0
package/claude-code/hooks/notify-idle.sh +2 -2
package/claude-code/hooks/session-start.sh +24 -0
package/claude-code/hooks/task-completed.sh +6 -2
package/claude-code/settings.json.template +12 -0
package/dashboard/public/app.js +4422 -0
package/dashboard/public/index.html +816 -0
package/dashboard/public/styles.css +4755 -0
package/dashboard/server.ts +4315 -0
package/docs/KNOWN-ISSUES.md +18 -10
package/docs/TIPS.md +38 -26
package/docs/patterns/README.md +33 -23
package/package.json +9 -5
package/scripts/adapters/iterm2-adapter.sh +1 -1
package/scripts/adapters/tmux-adapter.sh +52 -23
package/scripts/adapters/wezterm-adapter.sh +26 -14
package/scripts/lib/compat.sh +200 -0
package/scripts/lib/helpers.sh +72 -0
package/scripts/postinstall.mjs +72 -13
package/scripts/{cct → sw} +109 -21
package/scripts/sw-adversarial.sh +274 -0
package/scripts/sw-architecture-enforcer.sh +330 -0
package/scripts/sw-checkpoint.sh +390 -0
package/scripts/{cct-cleanup.sh → sw-cleanup.sh} +3 -1
package/scripts/sw-connect.sh +619 -0
package/scripts/{cct-cost.sh → sw-cost.sh} +368 -34
package/scripts/{cct-daemon.sh → sw-daemon.sh} +2217 -204
package/scripts/sw-dashboard.sh +477 -0
package/scripts/sw-developer-simulation.sh +252 -0
package/scripts/sw-docs.sh +635 -0
package/scripts/sw-doctor.sh +907 -0
package/scripts/{cct-fix.sh → sw-fix.sh} +10 -6
package/scripts/{cct-fleet.sh → sw-fleet.sh} +498 -22
package/scripts/sw-github-checks.sh +521 -0
package/scripts/sw-github-deploy.sh +533 -0
package/scripts/sw-github-graphql.sh +972 -0
package/scripts/sw-heartbeat.sh +293 -0
package/scripts/sw-init.sh +522 -0
package/scripts/sw-intelligence.sh +1196 -0
package/scripts/sw-jira.sh +643 -0
package/scripts/sw-launchd.sh +364 -0
package/scripts/sw-linear.sh +648 -0
package/scripts/{cct-logs.sh → sw-logs.sh} +72 -2
package/scripts/{cct-loop.sh → sw-loop.sh} +534 -44
package/scripts/{cct-memory.sh → sw-memory.sh} +321 -38
package/scripts/sw-patrol-meta.sh +417 -0
package/scripts/sw-pipeline-composer.sh +455 -0
package/scripts/{cct-pipeline.sh → sw-pipeline.sh} +2319 -178
package/scripts/sw-predictive.sh +820 -0
package/scripts/{cct-prep.sh → sw-prep.sh} +339 -49
package/scripts/{cct-ps.sh → sw-ps.sh} +6 -4
package/scripts/{cct-reaper.sh → sw-reaper.sh} +6 -4
package/scripts/sw-remote.sh +687 -0
package/scripts/sw-self-optimize.sh +947 -0
package/scripts/sw-session.sh +519 -0
package/scripts/sw-setup.sh +234 -0
package/scripts/sw-status.sh +605 -0
package/scripts/{cct-templates.sh → sw-templates.sh} +9 -4
package/scripts/sw-tmux.sh +591 -0
package/scripts/sw-tracker-jira.sh +277 -0
package/scripts/sw-tracker-linear.sh +292 -0
package/scripts/sw-tracker.sh +409 -0
package/scripts/{cct-upgrade.sh → sw-upgrade.sh} +103 -46
package/scripts/{cct-worktree.sh → sw-worktree.sh} +3 -0
package/templates/pipelines/autonomous.json +27 -5
package/templates/pipelines/full.json +12 -0
package/templates/pipelines/standard.json +12 -0
package/tmux/{claude-teams-overlay.conf → shipwright-overlay.conf} +27 -9
package/tmux/templates/accessibility.json +34 -0
package/tmux/templates/api-design.json +35 -0
package/tmux/templates/architecture.json +1 -0
package/tmux/templates/bug-fix.json +9 -0
package/tmux/templates/code-review.json +1 -0
package/tmux/templates/compliance.json +36 -0
package/tmux/templates/data-pipeline.json +36 -0
package/tmux/templates/debt-paydown.json +34 -0
package/tmux/templates/devops.json +1 -0
package/tmux/templates/documentation.json +1 -0
package/tmux/templates/exploration.json +1 -0
package/tmux/templates/feature-dev.json +1 -0
package/tmux/templates/full-stack.json +8 -0
package/tmux/templates/i18n.json +34 -0
package/tmux/templates/incident-response.json +36 -0
package/tmux/templates/migration.json +1 -0
package/tmux/templates/observability.json +35 -0
package/tmux/templates/onboarding.json +33 -0
package/tmux/templates/performance.json +35 -0
package/tmux/templates/refactor.json +1 -0
package/tmux/templates/release.json +35 -0
package/tmux/templates/security-audit.json +8 -0
package/tmux/templates/spike.json +34 -0
package/tmux/templates/testing.json +1 -0
package/tmux/tmux.conf +98 -9
package/scripts/cct-doctor.sh +0 -328
package/scripts/cct-init.sh +0 -282
package/scripts/cct-session.sh +0 -284
package/scripts/cct-status.sh +0 -169

package/scripts/{cct-daemon.sh → sw-daemon.sh} RENAMED Viewed

@@ -4,8 +4,9 @@
 # ║  Polls for labeled issues · Spawns pipelines · Manages worktrees      ║
 # ╚═══════════════════════════════════════════════════════════════════════════╝
 set -euo pipefail
+trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
-VERSION="1.7.0"
+VERSION="1.9.0"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
@@ -20,6 +21,28 @@ DIM='\033[2m'
 BOLD='\033[1m'
 RESET='\033[0m'
+# ─── Cross-platform compatibility ──────────────────────────────────────────
+# shellcheck source=lib/compat.sh
+[[ -f "$SCRIPT_DIR/lib/compat.sh" ]] && source "$SCRIPT_DIR/lib/compat.sh"
+# ─── Intelligence Engine (optional) ──────────────────────────────────────────
+# shellcheck source=sw-intelligence.sh
+[[ -f "$SCRIPT_DIR/sw-intelligence.sh" ]] && source "$SCRIPT_DIR/sw-intelligence.sh"
+# shellcheck source=sw-pipeline-composer.sh
+[[ -f "$SCRIPT_DIR/sw-pipeline-composer.sh" ]] && source "$SCRIPT_DIR/sw-pipeline-composer.sh"
+# shellcheck source=sw-self-optimize.sh
+[[ -f "$SCRIPT_DIR/sw-self-optimize.sh" ]] && source "$SCRIPT_DIR/sw-self-optimize.sh"
+# shellcheck source=sw-predictive.sh
+[[ -f "$SCRIPT_DIR/sw-predictive.sh" ]] && source "$SCRIPT_DIR/sw-predictive.sh"
+# ─── GitHub API Modules (optional) ────────────────────────────────────────
+# shellcheck source=sw-github-graphql.sh
+[[ -f "$SCRIPT_DIR/sw-github-graphql.sh" ]] && source "$SCRIPT_DIR/sw-github-graphql.sh"
+# shellcheck source=sw-github-checks.sh
+[[ -f "$SCRIPT_DIR/sw-github-checks.sh" ]] && source "$SCRIPT_DIR/sw-github-checks.sh"
+# shellcheck source=sw-github-deploy.sh
+[[ -f "$SCRIPT_DIR/sw-github-deploy.sh" ]] && source "$SCRIPT_DIR/sw-github-deploy.sh"
 # ─── Output Helpers ─────────────────────────────────────────────────────────
 info()    { echo -e "${CYAN}${BOLD}▸${RESET} $*"; }
 success() { echo -e "${GREEN}${BOLD}✓${RESET} $*"; }
@@ -49,7 +72,7 @@ format_duration() {
 }
 # ─── Structured Event Log ──────────────────────────────────────────────────
-EVENTS_FILE="${HOME}/.claude-teams/events.jsonl"
+EVENTS_FILE="${HOME}/.shipwright/events.jsonl"
 emit_event() {
     local event_type="$1"
@@ -61,14 +84,68 @@ emit_event() {
         if [[ "$val" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then
             json_fields="${json_fields},\"${key}\":${val}"
         else
-            val="${val//\"/\\\"}"
-            json_fields="${json_fields},\"${key}\":\"${val}\""
+            local escaped_val
+            escaped_val=$(printf '%s' "$val" | jq -Rs '.' 2>/dev/null || printf '"%s"' "${val//\"/\\\"}")
+            json_fields="${json_fields},\"${key}\":${escaped_val}"
         fi
     done
-    mkdir -p "${HOME}/.claude-teams"
+    mkdir -p "${HOME}/.shipwright"
     echo "{\"ts\":\"$(now_iso)\",\"ts_epoch\":$(now_epoch),\"type\":\"${event_type}\"${json_fields}}" >> "$EVENTS_FILE"
 }
+# ─── Event Log Rotation ─────────────────────────────────────────────────────
+rotate_event_log() {
+    local max_size=$((50 * 1024 * 1024))  # 50MB
+    local max_rotations=3
+    # Rotate events.jsonl if too large
+    if [[ -f "$EVENTS_FILE" ]]; then
+        local size
+        size=$(wc -c < "$EVENTS_FILE" 2>/dev/null || echo 0)
+        if [[ "$size" -gt "$max_size" ]]; then
+            # Shift rotations: .3 → delete, .2 → .3, .1 → .2, current → .1
+            local i=$max_rotations
+            while [[ $i -gt 1 ]]; do
+                local prev=$((i - 1))
+                [[ -f "${EVENTS_FILE}.${prev}" ]] && mv "${EVENTS_FILE}.${prev}" "${EVENTS_FILE}.${i}"
+                i=$((i - 1))
+            done
+            mv "$EVENTS_FILE" "${EVENTS_FILE}.1"
+            touch "$EVENTS_FILE"
+            emit_event "daemon.log_rotated" "previous_size=$size"
+            info "Rotated events.jsonl (was $(( size / 1048576 ))MB)"
+        fi
+    fi
+    # Clean old heartbeat files (> 24h)
+    local heartbeat_dir="$HOME/.shipwright/heartbeats"
+    if [[ -d "$heartbeat_dir" ]]; then
+        find "$heartbeat_dir" -name "*.json" -mmin +1440 -delete 2>/dev/null || true
+    fi
+}
+# ─── GitHub Context (loaded once at startup) ──────────────────────────────
+DAEMON_GITHUB_CONTEXT=""
+daemon_github_context() {
+    # Skip if no GitHub
+    [[ "${NO_GITHUB:-false}" == "true" ]] && return 0
+    type gh_repo_context &>/dev/null 2>&1 || return 0
+    type _gh_detect_repo &>/dev/null 2>&1 || return 0
+    _gh_detect_repo 2>/dev/null || return 0
+    local owner="${GH_OWNER:-}" repo="${GH_REPO:-}"
+    [[ -z "$owner" || -z "$repo" ]] && return 0
+    local context
+    context=$(gh_repo_context "$owner" "$repo" 2>/dev/null || echo "{}")
+    if [[ -n "$context" && "$context" != "{}" ]]; then
+        daemon_log INFO "GitHub context loaded: $(echo "$context" | jq -r '.contributor_count // 0') contributors, $(echo "$context" | jq -r '.security_alert_count // 0') security alerts"
+        DAEMON_GITHUB_CONTEXT="$context"
+        export DAEMON_GITHUB_CONTEXT
+    fi
+}
 # ─── GitHub API Retry with Backoff ────────────────────────────────────────
 # Retries gh commands up to 3 times with exponential backoff (1s, 3s, 9s).
 # Detects rate-limit (403/429) and transient errors. Returns the gh exit code.
@@ -106,7 +183,7 @@ gh_retry() {
 }
 # ─── Defaults ───────────────────────────────────────────────────────────────
-DAEMON_DIR="$HOME/.claude-teams"
+DAEMON_DIR="$HOME/.shipwright"
 PID_FILE="$DAEMON_DIR/daemon.pid"
 SHUTDOWN_FLAG="$DAEMON_DIR/daemon.shutdown"
 STATE_FILE=""
@@ -153,8 +230,17 @@ PATROL_INTERVAL="${PATROL_INTERVAL:-3600}"
 PATROL_MAX_ISSUES="${PATROL_MAX_ISSUES:-5}"
 PATROL_LABEL="${PATROL_LABEL:-auto-patrol}"
 PATROL_DRY_RUN=false
+PATROL_AUTO_WATCH=false
+PATROL_FAILURES_THRESHOLD=3
+PATROL_DORA_ENABLED=true
+PATROL_UNTESTED_ENABLED=true
+PATROL_RETRY_ENABLED=true
+PATROL_RETRY_THRESHOLD=2
 LAST_PATROL_EPOCH=0
+# Team dashboard coordination
+DASHBOARD_URL="${DASHBOARD_URL:-http://localhost:8767}"
 # Runtime
 NO_GITHUB=false
 CONFIG_PATH=""
@@ -320,6 +406,12 @@ load_config() {
     PATROL_INTERVAL=$(jq -r '.patrol.interval // 3600' "$config_file")
     PATROL_MAX_ISSUES=$(jq -r '.patrol.max_issues // 5' "$config_file")
     PATROL_LABEL=$(jq -r '.patrol.label // "auto-patrol"' "$config_file")
+    PATROL_AUTO_WATCH=$(jq -r '.patrol.auto_watch // false' "$config_file")
+    PATROL_FAILURES_THRESHOLD=$(jq -r '.patrol.checks.recurring_failures.threshold // 3' "$config_file")
+    PATROL_DORA_ENABLED=$(jq -r '.patrol.checks.dora_degradation.enabled // true' "$config_file")
+    PATROL_UNTESTED_ENABLED=$(jq -r '.patrol.checks.untested_scripts.enabled // true' "$config_file")
+    PATROL_RETRY_ENABLED=$(jq -r '.patrol.checks.retry_exhaustion.enabled // true' "$config_file")
+    PATROL_RETRY_THRESHOLD=$(jq -r '.patrol.checks.retry_exhaustion.threshold // 2' "$config_file")
     # adaptive template selection
     AUTO_TEMPLATE=$(jq -r '.auto_template // false' "$config_file")
@@ -333,6 +425,18 @@ load_config() {
     SELF_OPTIMIZE=$(jq -r '.self_optimize // false' "$config_file")
     OPTIMIZE_INTERVAL=$(jq -r '.optimize_interval // 10' "$config_file")
+    # intelligence engine settings
+    INTELLIGENCE_ENABLED=$(jq -r '.intelligence.enabled // false' "$config_file")
+    INTELLIGENCE_CACHE_TTL=$(jq -r '.intelligence.cache_ttl_seconds // 3600' "$config_file")
+    COMPOSER_ENABLED=$(jq -r '.intelligence.composer_enabled // false' "$config_file")
+    OPTIMIZATION_ENABLED=$(jq -r '.intelligence.optimization_enabled // false' "$config_file")
+    PREDICTION_ENABLED=$(jq -r '.intelligence.prediction_enabled // false' "$config_file")
+    ANOMALY_THRESHOLD=$(jq -r '.intelligence.anomaly_threshold // 3.0' "$config_file")
+    # adaptive thresholds (intelligence-driven operational tuning)
+    ADAPTIVE_THRESHOLDS_ENABLED=$(jq -r '.intelligence.adaptive_enabled // false' "$config_file")
+    PRIORITY_STRATEGY=$(jq -r '.intelligence.priority_strategy // "quick-wins-first"' "$config_file")
     # gh_retry: enable retry wrapper on critical GitHub API calls
     GH_RETRY_ENABLED=$(jq -r '.gh_retry // true' "$config_file")
@@ -361,6 +465,23 @@ load_config() {
     WORKER_MEM_GB=$(jq -r '.worker_mem_gb // 4' "$config_file")
     EST_COST_PER_JOB=$(jq -r '.estimated_cost_per_job_usd // 5.0' "$config_file")
+    # heartbeat + checkpoint recovery
+    HEALTH_HEARTBEAT_TIMEOUT=$(jq -r '.health.heartbeat_timeout_s // 120' "$config_file")
+    CHECKPOINT_ENABLED=$(jq -r '.health.checkpoint_enabled // true' "$config_file")
+    # progress-based health monitoring (replaces static timeouts)
+    PROGRESS_MONITORING=$(jq -r '.health.progress_based // true' "$config_file")
+    PROGRESS_CHECKS_BEFORE_WARN=$(jq -r '.health.stale_checks_before_warn // 3' "$config_file")
+    PROGRESS_CHECKS_BEFORE_KILL=$(jq -r '.health.stale_checks_before_kill // 6' "$config_file")
+    PROGRESS_HARD_LIMIT_S=$(jq -r '.health.hard_limit_s // 10800' "$config_file")  # 3hr absolute max
+    # team dashboard URL (for coordinated claiming)
+    local cfg_dashboard_url
+    cfg_dashboard_url=$(jq -r '.dashboard_url // ""' "$config_file")
+    if [[ -n "$cfg_dashboard_url" && "$cfg_dashboard_url" != "null" ]]; then
+        DASHBOARD_URL="$cfg_dashboard_url"
+    fi
     success "Config loaded"
 }
@@ -375,6 +496,546 @@ setup_dirs() {
     WORKTREE_DIR=".worktrees"
     mkdir -p "$LOG_DIR"
+    mkdir -p "$HOME/.shipwright/progress"
+}
+# ─── Adaptive Threshold Helpers ──────────────────────────────────────────────
+# When intelligence.adaptive_enabled=true, operational thresholds are learned
+# from historical data instead of using fixed defaults.
+# Every function falls back to the current hardcoded value when no data exists.
+ADAPTIVE_THRESHOLDS_ENABLED="${ADAPTIVE_THRESHOLDS_ENABLED:-false}"
+PRIORITY_STRATEGY="${PRIORITY_STRATEGY:-quick-wins-first}"
+EMPTY_QUEUE_CYCLES=0
+# Adapt poll interval based on queue state
+# Empty queue 5+ cycles → 120s; queue has items → 30s; processing → 60s
+get_adaptive_poll_interval() {
+    local queue_depth="$1"
+    local active_count="$2"
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        echo "$POLL_INTERVAL"
+        return
+    fi
+    if [[ "$queue_depth" -eq 0 && "$active_count" -eq 0 ]]; then
+        EMPTY_QUEUE_CYCLES=$((EMPTY_QUEUE_CYCLES + 1))
+    else
+        EMPTY_QUEUE_CYCLES=0
+    fi
+    local interval="$POLL_INTERVAL"
+    if [[ "$EMPTY_QUEUE_CYCLES" -ge 5 ]]; then
+        interval=120
+    elif [[ "$queue_depth" -gt 0 ]]; then
+        interval=30
+    else
+        interval=60
+    fi
+    # Persist current setting for dashboard visibility
+    local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
+    mkdir -p "$HOME/.shipwright/optimization"
+    local tmp_tuning="${tuning_file}.tmp.$$"
+    if [[ -f "$tuning_file" ]]; then
+        jq --argjson pi "$interval" --argjson eqc "$EMPTY_QUEUE_CYCLES" \
+            '.poll_interval = $pi | .empty_queue_cycles = $eqc' \
+            "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
+    else
+        jq -n --argjson pi "$interval" --argjson eqc "$EMPTY_QUEUE_CYCLES" \
+            '{poll_interval: $pi, empty_queue_cycles: $eqc}' > "$tmp_tuning" \
+            && mv "$tmp_tuning" "$tuning_file"
+    fi
+    echo "$interval"
+}
+# Rolling average cost per template from costs.json (last 10 runs)
+get_adaptive_cost_estimate() {
+    local template="${1:-autonomous}"
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        echo "$EST_COST_PER_JOB"
+        return
+    fi
+    local costs_file="$HOME/.shipwright/costs.json"
+    if [[ ! -f "$costs_file" ]]; then
+        echo "$EST_COST_PER_JOB"
+        return
+    fi
+    local avg_cost
+    avg_cost=$(jq -r --arg tpl "$template" '
+        [.sessions // [] | .[] | select(.template == $tpl) | .total_cost_usd // 0] |
+        .[-10:] | if length > 0 then (add / length) else null end
+    ' "$costs_file" 2>/dev/null || echo "")
+    if [[ -n "$avg_cost" && "$avg_cost" != "null" && "$avg_cost" != "0" ]]; then
+        echo "$avg_cost"
+    else
+        echo "$EST_COST_PER_JOB"
+    fi
+}
+# Per-stage adaptive heartbeat timeout from learned stage durations
+get_adaptive_heartbeat_timeout() {
+    local stage="${1:-unknown}"
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        echo "${HEALTH_HEARTBEAT_TIMEOUT:-120}"
+        return
+    fi
+    # Stage-specific defaults (used when no learned data)
+    local default_timeout="${HEALTH_HEARTBEAT_TIMEOUT:-120}"
+    case "$stage" in
+        build)  default_timeout=300 ;;
+        test)   default_timeout=180 ;;
+        review|compound_quality) default_timeout=180 ;;
+        lint|format|intake|plan|design) default_timeout=60 ;;
+    esac
+    local durations_file="$HOME/.shipwright/optimization/stage-durations.json"
+    if [[ ! -f "$durations_file" ]]; then
+        echo "$default_timeout"
+        return
+    fi
+    local learned_duration
+    learned_duration=$(jq -r --arg s "$stage" \
+        '.stages[$s].p90_duration_s // 0' "$durations_file" 2>/dev/null || echo "0")
+    if [[ "$learned_duration" -gt 0 ]]; then
+        # 150% of p90 duration, floor of 60s
+        local adaptive_timeout=$(( (learned_duration * 3) / 2 ))
+        [[ "$adaptive_timeout" -lt 60 ]] && adaptive_timeout=60
+        echo "$adaptive_timeout"
+    else
+        echo "$default_timeout"
+    fi
+}
+# Adaptive stale pipeline timeout using 95th percentile of historical durations
+get_adaptive_stale_timeout() {
+    local template="${1:-autonomous}"
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        echo "${HEALTH_STALE_TIMEOUT:-1800}"
+        return
+    fi
+    local durations_file="$HOME/.shipwright/optimization/pipeline-durations.json"
+    if [[ ! -f "$durations_file" ]]; then
+        echo "${HEALTH_STALE_TIMEOUT:-1800}"
+        return
+    fi
+    local p95_duration
+    p95_duration=$(jq -r --arg tpl "$template" \
+        '.templates[$tpl].p95_duration_s // 0' "$durations_file" 2>/dev/null || echo "0")
+    if [[ "$p95_duration" -gt 0 ]]; then
+        # 1.5x safety margin, clamped 600s-7200s
+        local adaptive_timeout=$(( (p95_duration * 3) / 2 ))
+        [[ "$adaptive_timeout" -lt 600 ]] && adaptive_timeout=600
+        [[ "$adaptive_timeout" -gt 7200 ]] && adaptive_timeout=7200
+        echo "$adaptive_timeout"
+    else
+        echo "${HEALTH_STALE_TIMEOUT:-1800}"
+    fi
+}
+# Record pipeline duration for future threshold learning
+record_pipeline_duration() {
+    local template="$1" duration_s="$2" result="$3"
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        return
+    fi
+    [[ ! "$duration_s" =~ ^[0-9]+$ ]] && return
+    local durations_file="$HOME/.shipwright/optimization/pipeline-durations.json"
+    mkdir -p "$HOME/.shipwright/optimization"
+    if [[ ! -f "$durations_file" ]]; then
+        echo '{"templates":{}}' > "$durations_file"
+    fi
+    local tmp_dur="${durations_file}.tmp.$$"
+    jq --arg tpl "$template" --argjson dur "$duration_s" --arg res "$result" --arg ts "$(now_iso)" '
+        .templates[$tpl] = (
+            (.templates[$tpl] // {durations: [], p95_duration_s: 0}) |
+            .durations = ((.durations + [{duration_s: $dur, result: $res, ts: $ts}]) | .[-50:]) |
+            .p95_duration_s = (
+                [.durations[].duration_s] | sort |
+                if length > 0 then .[((length * 95 / 100) | floor)] else 0 end
+            )
+        )
+    ' "$durations_file" > "$tmp_dur" 2>/dev/null && mv "$tmp_dur" "$durations_file"
+}
+# ─── Progress-Based Health Monitoring ─────────────────────────────────────────
+# Instead of killing jobs after a static timeout, we check for forward progress.
+# Progress signals: stage transitions, iteration advances, git diff growth, new files.
+# Graduated response: healthy → slowing → stalled → stuck → kill.
+PROGRESS_DIR="$HOME/.shipwright/progress"
+# Collect a progress snapshot for an active job
+# Returns JSON with stage, iteration, diff_lines, files_changed
+daemon_collect_snapshot() {
+    local issue_num="$1" worktree="$2" pid="$3"
+    local stage="" iteration=0 diff_lines=0 files_changed=0 last_error=""
+    # Get stage and iteration from heartbeat (fastest source)
+    local heartbeat_dir="$HOME/.shipwright/heartbeats"
+    if [[ -d "$heartbeat_dir" ]]; then
+        local hb_file
+        for hb_file in "$heartbeat_dir"/*.json; do
+            [[ ! -f "$hb_file" ]] && continue
+            local hb_pid
+            hb_pid=$(jq -r '.pid // 0' "$hb_file" 2>/dev/null || echo 0)
+            if [[ "$hb_pid" == "$pid" ]]; then
+                stage=$(jq -r '.stage // "unknown"' "$hb_file" 2>/dev/null || echo "unknown")
+                iteration=$(jq -r '.iteration // 0' "$hb_file" 2>/dev/null || echo 0)
+                [[ "$iteration" == "null" ]] && iteration=0
+                break
+            fi
+        done
+    fi
+    # Fallback: read stage from pipeline-state.md in worktree
+    if [[ -z "$stage" || "$stage" == "unknown" ]] && [[ -d "$worktree" ]]; then
+        local state_file="$worktree/.claude/pipeline-state.md"
+        if [[ -f "$state_file" ]]; then
+            stage=$(grep -m1 '^current_stage:' "$state_file" 2>/dev/null | sed 's/^current_stage: *//' || echo "unknown")
+        fi
+    fi
+    # Get git diff stats from worktree (how much code has been written)
+    if [[ -d "$worktree/.git" ]] || [[ -f "$worktree/.git" ]]; then
+        diff_lines=$(cd "$worktree" && git diff --stat 2>/dev/null | tail -1 | grep -o '[0-9]* insertion' | grep -o '[0-9]*' || echo "0")
+        [[ -z "$diff_lines" ]] && diff_lines=0
+        files_changed=$(cd "$worktree" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ' || echo "0")
+        # Also count untracked files the agent has created
+        local untracked
+        untracked=$(cd "$worktree" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ' || echo "0")
+        files_changed=$((files_changed + untracked))
+    fi
+    # Check last error from error log
+    if [[ -d "$worktree" ]]; then
+        local error_log="$worktree/.claude/pipeline-artifacts/error-log.jsonl"
+        if [[ -f "$error_log" ]]; then
+            last_error=$(tail -1 "$error_log" 2>/dev/null | jq -r '.signature // ""' 2>/dev/null || echo "")
+        fi
+    fi
+    # Output JSON snapshot
+    jq -n \
+        --arg stage "$stage" \
+        --argjson iteration "${iteration:-0}" \
+        --argjson diff_lines "${diff_lines:-0}" \
+        --argjson files_changed "${files_changed:-0}" \
+        --arg last_error "$last_error" \
+        --arg ts "$(now_iso)" \
+        '{
+            stage: $stage,
+            iteration: $iteration,
+            diff_lines: $diff_lines,
+            files_changed: $files_changed,
+            last_error: $last_error,
+            ts: $ts
+        }'
+}
+# Assess job progress by comparing current snapshot to previous
+# Returns: healthy | slowing | stalled | stuck
+daemon_assess_progress() {
+    local issue_num="$1" current_snapshot="$2"
+    mkdir -p "$PROGRESS_DIR"
+    local progress_file="$PROGRESS_DIR/issue-${issue_num}.json"
+    # If no previous snapshot, store this one and return healthy
+    if [[ ! -f "$progress_file" ]]; then
+        jq -n \
+            --argjson snap "$current_snapshot" \
+            --arg issue "$issue_num" \
+            '{
+                issue: $issue,
+                snapshots: [$snap],
+                no_progress_count: 0,
+                last_progress_at: $snap.ts,
+                repeated_error_count: 0
+            }' > "$progress_file"
+        echo "healthy"
+        return
+    fi
+    local prev_data
+    prev_data=$(cat "$progress_file")
+    # Get previous snapshot values
+    local prev_stage prev_iteration prev_diff_lines prev_files prev_error prev_no_progress
+    prev_stage=$(echo "$prev_data" | jq -r '.snapshots[-1].stage // "unknown"')
+    prev_iteration=$(echo "$prev_data" | jq -r '.snapshots[-1].iteration // 0')
+    prev_diff_lines=$(echo "$prev_data" | jq -r '.snapshots[-1].diff_lines // 0')
+    prev_files=$(echo "$prev_data" | jq -r '.snapshots[-1].files_changed // 0')
+    prev_error=$(echo "$prev_data" | jq -r '.snapshots[-1].last_error // ""')
+    prev_no_progress=$(echo "$prev_data" | jq -r '.no_progress_count // 0')
+    local prev_repeated_errors
+    prev_repeated_errors=$(echo "$prev_data" | jq -r '.repeated_error_count // 0')
+    # Get current values
+    local cur_stage cur_iteration cur_diff cur_files cur_error
+    cur_stage=$(echo "$current_snapshot" | jq -r '.stage')
+    cur_iteration=$(echo "$current_snapshot" | jq -r '.iteration')
+    cur_diff=$(echo "$current_snapshot" | jq -r '.diff_lines')
+    cur_files=$(echo "$current_snapshot" | jq -r '.files_changed')
+    cur_error=$(echo "$current_snapshot" | jq -r '.last_error')
+    # Detect progress
+    local has_progress=false
+    # Stage advanced → clear progress
+    if [[ "$cur_stage" != "$prev_stage" && "$cur_stage" != "unknown" ]]; then
+        has_progress=true
+        daemon_log INFO "Progress: issue #${issue_num} stage ${prev_stage} → ${cur_stage}"
+    fi
+    # Iteration increased → clear progress (agent is looping but advancing)
+    if [[ "$cur_iteration" -gt "$prev_iteration" ]]; then
+        has_progress=true
+        daemon_log INFO "Progress: issue #${issue_num} iteration ${prev_iteration} → ${cur_iteration}"
+    fi
+    # Diff lines grew (agent is writing code)
+    if [[ "$cur_diff" -gt "$prev_diff_lines" ]]; then
+        has_progress=true
+    fi
+    # More files touched
+    if [[ "$cur_files" -gt "$prev_files" ]]; then
+        has_progress=true
+    fi
+    # Detect repeated errors (same error signature hitting again)
+    local repeated_errors="$prev_repeated_errors"
+    if [[ -n "$cur_error" && "$cur_error" == "$prev_error" ]]; then
+        repeated_errors=$((repeated_errors + 1))
+    elif [[ -n "$cur_error" && "$cur_error" != "$prev_error" ]]; then
+        # Different error — reset counter (agent is making different mistakes, that's progress)
+        repeated_errors=0
+    fi
+    # Update no_progress counter
+    local no_progress_count
+    if [[ "$has_progress" == "true" ]]; then
+        no_progress_count=0
+        repeated_errors=0
+    else
+        no_progress_count=$((prev_no_progress + 1))
+    fi
+    # Update progress file (keep last 10 snapshots)
+    local tmp_progress="${progress_file}.tmp.$$"
+    jq \
+        --argjson snap "$current_snapshot" \
+        --argjson npc "$no_progress_count" \
+        --argjson rec "$repeated_errors" \
+        --arg ts "$(now_iso)" \
+        '
+        .snapshots = ((.snapshots + [$snap]) | .[-10:]) |
+        .no_progress_count = $npc |
+        .repeated_error_count = $rec |
+        if $npc == 0 then .last_progress_at = $ts else . end
+        ' "$progress_file" > "$tmp_progress" 2>/dev/null && mv "$tmp_progress" "$progress_file"
+    # Determine verdict
+    local warn_threshold="${PROGRESS_CHECKS_BEFORE_WARN:-3}"
+    local kill_threshold="${PROGRESS_CHECKS_BEFORE_KILL:-6}"
+    # Stuck in same error loop — accelerate to kill
+    if [[ "$repeated_errors" -ge 3 ]]; then
+        echo "stuck"
+        return
+    fi
+    if [[ "$no_progress_count" -ge "$kill_threshold" ]]; then
+        echo "stuck"
+    elif [[ "$no_progress_count" -ge "$warn_threshold" ]]; then
+        echo "stalled"
+    elif [[ "$no_progress_count" -ge 1 ]]; then
+        echo "slowing"
+    else
+        echo "healthy"
+    fi
+}
+# Clean up progress tracking for a completed/failed job
+daemon_clear_progress() {
+    local issue_num="$1"
+    rm -f "$PROGRESS_DIR/issue-${issue_num}.json"
+}
+# Learn actual worker memory from peak RSS of pipeline processes
+learn_worker_memory() {
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        return
+    fi
+    if [[ ! -f "$STATE_FILE" ]]; then
+        return
+    fi
+    local total_rss=0
+    local process_count=0
+    while IFS= read -r job; do
+        local pid
+        pid=$(echo "$job" | jq -r '.pid // empty')
+        [[ -z "$pid" || ! "$pid" =~ ^[0-9]+$ ]] && continue
+        if kill -0 "$pid" 2>/dev/null; then
+            local rss_kb
+            rss_kb=$(ps -o rss= -p "$pid" 2>/dev/null | tr -d ' ' || echo "0")
+            [[ ! "$rss_kb" =~ ^[0-9]+$ ]] && rss_kb=0
+            if [[ "$rss_kb" -gt 0 ]]; then
+                total_rss=$((total_rss + rss_kb))
+                process_count=$((process_count + 1))
+            fi
+        fi
+    done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
+    if [[ "$process_count" -gt 0 ]]; then
+        local avg_rss_gb=$(( total_rss / process_count / 1048576 ))
+        # 125% headroom, minimum 1GB, max 16GB
+        local learned_mem_gb=$(( (avg_rss_gb * 5 + 3) / 4 ))
+        [[ "$learned_mem_gb" -lt 1 ]] && learned_mem_gb=1
+        [[ "$learned_mem_gb" -gt 16 ]] && learned_mem_gb=16
+        local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
+        mkdir -p "$HOME/.shipwright/optimization"
+        local tmp_tuning="${tuning_file}.tmp.$$"
+        if [[ -f "$tuning_file" ]]; then
+            jq --argjson mem "$learned_mem_gb" --argjson rss "$total_rss" --argjson cnt "$process_count" \
+                '.learned_worker_mem_gb = $mem | .last_rss_total_kb = $rss | .last_rss_process_count = $cnt' \
+                "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
+        else
+            jq -n --argjson mem "$learned_mem_gb" \
+                '{learned_worker_mem_gb: $mem}' > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
+        fi
+        WORKER_MEM_GB="$learned_mem_gb"
+    fi
+}
+# Record scaling outcome for learning optimal parallelism
+record_scaling_outcome() {
+    local parallelism="$1" result="$2"
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        return
+    fi
+    local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
+    mkdir -p "$HOME/.shipwright/optimization"
+    local tmp_tuning="${tuning_file}.tmp.$$"
+    if [[ -f "$tuning_file" ]]; then
+        jq --argjson p "$parallelism" --arg r "$result" --arg ts "$(now_iso)" '
+            .scaling_history = ((.scaling_history // []) + [{parallelism: $p, result: $r, ts: $ts}]) |
+            .scaling_history |= .[-50:]
+        ' "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
+    else
+        jq -n --argjson p "$parallelism" --arg r "$result" --arg ts "$(now_iso)" '
+            {scaling_history: [{parallelism: $p, result: $r, ts: $ts}]}
+        ' > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
+    fi
+}
+# Get success rate at a given parallelism level (for gradual scaling decisions)
+get_success_rate_at_parallelism() {
+    local target_parallelism="$1"
+    local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
+    if [[ ! -f "$tuning_file" ]]; then
+        echo "100"
+        return
+    fi
+    local rate
+    rate=$(jq -r --argjson p "$target_parallelism" '
+        [.scaling_history // [] | .[] | select(.parallelism == $p)] |
+        if length > 0 then
+            ([.[] | select(.result == "success")] | length) * 100 / length | floor
+        else 100 end
+    ' "$tuning_file" 2>/dev/null || echo "100")
+    echo "${rate:-100}"
+}
+# Adapt patrol limits based on hit rate
+adapt_patrol_limits() {
+    local findings="$1" max_issues="$2"
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        return
+    fi
+    local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
+    mkdir -p "$HOME/.shipwright/optimization"
+    local new_max="$max_issues"
+    if [[ "$findings" -ge "$max_issues" ]]; then
+        # Consistently hitting limit — increase
+        new_max=$((max_issues + 2))
+        [[ "$new_max" -gt 20 ]] && new_max=20
+    elif [[ "$findings" -eq 0 ]]; then
+        # Finds nothing — reduce
+        if [[ "$max_issues" -gt 3 ]]; then
+            new_max=$((max_issues - 1))
+        else
+            new_max=3
+        fi
+    fi
+    local tmp_tuning="${tuning_file}.tmp.$$"
+    if [[ -f "$tuning_file" ]]; then
+        jq --argjson pm "$new_max" --argjson lf "$findings" --arg ts "$(now_iso)" \
+            '.patrol_max_issues = $pm | .last_patrol_findings = $lf | .patrol_adapted_at = $ts' \
+            "$tuning_file" > "$tmp_tuning" 2>/dev/null && mv "$tmp_tuning" "$tuning_file"
+    else
+        jq -n --argjson pm "$new_max" --argjson lf "$findings" --arg ts "$(now_iso)" \
+            '{patrol_max_issues: $pm, last_patrol_findings: $lf, patrol_adapted_at: $ts}' \
+            > "$tmp_tuning" && mv "$tmp_tuning" "$tuning_file"
+    fi
+}
+# Load adaptive patrol limits from tuning config
+load_adaptive_patrol_limits() {
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" != "true" ]]; then
+        return
+    fi
+    local tuning_file="$HOME/.shipwright/optimization/daemon-tuning.json"
+    if [[ ! -f "$tuning_file" ]]; then
+        return
+    fi
+    local adaptive_max_issues
+    adaptive_max_issues=$(jq -r '.patrol_max_issues // 0' "$tuning_file" 2>/dev/null || echo "0")
+    if [[ "$adaptive_max_issues" -gt 0 ]]; then
+        PATROL_MAX_ISSUES="$adaptive_max_issues"
+    fi
+}
+# Extract dependency issue numbers from issue text
+extract_issue_dependencies() {
+    local text="$1"
+    echo "$text" | grep -oE '(depends on|blocked by|after) #[0-9]+' | grep -oE '#[0-9]+' | sort -u || true
 }
 # ─── Logging ─────────────────────────────────────────────────────────────────
@@ -387,6 +1048,18 @@ daemon_log() {
     ts=$(now_iso)
     echo "[$ts] [$level] $msg" >> "$LOG_FILE"
+    # Rotate daemon.log if over 20MB (checked every ~100 writes)
+    if [[ $(( RANDOM % 100 )) -eq 0 ]] && [[ -f "$LOG_FILE" ]]; then
+        local log_size
+        log_size=$(wc -c < "$LOG_FILE" 2>/dev/null || echo 0)
+        if [[ "$log_size" -gt 20971520 ]]; then
+            [[ -f "${LOG_FILE}.2" ]] && mv "${LOG_FILE}.2" "${LOG_FILE}.3"
+            [[ -f "${LOG_FILE}.1" ]] && mv "${LOG_FILE}.1" "${LOG_FILE}.2"
+            mv "$LOG_FILE" "${LOG_FILE}.1"
+            touch "$LOG_FILE"
+        fi
+    fi
     # Also print to stdout
     case "$level" in
         INFO)    info "$msg" ;;
@@ -431,6 +1104,40 @@ notify() {
     fi
 }
+# ─── GitHub Rate-Limit Circuit Breaker ─────────────────────────────────────
+# Tracks consecutive GitHub API failures. If we hit too many failures in a row,
+# we back off exponentially to avoid hammering a rate-limited API.
+GH_CONSECUTIVE_FAILURES=0
+GH_BACKOFF_UNTIL=0  # epoch seconds — skip gh calls until this time
+gh_rate_limited() {
+    # Returns 0 (true) if we should skip GitHub API calls
+    local now_e
+    now_e=$(now_epoch)
+    if [[ "$GH_BACKOFF_UNTIL" -gt "$now_e" ]]; then
+        return 0
+    fi
+    return 1
+}
+gh_record_success() {
+    GH_CONSECUTIVE_FAILURES=0
+    GH_BACKOFF_UNTIL=0
+}
+gh_record_failure() {
+    GH_CONSECUTIVE_FAILURES=$((GH_CONSECUTIVE_FAILURES + 1))
+    if [[ "$GH_CONSECUTIVE_FAILURES" -ge 3 ]]; then
+        # Exponential backoff: 30s, 60s, 120s, 240s (capped at 5min)
+        local backoff_secs=$((30 * (1 << (GH_CONSECUTIVE_FAILURES - 3))))
+        [[ "$backoff_secs" -gt 300 ]] && backoff_secs=300
+        GH_BACKOFF_UNTIL=$(( $(now_epoch) + backoff_secs ))
+        daemon_log WARN "GitHub rate-limit circuit breaker: backing off ${backoff_secs}s after ${GH_CONSECUTIVE_FAILURES} failures"
+        emit_event "daemon.rate_limit" "failures=$GH_CONSECUTIVE_FAILURES" "backoff_s=$backoff_secs"
+    fi
+}
 # ─── Pre-flight Checks ──────────────────────────────────────────────────────
 preflight_checks() {
@@ -490,10 +1197,10 @@ preflight_checks() {
     fi
     # 4. Pipeline script
-    if [[ -x "$SCRIPT_DIR/cct-pipeline.sh" ]]; then
-        echo -e "  ${GREEN}✓${RESET} cct-pipeline.sh available"
+    if [[ -x "$SCRIPT_DIR/sw-pipeline.sh" ]]; then
+        echo -e "  ${GREEN}✓${RESET} sw-pipeline.sh available"
     else
-        echo -e "  ${RED}✗${RESET} cct-pipeline.sh not found at $SCRIPT_DIR"
+        echo -e "  ${RED}✗${RESET} sw-pipeline.sh not found at $SCRIPT_DIR"
         errors=$((errors + 1))
     fi
@@ -518,17 +1225,59 @@ preflight_checks() {
 # ─── State Management ───────────────────────────────────────────────────────
+# State file lock FD (used by locked_state_update for serialized read-modify-write)
+STATE_LOCK_FD=7
 # Atomic write: write to tmp file, then mv (prevents corruption on crash)
 atomic_write_state() {
     local content="$1"
-    local tmp_file="${STATE_FILE}.tmp.$$"
-    echo "$content" > "$tmp_file"
-    mv "$tmp_file" "$STATE_FILE"
+    local tmp_file
+    tmp_file=$(mktemp "${STATE_FILE}.tmp.XXXXXX") || {
+        daemon_log ERROR "Failed to create temp file for state write"
+        return 1
+    }
+    echo "$content" > "$tmp_file" || {
+        daemon_log ERROR "Failed to write state to temp file"
+        rm -f "$tmp_file"
+        return 1
+    }
+    mv "$tmp_file" "$STATE_FILE" || {
+        daemon_log ERROR "Failed to move temp state file into place"
+        rm -f "$tmp_file"
+        return 1
+    }
+}
+# Locked read-modify-write: prevents TOCTOU race on state file.
+# Usage: locked_state_update '.queued += [42]'
+# The jq expression is applied to the current state file atomically.
+locked_state_update() {
+    local jq_expr="$1"
+    shift
+    local lock_file="${STATE_FILE}.lock"
+    (
+        if command -v flock &>/dev/null; then
+            flock -w 5 200 2>/dev/null || {
+                daemon_log ERROR "locked_state_update: lock acquisition timed out — aborting"
+                return 1
+            }
+        fi
+        local tmp
+        tmp=$(jq "$jq_expr" "$@" "$STATE_FILE" 2>&1) || {
+            daemon_log ERROR "locked_state_update: jq failed — $(echo "$tmp" | head -1)"
+            return 1
+        }
+        atomic_write_state "$tmp" || {
+            daemon_log ERROR "locked_state_update: atomic_write_state failed"
+            return 1
+        }
+    ) 200>"$lock_file"
 }
 init_state() {
     if [[ ! -f "$STATE_FILE" ]]; then
-        jq -n \
+        local init_json
+        init_json=$(jq -n \
             --arg pid "$$" \
             --arg started "$(now_iso)" \
             --argjson interval "$POLL_INTERVAL" \
@@ -550,25 +1299,32 @@ init_state() {
                 queued: [],
                 completed: [],
                 retry_counts: {},
-                priority_lane_active: []
-            }' > "$STATE_FILE"
+                priority_lane_active: [],
+                titles: {}
+            }')
+        local lock_file="${STATE_FILE}.lock"
+        (
+            if command -v flock &>/dev/null; then
+                flock -w 5 200 2>/dev/null || {
+                    daemon_log ERROR "init_state: lock acquisition timed out"
+                    return 1
+                }
+            fi
+            atomic_write_state "$init_json"
+        ) 200>"$lock_file"
     else
         # Update PID and start time in existing state
-        local tmp
-        tmp=$(jq \
+        locked_state_update \
             --arg pid "$$" \
             --arg started "$(now_iso)" \
-            '.pid = ($pid | tonumber) | .started_at = $started' \
-            "$STATE_FILE")
-        atomic_write_state "$tmp"
+            '.pid = ($pid | tonumber) | .started_at = $started'
     fi
 }
 update_state_field() {
     local field="$1" value="$2"
-    local tmp
-    tmp=$(jq --arg val "$value" ".${field} = \$val" "$STATE_FILE")
-    atomic_write_state "$tmp"
+    locked_state_update --arg field "$field" --arg val "$value" \
+        '.[$field] = $val'
 }
 # ─── Inflight Check ─────────────────────────────────────────────────────────
@@ -611,15 +1367,36 @@ get_active_count() {
     jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo 0
 }
+# Race-safe active count: acquires state lock before reading.
+# Returns MAX_PARALLEL on lock timeout (safe fail — prevents over-spawning).
+locked_get_active_count() {
+    if [[ ! -f "$STATE_FILE" ]]; then
+        echo 0
+        return
+    fi
+    local lock_file="${STATE_FILE}.lock"
+    local count
+    count=$(
+        (
+            if command -v flock &>/dev/null; then
+                flock -w 5 200 2>/dev/null || {
+                    daemon_log WARN "locked_get_active_count: lock timeout — returning MAX_PARALLEL as safe default"
+                    echo "$MAX_PARALLEL"
+                    exit 0
+                }
+            fi
+            jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo "$MAX_PARALLEL"
+        ) 200>"$lock_file"
+    )
+    echo "${count:-0}"
+}
 # ─── Queue Management ───────────────────────────────────────────────────────
 enqueue_issue() {
     local issue_num="$1"
-    local tmp
-    tmp=$(jq --argjson num "$issue_num" \
-        '.queued += [$num] | .queued |= unique' \
-        "$STATE_FILE")
-    atomic_write_state "$tmp"
+    locked_state_update --argjson num "$issue_num" \
+        '.queued += [$num] | .queued |= unique'
     daemon_log INFO "Queued issue #${issue_num} (at capacity)"
 }
@@ -631,10 +1408,8 @@ dequeue_next() {
     local next
     next=$(jq -r '.queued[0] // empty' "$STATE_FILE" 2>/dev/null || true)
     if [[ -n "$next" ]]; then
-        # Remove from queue
-        local tmp
-        tmp=$(jq '.queued = .queued[1:]' "$STATE_FILE")
-        atomic_write_state "$tmp"
+        # Remove from queue (locked to prevent race with enqueue)
+        locked_state_update '.queued = .queued[1:]'
         echo "$next"
     fi
 }
@@ -667,11 +1442,8 @@ get_priority_active_count() {
 track_priority_job() {
     local issue_num="$1"
-    local tmp
-    tmp=$(jq --argjson num "$issue_num" \
-        '.priority_lane_active = ((.priority_lane_active // []) + [$num] | unique)' \
-        "$STATE_FILE")
-    atomic_write_state "$tmp"
+    locked_state_update --argjson num "$issue_num" \
+        '.priority_lane_active = ((.priority_lane_active // []) + [$num] | unique)'
 }
 untrack_priority_job() {
@@ -679,11 +1451,63 @@ untrack_priority_job() {
     if [[ ! -f "$STATE_FILE" ]]; then
         return
     fi
-    local tmp
-    tmp=$(jq --argjson num "$issue_num" \
-        '.priority_lane_active = [(.priority_lane_active // [])[] | select(. != $num)]' \
-        "$STATE_FILE")
-    atomic_write_state "$tmp"
+    locked_state_update --argjson num "$issue_num" \
+        '.priority_lane_active = [(.priority_lane_active // [])[] | select(. != $num)]'
+}
+# ─── Distributed Issue Claiming ───────────────────────────────────────────
+claim_issue() {
+    local issue_num="$1"
+    local machine_name="$2"
+    [[ "$NO_GITHUB" == "true" ]] && return 0  # No claiming in no-github mode
+    # Try dashboard-coordinated claim first (atomic label-based)
+    local resp
+    resp=$(curl -s --max-time 5 -X POST "${DASHBOARD_URL}/api/claim" \
+        -H "Content-Type: application/json" \
+        -d "$(jq -n --argjson issue "$issue_num" --arg machine "$machine_name" \
+            '{issue: $issue, machine: $machine}')" 2>/dev/null || echo "")
+    if [[ -n "$resp" ]] && echo "$resp" | jq -e '.approved == true' &>/dev/null; then
+        return 0
+    elif [[ -n "$resp" ]] && echo "$resp" | jq -e '.approved == false' &>/dev/null; then
+        local claimed_by
+        claimed_by=$(echo "$resp" | jq -r '.claimed_by // "another machine"')
+        daemon_log INFO "Issue #${issue_num} claimed by ${claimed_by} (via dashboard)"
+        return 1
+    fi
+    # Fallback: direct GitHub label check (dashboard unreachable)
+    daemon_log WARN "Dashboard unreachable — falling back to direct GitHub label claim"
+    local existing_claim
+    existing_claim=$(gh issue view "$issue_num" --json labels --jq \
+        '[.labels[].name | select(startswith("claimed:"))] | .[0] // ""' 2>/dev/null || true)
+    if [[ -n "$existing_claim" ]]; then
+        daemon_log INFO "Issue #${issue_num} already claimed: ${existing_claim}"
+        return 1
+    fi
+    gh issue edit "$issue_num" --add-label "claimed:${machine_name}" 2>/dev/null || return 1
+    return 0
+}
+release_claim() {
+    local issue_num="$1"
+    local machine_name="$2"
+    [[ "$NO_GITHUB" == "true" ]] && return 0
+    # Try dashboard-coordinated release first
+    curl -s --max-time 5 -X POST "${DASHBOARD_URL}/api/claim/release" \
+        -H "Content-Type: application/json" \
+        -d "$(jq -n --argjson issue "$issue_num" --arg machine "$machine_name" \
+            '{issue: $issue, machine: $machine}')" 2>/dev/null || true
+    # Also remove label directly as backup (idempotent)
+    gh issue edit "$issue_num" --remove-label "claimed:${machine_name}" 2>/dev/null || true
 }
 # ─── Org-Wide Repo Management ─────────────────────────────────────────────
@@ -718,6 +1542,38 @@ daemon_spawn_pipeline() {
     daemon_log INFO "Spawning pipeline for issue #${issue_num}: ${issue_title}"
+    # Extract goal text from issue (title + first line of body)
+    local issue_goal="$issue_title"
+    if [[ "$NO_GITHUB" != "true" ]]; then
+        local issue_body_first
+        issue_body_first=$(gh issue view "$issue_num" --json body --jq '.body' 2>/dev/null | head -3 | tr '\n' ' ' | cut -c1-200 || true)
+        if [[ -n "$issue_body_first" ]]; then
+            issue_goal="${issue_title}: ${issue_body_first}"
+        fi
+    fi
+    # ── Predictive risk assessment (if enabled) ──
+    if [[ "${PREDICTION_ENABLED:-false}" == "true" ]] && type predict_pipeline_risk &>/dev/null 2>&1; then
+        local issue_json_for_pred=""
+        if [[ "$NO_GITHUB" != "true" ]]; then
+            issue_json_for_pred=$(gh issue view "$issue_num" --json number,title,body,labels 2>/dev/null || echo "")
+        fi
+        if [[ -n "$issue_json_for_pred" ]]; then
+            local risk_result
+            risk_result=$(predict_pipeline_risk "$issue_json_for_pred" "" 2>/dev/null || echo "")
+            if [[ -n "$risk_result" ]]; then
+                local overall_risk
+                overall_risk=$(echo "$risk_result" | jq -r '.overall_risk // 50' 2>/dev/null || echo "50")
+                if [[ "$overall_risk" -gt 80 ]]; then
+                    daemon_log WARN "HIGH RISK (${overall_risk}%) predicted for issue #${issue_num} — upgrading model"
+                    export CLAUDE_MODEL="opus"
+                elif [[ "$overall_risk" -lt 30 ]]; then
+                    daemon_log INFO "LOW RISK (${overall_risk}%) predicted for issue #${issue_num}"
+                fi
+            fi
+        fi
+    fi
     # Check disk space before spawning
     local free_space_kb
     free_space_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
@@ -747,13 +1603,23 @@ daemon_spawn_pipeline() {
         # Standard mode: use git worktree
         work_dir="${WORKTREE_DIR}/daemon-issue-${issue_num}"
-        # Clean up stale worktree if it exists
-        if [[ -d "$work_dir" ]]; then
-            git worktree remove "$work_dir" --force 2>/dev/null || true
-        fi
-        git branch -D "$branch_name" 2>/dev/null || true
+        # Serialize worktree operations with a lock file (run in subshell to auto-close FD)
+        mkdir -p "$WORKTREE_DIR"
+        local wt_ok=0
+        (
+            flock -w 30 200 2>/dev/null || true
+            # Clean up stale worktree if it exists
+            if [[ -d "$work_dir" ]]; then
+                git worktree remove "$work_dir" --force 2>/dev/null || true
+            fi
+            git branch -D "$branch_name" 2>/dev/null || true
+            git worktree add "$work_dir" -b "$branch_name" "$BASE_BRANCH" 2>/dev/null
+        ) 200>"${WORKTREE_DIR}/.worktree.lock"
+        wt_ok=$?
-        if ! git worktree add "$work_dir" -b "$branch_name" "$BASE_BRANCH" 2>/dev/null; then
+        if [[ $wt_ok -ne 0 ]]; then
             daemon_log ERROR "Failed to create worktree for issue #${issue_num}"
             return 1
         fi
@@ -773,17 +1639,19 @@ daemon_spawn_pipeline() {
     fi
     # Run pipeline in work directory (background)
+    echo -e "\n\n===== Pipeline run $(date -u +%Y-%m-%dT%H:%M:%SZ) =====" >> "$LOG_DIR/issue-${issue_num}.log" 2>/dev/null || true
     (
         cd "$work_dir"
-        "$SCRIPT_DIR/cct-pipeline.sh" "${pipeline_args[@]}"
-    ) > "$LOG_DIR/issue-${issue_num}.log" 2>&1 &
+        "$SCRIPT_DIR/sw-pipeline.sh" "${pipeline_args[@]}"
+    ) >> "$LOG_DIR/issue-${issue_num}.log" 2>&1 200>&- &
     local pid=$!
     daemon_log INFO "Pipeline started for issue #${issue_num} (PID: ${pid})"
-    # Track the job (include repo for org mode)
-    daemon_track_job "$issue_num" "$pid" "$work_dir" "$issue_title" "$repo_full_name"
+    # Track the job (include repo and goal for org mode)
+    daemon_track_job "$issue_num" "$pid" "$work_dir" "$issue_title" "$repo_full_name" "$issue_goal"
     emit_event "daemon.spawn" "issue=$issue_num" "pid=$pid" "repo=${repo_full_name:-local}"
+    "$SCRIPT_DIR/sw-tracker.sh" notify "spawn" "$issue_num" 2>/dev/null || true
     # Comment on the issue
     if [[ "$NO_GITHUB" != "true" ]]; then
@@ -791,9 +1659,9 @@ daemon_spawn_pipeline() {
         if [[ -n "$repo_full_name" ]]; then
             gh_args+=("--repo" "$repo_full_name")
         fi
-        gh issue comment "$issue_num" "${gh_args[@]}" --body "## 🤖 Pipeline Started
+        gh issue comment "$issue_num" ${gh_args[@]+"${gh_args[@]}"} --body "## 🤖 Pipeline Started
-**Daemon** picked up this issue and started an autonomous pipeline.
+**Delivering:** ${issue_title}
 | Field | Value |
 |-------|-------|
@@ -802,32 +1670,31 @@ daemon_spawn_pipeline() {
 | Repo | \`${repo_full_name:-local}\` |
 | Started | $(now_iso) |
-_Progress updates will be posted as the pipeline advances._" 2>/dev/null || true
+_Progress updates will appear below as the pipeline advances through each stage._" 2>/dev/null || true
     fi
 }
 # ─── Track Job ───────────────────────────────────────────────────────────────
 daemon_track_job() {
-    local issue_num="$1" pid="$2" worktree="$3" title="${4:-}" repo="${5:-}"
-    local tmp
-    tmp=$(jq \
+    local issue_num="$1" pid="$2" worktree="$3" title="${4:-}" repo="${5:-}" goal="${6:-}"
+    locked_state_update \
         --argjson num "$issue_num" \
         --argjson pid "$pid" \
         --arg wt "$worktree" \
         --arg title "$title" \
         --arg started "$(now_iso)" \
         --arg repo "$repo" \
+        --arg goal "$goal" \
         '.active_jobs += [{
             issue: $num,
             pid: $pid,
             worktree: $wt,
             title: $title,
             started_at: $started,
-            repo: $repo
-        }]' \
-        "$STATE_FILE")
-    atomic_write_state "$tmp"
+            repo: $repo,
+            goal: $goal
+        }]'
 }
 # ─── Reap Completed Jobs ────────────────────────────────────────────────────
@@ -843,11 +1710,17 @@ daemon_reap_completed() {
         return
     fi
+    local _retry_spawned_for=""
     while IFS= read -r job; do
         local issue_num pid worktree
-        issue_num=$(echo "$job" | jq -r '.issue')
-        pid=$(echo "$job" | jq -r '.pid')
-        worktree=$(echo "$job" | jq -r '.worktree')
+        issue_num=$(echo "$job" | jq -r '.issue // empty')
+        pid=$(echo "$job" | jq -r '.pid // empty')
+        worktree=$(echo "$job" | jq -r '.worktree // empty')
+        # Skip malformed entries (corrupted state file)
+        [[ -z "$issue_num" || ! "$issue_num" =~ ^[0-9]+$ ]] && continue
+        [[ -z "$pid" || ! "$pid" =~ ^[0-9]+$ ]] && continue
         # Check if process is still running
         if kill -0 "$pid" 2>/dev/null; then
@@ -855,13 +1728,30 @@ daemon_reap_completed() {
         fi
         # Process is dead — determine exit code
+        # Note: wait returns 127 if process was already reaped (e.g., by init)
+        # In that case, check pipeline log for success/failure indicators
         local exit_code=0
         wait "$pid" 2>/dev/null || exit_code=$?
+        if [[ "$exit_code" -eq 127 ]]; then
+            # Process already reaped — check log file for real outcome
+            local issue_log="$LOG_DIR/issue-${issue_num}.log"
+            if [[ -f "$issue_log" ]]; then
+                if grep -q "Pipeline completed successfully" "$issue_log" 2>/dev/null; then
+                    exit_code=0
+                elif grep -q "Pipeline failed\|ERROR.*stage.*failed\|exited with status" "$issue_log" 2>/dev/null; then
+                    exit_code=1
+                else
+                    daemon_log WARN "Could not determine exit code for issue #${issue_num} (PID ${pid} already reaped) — marking as failure"
+                    exit_code=1
+                fi
+            else
+                exit_code=1
+            fi
+        fi
-        local started_at duration_str=""
+        local started_at duration_str="" start_epoch=0 end_epoch=0
         started_at=$(echo "$job" | jq -r '.started_at // empty')
         if [[ -n "$started_at" ]]; then
-            local start_epoch end_epoch
             # macOS date -j for parsing ISO dates (TZ=UTC to parse Z-suffix correctly)
             start_epoch=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started_at" +%s 2>/dev/null || date -d "$started_at" +%s 2>/dev/null || echo "0")
             end_epoch=$(now_epoch)
@@ -882,31 +1772,52 @@ daemon_reap_completed() {
             daemon_on_failure "$issue_num" "$exit_code" "$duration_str"
         fi
-        # Remove from active_jobs and priority lane tracking
-        local tmp
-        tmp=$(jq --argjson num "$issue_num" \
-            '.active_jobs = [.active_jobs[] | select(.issue != $num)]' \
-            "$STATE_FILE")
-        atomic_write_state "$tmp"
-        untrack_priority_job "$issue_num"
-        # Clean up worktree (skip for org-mode clones — they persist)
-        local job_repo
-        job_repo=$(echo "$job" | jq -r '.repo // ""')
-        if [[ -z "$job_repo" ]] && [[ -d "$worktree" ]]; then
-            git worktree remove "$worktree" --force 2>/dev/null || true
-            daemon_log INFO "Cleaned worktree: $worktree"
-            git branch -D "daemon/issue-${issue_num}" 2>/dev/null || true
-        elif [[ -n "$job_repo" ]]; then
-            daemon_log INFO "Org-mode: preserving clone for ${job_repo}"
-        fi
-        # Dequeue next issue if available
-        local next_issue
-        next_issue=$(dequeue_next)
-        if [[ -n "$next_issue" ]]; then
-            daemon_log INFO "Dequeuing issue #${next_issue}"
-            daemon_spawn_pipeline "$next_issue"
+        # Clean up progress tracking for this job
+        daemon_clear_progress "$issue_num"
+        # Release claim lock (label-based coordination)
+        local reap_machine_name
+        reap_machine_name=$(jq -r '.machines[] | select(.role == "primary") | .name' "$HOME/.shipwright/machines.json" 2>/dev/null || hostname -s)
+        release_claim "$issue_num" "$reap_machine_name"
+        # Skip cleanup if a retry was just spawned for this issue
+        if [[ "$_retry_spawned_for" == "$issue_num" ]]; then
+            daemon_log INFO "Retry spawned for issue #${issue_num} — skipping worktree cleanup"
+        else
+            # Remove from active_jobs and priority lane tracking (locked)
+            locked_state_update --argjson num "$issue_num" \
+                '.active_jobs = [.active_jobs[] | select(.issue != $num)]'
+            untrack_priority_job "$issue_num"
+            # Clean up worktree (skip for org-mode clones — they persist)
+            local job_repo
+            job_repo=$(echo "$job" | jq -r '.repo // ""')
+            if [[ -z "$job_repo" ]] && [[ -d "$worktree" ]]; then
+                git worktree remove "$worktree" --force 2>/dev/null || true
+                daemon_log INFO "Cleaned worktree: $worktree"
+                git branch -D "daemon/issue-${issue_num}" 2>/dev/null || true
+            elif [[ -n "$job_repo" ]]; then
+                daemon_log INFO "Org-mode: preserving clone for ${job_repo}"
+            fi
+        fi
+        # Dequeue next issue if available AND we have capacity
+        # NOTE: locked_get_active_count prevents TOCTOU race with the
+        # active_jobs removal above.  A tiny window remains between
+        # the count read and dequeue_next's own lock acquisition, but
+        # dequeue_next is itself locked, so the worst case is a
+        # missed dequeue that the next poll cycle will pick up.
+        local current_active
+        current_active=$(locked_get_active_count)
+        if [[ "$current_active" -lt "$MAX_PARALLEL" ]]; then
+            local next_issue
+            next_issue=$(dequeue_next)
+            if [[ -n "$next_issue" ]]; then
+                local next_title
+                next_title=$(jq -r --arg n "$next_issue" '.titles[$n] // ""' "$STATE_FILE" 2>/dev/null || true)
+                daemon_log INFO "Dequeuing issue #${next_issue}: ${next_title}"
+                daemon_spawn_pipeline "$next_issue" "$next_title"
+            fi
         fi
     done <<< "$jobs"
 }
@@ -918,9 +1829,23 @@ daemon_on_success() {
     daemon_log SUCCESS "Pipeline completed for issue #${issue_num} (${duration:-unknown})"
-    # Record in completed list
-    local tmp
-    tmp=$(jq \
+    # Record pipeline duration for adaptive threshold learning
+    if [[ -n "$duration" && "$duration" != "unknown" ]]; then
+        # Parse duration string back to seconds (e.g. "5m 30s" → 330)
+        local dur_secs=0
+        local _h _m _s
+        _h=$(echo "$duration" | grep -oE '[0-9]+h' | grep -oE '[0-9]+' || true)
+        _m=$(echo "$duration" | grep -oE '[0-9]+m' | grep -oE '[0-9]+' || true)
+        _s=$(echo "$duration" | grep -oE '[0-9]+s' | grep -oE '[0-9]+' || true)
+        dur_secs=$(( ${_h:-0} * 3600 + ${_m:-0} * 60 + ${_s:-0} ))
+        if [[ "$dur_secs" -gt 0 ]]; then
+            record_pipeline_duration "$PIPELINE_TEMPLATE" "$dur_secs" "success"
+            record_scaling_outcome "$MAX_PARALLEL" "success"
+        fi
+    fi
+    # Record in completed list + clear retry count for this issue
+    locked_state_update \
         --argjson num "$issue_num" \
         --arg result "success" \
         --arg dur "${duration:-unknown}" \
@@ -930,9 +1855,8 @@ daemon_on_success() {
             result: $result,
             duration: $dur,
             completed_at: $completed_at
-        }]' \
-        "$STATE_FILE")
-    atomic_write_state "$tmp"
+        }] | .completed = .completed[-500:]
+        | del(.retry_counts[($num | tostring)])'
     if [[ "$NO_GITHUB" != "true" ]]; then
         # Remove watch label, add success label
@@ -960,6 +1884,7 @@ Check the associated PR for the implementation." 2>/dev/null || true
     notify "Pipeline Complete — Issue #${issue_num}" \
         "Duration: ${duration:-unknown}" "success"
+    "$SCRIPT_DIR/sw-tracker.sh" notify "completed" "$issue_num" 2>/dev/null || true
 }
 # ─── Failure Handler ────────────────────────────────────────────────────────
@@ -969,9 +1894,22 @@ daemon_on_failure() {
     daemon_log ERROR "Pipeline failed for issue #${issue_num} (exit: ${exit_code}, ${duration:-unknown})"
+    # Record pipeline duration for adaptive threshold learning
+    if [[ -n "$duration" && "$duration" != "unknown" ]]; then
+        local dur_secs=0
+        local _h _m _s
+        _h=$(echo "$duration" | grep -oE '[0-9]+h' | grep -oE '[0-9]+' || true)
+        _m=$(echo "$duration" | grep -oE '[0-9]+m' | grep -oE '[0-9]+' || true)
+        _s=$(echo "$duration" | grep -oE '[0-9]+s' | grep -oE '[0-9]+' || true)
+        dur_secs=$(( ${_h:-0} * 3600 + ${_m:-0} * 60 + ${_s:-0} ))
+        if [[ "$dur_secs" -gt 0 ]]; then
+            record_pipeline_duration "$PIPELINE_TEMPLATE" "$dur_secs" "failure"
+            record_scaling_outcome "$MAX_PARALLEL" "failure"
+        fi
+    fi
     # Record in completed list
-    local tmp
-    tmp=$(jq \
+    locked_state_update \
         --argjson num "$issue_num" \
         --arg result "failed" \
         --argjson code "$exit_code" \
@@ -983,9 +1921,7 @@ daemon_on_failure() {
             exit_code: $code,
             duration: $dur,
             completed_at: $completed_at
-        }]' \
-        "$STATE_FILE")
-    atomic_write_state "$tmp"
+        }] | .completed = .completed[-500:]'
     # ── Auto-retry with strategy escalation ──
     if [[ "${RETRY_ESCALATION:-true}" == "true" ]]; then
@@ -996,15 +1932,32 @@ daemon_on_failure() {
         if [[ "$retry_count" -lt "${MAX_RETRIES:-2}" ]]; then
             retry_count=$((retry_count + 1))
-            # Update retry count in state
-            local tmp_state
-            tmp_state=$(jq --arg num "$issue_num" --argjson count "$retry_count" \
-                '.retry_counts[$num] = $count' "$STATE_FILE")
-            atomic_write_state "$tmp_state"
+            # Update retry count in state (locked to prevent race)
+            locked_state_update \
+                --arg num "$issue_num" --argjson count "$retry_count" \
+                '.retry_counts[$num] = $count'
             daemon_log WARN "Auto-retry #${retry_count}/${MAX_RETRIES:-2} for issue #${issue_num}"
             emit_event "daemon.retry" "issue=$issue_num" "retry=$retry_count" "max=${MAX_RETRIES:-2}"
+            # Check for checkpoint to enable resume-from-checkpoint
+            local checkpoint_args=()
+            if [[ "${CHECKPOINT_ENABLED:-true}" == "true" ]]; then
+                # Try to find worktree for this issue to check for checkpoints
+                local issue_worktree="${REPO_DIR}/.worktrees/daemon-issue-${issue_num}"
+                if [[ -d "$issue_worktree/.claude/pipeline-artifacts/checkpoints" ]]; then
+                    local latest_checkpoint=""
+                    for cp_file in "$issue_worktree/.claude/pipeline-artifacts/checkpoints"/*-checkpoint.json; do
+                        [[ -f "$cp_file" ]] && latest_checkpoint="$cp_file"
+                    done
+                    if [[ -n "$latest_checkpoint" ]]; then
+                        daemon_log INFO "Found checkpoint: $latest_checkpoint"
+                        emit_event "daemon.recovery" "issue=$issue_num" "checkpoint=$latest_checkpoint"
+                        checkpoint_args+=("--resume")
+                    fi
+                fi
+            fi
             # Build escalated pipeline args
             local retry_template="$PIPELINE_TEMPLATE"
             local retry_model="${MODEL:-opus}"
@@ -1038,12 +1991,18 @@ Pipeline failed — retrying with escalated strategy.
 _Escalation: $(if [[ "$retry_count" -eq 1 ]]; then echo "upgraded model + increased iterations"; else echo "full template + compound quality"; fi)_" 2>/dev/null || true
             fi
+            # Backoff before retry: 30s * retry_count (30s, 60s, ...)
+            local backoff_secs=$((30 * retry_count))
+            daemon_log INFO "Waiting ${backoff_secs}s before retry #${retry_count}"
+            sleep "$backoff_secs"
             # Re-spawn with escalated strategy
             local orig_template="$PIPELINE_TEMPLATE"
             local orig_model="$MODEL"
             PIPELINE_TEMPLATE="$retry_template"
             MODEL="$retry_model"
             daemon_spawn_pipeline "$issue_num" "retry-${retry_count}"
+            _retry_spawned_for="$issue_num"
             PIPELINE_TEMPLATE="$orig_template"
             MODEL="$orig_model"
             return
@@ -1099,18 +2058,66 @@ _Re-add the \`${WATCH_LABEL}\` label to retry._" 2>/dev/null || true
     notify "Pipeline Failed — Issue #${issue_num}" \
         "Exit code: ${exit_code}, Duration: ${duration:-unknown}" "error"
+    "$SCRIPT_DIR/sw-tracker.sh" notify "failed" "$issue_num" "Exit code: ${exit_code}, Duration: ${duration:-unknown}" 2>/dev/null || true
 }
 # ─── Intelligent Triage ──────────────────────────────────────────────────────
 # Score an issue from 0-100 based on multiple signals for intelligent prioritization.
 # Combines priority labels, age, complexity, dependencies, type, and memory signals.
+# When intelligence engine is enabled, uses semantic AI analysis for richer scoring.
 triage_score_issue() {
     local issue_json="$1"
     local issue_num issue_title issue_body labels_csv created_at
     issue_num=$(echo "$issue_json" | jq -r '.number')
     issue_title=$(echo "$issue_json" | jq -r '.title // ""')
     issue_body=$(echo "$issue_json" | jq -r '.body // ""')
+    # ── Intelligence-powered triage (if enabled) ──
+    if [[ "${INTELLIGENCE_ENABLED:-false}" == "true" ]] && type intelligence_analyze_issue &>/dev/null 2>&1; then
+        daemon_log INFO "Intelligence: using AI triage (intelligence enabled)"
+        local analysis
+        analysis=$(intelligence_analyze_issue "$issue_json" 2>/dev/null || echo "")
+        if [[ -n "$analysis" && "$analysis" != "{}" && "$analysis" != "null" ]]; then
+            # Extract complexity (1-10) and convert to score (0-100)
+            local ai_complexity ai_risk ai_success_prob
+            ai_complexity=$(echo "$analysis" | jq -r '.complexity // 0' 2>/dev/null || echo "0")
+            ai_risk=$(echo "$analysis" | jq -r '.risk_level // "medium"' 2>/dev/null || echo "medium")
+            ai_success_prob=$(echo "$analysis" | jq -r '.success_probability // 50' 2>/dev/null || echo "50")
+            # Store analysis for downstream use (composer, predictions)
+            export INTELLIGENCE_ANALYSIS="$analysis"
+            export INTELLIGENCE_COMPLEXITY="$ai_complexity"
+            # Convert AI analysis to triage score:
+            # Higher success probability + lower complexity = higher score (process sooner)
+            local ai_score
+            ai_score=$(( ai_success_prob - (ai_complexity * 3) ))
+            # Risk adjustment
+            case "$ai_risk" in
+                critical) ai_score=$((ai_score + 15)) ;;  # Critical = process urgently
+                high)     ai_score=$((ai_score + 10)) ;;
+                low)      ai_score=$((ai_score - 5)) ;;
+            esac
+            # Clamp
+            [[ "$ai_score" -lt 0 ]] && ai_score=0
+            [[ "$ai_score" -gt 100 ]] && ai_score=100
+            emit_event "intelligence.triage" \
+                "issue=$issue_num" \
+                "complexity=$ai_complexity" \
+                "risk=$ai_risk" \
+                "success_prob=$ai_success_prob" \
+                "score=$ai_score"
+            echo "$ai_score"
+            return
+        fi
+        # Fall through to heuristic scoring if intelligence call failed
+        daemon_log INFO "Intelligence: AI triage failed, falling back to heuristic scoring"
+    else
+        daemon_log INFO "Intelligence: using heuristic triage (intelligence disabled, enable with intelligence.enabled=true)"
+    fi
     labels_csv=$(echo "$issue_json" | jq -r '[.labels[].name] | join(",")')
     created_at=$(echo "$issue_json" | jq -r '.createdAt // ""')
@@ -1211,9 +2218,9 @@ triage_score_issue() {
     # ── 6. Memory bonus (0-10 points / -5 for prior failures) ──
     local memory_score=0
-    if [[ -x "$SCRIPT_DIR/cct-memory.sh" ]]; then
+    if [[ -x "$SCRIPT_DIR/sw-memory.sh" ]]; then
         local memory_result
-        memory_result=$("$SCRIPT_DIR/cct-memory.sh" search --issue "$issue_num" --json 2>/dev/null || true)
+        memory_result=$("$SCRIPT_DIR/sw-memory.sh" search --issue "$issue_num" --json 2>/dev/null || true)
         if [[ -n "$memory_result" ]]; then
             local prior_result
             prior_result=$(echo "$memory_result" | jq -r '.last_result // ""' 2>/dev/null || true)
@@ -1245,6 +2252,7 @@ triage_score_issue() {
 }
 # Auto-select pipeline template based on issue labels
+# When intelligence/composer is enabled, composes a custom pipeline instead of static selection.
 select_pipeline_template() {
     local labels="$1"
     local score="${2:-50}"
@@ -1255,7 +2263,57 @@ select_pipeline_template() {
         return
     fi
-    # ── Label-based overrides (highest priority) ──
+    # ── Intelligence-composed pipeline (if enabled) ──
+    if [[ "${COMPOSER_ENABLED:-false}" == "true" ]] && type composer_create_pipeline &>/dev/null 2>&1; then
+        daemon_log INFO "Intelligence: using AI pipeline composition (composer enabled)"
+        local analysis="${INTELLIGENCE_ANALYSIS:-{}}"
+        local repo_context=""
+        if [[ -f "${REPO_DIR:-}/.claude/pipeline-state.md" ]]; then
+            repo_context="has_pipeline_state"
+        fi
+        local budget_json="{}"
+        if [[ -x "$SCRIPT_DIR/sw-cost.sh" ]]; then
+            local remaining
+            remaining=$(bash "$SCRIPT_DIR/sw-cost.sh" remaining-budget 2>/dev/null || echo "")
+            if [[ -n "$remaining" ]]; then
+                budget_json="{\"remaining_usd\": $remaining}"
+            fi
+        fi
+        local composed_path
+        composed_path=$(composer_create_pipeline "$analysis" "$repo_context" "$budget_json" 2>/dev/null || echo "")
+        if [[ -n "$composed_path" && -f "$composed_path" ]]; then
+            emit_event "daemon.composed_pipeline" "labels=$labels" "score=$score"
+            echo "composed"
+            return
+        fi
+        # Fall through to static selection if composition failed
+        daemon_log INFO "Intelligence: AI pipeline composition failed, falling back to static template selection"
+    else
+        daemon_log INFO "Intelligence: using static template selection (composer disabled, enable with intelligence.composer_enabled=true)"
+    fi
+    # ── Branch protection escalation (highest priority) ──
+    if type gh_branch_protection &>/dev/null 2>&1 && [[ "${NO_GITHUB:-false}" != "true" ]]; then
+        if type _gh_detect_repo &>/dev/null 2>&1; then
+            _gh_detect_repo 2>/dev/null || true
+        fi
+        local gh_owner="${GH_OWNER:-}" gh_repo="${GH_REPO:-}"
+        if [[ -n "$gh_owner" && -n "$gh_repo" ]]; then
+            local protection
+            protection=$(gh_branch_protection "$gh_owner" "$gh_repo" "${BASE_BRANCH:-main}" 2>/dev/null || echo '{"protected": false}')
+            local strict_protection
+            strict_protection=$(echo "$protection" | jq -r '.enforce_admins.enabled // false' 2>/dev/null || echo "false")
+            local required_reviews
+            required_reviews=$(echo "$protection" | jq -r '.required_pull_request_reviews.required_approving_review_count // 0' 2>/dev/null || echo "0")
+            if [[ "$strict_protection" == "true" ]] || [[ "${required_reviews:-0}" -gt 1 ]]; then
+                daemon_log INFO "Branch has strict protection — escalating to enterprise template"
+                echo "enterprise"
+                return
+            fi
+        fi
+    fi
+    # ── Label-based overrides ──
     if echo "$labels" | grep -qi "hotfix\|incident"; then
         echo "hotfix"
         return
@@ -1363,6 +2421,16 @@ daemon_triage_show() {
     echo ""
 }
+# ─── Patrol Self-Labeling ─────────────────────────────────────────────────
+patrol_build_labels() {
+    local check_label="$1"
+    local labels="${PATROL_LABEL},${check_label}"
+    if [[ "$PATROL_AUTO_WATCH" == "true" && -n "${WATCH_LABEL:-}" ]]; then
+        labels="${labels},${WATCH_LABEL}"
+    fi
+    echo "$labels"
+}
 # ─── Proactive Patrol Mode ───────────────────────────────────────────────────
 daemon_patrol() {
@@ -1413,7 +2481,7 @@ daemon_patrol() {
                     fi
                     findings=$((findings + 1))
-                    emit_event "patrol.finding" "type=security" "severity=$severity" "package=$name"
+                    emit_event "patrol.finding" "check=security" "severity=$severity" "package=$name"
                     # Check if issue already exists
                     if [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
@@ -1434,9 +2502,9 @@ daemon_patrol() {
 | Date | $(now_iso) |
 Auto-detected by \`shipwright daemon patrol\`." \
-                                --label "security" --label "$PATROL_LABEL" 2>/dev/null || true
+                                --label "$(patrol_build_labels "security")" 2>/dev/null || true
                             issues_created=$((issues_created + 1))
-                            emit_event "patrol.issue_created" "type=security" "package=$name"
+                            emit_event "patrol.issue_created" "check=security" "package=$name"
                         fi
                     else
                         echo -e "    ${RED}●${RESET} ${BOLD}${severity}${RESET}: ${title} in ${CYAN}${name}${RESET}"
@@ -1467,6 +2535,39 @@ Auto-detected by \`shipwright daemon patrol\`." \
             fi
         fi
+        # Enrich with GitHub security alerts
+        if type gh_security_alerts &>/dev/null 2>&1 && [[ "${NO_GITHUB:-false}" != "true" ]]; then
+            if type _gh_detect_repo &>/dev/null 2>&1; then
+                _gh_detect_repo 2>/dev/null || true
+            fi
+            local gh_owner="${GH_OWNER:-}" gh_repo="${GH_REPO:-}"
+            if [[ -n "$gh_owner" && -n "$gh_repo" ]]; then
+                local gh_alerts
+                gh_alerts=$(gh_security_alerts "$gh_owner" "$gh_repo" 2>/dev/null || echo "[]")
+                local gh_alert_count
+                gh_alert_count=$(echo "$gh_alerts" | jq 'length' 2>/dev/null || echo "0")
+                if [[ "${gh_alert_count:-0}" -gt 0 ]]; then
+                    daemon_log WARN "Patrol: $gh_alert_count GitHub security alert(s) found"
+                    findings=$((findings + gh_alert_count))
+                fi
+            fi
+        fi
+        # Enrich with GitHub Dependabot alerts
+        if type gh_dependabot_alerts &>/dev/null 2>&1 && [[ "${NO_GITHUB:-false}" != "true" ]]; then
+            local gh_owner="${GH_OWNER:-}" gh_repo="${GH_REPO:-}"
+            if [[ -n "$gh_owner" && -n "$gh_repo" ]]; then
+                local dep_alerts
+                dep_alerts=$(gh_dependabot_alerts "$gh_owner" "$gh_repo" 2>/dev/null || echo "[]")
+                local dep_alert_count
+                dep_alert_count=$(echo "$dep_alerts" | jq 'length' 2>/dev/null || echo "0")
+                if [[ "${dep_alert_count:-0}" -gt 0 ]]; then
+                    daemon_log WARN "Patrol: $dep_alert_count Dependabot alert(s) found"
+                    findings=$((findings + dep_alert_count))
+                fi
+            fi
+        fi
         total_findings=$((total_findings + findings))
         if [[ "$findings" -gt 0 ]]; then
             daemon_log INFO "Patrol: found ${findings} security vulnerability(ies)"
@@ -1499,7 +2600,7 @@ Auto-detected by \`shipwright daemon patrol\`." \
                         if [[ "$diff" -ge 2 ]]; then
                             findings=$((findings + 1))
                             stale_packages="${stale_packages}\n- \`${name}\`: ${current} → ${latest} (${diff} major versions behind)"
-                            emit_event "patrol.finding" "type=stale_dependency" "package=$name" "current=$current" "latest=$latest"
+                            emit_event "patrol.finding" "check=stale_dependency" "package=$name" "current=$current" "latest=$latest"
                             if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
                                 echo -e "    ${YELLOW}●${RESET} ${CYAN}${name}${RESET}: ${current} → ${latest} (${diff} major versions behind)"
@@ -1522,9 +2623,9 @@ The following packages are 2+ major versions behind:
 $(echo -e "$stale_packages")
 Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
-                            --label "dependencies" --label "$PATROL_LABEL" 2>/dev/null || true
+                            --label "$(patrol_build_labels "dependencies")" 2>/dev/null || true
                         issues_created=$((issues_created + 1))
-                        emit_event "patrol.issue_created" "type=stale_dependency" "count=$findings"
+                        emit_event "patrol.issue_created" "check=stale_dependency" "count=$findings"
                     fi
                 fi
             fi
@@ -1586,9 +2687,9 @@ $(echo -e "$dead_files")
 > **Note:** Some files may be entry points or dynamically loaded. Verify before removing.
 Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
-                    --label "tech-debt" --label "$PATROL_LABEL" 2>/dev/null || true
+                    --label "$(patrol_build_labels "tech-debt")" 2>/dev/null || true
                 issues_created=$((issues_created + 1))
-                emit_event "patrol.issue_created" "type=dead_code" "count=$findings"
+                emit_event "patrol.issue_created" "check=dead_code" "count=$findings"
             fi
         fi
@@ -1649,9 +2750,9 @@ These files have < 50% line coverage:
 $(echo -e "$low_cov_files")
 Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
-                    --label "testing" --label "$PATROL_LABEL" 2>/dev/null || true
+                    --label "$(patrol_build_labels "testing")" 2>/dev/null || true
                 issues_created=$((issues_created + 1))
-                emit_event "patrol.issue_created" "type=coverage" "count=$findings"
+                emit_event "patrol.issue_created" "check=coverage" "count=$findings"
             fi
         fi
@@ -1694,9 +2795,49 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
                 tag_epoch=$(git log -1 --format=%ct "$latest_tag" 2>/dev/null || echo "0")
                 if [[ "$tag_epoch" -gt "$changelog_epoch" ]] && [[ "$changelog_epoch" -gt 0 ]]; then
                     findings=$((findings + 1))
-                    stale_docs="${stale_docs}\n- \`CHANGELOG.md\`: not updated since tag \`${latest_tag}\`"
+                    stale_docs="${stale_docs}\n- \`CHANGELOG.md\`: not updated since tag \`${latest_tag}\`"
+                    if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
+                        echo -e "    ${YELLOW}●${RESET} CHANGELOG.md not updated since ${latest_tag}"
+                    fi
+                fi
+            fi
+        fi
+        # Check CLAUDE.md staleness (same pattern as README)
+        if [[ -f ".claude/CLAUDE.md" ]]; then
+            local claudemd_epoch claudemd_src_epoch
+            claudemd_src_epoch=$(git log -1 --format=%ct -- "*.ts" "*.js" "*.py" "*.go" "*.rs" "*.sh" 2>/dev/null || echo "0")
+            claudemd_epoch=$(git log -1 --format=%ct -- ".claude/CLAUDE.md" 2>/dev/null || echo "0")
+            if [[ "$claudemd_src_epoch" -gt 0 ]] && [[ "$claudemd_epoch" -gt 0 ]]; then
+                local claude_drift=$((claudemd_src_epoch - claudemd_epoch))
+                if [[ "$claude_drift" -gt 2592000 ]]; then
+                    findings=$((findings + 1))
+                    local claude_days_behind=$((claude_drift / 86400))
+                    stale_docs="${stale_docs}\n- \`.claude/CLAUDE.md\`: ${claude_days_behind} days behind source code"
                     if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
-                        echo -e "    ${YELLOW}●${RESET} CHANGELOG.md not updated since ${latest_tag}"
+                        echo -e "    ${YELLOW}●${RESET} CLAUDE.md is ${claude_days_behind} days behind source code"
+                    fi
+                fi
+            fi
+        fi
+        # Check AUTO section freshness (if sw-docs.sh available)
+        if [[ -x "$SCRIPT_DIR/sw-docs.sh" ]]; then
+            local docs_stale=false
+            bash "$SCRIPT_DIR/sw-docs.sh" check >/dev/null 2>&1 || docs_stale=true
+            if [[ "$docs_stale" == "true" ]]; then
+                findings=$((findings + 1))
+                stale_docs="${stale_docs}\n- AUTO sections: some documentation sections are stale"
+                if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
+                    echo -e "    ${YELLOW}●${RESET} AUTO documentation sections are stale"
+                fi
+                # Auto-sync if not dry run
+                if [[ "$dry_run" != "true" ]] && [[ "$NO_GITHUB" != "true" ]]; then
+                    daemon_log INFO "Auto-syncing stale documentation sections"
+                    bash "$SCRIPT_DIR/sw-docs.sh" sync 2>/dev/null || true
+                    if ! git diff --quiet -- '*.md' 2>/dev/null; then
+                        git add -A '*.md' 2>/dev/null || true
+                        git commit -m "docs: auto-sync stale documentation sections" 2>/dev/null || true
                     fi
                 fi
             fi
@@ -1715,9 +2856,9 @@ The following docs may need updating:
 $(echo -e "$stale_docs")
 Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
-                    --label "documentation" --label "$PATROL_LABEL" 2>/dev/null || true
+                    --label "$(patrol_build_labels "documentation")" 2>/dev/null || true
                 issues_created=$((issues_created + 1))
-                emit_event "patrol.issue_created" "type=documentation" "count=$findings"
+                emit_event "patrol.issue_created" "check=documentation" "count=$findings"
             fi
         fi
@@ -1754,7 +2895,7 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
                 if [[ "$recent_test_dur" -gt "$threshold" ]]; then
                     total_findings=$((total_findings + 1))
                     local pct_slower=$(( (recent_test_dur - baseline_dur) * 100 / baseline_dur ))
-                    emit_event "patrol.finding" "type=performance" "baseline=${baseline_dur}s" "current=${recent_test_dur}s" "regression=${pct_slower}%"
+                    emit_event "patrol.finding" "check=performance" "baseline=${baseline_dur}s" "current=${recent_test_dur}s" "regression=${pct_slower}%"
                     if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
                         echo -e "    ${RED}●${RESET} Test suite ${pct_slower}% slower than baseline (${baseline_dur}s → ${recent_test_dur}s)"
@@ -1774,9 +2915,9 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
 | Regression | ${pct_slower}% |
 Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
-                                --label "performance" --label "$PATROL_LABEL" 2>/dev/null || true
+                                --label "$(patrol_build_labels "performance")" 2>/dev/null || true
                             issues_created=$((issues_created + 1))
-                            emit_event "patrol.issue_created" "type=performance"
+                            emit_event "patrol.issue_created" "check=performance"
                         fi
                     fi
@@ -1792,31 +2933,557 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
         daemon_log INFO "Patrol: performance baseline updated (${recent_test_dur}s)"
     }
-    # ── Run all patrol checks ──
+    # ── 7. Recurring Failure Patterns ──
+    patrol_recurring_failures() {
+        if [[ "$PATROL_FAILURES_THRESHOLD" -le 0 ]]; then return; fi
+        daemon_log INFO "Patrol: checking recurring failure patterns"
+        local findings=0
+        # Source memory functions if available
+        local memory_script="$SCRIPT_DIR/sw-memory.sh"
+        if [[ ! -f "$memory_script" ]]; then
+            daemon_log INFO "Patrol: memory script not found — skipping recurring failures"
+            return
+        fi
+        # Get actionable failures from memory
+        # Note: sw-memory.sh runs its CLI router on source, so we must redirect
+        # the source's stdout to /dev/null and only capture the function's output
+        local failures_json
+        failures_json=$(
+            (
+                source "$memory_script" > /dev/null 2>&1 || true
+                if command -v memory_get_actionable_failures &>/dev/null; then
+                    memory_get_actionable_failures "$PATROL_FAILURES_THRESHOLD"
+                else
+                    echo "[]"
+                fi
+            )
+        )
+        local count
+        count=$(echo "$failures_json" | jq 'length' 2>/dev/null || echo "0")
+        if [[ "${count:-0}" -eq 0 ]]; then
+            daemon_log INFO "Patrol: no recurring failures above threshold ($PATROL_FAILURES_THRESHOLD)"
+            return
+        fi
+        while IFS= read -r failure; do
+            local pattern stage seen_count last_seen root_cause
+            pattern=$(echo "$failure" | jq -r '.pattern // "unknown"')
+            stage=$(echo "$failure" | jq -r '.stage // "unknown"')
+            seen_count=$(echo "$failure" | jq -r '.seen_count // 0')
+            last_seen=$(echo "$failure" | jq -r '.last_seen // "unknown"')
+            root_cause=$(echo "$failure" | jq -r '.root_cause // "Not yet identified"')
+            # Truncate pattern for title (first 60 chars)
+            local short_pattern
+            short_pattern=$(echo "$pattern" | cut -c1-60)
+            findings=$((findings + 1))
+            emit_event "patrol.finding" "check=recurring_failure" "pattern=$short_pattern" "seen_count=$seen_count"
+            if [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
+                # Deduplicate
+                local existing
+                existing=$(gh issue list --label "$PATROL_LABEL" --label "recurring-failure" \
+                    --search "Fix recurring: ${short_pattern}" --json number -q 'length' 2>/dev/null || echo "0")
+                if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
+                    gh issue create \
+                        --title "Fix recurring: ${short_pattern}" \
+                        --body "## Recurring Failure Pattern
+| Field | Value |
+|-------|-------|
+| Stage | \`${stage}\` |
+| Pattern | \`${pattern}\` |
+| Seen count | **${seen_count}** |
+| Last seen | ${last_seen} |
+| Root cause | ${root_cause} |
+| Found by | Shipwright patrol |
+| Date | $(now_iso) |
+### Suggested Actions
+- Investigate the root cause in the \`${stage}\` stage
+- Check if recent changes introduced the failure
+- Add a targeted test to prevent regression
+Auto-detected by \`shipwright daemon patrol\`." \
+                        --label "$(patrol_build_labels "recurring-failure")" 2>/dev/null || true
+                    issues_created=$((issues_created + 1))
+                    emit_event "patrol.issue_created" "check=recurring_failure" "pattern=$short_pattern"
+                fi
+            else
+                echo -e "    ${RED}●${RESET} ${BOLD}recurring${RESET}: ${short_pattern} (${seen_count}x in ${CYAN}${stage}${RESET})"
+            fi
+        done < <(echo "$failures_json" | jq -c '.[]' 2>/dev/null)
+        total_findings=$((total_findings + findings))
+        daemon_log INFO "Patrol: found ${findings} recurring failure pattern(s)"
+    }
+    # ── 8. DORA Metric Degradation ──
+    patrol_dora_degradation() {
+        if [[ "$PATROL_DORA_ENABLED" != "true" ]]; then return; fi
+        daemon_log INFO "Patrol: checking DORA metric degradation"
+        if [[ ! -f "$EVENTS_FILE" ]]; then
+            daemon_log INFO "Patrol: no events file — skipping DORA check"
+            return
+        fi
+        local now_e
+        now_e=$(now_epoch)
+        # Current 7-day window
+        local current_start=$((now_e - 604800))
+        # Previous 7-day window
+        local prev_start=$((now_e - 1209600))
+        local prev_end=$current_start
+        # Get events for both windows
+        local current_events prev_events
+        current_events=$(jq -s --argjson start "$current_start" \
+            '[.[] | select(.ts_epoch >= $start)]' "$EVENTS_FILE" 2>/dev/null || echo "[]")
+        prev_events=$(jq -s --argjson start "$prev_start" --argjson end "$prev_end" \
+            '[.[] | select(.ts_epoch >= $start and .ts_epoch < $end)]' "$EVENTS_FILE" 2>/dev/null || echo "[]")
+        # Helper: calculate DORA metrics from an event set
+        calc_dora() {
+            local events="$1"
+            local total successes failures
+            total=$(echo "$events" | jq '[.[] | select(.type == "pipeline.completed")] | length' 2>/dev/null || echo "0")
+            successes=$(echo "$events" | jq '[.[] | select(.type == "pipeline.completed" and .result == "success")] | length' 2>/dev/null || echo "0")
+            failures=$(echo "$events" | jq '[.[] | select(.type == "pipeline.completed" and .result == "failure")] | length' 2>/dev/null || echo "0")
+            local deploy_freq="0"
+            [[ "$total" -gt 0 ]] && deploy_freq=$(echo "$successes 7" | awk '{printf "%.1f", $1 / ($2 / 7)}')
+            local cfr="0"
+            [[ "$total" -gt 0 ]] && cfr=$(echo "$failures $total" | awk '{printf "%.1f", ($1 / $2) * 100}')
+            local cycle_time="0"
+            cycle_time=$(echo "$events" | jq '[.[] | select(.type == "pipeline.completed" and .result == "success") | .duration_s] | sort | if length > 0 then .[length/2 | floor] else 0 end' 2>/dev/null || echo "0")
+            echo "{\"deploy_freq\":$deploy_freq,\"cfr\":$cfr,\"cycle_time\":$cycle_time,\"total\":$total}"
+        }
+        local current_metrics prev_metrics
+        current_metrics=$(calc_dora "$current_events")
+        prev_metrics=$(calc_dora "$prev_events")
+        local prev_total
+        prev_total=$(echo "$prev_metrics" | jq '.total' 2>/dev/null || echo "0")
+        local current_total
+        current_total=$(echo "$current_metrics" | jq '.total' 2>/dev/null || echo "0")
+        # Need data in both windows to compare
+        if [[ "${prev_total:-0}" -lt 3 ]] || [[ "${current_total:-0}" -lt 3 ]]; then
+            daemon_log INFO "Patrol: insufficient data for DORA comparison (prev=$prev_total, current=$current_total)"
+            return
+        fi
+        # Grade each metric using dora_grade (defined in daemon_metrics, redefined here inline)
+        local_dora_grade() {
+            local metric="$1" value="$2"
+            case "$metric" in
+                deploy_freq)
+                    if awk "BEGIN{exit !($value >= 7)}" 2>/dev/null; then echo "Elite"; return; fi
+                    if awk "BEGIN{exit !($value >= 1)}" 2>/dev/null; then echo "High"; return; fi
+                    if awk "BEGIN{exit !($value >= 0.25)}" 2>/dev/null; then echo "Medium"; return; fi
+                    echo "Low" ;;
+                cfr)
+                    if awk "BEGIN{exit !($value < 5)}" 2>/dev/null; then echo "Elite"; return; fi
+                    if awk "BEGIN{exit !($value < 10)}" 2>/dev/null; then echo "High"; return; fi
+                    if awk "BEGIN{exit !($value < 15)}" 2>/dev/null; then echo "Medium"; return; fi
+                    echo "Low" ;;
+                cycle_time)
+                    [[ "$value" -lt 3600 ]] && echo "Elite" && return
+                    [[ "$value" -lt 86400 ]] && echo "High" && return
+                    [[ "$value" -lt 604800 ]] && echo "Medium" && return
+                    echo "Low" ;;
+            esac
+        }
+        grade_rank() {
+            case "$1" in
+                Elite) echo 4 ;; High) echo 3 ;; Medium) echo 2 ;; Low) echo 1 ;; *) echo 0 ;;
+            esac
+        }
+        local degraded_metrics=""
+        local degradation_details=""
+        # Check deploy frequency
+        local prev_df curr_df
+        prev_df=$(echo "$prev_metrics" | jq -r '.deploy_freq')
+        curr_df=$(echo "$current_metrics" | jq -r '.deploy_freq')
+        local prev_df_grade curr_df_grade
+        prev_df_grade=$(local_dora_grade deploy_freq "$prev_df")
+        curr_df_grade=$(local_dora_grade deploy_freq "$curr_df")
+        if [[ "$(grade_rank "$curr_df_grade")" -lt "$(grade_rank "$prev_df_grade")" ]]; then
+            degraded_metrics="${degraded_metrics}deploy_freq "
+            degradation_details="${degradation_details}\n| Deploy Frequency | ${prev_df_grade} (${prev_df}/wk) | ${curr_df_grade} (${curr_df}/wk) | Check for blocked PRs, increase automation |"
+        fi
+        # Check CFR
+        local prev_cfr curr_cfr
+        prev_cfr=$(echo "$prev_metrics" | jq -r '.cfr')
+        curr_cfr=$(echo "$current_metrics" | jq -r '.cfr')
+        local prev_cfr_grade curr_cfr_grade
+        prev_cfr_grade=$(local_dora_grade cfr "$prev_cfr")
+        curr_cfr_grade=$(local_dora_grade cfr "$curr_cfr")
+        if [[ "$(grade_rank "$curr_cfr_grade")" -lt "$(grade_rank "$prev_cfr_grade")" ]]; then
+            degraded_metrics="${degraded_metrics}cfr "
+            degradation_details="${degradation_details}\n| Change Failure Rate | ${prev_cfr_grade} (${prev_cfr}%) | ${curr_cfr_grade} (${curr_cfr}%) | Investigate recent failures, improve test coverage |"
+        fi
+        # Check Cycle Time
+        local prev_ct curr_ct
+        prev_ct=$(echo "$prev_metrics" | jq -r '.cycle_time')
+        curr_ct=$(echo "$current_metrics" | jq -r '.cycle_time')
+        local prev_ct_grade curr_ct_grade
+        prev_ct_grade=$(local_dora_grade cycle_time "$prev_ct")
+        curr_ct_grade=$(local_dora_grade cycle_time "$curr_ct")
+        if [[ "$(grade_rank "$curr_ct_grade")" -lt "$(grade_rank "$prev_ct_grade")" ]]; then
+            degraded_metrics="${degraded_metrics}cycle_time "
+            degradation_details="${degradation_details}\n| Cycle Time | ${prev_ct_grade} (${prev_ct}s) | ${curr_ct_grade} (${curr_ct}s) | Profile slow stages, check for new slow tests |"
+        fi
+        if [[ -z "$degraded_metrics" ]]; then
+            daemon_log INFO "Patrol: no DORA degradation detected"
+            return
+        fi
+        local findings=0
+        findings=1
+        total_findings=$((total_findings + findings))
+        emit_event "patrol.finding" "check=dora_regression" "metrics=$degraded_metrics"
+        if [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
+            local trimmed
+            trimmed=$(echo "$degraded_metrics" | sed 's/ *$//' | tr ' ' ',')
+            local existing
+            existing=$(gh issue list --label "$PATROL_LABEL" --label "dora-regression" \
+                --search "DORA regression" --json number -q 'length' 2>/dev/null || echo "0")
+            if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
+                gh issue create \
+                    --title "DORA regression: ${trimmed}" \
+                    --body "## DORA Metric Degradation
+| Metric | Previous (7d) | Current (7d) | Suggested Action |
+|--------|---------------|--------------|------------------|$(echo -e "$degradation_details")
+> Compared: previous 7-day window vs current 7-day window.
+Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
+                    --label "$(patrol_build_labels "dora-regression")" 2>/dev/null || true
+                issues_created=$((issues_created + 1))
+                emit_event "patrol.issue_created" "check=dora_regression" "metrics=$trimmed"
+            fi
+        else
+            local trimmed
+            trimmed=$(echo "$degraded_metrics" | sed 's/ *$//')
+            echo -e "    ${RED}●${RESET} ${BOLD}DORA regression${RESET}: ${trimmed}"
+        fi
+        daemon_log INFO "Patrol: DORA degradation detected in: ${degraded_metrics}"
+    }
+    # ── 9. Untested Scripts ──
+    patrol_untested_scripts() {
+        if [[ "$PATROL_UNTESTED_ENABLED" != "true" ]]; then return; fi
+        daemon_log INFO "Patrol: checking for untested scripts"
+        local findings=0
+        local untested_list=""
+        local scripts_dir="$SCRIPT_DIR"
+        if [[ ! -d "$scripts_dir" ]]; then
+            daemon_log INFO "Patrol: scripts directory not found — skipping"
+            return
+        fi
+        # Collect untested scripts with usage counts
+        local untested_entries=""
+        while IFS= read -r script; do
+            local basename
+            basename=$(basename "$script")
+            # Skip test scripts themselves
+            [[ "$basename" == *-test.sh ]] && continue
+            # Skip the main CLI router
+            [[ "$basename" == "sw" ]] && continue
+            # Extract the name part (sw-NAME.sh -> NAME)
+            local name
+            name=$(echo "$basename" | sed 's/^sw-//' | sed 's/\.sh$//')
+            # Check if a test file exists
+            if [[ ! -f "$scripts_dir/sw-${name}-test.sh" ]]; then
+                # Count usage across other scripts
+                local usage_count
+                usage_count=$(grep -rl "sw-${name}" "$scripts_dir"/sw-*.sh 2>/dev/null | grep -cv "$basename" || true)
+                usage_count=${usage_count:-0}
+                local line_count
+                line_count=$(wc -l < "$script" 2>/dev/null | tr -d ' ')
+                untested_entries="${untested_entries}${usage_count}|${basename}|${line_count}\n"
+                findings=$((findings + 1))
+            fi
+        done < <(find "$scripts_dir" -maxdepth 1 -name "sw-*.sh" -type f 2>/dev/null | sort)
+        if [[ "$findings" -eq 0 ]]; then
+            daemon_log INFO "Patrol: all scripts have test files"
+            return
+        fi
+        # Sort by usage count descending
+        local sorted_entries
+        sorted_entries=$(echo -e "$untested_entries" | sort -t'|' -k1 -rn | head -10)
+        while IFS='|' read -r usage_count basename line_count; do
+            [[ -z "$basename" ]] && continue
+            untested_list="${untested_list}\n- \`${basename}\` (${line_count} lines, referenced by ${usage_count} scripts)"
+            emit_event "patrol.finding" "check=untested_script" "script=$basename" "lines=$line_count" "usage=$usage_count"
+            if [[ "$dry_run" == "true" ]] || [[ "$NO_GITHUB" == "true" ]]; then
+                echo -e "    ${YELLOW}●${RESET} ${CYAN}${basename}${RESET} (${line_count} lines, ${usage_count} refs)"
+            fi
+        done <<< "$sorted_entries"
+        total_findings=$((total_findings + findings))
+        if [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
+            local existing
+            existing=$(gh issue list --label "$PATROL_LABEL" --label "test-coverage" \
+                --search "Add tests for untested scripts" --json number -q 'length' 2>/dev/null || echo "0")
+            if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
+                gh issue create \
+                    --title "Add tests for ${findings} untested script(s)" \
+                    --body "## Untested Scripts
+The following scripts have no corresponding test file (\`sw-*-test.sh\`):
+$(echo -e "$untested_list")
+### How to Add Tests
+Each test file should follow the pattern in existing test scripts (e.g., \`sw-daemon-test.sh\`):
+- Mock environment with TEMP_DIR
+- PASS/FAIL counters
+- \`run_test\` harness
+- Register in \`package.json\` test script
+Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
+                    --label "$(patrol_build_labels "test-coverage")" 2>/dev/null || true
+                issues_created=$((issues_created + 1))
+                emit_event "patrol.issue_created" "check=untested_scripts" "count=$findings"
+            fi
+        fi
+        daemon_log INFO "Patrol: found ${findings} untested script(s)"
+    }
+    # ── 10. Retry Exhaustion Patterns ──
+    patrol_retry_exhaustion() {
+        if [[ "$PATROL_RETRY_ENABLED" != "true" ]]; then return; fi
+        daemon_log INFO "Patrol: checking retry exhaustion patterns"
+        local findings=0
+        if [[ ! -f "$EVENTS_FILE" ]]; then
+            daemon_log INFO "Patrol: no events file — skipping retry check"
+            return
+        fi
+        local seven_days_ago
+        seven_days_ago=$(($(now_epoch) - 604800))
+        # Find retry_exhausted events in last 7 days
+        local exhausted_events
+        exhausted_events=$(jq -s --argjson since "$seven_days_ago" \
+            '[.[] | select(.type == "daemon.retry_exhausted" and (.ts_epoch // 0) >= $since)]' \
+            "$EVENTS_FILE" 2>/dev/null || echo "[]")
+        local exhausted_count
+        exhausted_count=$(echo "$exhausted_events" | jq 'length' 2>/dev/null || echo "0")
+        if [[ "${exhausted_count:-0}" -lt "$PATROL_RETRY_THRESHOLD" ]]; then
+            daemon_log INFO "Patrol: retry exhaustions ($exhausted_count) below threshold ($PATROL_RETRY_THRESHOLD)"
+            return
+        fi
+        findings=1
+        total_findings=$((total_findings + findings))
+        # Get unique issue patterns
+        local issue_list
+        issue_list=$(echo "$exhausted_events" | jq -r '[.[] | .issue // "unknown"] | unique | join(", ")' 2>/dev/null || echo "unknown")
+        local first_ts last_ts
+        first_ts=$(echo "$exhausted_events" | jq -r '[.[] | .ts] | sort | first // "unknown"' 2>/dev/null || echo "unknown")
+        last_ts=$(echo "$exhausted_events" | jq -r '[.[] | .ts] | sort | last // "unknown"' 2>/dev/null || echo "unknown")
+        emit_event "patrol.finding" "check=retry_exhaustion" "count=$exhausted_count" "issues=$issue_list"
+        if [[ "$NO_GITHUB" != "true" ]] && [[ "$dry_run" != "true" ]]; then
+            local existing
+            existing=$(gh issue list --label "$PATROL_LABEL" --label "reliability" \
+                --search "Retry exhaustion pattern" --json number -q 'length' 2>/dev/null || echo "0")
+            if [[ "${existing:-0}" -eq 0 ]] && [[ "$issues_created" -lt "$PATROL_MAX_ISSUES" ]]; then
+                gh issue create \
+                    --title "Retry exhaustion pattern (${exhausted_count} in 7 days)" \
+                    --body "## Retry Exhaustion Pattern
+| Field | Value |
+|-------|-------|
+| Exhaustions (7d) | **${exhausted_count}** |
+| Threshold | ${PATROL_RETRY_THRESHOLD} |
+| Affected issues | ${issue_list} |
+| First occurrence | ${first_ts} |
+| Latest occurrence | ${last_ts} |
+### Investigation Steps
+1. Check the affected issues for common patterns
+2. Review pipeline logs for root cause
+3. Consider if max_retries needs adjustment
+4. Investigate if an external dependency is flaky
+Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
+                    --label "$(patrol_build_labels "reliability")" 2>/dev/null || true
+                issues_created=$((issues_created + 1))
+                emit_event "patrol.issue_created" "check=retry_exhaustion" "count=$exhausted_count"
+            fi
+        else
+            echo -e "    ${RED}●${RESET} ${BOLD}retry exhaustion${RESET}: ${exhausted_count} exhaustions in 7 days (issues: ${issue_list})"
+        fi
+        daemon_log INFO "Patrol: found retry exhaustion pattern (${exhausted_count} in 7 days)"
+    }
+    # ── Stage 1: Run all grep-based patrol checks (fast pre-filter) ──
+    local patrol_findings_summary=""
+    local pre_check_findings=0
     echo -e "  ${BOLD}Security Audit${RESET}"
+    pre_check_findings=$total_findings
     patrol_security_audit
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}security: $((total_findings - pre_check_findings)) finding(s); "
+    fi
     echo ""
     echo -e "  ${BOLD}Stale Dependencies${RESET}"
+    pre_check_findings=$total_findings
     patrol_stale_dependencies
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}stale_deps: $((total_findings - pre_check_findings)) finding(s); "
+    fi
     echo ""
     echo -e "  ${BOLD}Dead Code Detection${RESET}"
+    pre_check_findings=$total_findings
     patrol_dead_code
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}dead_code: $((total_findings - pre_check_findings)) finding(s); "
+    fi
     echo ""
     echo -e "  ${BOLD}Test Coverage Gaps${RESET}"
+    pre_check_findings=$total_findings
     patrol_coverage_gaps
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}coverage: $((total_findings - pre_check_findings)) finding(s); "
+    fi
     echo ""
     echo -e "  ${BOLD}Documentation Staleness${RESET}"
+    pre_check_findings=$total_findings
     patrol_doc_staleness
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}docs: $((total_findings - pre_check_findings)) finding(s); "
+    fi
     echo ""
     echo -e "  ${BOLD}Performance Baseline${RESET}"
+    pre_check_findings=$total_findings
     patrol_performance_baseline
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}performance: $((total_findings - pre_check_findings)) finding(s); "
+    fi
+    echo ""
+    echo -e "  ${BOLD}Recurring Failures${RESET}"
+    pre_check_findings=$total_findings
+    patrol_recurring_failures
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}recurring_failures: $((total_findings - pre_check_findings)) finding(s); "
+    fi
+    echo ""
+    echo -e "  ${BOLD}DORA Degradation${RESET}"
+    pre_check_findings=$total_findings
+    patrol_dora_degradation
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}dora: $((total_findings - pre_check_findings)) finding(s); "
+    fi
+    echo ""
+    echo -e "  ${BOLD}Untested Scripts${RESET}"
+    pre_check_findings=$total_findings
+    patrol_untested_scripts
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}untested: $((total_findings - pre_check_findings)) finding(s); "
+    fi
+    echo ""
+    echo -e "  ${BOLD}Retry Exhaustion${RESET}"
+    pre_check_findings=$total_findings
+    patrol_retry_exhaustion
+    if [[ "$total_findings" -gt "$pre_check_findings" ]]; then
+        patrol_findings_summary="${patrol_findings_summary}retry_exhaustion: $((total_findings - pre_check_findings)) finding(s); "
+    fi
     echo ""
+    # ── Stage 2: AI-Powered Confirmation (if enabled) ──
+    if [[ "${PREDICTION_ENABLED:-false}" == "true" ]] && type patrol_ai_analyze &>/dev/null 2>&1; then
+        daemon_log INFO "Intelligence: using AI patrol analysis (prediction enabled)"
+        echo -e "  ${BOLD}AI Deep Analysis${RESET}"
+        # Sample recent source files for AI analysis
+        local sample_files=""
+        local git_log_recent=""
+        sample_files=$(git diff --name-only HEAD~5 2>/dev/null | head -10 | tr '\n' ',' || echo "")
+        git_log_recent=$(git log --oneline -10 2>/dev/null || echo "")
+        # Include grep-based findings summary as context for AI confirmation
+        if [[ -n "$patrol_findings_summary" ]]; then
+            git_log_recent="${git_log_recent}
+Patrol pre-filter findings to confirm: ${patrol_findings_summary}"
+            daemon_log INFO "Patrol: passing ${total_findings} grep findings to AI for confirmation"
+        fi
+        if [[ -n "$sample_files" ]]; then
+            local ai_findings
+            ai_findings=$(patrol_ai_analyze "$sample_files" "$git_log_recent" 2>/dev/null || echo "[]")
+            if [[ -n "$ai_findings" && "$ai_findings" != "[]" ]]; then
+                local ai_count
+                ai_count=$(echo "$ai_findings" | jq 'length' 2>/dev/null || echo "0")
+                ai_count=${ai_count:-0}
+                total_findings=$((total_findings + ai_count))
+                echo -e "    ${CYAN}●${RESET} AI confirmed findings + found ${ai_count} additional issue(s)"
+                emit_event "patrol.ai_analysis" "findings=$ai_count" "grep_findings=${patrol_findings_summary:-none}"
+            else
+                echo -e "    ${GREEN}●${RESET} AI analysis: grep findings confirmed, no additional issues"
+            fi
+        fi
+        echo ""
+    else
+        daemon_log INFO "Intelligence: using grep-only patrol (prediction disabled, enable with intelligence.prediction_enabled=true)"
+    fi
+    # ── Meta Self-Improvement Patrol ──
+    if [[ -f "$SCRIPT_DIR/sw-patrol-meta.sh" ]]; then
+        # shellcheck source=sw-patrol-meta.sh
+        source "$SCRIPT_DIR/sw-patrol-meta.sh"
+        patrol_meta_run
+    fi
     # ── Summary ──
     emit_event "patrol.completed" "findings=$total_findings" "issues_created=$issues_created" "dry_run=$dry_run"
@@ -1829,6 +3496,9 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
     echo ""
     daemon_log INFO "Patrol complete: ${total_findings} findings, ${issues_created} issues created"
+    # Adapt patrol limits based on hit rate
+    adapt_patrol_limits "$total_findings" "$PATROL_MAX_ISSUES"
 }
 # ─── Poll Issues ─────────────────────────────────────────────────────────────
@@ -1839,6 +3509,18 @@ daemon_poll_issues() {
         return
     fi
+    # Check for pause flag (set by dashboard or disk_low alert)
+    if [[ -f "$HOME/.shipwright/daemon-pause.flag" ]]; then
+        daemon_log INFO "Daemon paused — skipping poll"
+        return
+    fi
+    # Circuit breaker: skip poll if in backoff window
+    if gh_rate_limited; then
+        daemon_log INFO "Polling skipped (rate-limit backoff until $(epoch_to_iso "$GH_BACKOFF_UNTIL"))"
+        return
+    fi
     local issues_json
     # Select gh command wrapper: gh_retry for critical poll calls when enabled
@@ -1865,6 +3547,7 @@ daemon_poll_issues() {
                 fi
             fi
             daemon_log WARN "GitHub API error (org search) — backing off ${BACKOFF_SECS}s"
+            gh_record_failure
             sleep "$BACKOFF_SECS"
             return
         }
@@ -1891,6 +3574,7 @@ daemon_poll_issues() {
                 fi
             fi
             daemon_log WARN "GitHub API error — backing off ${BACKOFF_SECS}s"
+            gh_record_failure
             sleep "$BACKOFF_SECS"
             return
         }
@@ -1898,6 +3582,7 @@ daemon_poll_issues() {
     # Reset backoff on success
     BACKOFF_SECS=0
+    gh_record_success
     local issue_count
     issue_count=$(echo "$issues_json" | jq 'length' 2>/dev/null || echo 0)
@@ -1913,6 +3598,7 @@ daemon_poll_issues() {
     # Score each issue using intelligent triage and sort by descending score
     local scored_issues=()
+    local dep_graph=""  # "issue:dep1,dep2" entries for dependency ordering
     while IFS= read -r issue; do
         local num score
         num=$(echo "$issue" | jq -r '.number')
@@ -1923,14 +3609,85 @@ daemon_poll_issues() {
             repo_name=$(echo "$issue" | jq -r '.repository.nameWithOwner // ""')
         fi
         scored_issues+=("${score}|${num}|${repo_name}")
+        # Issue dependency detection (adaptive: extract "depends on #X", "blocked by #X")
+        if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" == "true" ]]; then
+            local issue_text
+            issue_text=$(echo "$issue" | jq -r '(.title // "") + " " + (.body // "")')
+            local deps
+            deps=$(extract_issue_dependencies "$issue_text")
+            if [[ -n "$deps" ]]; then
+                local dep_nums
+                dep_nums=$(echo "$deps" | tr -d '#' | tr '\n' ',' | sed 's/,$//')
+                dep_graph="${dep_graph}${num}:${dep_nums}\n"
+                daemon_log INFO "Issue #${num} depends on: ${deps//$'\n'/, }"
+            fi
+        fi
     done < <(echo "$issues_json" | jq -c '.[]')
-    # Sort by score descending
+    # Sort by score — strategy determines ascending vs descending
     local sorted_order
-    sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1 -rn)
+    if [[ "${PRIORITY_STRATEGY:-quick-wins-first}" == "complex-first" ]]; then
+        # Complex-first: lower score (more complex) first
+        sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1 -n)
+    else
+        # Quick-wins-first (default): higher score (simpler) first
+        sorted_order=$(printf '%s\n' "${scored_issues[@]}" | sort -t'|' -k1 -rn)
+    fi
+    # Dependency-aware reordering: move dependencies before dependents
+    if [[ -n "$dep_graph" && "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" == "true" ]]; then
+        local reordered=""
+        local scheduled=""
+        # Multiple passes to resolve transitive dependencies (max 3)
+        local pass=0
+        while [[ $pass -lt 3 ]]; do
+            local changed=false
+            local new_order=""
+            while IFS='|' read -r s_score s_num s_repo; do
+                [[ -z "$s_num" ]] && continue
+                # Check if this issue has unscheduled dependencies
+                local issue_deps
+                issue_deps=$(echo -e "$dep_graph" | grep "^${s_num}:" | head -1 | cut -d: -f2 || true)
+                if [[ -n "$issue_deps" ]]; then
+                    # Check if all deps are scheduled (or not in our issue set)
+                    local all_deps_ready=true
+                    local IFS_SAVE="$IFS"
+                    IFS=','
+                    for dep in $issue_deps; do
+                        dep="${dep## }"
+                        dep="${dep%% }"
+                        # Is this dep in our scored set and not yet scheduled?
+                        if echo "$sorted_order" | grep -q "|${dep}|" && ! echo "$scheduled" | grep -q "|${dep}|"; then
+                            all_deps_ready=false
+                            break
+                        fi
+                    done
+                    IFS="$IFS_SAVE"
+                    if [[ "$all_deps_ready" == "false" ]]; then
+                        # Defer this issue — append at end
+                        new_order="${new_order}${s_score}|${s_num}|${s_repo}\n"
+                        changed=true
+                        continue
+                    fi
+                fi
+                reordered="${reordered}${s_score}|${s_num}|${s_repo}\n"
+                scheduled="${scheduled}|${s_num}|"
+            done <<< "$sorted_order"
+            # Append deferred issues
+            reordered="${reordered}${new_order}"
+            sorted_order=$(echo -e "$reordered" | grep -v '^$')
+            reordered=""
+            scheduled=""
+            if [[ "$changed" == "false" ]]; then
+                break
+            fi
+            pass=$((pass + 1))
+        done
+    fi
     local active_count
-    active_count=$(get_active_count)
+    active_count=$(locked_get_active_count)
     # Process each issue in triage order (process substitution keeps state in current shell)
     while IFS='|' read -r score issue_num repo_name; do
@@ -1940,11 +3697,27 @@ daemon_poll_issues() {
         issue_title=$(echo "$issues_json" | jq -r --argjson n "$issue_num" '.[] | select(.number == $n) | .title')
         labels_csv=$(echo "$issues_json" | jq -r --argjson n "$issue_num" '.[] | select(.number == $n) | [.labels[].name] | join(",")')
+        # Cache title in state for dashboard visibility
+        if [[ -n "$issue_title" ]]; then
+            locked_state_update --arg num "$issue_num" --arg title "$issue_title" \
+                '.titles[$num] = $title'
+        fi
         # Skip if already inflight
         if daemon_is_inflight "$issue_num"; then
             continue
         fi
+        # Distributed claim (skip if no machines registered)
+        if [[ -f "$HOME/.shipwright/machines.json" ]]; then
+            local machine_name
+            machine_name=$(jq -r '.machines[] | select(.role == "primary") | .name' "$HOME/.shipwright/machines.json" 2>/dev/null || hostname -s)
+            if ! claim_issue "$issue_num" "$machine_name"; then
+                daemon_log INFO "Issue #${issue_num} claimed by another machine — skipping"
+                continue
+            fi
+        fi
         # Priority lane: bypass queue for critical issues
         if [[ "$PRIORITY_LANE" == "true" ]]; then
             local priority_active
@@ -1967,7 +3740,7 @@ daemon_poll_issues() {
         fi
         # Check capacity
-        active_count=$(get_active_count)
+        active_count=$(locked_get_active_count)
         if [[ "$active_count" -ge "$MAX_PARALLEL" ]]; then
             enqueue_issue "$issue_num"
             continue
@@ -1993,33 +3766,95 @@ daemon_poll_issues() {
 daemon_health_check() {
     local findings=0
-    # Stale jobs: kill processes running > timeout
-    local stale_timeout="${HEALTH_STALE_TIMEOUT:-1800}"  # default 30min
     local now_e
     now_e=$(now_epoch)
     if [[ -f "$STATE_FILE" ]]; then
+        # ── Progress-Based Health Monitoring ──
+        # Instead of killing after a static timeout, check for forward progress.
+        # Only kill when the agent is truly stuck (no stage change, no new code,
+        # same error repeating). A hard wall-clock limit remains as absolute safety net.
+        local hard_limit="${PROGRESS_HARD_LIMIT_S:-10800}"
+        local use_progress="${PROGRESS_MONITORING:-true}"
         while IFS= read -r job; do
-            local pid started_at issue_num
+            local pid started_at issue_num worktree
             pid=$(echo "$job" | jq -r '.pid')
             started_at=$(echo "$job" | jq -r '.started_at // empty')
             issue_num=$(echo "$job" | jq -r '.issue')
+            worktree=$(echo "$job" | jq -r '.worktree // ""')
+            # Skip dead processes
+            if ! kill -0 "$pid" 2>/dev/null; then
+                continue
+            fi
+            local elapsed=0
             if [[ -n "$started_at" ]]; then
                 local start_e
                 start_e=$(TZ=UTC date -j -f "%Y-%m-%dT%H:%M:%SZ" "$started_at" +%s 2>/dev/null || date -d "$started_at" +%s 2>/dev/null || echo "0")
-                local elapsed=$(( now_e - start_e ))
-                if [[ "$elapsed" -gt "$stale_timeout" ]] && kill -0 "$pid" 2>/dev/null; then
-                    daemon_log WARN "Stale job detected: issue #${issue_num} (${elapsed}s, PID $pid) — killing"
+                elapsed=$(( now_e - start_e ))
+            fi
+            # Hard wall-clock limit — absolute safety net (default 3h)
+            if [[ "$elapsed" -gt "$hard_limit" ]]; then
+                daemon_log WARN "Hard limit exceeded: issue #${issue_num} (${elapsed}s > ${hard_limit}s, PID $pid) — killing"
+                emit_event "daemon.hard_limit" "issue=$issue_num" "elapsed_s=$elapsed" "limit_s=$hard_limit" "pid=$pid"
+                kill "$pid" 2>/dev/null || true
+                daemon_clear_progress "$issue_num"
+                findings=$((findings + 1))
+                continue
+            fi
+            # Progress-based detection (when enabled)
+            if [[ "$use_progress" == "true" && -n "$worktree" ]]; then
+                local snapshot verdict
+                snapshot=$(daemon_collect_snapshot "$issue_num" "$worktree" "$pid" 2>/dev/null || echo '{}')
+                if [[ "$snapshot" != "{}" ]]; then
+                    verdict=$(daemon_assess_progress "$issue_num" "$snapshot" 2>/dev/null || echo "healthy")
+                    case "$verdict" in
+                        healthy)
+                            # All good — agent is making progress
+                            ;;
+                        slowing)
+                            daemon_log INFO "Issue #${issue_num} slowing (no progress for 1-2 checks, ${elapsed}s elapsed)"
+                            ;;
+                        stalled)
+                            local no_progress_count
+                            no_progress_count=$(jq -r '.no_progress_count // 0' "$PROGRESS_DIR/issue-${issue_num}.json" 2>/dev/null || echo 0)
+                            daemon_log WARN "Issue #${issue_num} stalled: no progress for ${no_progress_count} checks (${elapsed}s elapsed, PID $pid)"
+                            emit_event "daemon.stalled" "issue=$issue_num" "no_progress=$no_progress_count" "elapsed_s=$elapsed" "pid=$pid"
+                            ;;
+                        stuck)
+                            local no_progress_count repeated_errors cur_stage
+                            no_progress_count=$(jq -r '.no_progress_count // 0' "$PROGRESS_DIR/issue-${issue_num}.json" 2>/dev/null || echo 0)
+                            repeated_errors=$(jq -r '.repeated_error_count // 0' "$PROGRESS_DIR/issue-${issue_num}.json" 2>/dev/null || echo 0)
+                            cur_stage=$(echo "$snapshot" | jq -r '.stage // "unknown"')
+                            daemon_log WARN "Issue #${issue_num} STUCK: no progress for ${no_progress_count} checks, ${repeated_errors} repeated errors (stage=${cur_stage}, ${elapsed}s, PID $pid) — killing"
+                            emit_event "daemon.stuck_kill" "issue=$issue_num" "no_progress=$no_progress_count" "repeated_errors=$repeated_errors" "stage=$cur_stage" "elapsed_s=$elapsed" "pid=$pid"
+                            kill "$pid" 2>/dev/null || true
+                            daemon_clear_progress "$issue_num"
+                            findings=$((findings + 1))
+                            ;;
+                    esac
+                fi
+            else
+                # Fallback: legacy time-based detection when progress monitoring is off
+                local stale_timeout
+                stale_timeout=$(get_adaptive_stale_timeout "$PIPELINE_TEMPLATE")
+                if [[ "$elapsed" -gt "$stale_timeout" ]]; then
+                    daemon_log WARN "Stale job (legacy): issue #${issue_num} (${elapsed}s > ${stale_timeout}s, PID $pid) — killing"
                     kill "$pid" 2>/dev/null || true
                     findings=$((findings + 1))
                 fi
             fi
-        done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null)
+        done < <(jq -c '.active_jobs[]' "$STATE_FILE" 2>/dev/null || true)
     fi
-    # Disk space warning
+    # Disk space warning (check both repo dir and ~/.shipwright)
     local free_kb
     free_kb=$(df -k "." 2>/dev/null | tail -1 | awk '{print $4}')
     if [[ -n "$free_kb" ]] && [[ "$free_kb" -lt 1048576 ]] 2>/dev/null; then
@@ -2027,6 +3862,17 @@ daemon_health_check() {
         findings=$((findings + 1))
     fi
+    # Critical disk space on ~/.shipwright — pause spawning
+    local sw_free_kb
+    sw_free_kb=$(df -k "$HOME/.shipwright" 2>/dev/null | tail -1 | awk '{print $4}')
+    if [[ -n "$sw_free_kb" ]] && [[ "$sw_free_kb" -lt 512000 ]] 2>/dev/null; then
+        daemon_log WARN "Critical disk space on ~/.shipwright: $(( sw_free_kb / 1024 ))MB — pausing spawns"
+        emit_event "daemon.disk_low" "free_mb=$(( sw_free_kb / 1024 ))"
+        mkdir -p "$HOME/.shipwright"
+        echo '{"paused":true,"reason":"disk_low"}' > "$HOME/.shipwright/daemon-pause.flag"
+        findings=$((findings + 1))
+    fi
     # Events file size warning
     if [[ -f "$EVENTS_FILE" ]]; then
         local events_size
@@ -2096,6 +3942,13 @@ daemon_auto_scale() {
     local prev_max="$MAX_PARALLEL"
+    # ── Learn worker memory from actual RSS (adaptive) ──
+    learn_worker_memory
+    # ── Adaptive cost estimate per template ──
+    local effective_cost_per_job
+    effective_cost_per_job=$(get_adaptive_cost_estimate "$PIPELINE_TEMPLATE")
     # ── CPU cores ──
     local cpu_cores=2
     if [[ "$(uname -s)" == "Darwin" ]]; then
@@ -2106,10 +3959,9 @@ daemon_auto_scale() {
     local max_by_cpu=$(( (cpu_cores * 3) / 4 ))  # 75% utilization cap
     [[ "$max_by_cpu" -lt 1 ]] && max_by_cpu=1
-    # ── Load average check (back off if system is stressed) ──
+    # ── Load average check — gradual scaling curve (replaces 90% cliff) ──
     local load_avg
     load_avg=$(uptime | awk -F'load averages?: ' '{print $2}' | awk -F'[, ]+' '{print $1}' 2>/dev/null || echo "0")
-    # Validate numeric
     if [[ ! "$load_avg" =~ ^[0-9]+\.?[0-9]*$ ]]; then
         load_avg="0"
     fi
@@ -2117,17 +3969,28 @@ daemon_auto_scale() {
     if [[ "$cpu_cores" -gt 0 ]]; then
         load_ratio=$(awk -v load="$load_avg" -v cores="$cpu_cores" 'BEGIN { printf "%.0f", (load / cores) * 100 }')
     fi
-    if [[ "$load_ratio" -gt 90 ]]; then
-        # System under heavy load — scale down to min
+    # Gradual load scaling curve (replaces binary 90% cliff)
+    if [[ "$load_ratio" -gt 95 ]]; then
+        # 95%+: minimum workers only
         max_by_cpu="$MIN_WORKERS"
-        daemon_log WARN "Auto-scale: high load (${load_avg}/${cpu_cores} cores) — constraining to ${max_by_cpu}"
+        daemon_log WARN "Auto-scale: critical load (${load_ratio}%) — minimum workers only"
+    elif [[ "$load_ratio" -gt 85 ]]; then
+        # 85-95%: reduce by 50%
+        max_by_cpu=$(( max_by_cpu / 2 ))
+        [[ "$max_by_cpu" -lt "$MIN_WORKERS" ]] && max_by_cpu="$MIN_WORKERS"
+        daemon_log WARN "Auto-scale: high load (${load_ratio}%) — reducing capacity 50%"
+    elif [[ "$load_ratio" -gt 70 ]]; then
+        # 70-85%: reduce by 25%
+        max_by_cpu=$(( (max_by_cpu * 3) / 4 ))
+        [[ "$max_by_cpu" -lt "$MIN_WORKERS" ]] && max_by_cpu="$MIN_WORKERS"
+        daemon_log INFO "Auto-scale: moderate load (${load_ratio}%) — reducing capacity 25%"
     fi
+    # 0-70%: full capacity (no change)
     # ── Available memory ──
     local avail_mem_gb=8
     if [[ "$(uname -s)" == "Darwin" ]]; then
         local page_size free_pages inactive_pages purgeable_pages speculative_pages
-        # Page size is in format: "(page size of 16384 bytes)"
         page_size=$(vm_stat | awk '/page size of/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) print $i}')
         page_size="${page_size:-16384}"
         free_pages=$(vm_stat | awk '/^Pages free:/ {gsub(/\./, "", $NF); print $NF}')
@@ -2138,7 +4001,6 @@ daemon_auto_scale() {
         inactive_pages="${inactive_pages:-0}"
         purgeable_pages=$(vm_stat | awk '/^Pages purgeable:/ {gsub(/\./, "", $NF); print $NF}')
         purgeable_pages="${purgeable_pages:-0}"
-        # Available ≈ free + speculative + inactive + purgeable
         local avail_pages=$(( free_pages + speculative_pages + inactive_pages + purgeable_pages ))
         if [[ "$avail_pages" -gt 0 && "$page_size" -gt 0 ]]; then
             local free_bytes=$(( avail_pages * page_size ))
@@ -2153,13 +4015,13 @@ daemon_auto_scale() {
     local max_by_mem=$(( avail_mem_gb / WORKER_MEM_GB ))
     [[ "$max_by_mem" -lt 1 ]] && max_by_mem=1
-    # ── Budget remaining ──
+    # ── Budget remaining (adaptive cost estimate) ──
     local max_by_budget="$MAX_WORKERS"
     local remaining_usd
-    remaining_usd=$("$SCRIPT_DIR/cct-cost.sh" remaining-budget 2>/dev/null || echo "unlimited")
+    remaining_usd=$("$SCRIPT_DIR/sw-cost.sh" remaining-budget 2>/dev/null || echo "unlimited")
     if [[ "$remaining_usd" != "unlimited" && -n "$remaining_usd" ]]; then
-        if awk -v r="$remaining_usd" -v c="$EST_COST_PER_JOB" 'BEGIN { exit !(r > 0 && c > 0) }'; then
-            max_by_budget=$(awk -v r="$remaining_usd" -v c="$EST_COST_PER_JOB" 'BEGIN { printf "%.0f", r / c }')
+        if awk -v r="$remaining_usd" -v c="$effective_cost_per_job" 'BEGIN { exit !(r > 0 && c > 0) }'; then
+            max_by_budget=$(awk -v r="$remaining_usd" -v c="$effective_cost_per_job" 'BEGIN { printf "%.0f", r / c }')
             [[ "$max_by_budget" -lt 0 ]] && max_by_budget=0
         else
             max_by_budget=0
@@ -2192,10 +4054,31 @@ daemon_auto_scale() {
     # Clamp to min_workers
     [[ "$computed" -lt "$MIN_WORKERS" ]] && computed="$MIN_WORKERS"
+    # ── Gradual scaling: change by at most 1 at a time (adaptive) ──
+    if [[ "${ADAPTIVE_THRESHOLDS_ENABLED:-false}" == "true" ]]; then
+        if [[ "$computed" -gt "$prev_max" ]]; then
+            # Check success rate at target parallelism before scaling up
+            local target_rate
+            target_rate=$(get_success_rate_at_parallelism "$((prev_max + 1))")
+            if [[ "$target_rate" -lt 50 ]]; then
+                # Poor success rate at higher parallelism — hold steady
+                computed="$prev_max"
+                daemon_log INFO "Auto-scale: holding at ${prev_max} (success rate ${target_rate}% at $((prev_max + 1)))"
+            else
+                # Scale up by 1, not jump to target
+                computed=$((prev_max + 1))
+            fi
+        elif [[ "$computed" -lt "$prev_max" ]]; then
+            # Scale down by 1, not drop to minimum
+            computed=$((prev_max - 1))
+            [[ "$computed" -lt "$MIN_WORKERS" ]] && computed="$MIN_WORKERS"
+        fi
+    fi
     MAX_PARALLEL="$computed"
     if [[ "$MAX_PARALLEL" -ne "$prev_max" ]]; then
-        daemon_log INFO "Auto-scale: ${prev_max} → ${MAX_PARALLEL} (cpu=${max_by_cpu} mem=${max_by_mem} budget=${max_by_budget} queue=${max_by_queue})"
+        daemon_log INFO "Auto-scale: ${prev_max} → ${MAX_PARALLEL} (cpu=${max_by_cpu} mem=${max_by_mem} budget=${max_by_budget} queue=${max_by_queue} load=${load_ratio}%)"
         emit_event "daemon.scale" \
             "from=$prev_max" \
             "to=$MAX_PARALLEL" \
@@ -2205,7 +4088,8 @@ daemon_auto_scale() {
             "max_by_queue=$max_by_queue" \
             "cpu_cores=$cpu_cores" \
             "avail_mem_gb=$avail_mem_gb" \
-            "remaining_usd=$remaining_usd"
+            "remaining_usd=$remaining_usd" \
+            "load_ratio=$load_ratio"
     fi
 }
@@ -2213,7 +4097,7 @@ daemon_auto_scale() {
 # Checks for fleet-reload.flag and reloads MAX_PARALLEL from fleet-managed config
 daemon_reload_config() {
-    local reload_flag="$HOME/.claude-teams/fleet-reload.flag"
+    local reload_flag="$HOME/.shipwright/fleet-reload.flag"
     if [[ ! -f "$reload_flag" ]]; then
         return
     fi
@@ -2245,6 +4129,15 @@ daemon_self_optimize() {
         return
     fi
+    # ── Intelligence-powered optimization (if enabled) ──
+    if [[ "${OPTIMIZATION_ENABLED:-false}" == "true" ]] && type optimize_full_analysis &>/dev/null 2>&1; then
+        daemon_log INFO "Running intelligence-powered optimization"
+        optimize_full_analysis 2>/dev/null || {
+            daemon_log WARN "Intelligence optimization failed — falling back to DORA-based tuning"
+        }
+        # Still run DORA-based tuning below as a complement
+    fi
     daemon_log INFO "Running self-optimization check"
     # Read DORA metrics from recent events (last 7 days)
@@ -2339,13 +4232,10 @@ daemon_self_optimize() {
         local adj_str
         adj_str=$(printf '%s; ' "${adjustments[@]}")
-        local tmp_state
-        tmp_state=$(jq \
+        locked_state_update \
             --arg adj "$adj_str" \
             --arg ts "$(now_iso)" \
-            '.last_optimization = {timestamp: $ts, adjustments: $adj}' \
-            "$STATE_FILE")
-        atomic_write_state "$tmp_state"
+            '.last_optimization = {timestamp: $ts, adjustments: $adj}'
         # ── Persist adjustments to daemon-config.json (survives restart) ──
         local config_file="${CONFIG_PATH:-.claude/daemon-config.json}"
@@ -2427,24 +4317,59 @@ daemon_cleanup_stale() {
         done < <(find "$artifacts_dir" -mindepth 1 -maxdepth 1 -type d 2>/dev/null)
     fi
-    # ── 3. Prune completed/failed state entries older than age_days ──
+    # ── 3. Clean orphaned daemon/* branches (no matching worktree or active job) ──
+    if command -v git &>/dev/null; then
+        while IFS= read -r branch; do
+            [[ -z "$branch" ]] && continue
+            branch="${branch## }"  # trim leading spaces
+            # Only clean daemon-created branches
+            [[ "$branch" == daemon/issue-* ]] || continue
+            # Extract issue number
+            local branch_issue_num="${branch#daemon/issue-}"
+            # Skip if there's an active job for this issue
+            if daemon_is_inflight "$branch_issue_num" 2>/dev/null; then
+                continue
+            fi
+            daemon_log INFO "Removing orphaned branch: ${branch}"
+            git branch -D "$branch" 2>/dev/null || true
+            cleaned=$((cleaned + 1))
+        done < <(git branch --list 'daemon/issue-*' 2>/dev/null)
+    fi
+    # ── 4. Prune completed/failed state entries older than age_days ──
     if [[ -f "$STATE_FILE" ]]; then
         local cutoff_iso
         cutoff_iso=$(epoch_to_iso $((now_e - age_secs)))
-        local before_count after_count
+        local before_count
         before_count=$(jq '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
-        local tmp_state
-        tmp_state=$(jq --arg cutoff "$cutoff_iso" \
-            '.completed = [.completed[] | select(.completed_at > $cutoff)]' \
-            "$STATE_FILE" 2>/dev/null) || true
-        if [[ -n "$tmp_state" ]]; then
-            atomic_write_state "$tmp_state"
-            after_count=$(jq '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
-            local pruned=$((before_count - after_count))
-            if [[ "$pruned" -gt 0 ]]; then
-                daemon_log INFO "Pruned ${pruned} old completed state entries"
-                cleaned=$((cleaned + pruned))
+        locked_state_update --arg cutoff "$cutoff_iso" \
+            '.completed = [.completed[] | select(.completed_at > $cutoff)]' 2>/dev/null || true
+        local after_count
+        after_count=$(jq '.completed | length' "$STATE_FILE" 2>/dev/null || echo 0)
+        local pruned=$((before_count - after_count))
+        if [[ "$pruned" -gt 0 ]]; then
+            daemon_log INFO "Pruned ${pruned} old completed state entries"
+            cleaned=$((cleaned + pruned))
+        fi
+    fi
+    # ── 5. Prune stale retry_counts (issues no longer in flight or queued) ──
+    if [[ -f "$STATE_FILE" ]]; then
+        local retry_keys
+        retry_keys=$(jq -r '.retry_counts // {} | keys[]' "$STATE_FILE" 2>/dev/null || true)
+        local stale_keys=()
+        while IFS= read -r key; do
+            [[ -z "$key" ]] && continue
+            if ! daemon_is_inflight "$key" 2>/dev/null; then
+                stale_keys+=("$key")
             fi
+        done <<< "$retry_keys"
+        if [[ ${#stale_keys[@]} -gt 0 ]]; then
+            for sk in "${stale_keys[@]}"; do
+                locked_state_update --arg k "$sk" 'del(.retry_counts[$k])' 2>/dev/null || continue
+            done
+            daemon_log INFO "Pruned ${#stale_keys[@]} stale retry count(s)"
+            cleaned=$((cleaned + ${#stale_keys[@]}))
         fi
     fi
@@ -2465,56 +4390,69 @@ daemon_poll_loop() {
     daemon_log INFO "Watching for label: ${CYAN}${WATCH_LABEL}${RESET}"
     while [[ ! -f "$SHUTDOWN_FLAG" ]]; do
-        daemon_poll_issues
-        daemon_reap_completed
-        daemon_health_check
+        # All poll loop calls are error-guarded to prevent set -e from killing the daemon.
+        # The || operator disables set -e for the entire call chain, so transient failures
+        # (GitHub API timeouts, jq errors, intelligence failures) are logged and skipped.
+        daemon_poll_issues || daemon_log WARN "daemon_poll_issues failed — continuing"
+        daemon_reap_completed || daemon_log WARN "daemon_reap_completed failed — continuing"
+        daemon_health_check || daemon_log WARN "daemon_health_check failed — continuing"
         # Increment cycle counter (must be before all modulo checks)
         POLL_CYCLE_COUNT=$((POLL_CYCLE_COUNT + 1))
         # Fleet config reload every 3 cycles
         if [[ $((POLL_CYCLE_COUNT % 3)) -eq 0 ]]; then
-            daemon_reload_config
+            daemon_reload_config || daemon_log WARN "daemon_reload_config failed — continuing"
         fi
         # Check degradation every 5 poll cycles
         if [[ $((POLL_CYCLE_COUNT % 5)) -eq 0 ]]; then
-            daemon_check_degradation
+            daemon_check_degradation || daemon_log WARN "daemon_check_degradation failed — continuing"
         fi
         # Auto-scale every N cycles (default: 5)
         if [[ $((POLL_CYCLE_COUNT % ${AUTO_SCALE_INTERVAL:-5})) -eq 0 ]]; then
-            daemon_auto_scale
+            daemon_auto_scale || daemon_log WARN "daemon_auto_scale failed — continuing"
         fi
         # Self-optimize every N cycles (default: 10)
         if [[ $((POLL_CYCLE_COUNT % ${OPTIMIZE_INTERVAL:-10})) -eq 0 ]]; then
-            daemon_self_optimize
+            daemon_self_optimize || daemon_log WARN "daemon_self_optimize failed — continuing"
         fi
         # Stale state reaper every N cycles (default: 10)
         if [[ $((POLL_CYCLE_COUNT % ${STALE_REAPER_INTERVAL:-10})) -eq 0 ]]; then
-            daemon_cleanup_stale
+            daemon_cleanup_stale || daemon_log WARN "daemon_cleanup_stale failed — continuing"
         fi
-        # Proactive patrol during quiet periods
+        # Rotate event log every 10 cycles (~10 min with 60s interval)
+        if [[ $((POLL_CYCLE_COUNT % 10)) -eq 0 ]]; then
+            rotate_event_log || true
+        fi
+        # Proactive patrol during quiet periods (with adaptive limits)
         local issue_count_now active_count_now
         issue_count_now=$(jq -r '.queued | length' "$STATE_FILE" 2>/dev/null || echo 0)
-        active_count_now=$(get_active_count)
+        active_count_now=$(get_active_count || echo 0)
         if [[ "$issue_count_now" -eq 0 ]] && [[ "$active_count_now" -eq 0 ]]; then
             local now_e
-            now_e=$(now_epoch)
+            now_e=$(now_epoch || date +%s)
             if [[ $((now_e - LAST_PATROL_EPOCH)) -ge "$PATROL_INTERVAL" ]]; then
+                load_adaptive_patrol_limits || true
                 daemon_log INFO "No active work — running patrol"
-                daemon_patrol --once
+                daemon_patrol --once || daemon_log WARN "daemon_patrol failed — continuing"
                 LAST_PATROL_EPOCH=$now_e
             fi
         fi
+        # ── Adaptive poll interval: adjust sleep based on queue state ──
+        local effective_interval
+        effective_interval=$(get_adaptive_poll_interval "$issue_count_now" "$active_count_now" || echo "${POLL_INTERVAL:-30}")
         # Sleep in 1s intervals so we can catch shutdown quickly
         local i=0
-        while [[ $i -lt $POLL_INTERVAL ]] && [[ ! -f "$SHUTDOWN_FLAG" ]]; do
-            sleep 1
+        while [[ $i -lt $effective_interval ]] && [[ ! -f "$SHUTDOWN_FLAG" ]]; do
+            sleep 1 || true  # Guard against signal interruption under set -e
             i=$((i + 1))
         done
     done
@@ -2525,7 +4463,39 @@ daemon_poll_loop() {
 # ─── Graceful Shutdown Handler ───────────────────────────────────────────────
 cleanup_on_exit() {
-    daemon_log INFO "Cleaning up..."
+    local exit_code=$?
+    local last_cmd="${BASH_COMMAND:-unknown}"
+    daemon_log INFO "Cleaning up... (exit_code=${exit_code}, last_command=${last_cmd})"
+    # Kill all active pipeline child processes
+    if [[ -f "$STATE_FILE" ]]; then
+        local child_pids
+        child_pids=$(jq -r '.active_jobs[].pid // empty' "$STATE_FILE" 2>/dev/null || true)
+        if [[ -n "$child_pids" ]]; then
+            local killed=0
+            while IFS= read -r cpid; do
+                [[ -z "$cpid" ]] && continue
+                if kill -0 "$cpid" 2>/dev/null; then
+                    daemon_log INFO "Killing pipeline process PID ${cpid}"
+                    kill "$cpid" 2>/dev/null || true
+                    killed=$((killed + 1))
+                fi
+            done <<< "$child_pids"
+            if [[ $killed -gt 0 ]]; then
+                daemon_log INFO "Sent SIGTERM to ${killed} pipeline process(es) — waiting 5s"
+                sleep 5
+                # Force-kill any that didn't exit
+                while IFS= read -r cpid; do
+                    [[ -z "$cpid" ]] && continue
+                    if kill -0 "$cpid" 2>/dev/null; then
+                        daemon_log WARN "Force-killing pipeline PID ${cpid}"
+                        kill -9 "$cpid" 2>/dev/null || true
+                    fi
+                done <<< "$child_pids"
+            fi
+        fi
+    fi
     rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
     daemon_log INFO "Daemon stopped"
     emit_event "daemon.stopped" "pid=$$"
@@ -2544,15 +4514,19 @@ daemon_start() {
         local existing_pid
         existing_pid=$(cat "$PID_FILE" 2>/dev/null || true)
         if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then
+            exec 9>&-  # Release FD before exiting
             error "Daemon already running (PID: ${existing_pid})"
             info "Use ${CYAN}shipwright daemon stop${RESET} to stop it first"
             exit 1
         else
             warn "Stale PID file found — removing"
             rm -f "$PID_FILE"
+            exec 9>&-  # Release old FD
             exec 9>"$PID_FILE"
         fi
     fi
+    # Release FD 9 — we only needed it for the startup race check
+    exec 9>&-
     # Load config
     load_config
@@ -2569,10 +4543,10 @@ daemon_start() {
             exit 1
         fi
-        info "Starting daemon in detached tmux session: ${CYAN}cct-daemon${RESET}"
+        info "Starting daemon in detached tmux session: ${CYAN}sw-daemon${RESET}"
         # Build the command to run in tmux
-        local cmd_args=("$SCRIPT_DIR/cct-daemon.sh" "start")
+        local cmd_args=("$SCRIPT_DIR/sw-daemon.sh" "start")
         if [[ -n "$CONFIG_PATH" ]]; then
             cmd_args+=("--config" "$CONFIG_PATH")
         fi
@@ -2580,14 +4554,16 @@ daemon_start() {
             cmd_args+=("--no-github")
         fi
-        tmux new-session -d -s "cct-daemon" "${cmd_args[*]}" 2>/dev/null || {
+        # Export current PATH so detached session finds claude, gh, etc.
+        local tmux_cmd="export PATH='${PATH}'; ${cmd_args[*]}"
+        tmux new-session -d -s "sw-daemon" "$tmux_cmd" 2>/dev/null || {
             # Session may already exist — try killing and recreating
-            tmux kill-session -t "cct-daemon" 2>/dev/null || true
-            tmux new-session -d -s "cct-daemon" "${cmd_args[*]}"
+            tmux kill-session -t "sw-daemon" 2>/dev/null || true
+            tmux new-session -d -s "sw-daemon" "$tmux_cmd"
         }
-        success "Daemon started in tmux session ${CYAN}cct-daemon${RESET}"
-        info "Attach with: ${DIM}tmux attach -t cct-daemon${RESET}"
+        success "Daemon started in tmux session ${CYAN}sw-daemon${RESET}"
+        info "Attach with: ${DIM}tmux attach -t sw-daemon${RESET}"
         info "View logs:   ${DIM}shipwright daemon logs --follow${RESET}"
         return 0
     fi
@@ -2595,8 +4571,10 @@ daemon_start() {
     # Foreground mode
     info "Starting daemon (PID: $$)"
-    # Write PID file
-    echo "$$" > "$PID_FILE"
+    # Write PID file atomically
+    local pid_tmp="${PID_FILE}.tmp.$$"
+    echo "$$" > "$pid_tmp"
+    mv "$pid_tmp" "$PID_FILE"
     # Remove stale shutdown flag
     rm -f "$SHUTDOWN_FLAG"
@@ -2606,10 +4584,26 @@ daemon_start() {
     # Trap signals for graceful shutdown
     trap cleanup_on_exit EXIT
-    trap 'touch "$SHUTDOWN_FLAG"' SIGINT SIGTERM
+    trap '{ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] [WARN] SIGINT/SIGTERM received — initiating shutdown" >> "$LOG_FILE" 2>/dev/null; } || true; touch "$SHUTDOWN_FLAG"' SIGINT SIGTERM
+    # Ignore SIGHUP — tmux sends this on attach/detach and we must survive it
+    trap '' SIGHUP
+    # Ignore SIGPIPE — broken pipes in command substitutions must not kill the daemon
+    trap '' SIGPIPE
+    # Override global ERR trap to log to daemon log file (not stderr, which is lost when tmux dies)
+    trap '{ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] [ERROR] ERR trap: line=$LINENO exit=$? cmd=$BASH_COMMAND" >> "$LOG_FILE" 2>/dev/null; } || true' ERR
     # Reap any orphaned jobs from previous runs
-    daemon_reap_completed
+    daemon_reap_completed || daemon_log WARN "Failed to reap orphaned jobs — continuing"
+    # Clean up stale temp files from previous crashes
+    find "$(dirname "$STATE_FILE")" -name "*.tmp.*" -mmin +5 -delete 2>/dev/null || true
+    # Rotate event log on startup
+    rotate_event_log || true
+    # Load GitHub context (repo metadata, security alerts, etc.)
+    daemon_github_context || daemon_log WARN "Failed to load GitHub context — continuing without it"
     daemon_log INFO "Daemon started successfully"
     daemon_log INFO "Config: poll_interval=${POLL_INTERVAL}s, max_parallel=${MAX_PARALLEL}, label=${WATCH_LABEL}"
@@ -2673,7 +4667,7 @@ daemon_stop() {
     rm -f "$PID_FILE" "$SHUTDOWN_FLAG"
     # Also kill tmux session if it exists
-    tmux kill-session -t "cct-daemon" 2>/dev/null || true
+    tmux kill-session -t "sw-daemon" 2>/dev/null || true
     success "Daemon stopped"
 }
@@ -2819,7 +4813,14 @@ daemon_init() {
   "patrol": {
     "interval": 3600,
     "max_issues": 5,
-    "label": "auto-patrol"
+    "label": "auto-patrol",
+    "auto_watch": false,
+    "checks": {
+      "recurring_failures": { "enabled": true, "threshold": 3 },
+      "dora_degradation": { "enabled": true },
+      "untested_scripts": { "enabled": true },
+      "retry_exhaustion": { "enabled": true, "threshold": 2 }
+    }
   },
   "auto_template": false,
   "template_map": {
@@ -2841,7 +4842,19 @@ daemon_init() {
   "max_workers": 8,
   "min_workers": 1,
   "worker_mem_gb": 4,
-  "estimated_cost_per_job_usd": 5.0
+  "estimated_cost_per_job_usd": 5.0,
+  "intelligence": {
+    "enabled": true,
+    "cache_ttl_seconds": 3600,
+    "composer_enabled": true,
+    "optimization_enabled": true,
+    "prediction_enabled": true,
+    "adversarial_enabled": false,
+    "simulation_enabled": false,
+    "architecture_enabled": false,
+    "ab_test_ratio": 0.2,
+    "anomaly_threshold": 3.0
+  }
 }
 CONFIGEOF
@@ -3175,7 +5188,7 @@ case "$SUBCOMMAND" in
         daemon_patrol "$@"
         ;;
     test)
-        exec "$SCRIPT_DIR/cct-daemon-test.sh" "$@"
+        exec "$SCRIPT_DIR/sw-daemon-test.sh" "$@"
         ;;
     help|--help|-h)
         show_help