npm - shipwright-cli - Versions diffs - 2.4.0 → 3.0.0 - Mend

shipwright-cli 2.4.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

package/README.md +16 -11
package/completions/_shipwright +1 -1
package/completions/shipwright.bash +3 -8
package/completions/shipwright.fish +1 -1
package/config/defaults.json +111 -0
package/config/event-schema.json +81 -0
package/config/policy.json +13 -18
package/dashboard/coverage/coverage-summary.json +14 -0
package/dashboard/public/index.html +1 -1
package/dashboard/server.ts +306 -17
package/dashboard/src/components/charts/bar.test.ts +79 -0
package/dashboard/src/components/charts/donut.test.ts +68 -0
package/dashboard/src/components/charts/pipeline-rail.test.ts +117 -0
package/dashboard/src/components/charts/sparkline.test.ts +125 -0
package/dashboard/src/core/api.test.ts +309 -0
package/dashboard/src/core/helpers.test.ts +301 -0
package/dashboard/src/core/router.test.ts +307 -0
package/dashboard/src/core/router.ts +7 -0
package/dashboard/src/core/sse.test.ts +144 -0
package/dashboard/src/views/metrics.test.ts +186 -0
package/dashboard/src/views/overview.test.ts +173 -0
package/dashboard/src/views/pipelines.test.ts +183 -0
package/dashboard/src/views/team.test.ts +253 -0
package/dashboard/vitest.config.ts +14 -5
package/docs/TIPS.md +1 -1
package/docs/patterns/README.md +1 -1
package/package.json +5 -7
package/scripts/adapters/docker-deploy.sh +1 -1
package/scripts/adapters/tmux-adapter.sh +11 -1
package/scripts/adapters/wezterm-adapter.sh +1 -1
package/scripts/check-version-consistency.sh +1 -1
package/scripts/lib/architecture.sh +126 -0
package/scripts/lib/bootstrap.sh +75 -0
package/scripts/lib/compat.sh +89 -6
package/scripts/lib/config.sh +91 -0
package/scripts/lib/daemon-adaptive.sh +3 -3
package/scripts/lib/daemon-dispatch.sh +39 -16
package/scripts/lib/daemon-health.sh +1 -1
package/scripts/lib/daemon-patrol.sh +24 -12
package/scripts/lib/daemon-poll.sh +37 -25
package/scripts/lib/daemon-state.sh +115 -23
package/scripts/lib/daemon-triage.sh +30 -8
package/scripts/lib/fleet-failover.sh +63 -0
package/scripts/lib/helpers.sh +30 -6
package/scripts/lib/pipeline-detection.sh +2 -2
package/scripts/lib/pipeline-github.sh +9 -9
package/scripts/lib/pipeline-intelligence.sh +85 -35
package/scripts/lib/pipeline-quality-checks.sh +16 -16
package/scripts/lib/pipeline-quality.sh +1 -1
package/scripts/lib/pipeline-stages.sh +242 -28
package/scripts/lib/pipeline-state.sh +40 -4
package/scripts/lib/test-helpers.sh +247 -0
package/scripts/postinstall.mjs +3 -11
package/scripts/sw +10 -4
package/scripts/sw-activity.sh +1 -11
package/scripts/sw-adaptive.sh +109 -85
package/scripts/sw-adversarial.sh +4 -14
package/scripts/sw-architecture-enforcer.sh +1 -11
package/scripts/sw-auth.sh +8 -17
package/scripts/sw-autonomous.sh +111 -49
package/scripts/sw-changelog.sh +1 -11
package/scripts/sw-checkpoint.sh +144 -20
package/scripts/sw-ci.sh +2 -12
package/scripts/sw-cleanup.sh +13 -17
package/scripts/sw-code-review.sh +16 -36
package/scripts/sw-connect.sh +5 -12
package/scripts/sw-context.sh +9 -26
package/scripts/sw-cost.sh +6 -16
package/scripts/sw-daemon.sh +75 -70
package/scripts/sw-dashboard.sh +57 -17
package/scripts/sw-db.sh +506 -15
package/scripts/sw-decompose.sh +1 -11
package/scripts/sw-deps.sh +15 -25
package/scripts/sw-developer-simulation.sh +1 -11
package/scripts/sw-discovery.sh +112 -30
package/scripts/sw-doc-fleet.sh +7 -17
package/scripts/sw-docs-agent.sh +6 -16
package/scripts/sw-docs.sh +4 -12
package/scripts/sw-doctor.sh +134 -43
package/scripts/sw-dora.sh +11 -19
package/scripts/sw-durable.sh +35 -52
package/scripts/sw-e2e-orchestrator.sh +11 -27
package/scripts/sw-eventbus.sh +115 -115
package/scripts/sw-evidence.sh +114 -30
package/scripts/sw-feedback.sh +3 -13
package/scripts/sw-fix.sh +2 -20
package/scripts/sw-fleet-discover.sh +1 -11
package/scripts/sw-fleet-viz.sh +10 -18
package/scripts/sw-fleet.sh +13 -17
package/scripts/sw-github-app.sh +6 -16
package/scripts/sw-github-checks.sh +1 -11
package/scripts/sw-github-deploy.sh +1 -11
package/scripts/sw-github-graphql.sh +2 -12
package/scripts/sw-guild.sh +1 -11
package/scripts/sw-heartbeat.sh +49 -12
package/scripts/sw-hygiene.sh +45 -43
package/scripts/sw-incident.sh +48 -74
package/scripts/sw-init.sh +35 -37
package/scripts/sw-instrument.sh +1 -11
package/scripts/sw-intelligence.sh +362 -51
package/scripts/sw-jira.sh +5 -14
package/scripts/sw-launchd.sh +2 -12
package/scripts/sw-linear.sh +8 -17
package/scripts/sw-logs.sh +4 -12
package/scripts/sw-loop.sh +641 -90
package/scripts/sw-memory.sh +243 -17
package/scripts/sw-mission-control.sh +2 -12
package/scripts/sw-model-router.sh +73 -34
package/scripts/sw-otel.sh +11 -21
package/scripts/sw-oversight.sh +1 -11
package/scripts/sw-patrol-meta.sh +5 -11
package/scripts/sw-pipeline-composer.sh +7 -17
package/scripts/sw-pipeline-vitals.sh +1 -11
package/scripts/sw-pipeline.sh +478 -122
package/scripts/sw-pm.sh +2 -12
package/scripts/sw-pr-lifecycle.sh +27 -25
package/scripts/sw-predictive.sh +16 -22
package/scripts/sw-prep.sh +6 -16
package/scripts/sw-ps.sh +1 -11
package/scripts/sw-public-dashboard.sh +2 -12
package/scripts/sw-quality.sh +77 -10
package/scripts/sw-reaper.sh +1 -11
package/scripts/sw-recruit.sh +15 -25
package/scripts/sw-regression.sh +11 -21
package/scripts/sw-release-manager.sh +19 -28
package/scripts/sw-release.sh +8 -16
package/scripts/sw-remote.sh +1 -11
package/scripts/sw-replay.sh +48 -44
package/scripts/sw-retro.sh +70 -92
package/scripts/sw-review-rerun.sh +1 -1
package/scripts/sw-scale.sh +109 -32
package/scripts/sw-security-audit.sh +12 -22
package/scripts/sw-self-optimize.sh +239 -23
package/scripts/sw-session.sh +3 -13
package/scripts/sw-setup.sh +8 -18
package/scripts/sw-standup.sh +5 -15
package/scripts/sw-status.sh +32 -23
package/scripts/sw-strategic.sh +129 -13
package/scripts/sw-stream.sh +1 -11
package/scripts/sw-swarm.sh +76 -36
package/scripts/sw-team-stages.sh +10 -20
package/scripts/sw-templates.sh +4 -14
package/scripts/sw-testgen.sh +3 -13
package/scripts/sw-tmux-pipeline.sh +1 -19
package/scripts/sw-tmux-role-color.sh +0 -10
package/scripts/sw-tmux-status.sh +3 -11
package/scripts/sw-tmux.sh +2 -20
package/scripts/sw-trace.sh +1 -19
package/scripts/sw-tracker-github.sh +0 -10
package/scripts/sw-tracker-jira.sh +1 -11
package/scripts/sw-tracker-linear.sh +1 -11
package/scripts/sw-tracker.sh +7 -24
package/scripts/sw-triage.sh +24 -34
package/scripts/sw-upgrade.sh +5 -23
package/scripts/sw-ux.sh +1 -19
package/scripts/sw-webhook.sh +18 -32
package/scripts/sw-widgets.sh +3 -21
package/scripts/sw-worktree.sh +11 -27
package/scripts/update-homebrew-sha.sh +67 -0
package/templates/pipelines/tdd.json +72 -0
package/scripts/sw-pipeline.sh.mock +0 -7

package/scripts/sw-pipeline.sh CHANGED Viewed

@@ -11,7 +11,7 @@ unset CLAUDECODE 2>/dev/null || true
 # Ignore SIGHUP so tmux attach/detach doesn't kill long-running plan/design/review stages
 trap '' HUP
-VERSION="2.4.0"
+VERSION="3.0.0"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
@@ -21,6 +21,7 @@ REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
 # Canonical helpers (colors, output, events)
 # shellcheck source=lib/helpers.sh
 [[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
+[[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
 # Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
 [[ "$(type -t info 2>/dev/null)" == "function" ]]    || info()    { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
 [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
@@ -30,23 +31,6 @@ if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
   now_iso()   { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
   now_epoch() { date +%s; }
 fi
-if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
-  emit_event() {
-    local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
-    local payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
-    while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
-    echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
-  }
-fi
-CYAN="${CYAN:-\033[38;2;0;212;255m}"
-PURPLE="${PURPLE:-\033[38;2;124;58;237m}"
-BLUE="${BLUE:-\033[38;2;0;102;255m}"
-GREEN="${GREEN:-\033[38;2;74;222;128m}"
-YELLOW="${YELLOW:-\033[38;2;250;204;21m}"
-RED="${RED:-\033[38;2;248;113;113m}"
-DIM="${DIM:-\033[2m}"
-BOLD="${BOLD:-\033[1m}"
-RESET="${RESET:-\033[0m}"
 # Policy + pipeline quality thresholds (config/policy.json via lib/pipeline-quality.sh)
 [[ -f "$SCRIPT_DIR/lib/pipeline-quality.sh" ]] && source "$SCRIPT_DIR/lib/pipeline-quality.sh"
 # shellcheck source=lib/pipeline-state.sh
@@ -107,6 +91,8 @@ fi
 if [[ -f "$SCRIPT_DIR/sw-durable.sh" ]]; then
     source "$SCRIPT_DIR/sw-durable.sh"
 fi
+# shellcheck source=sw-db.sh — for db_save_checkpoint/db_load_checkpoint (durable workflows)
+[[ -f "$SCRIPT_DIR/sw-db.sh" ]] && source "$SCRIPT_DIR/sw-db.sh"
 # ─── GitHub API Modules (optional) ─────────────────────────────────────────
 # shellcheck source=sw-github-graphql.sh
@@ -151,6 +137,21 @@ format_duration() {
     fi
 }
+# Rotate event log if needed (standalone mode — daemon has its own rotation in poll loop)
+rotate_event_log_if_needed() {
+    local events_file="${EVENTS_FILE:-$HOME/.shipwright/events.jsonl}"
+    local max_lines=10000
+    [[ ! -f "$events_file" ]] && return
+    local lines
+    lines=$(wc -l < "$events_file" 2>/dev/null || echo "0")
+    if [[ "$lines" -gt "$max_lines" ]]; then
+        local tmp="${events_file}.rotating"
+        if tail -5000 "$events_file" > "$tmp" 2>/dev/null && mv "$tmp" "$events_file" 2>/dev/null; then
+            info "Rotated events.jsonl: ${lines} -> 5000 lines"
+        fi
+    fi
+}
 _pipeline_compact_goal() {
     local goal="$1"
     local plan_file="${2:-}"
@@ -199,33 +200,6 @@ load_composed_pipeline() {
     return 0
 }
-# ─── Structured Event Log ──────────────────────────────────────────────────
-# Appends JSON events to ~/.shipwright/events.jsonl for metrics/traceability
-EVENTS_DIR="${HOME}/.shipwright"
-EVENTS_FILE="${EVENTS_DIR}/events.jsonl"
-emit_event() {
-    local event_type="$1"
-    shift
-    # Remaining args are key=value pairs
-    local json_fields=""
-    for kv in "$@"; do
-        local key="${kv%%=*}"
-        local val="${kv#*=}"
-        # Numbers: don't quote; strings: quote
-        if [[ "$val" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then
-            json_fields="${json_fields},\"${key}\":${val}"
-        else
-            # Escape quotes in value
-            val="${val//\"/\\\"}"
-            json_fields="${json_fields},\"${key}\":\"${val}\""
-        fi
-    done
-    mkdir -p "$EVENTS_DIR"
-    echo "{\"ts\":\"$(now_iso)\",\"ts_epoch\":$(now_epoch),\"type\":\"${event_type}\"${json_fields}}" >> "$EVENTS_FILE"
-}
 # ─── Token / Cost Parsing ─────────────────────────────────────────────────
 parse_claude_tokens() {
     local log_file="$1"
@@ -237,6 +211,36 @@ parse_claude_tokens() {
     TOTAL_OUTPUT_TOKENS=$(( TOTAL_OUTPUT_TOKENS + ${output_tok:-0} ))
 }
+# Estimate pipeline cost using historical averages from completed pipelines.
+# Falls back to per-stage estimates when no history exists.
+estimate_pipeline_cost() {
+    local stages="$1"
+    local stage_count
+    stage_count=$(echo "$stages" | jq 'length' 2>/dev/null || echo "6")
+    [[ ! "$stage_count" =~ ^[0-9]+$ ]] && stage_count=6
+    local events_file="${EVENTS_FILE:-$HOME/.shipwright/events.jsonl}"
+    local avg_input=0 avg_output=0
+    if [[ -f "$events_file" ]]; then
+        local hist
+        hist=$(grep '"type":"pipeline.completed"' "$events_file" 2>/dev/null | tail -10)
+        if [[ -n "$hist" ]]; then
+            avg_input=$(echo "$hist" | jq -s -r '[.[] | .input_tokens // 0 | tonumber] | if length > 0 then (add / length | floor | tostring) else "0" end' 2>/dev/null | head -1)
+            avg_output=$(echo "$hist" | jq -s -r '[.[] | .output_tokens // 0 | tonumber] | if length > 0 then (add / length | floor | tostring) else "0" end' 2>/dev/null | head -1)
+        fi
+    fi
+    [[ ! "$avg_input" =~ ^[0-9]+$ ]] && avg_input=0
+    [[ ! "$avg_output" =~ ^[0-9]+$ ]] && avg_output=0
+    # Fall back to reasonable per-stage estimates only if no history
+    if [[ "$avg_input" -eq 0 ]]; then
+        avg_input=$(( stage_count * 8000 ))   # More realistic: ~8K input per stage
+        avg_output=$(( stage_count * 4000 ))  # ~4K output per stage
+    fi
+    echo "{\"input_tokens\":${avg_input},\"output_tokens\":${avg_output}}"
+}
 # ─── Defaults ───────────────────────────────────────────────────────────────
 GOAL=""
 ISSUE_NUMBER=""
@@ -260,6 +264,7 @@ CI_MODE=false
 DRY_RUN=false
 IGNORE_BUDGET=false
 COMPLETED_STAGES=""
+RESUME_FROM_CHECKPOINT=false
 MAX_ITERATIONS_OVERRIDE=""
 MAX_RESTARTS_OVERRIDE=""
 FAST_TEST_CMD_OVERRIDE=""
@@ -285,6 +290,10 @@ GH_AVAILABLE=false
 # Timing
 PIPELINE_START_EPOCH=""
 STAGE_TIMINGS=""
+PIPELINE_STAGES_PASSED=""
+PIPELINE_SLOWEST_STAGE=""
+LAST_STAGE_ERROR_CLASS=""
+LAST_STAGE_ERROR=""
 PROJECT_ROOT=""
 STATE_DIR=""
@@ -333,6 +342,7 @@ show_help() {
     echo -e "  ${DIM}--max-iterations <n>${RESET}       Override max build loop iterations"
     echo -e "  ${DIM}--max-restarts <n>${RESET}         Max session restarts in build loop"
     echo -e "  ${DIM}--fast-test-cmd <cmd>${RESET}      Fast/subset test for build loop"
+    echo -e "  ${DIM}--tdd${RESET}                     Test-first: generate tests before implementation"
     echo -e "  ${DIM}--completed-stages \"a,b\"${RESET}   Skip these stages (CI resume)"
     echo ""
     echo -e "${BOLD}STAGES${RESET}  ${DIM}(configurable per pipeline template)${RESET}"
@@ -413,6 +423,7 @@ parse_args() {
             --ignore-budget) IGNORE_BUDGET=true; shift ;;
             --max-iterations) MAX_ITERATIONS_OVERRIDE="$2"; shift 2 ;;
             --completed-stages) COMPLETED_STAGES="$2"; shift 2 ;;
+            --resume) RESUME_FROM_CHECKPOINT=true; shift ;;
             --worktree=*) AUTO_WORKTREE=true; WORKTREE_NAME="${1#--worktree=}"; WORKTREE_NAME="${WORKTREE_NAME//[^a-zA-Z0-9_-]/}"; if [[ -z "$WORKTREE_NAME" ]]; then error "Invalid worktree name (alphanumeric, hyphens, underscores only)"; exit 1; fi; shift ;;
             --worktree)   AUTO_WORKTREE=true; shift ;;
             --dry-run)     DRY_RUN=true; shift ;;
@@ -427,6 +438,7 @@ parse_args() {
                 shift 2 ;;
             --fast-test-cmd) FAST_TEST_CMD_OVERRIDE="$2"; shift 2 ;;
+            --tdd)         TDD_ENABLED=true; shift ;;
             --help|-h)     show_help; exit 0 ;;
             *)
                 if [[ -z "$PIPELINE_NAME_ARG" ]]; then
@@ -487,11 +499,11 @@ find_pipeline_config() {
 load_pipeline_config() {
     # Check for intelligence-composed pipeline first
     local composed_pipeline="${ARTIFACTS_DIR}/composed-pipeline.json"
-    if [[ -f "$composed_pipeline" ]] && type composer_validate_pipeline &>/dev/null; then
+    if [[ -f "$composed_pipeline" ]] && type composer_validate_pipeline >/dev/null 2>&1; then
         # Use composed pipeline if fresh (< 1 hour old)
         local composed_age=99999
         local composed_mtime
-        composed_mtime=$(stat -f %m "$composed_pipeline" 2>/dev/null || stat -c %Y "$composed_pipeline" 2>/dev/null || echo "0")
+        composed_mtime=$(file_mtime "$composed_pipeline")
         if [[ "$composed_mtime" -gt 0 ]]; then
             composed_age=$(( $(now_epoch) - composed_mtime ))
         fi
@@ -513,6 +525,9 @@ load_pipeline_config() {
         exit 1
     }
     info "Pipeline: ${BOLD}$PIPELINE_NAME${RESET} ${DIM}($PIPELINE_CONFIG)${RESET}"
+    # TDD from template (overridable by --tdd)
+    [[ "$(jq -r '.tdd // false' "$PIPELINE_CONFIG" 2>/dev/null)" == "true" ]] && PIPELINE_TDD=true
+    return 0
 }
 CURRENT_STAGE_ID=""
@@ -522,7 +537,7 @@ SLACK_WEBHOOK=""
 NOTIFICATION_ENABLED=false
 # Self-healing
-BUILD_TEST_RETRIES=2
+BUILD_TEST_RETRIES=$(_config_get_int "pipeline.build_test_retries" 3 2>/dev/null || echo 3)
 STASHED_CHANGES=false
 SELF_HEAL_COUNT=0
@@ -544,7 +559,7 @@ start_heartbeat() {
                 --stage "${CURRENT_STAGE_ID:-unknown}" \
                 --iteration "0" \
                 --activity "$(get_stage_description "${CURRENT_STAGE_ID:-}" 2>/dev/null || echo "Running pipeline")" 2>/dev/null || true
-            sleep 30
+            sleep "$(_config_get_int "pipeline.heartbeat_interval" 30 2>/dev/null || echo 30)"
         done
     ) >/dev/null 2>&1 &
     HEARTBEAT_PID=$!
@@ -574,7 +589,10 @@ ci_push_partial_work() {
     fi
     # Push branch (create if needed, force to overwrite previous WIP)
-    git push origin "HEAD:refs/heads/$branch" --force 2>/dev/null || true
+    if ! git push origin "HEAD:refs/heads/$branch" --force 2>/dev/null; then
+        warn "git push failed for $branch — remote may be out of sync"
+        emit_event "pipeline.push_failed" "branch=$branch"
+    fi
 }
 ci_post_stage_event() {
@@ -584,7 +602,7 @@ ci_post_stage_event() {
     local stage="$1" status="$2" elapsed="${3:-0s}"
     local comment="<!-- SHIPWRIGHT-STAGE: ${stage}:${status}:${elapsed} -->"
-    gh issue comment "$ISSUE_NUMBER" --body "$comment" 2>/dev/null || true
+    _timeout "$(_config_get_int "network.gh_timeout" 30 2>/dev/null || echo 30)" gh issue comment "$ISSUE_NUMBER" --body "$comment" 2>/dev/null || true
 }
 # ─── Signal Handling ───────────────────────────────────────────────────────
@@ -620,7 +638,10 @@ cleanup_on_exit() {
     # Update GitHub
     if [[ -n "${ISSUE_NUMBER:-}" && "${GH_AVAILABLE:-false}" == "true" ]]; then
-        gh_comment_issue "$ISSUE_NUMBER" "⏸️ **Pipeline interrupted** at stage: ${CURRENT_STAGE_ID:-unknown}" 2>/dev/null || true
+        if ! _timeout "$(_config_get_int "network.gh_timeout" 30 2>/dev/null || echo 30)" gh issue comment "$ISSUE_NUMBER" --body "⏸️ **Pipeline interrupted** at stage: ${CURRENT_STAGE_ID:-unknown}" 2>/dev/null; then
+            warn "gh issue comment failed — status update may not have been posted"
+            emit_event "pipeline.comment_failed" "issue=$ISSUE_NUMBER"
+        fi
     fi
     exit "$exit_code"
@@ -641,7 +662,7 @@ preflight_checks() {
     local optional_tools=("gh" "claude" "bc" "curl")
     for tool in "${required_tools[@]}"; do
-        if command -v "$tool" &>/dev/null; then
+        if command -v "$tool" >/dev/null 2>&1; then
             echo -e "  ${GREEN}✓${RESET} $tool"
         else
             echo -e "  ${RED}✗${RESET} $tool ${RED}(required)${RESET}"
@@ -650,7 +671,7 @@ preflight_checks() {
     done
     for tool in "${optional_tools[@]}"; do
-        if command -v "$tool" &>/dev/null; then
+        if command -v "$tool" >/dev/null 2>&1; then
             echo -e "  ${GREEN}✓${RESET} $tool"
         else
             echo -e "  ${DIM}○${RESET} $tool ${DIM}(optional — some features disabled)${RESET}"
@@ -659,7 +680,7 @@ preflight_checks() {
     # 2. Git state
     echo ""
-    if git rev-parse --is-inside-work-tree &>/dev/null; then
+    if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
         echo -e "  ${GREEN}✓${RESET} Inside git repo"
     else
         echo -e "  ${RED}✗${RESET} Not inside a git repository"
@@ -685,7 +706,7 @@ preflight_checks() {
     fi
     # Check if base branch exists
-    if git rev-parse --verify "$BASE_BRANCH" &>/dev/null; then
+    if git rev-parse --verify "$BASE_BRANCH" >/dev/null 2>&1; then
         echo -e "  ${GREEN}✓${RESET} Base branch: $BASE_BRANCH"
     else
         echo -e "  ${RED}✗${RESET} Base branch not found: $BASE_BRANCH"
@@ -693,8 +714,8 @@ preflight_checks() {
     fi
     # 3. GitHub auth (if gh available and not disabled)
-    if [[ "$NO_GITHUB" != "true" ]] && command -v gh &>/dev/null; then
-        if gh auth status &>/dev/null 2>&1; then
+    if [[ "$NO_GITHUB" != "true" ]] && command -v gh >/dev/null 2>&1; then
+        if gh auth status >/dev/null 2>&1; then
             echo -e "  ${GREEN}✓${RESET} GitHub authenticated"
         else
             echo -e "  ${YELLOW}⚠${RESET} GitHub not authenticated (features disabled)"
@@ -702,7 +723,7 @@ preflight_checks() {
     fi
     # 4. Claude CLI
-    if command -v claude &>/dev/null; then
+    if command -v claude >/dev/null 2>&1; then
         echo -e "  ${GREEN}✓${RESET} Claude CLI available"
     else
         echo -e "  ${RED}✗${RESET} Claude CLI not found — plan/build stages will fail"
@@ -754,12 +775,12 @@ notify() {
         payload=$(jq -n \
             --arg text "${emoji} *${title}*\n${message}" \
             '{text: $text}')
-        curl -sf -X POST -H 'Content-Type: application/json' \
+        curl -sf --connect-timeout "$(_config_get_int "network.connect_timeout" 10 2>/dev/null || echo 10)" --max-time "$(_config_get_int "network.max_time" 60 2>/dev/null || echo 60)" -X POST -H 'Content-Type: application/json' \
             -d "$payload" "$SLACK_WEBHOOK" >/dev/null 2>&1 || true
     fi
-    # Custom webhook (env var SHIPWRIGHT_WEBHOOK_URL, with CCT_WEBHOOK_URL fallback)
-    local _webhook_url="${SHIPWRIGHT_WEBHOOK_URL:-${CCT_WEBHOOK_URL:-}}"
+    # Custom webhook (env var SHIPWRIGHT_WEBHOOK_URL)
+    local _webhook_url="${SHIPWRIGHT_WEBHOOK_URL:-}"
     if [[ -n "$_webhook_url" ]]; then
         local payload
         payload=$(jq -n \
@@ -767,7 +788,7 @@ notify() {
             --arg level "$level" --arg pipeline "${PIPELINE_NAME:-}" \
             --arg goal "${GOAL:-}" --arg stage "${CURRENT_STAGE_ID:-}" \
             '{title:$title, message:$message, level:$level, pipeline:$pipeline, goal:$goal, stage:$stage}')
-        curl -sf -X POST -H 'Content-Type: application/json' \
+        curl -sf --connect-timeout 10 --max-time 30 -X POST -H 'Content-Type: application/json' \
             -d "$payload" "$_webhook_url" >/dev/null 2>&1 || true
     fi
 }
@@ -815,7 +836,7 @@ classify_error() {
     elif echo "$log_tail" | grep -qiE 'error\[E[0-9]+\]|error: aborting|FAILED.*compile|build failed|tsc.*error|eslint.*error'; then
         classification="logic"
     # Intelligence fallback: Claude classification for unknown errors
-    elif [[ "$classification" == "unknown" ]] && type intelligence_search_memory &>/dev/null 2>&1 && command -v claude &>/dev/null; then
+    elif [[ "$classification" == "unknown" ]] && type intelligence_search_memory >/dev/null 2>&1 && command -v claude >/dev/null 2>&1; then
         local ai_class
         ai_class=$(claude --print --output-format text -p "Classify this error as exactly one of: infrastructure, configuration, logic, unknown.
@@ -882,14 +903,23 @@ run_stage_with_retry() {
             return 0
         fi
+        # Capture error_class and error snippet for stage.failed / pipeline.completed events
+        local error_class
+        error_class=$(classify_error "$stage_id")
+        LAST_STAGE_ERROR_CLASS="$error_class"
+        LAST_STAGE_ERROR=""
+        local _log_file="${ARTIFACTS_DIR}/${stage_id}-results.log"
+        [[ ! -f "$_log_file" ]] && _log_file="${ARTIFACTS_DIR}/test-results.log"
+        if [[ -f "$_log_file" ]]; then
+            LAST_STAGE_ERROR=$(tail -20 "$_log_file" 2>/dev/null | grep -iE 'error|fail|exception|fatal' 2>/dev/null | head -1 | cut -c1-200 || true)
+        fi
         attempt=$((attempt + 1))
         if [[ "$attempt" -gt "$max_retries" ]]; then
             return 1
         fi
-        # Classify the error to decide whether retry makes sense
-        local error_class
-        error_class=$(classify_error "$stage_id")
+        # Classify done above; decide whether retry makes sense
         emit_event "retry.classified" \
             "issue=${ISSUE_NUMBER:-0}" \
@@ -926,6 +956,15 @@ run_stage_with_retry() {
         esac
         prev_error_class="$error_class"
+        if type db_save_reasoning_trace >/dev/null 2>&1; then
+            local job_id="${SHIPWRIGHT_PIPELINE_ID:-$$}"
+            local error_msg="${LAST_STAGE_ERROR:-$error_class}"
+            db_save_reasoning_trace "$job_id" "retry_reasoning" \
+                "stage=$stage_id error=$error_msg" \
+                "Stage failed, analyzing error pattern before retry" \
+                "retry_strategy=self_heal" 0.6 2>/dev/null || true
+        fi
         warn "Stage $stage_id failed (attempt $attempt/$((max_retries + 1)), class: $error_class) — retrying..."
         # Exponential backoff with jitter to avoid thundering herd
         local backoff=$((2 ** attempt))
@@ -951,9 +990,9 @@ self_healing_build_test() {
     local prev_fail_count=0 zero_convergence_streak=0
     # Vitals-driven adaptive limit (preferred over static BUILD_TEST_RETRIES)
-    if type pipeline_adaptive_limit &>/dev/null 2>&1; then
+    if type pipeline_adaptive_limit >/dev/null 2>&1; then
         local _vitals_json=""
-        if type pipeline_compute_vitals &>/dev/null 2>&1; then
+        if type pipeline_compute_vitals >/dev/null 2>&1; then
             _vitals_json=$(pipeline_compute_vitals "$STATE_FILE" "$ARTIFACTS_DIR" "${ISSUE_NUMBER:-}" 2>/dev/null) || true
         fi
         local vitals_limit
@@ -968,7 +1007,7 @@ self_healing_build_test() {
                 "vitals_limit=$vitals_limit"
         fi
     # Fallback: intelligence-based adaptive limits
-    elif type composer_estimate_iterations &>/dev/null 2>&1; then
+    elif type composer_estimate_iterations >/dev/null 2>&1; then
         local estimated
         estimated=$(composer_estimate_iterations \
             "${INTELLIGENCE_ANALYSIS:-{}}" \
@@ -1022,7 +1061,7 @@ self_healing_build_test() {
         if [[ "$cycle" -gt 1 && -n "$last_test_error" ]]; then
             # Query memory for known fixes
             local _memory_fix=""
-            if type memory_closed_loop_inject &>/dev/null 2>&1; then
+            if type memory_closed_loop_inject >/dev/null 2>&1; then
                 local _error_sig_short
                 _error_sig_short=$(echo "$last_test_error" | head -3 || echo "")
                 _memory_fix=$(memory_closed_loop_inject "$_error_sig_short" 2>/dev/null) || true
@@ -1053,7 +1092,7 @@ Focus on fixing the failing tests while keeping all passing tests working."
                 local timing
                 timing=$(get_stage_timing "build")
                 success "Stage ${BOLD}build${RESET} complete ${DIM}(${timing})${RESET}"
-                if type pipeline_emit_progress_snapshot &>/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
+                if type pipeline_emit_progress_snapshot >/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
                     local _diff_count
                     _diff_count=$(git diff --stat HEAD~1 2>/dev/null | tail -1 | grep -oE '[0-9]+' | head -1) || true
                     local _snap_files _snap_error
@@ -1078,7 +1117,7 @@ Focus on fixing the failing tests while keeping all passing tests working."
                 local timing
                 timing=$(get_stage_timing "build")
                 success "Stage ${BOLD}build${RESET} complete ${DIM}(${timing})${RESET}"
-                if type pipeline_emit_progress_snapshot &>/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
+                if type pipeline_emit_progress_snapshot >/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
                     local _diff_count
                     _diff_count=$(git diff --stat HEAD~1 2>/dev/null | tail -1 | grep -oE '[0-9]+' | head -1) || true
                     local _snap_files _snap_error
@@ -1109,7 +1148,7 @@ Focus on fixing the failing tests while keeping all passing tests working."
             emit_event "convergence.tests_passed" \
                 "issue=${ISSUE_NUMBER:-0}" \
                 "cycle=$cycle"
-            if type pipeline_emit_progress_snapshot &>/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
+            if type pipeline_emit_progress_snapshot >/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
                 local _diff_count
                 _diff_count=$(git diff --stat HEAD~1 2>/dev/null | tail -1 | grep -oE '[0-9]+' | head -1) || true
                 local _snap_files _snap_error
@@ -1235,6 +1274,9 @@ auto_rebase() {
 }
 run_pipeline() {
+    # Rotate event log if needed (standalone mode)
+    rotate_event_log_if_needed
     local stages
     stages=$(jq -c '.stages[]' "$PIPELINE_CONFIG")
@@ -1329,6 +1371,10 @@ run_pipeline() {
         # Self-healing build→test loop: when we hit build, run both together
         if [[ "$id" == "build" && "$use_self_healing" == "true" ]]; then
+            # TDD: generate tests before build when enabled
+            if [[ "${TDD_ENABLED:-false}" == "true" || "${PIPELINE_TDD:-}" == "true" ]]; then
+                stage_test_first || true
+            fi
             # Gate check for build
             local build_gate
             build_gate=$(echo "$stage" | jq -r '.gate')
@@ -1362,6 +1408,11 @@ run_pipeline() {
             continue
         fi
+        # TDD: generate tests before build when enabled (non-self-healing path)
+        if [[ "$id" == "build" && "$use_self_healing" != "true" ]] && [[ "${TDD_ENABLED:-false}" == "true" || "${PIPELINE_TDD:-}" == "true" ]]; then
+            stage_test_first || true
+        fi
         # Skip test if already handled by self-healing loop
         if [[ "$id" == "test" && "$use_self_healing" == "true" ]]; then
             stage_status=$(get_stage_status "test")
@@ -1401,52 +1452,59 @@ run_pipeline() {
             fi
         fi
-        # Intelligence: per-stage model routing with A/B testing
-        if type intelligence_recommend_model &>/dev/null 2>&1; then
+        # Intelligence: per-stage model routing (UCB1 when DB has data, else A/B testing)
+        local recommended_model="" from_ucb1=false
+        if type ucb1_select_model >/dev/null 2>&1; then
+            recommended_model=$(ucb1_select_model "$id" 2>/dev/null || echo "")
+            [[ -n "$recommended_model" ]] && from_ucb1=true
+        fi
+        if [[ -z "$recommended_model" ]] && type intelligence_recommend_model >/dev/null 2>&1; then
             local stage_complexity="${INTELLIGENCE_COMPLEXITY:-5}"
             local budget_remaining=""
             if [[ -x "$SCRIPT_DIR/sw-cost.sh" ]]; then
                 budget_remaining=$(bash "$SCRIPT_DIR/sw-cost.sh" remaining-budget 2>/dev/null || echo "")
             fi
-            local recommended_model
-            recommended_model=$(intelligence_recommend_model "$id" "$stage_complexity" "$budget_remaining" 2>/dev/null || echo "")
-            if [[ -n "$recommended_model" && "$recommended_model" != "null" ]]; then
-                # A/B testing: decide whether to use the recommended model
-                local ab_ratio=20  # default 20% use recommended model
+            local recommended_json
+            recommended_json=$(intelligence_recommend_model "$id" "$stage_complexity" "$budget_remaining" 2>/dev/null || echo "")
+            recommended_model=$(echo "$recommended_json" | jq -r '.model // empty' 2>/dev/null || echo "")
+        fi
+        if [[ -n "$recommended_model" && "$recommended_model" != "null" ]]; then
+            if [[ "$from_ucb1" == "true" ]]; then
+                # UCB1 already balances exploration/exploitation — use directly
+                export CLAUDE_MODEL="$recommended_model"
+                emit_event "intelligence.model_ucb1" \
+                    "issue=${ISSUE_NUMBER:-0}" \
+                    "stage=$id" \
+                    "model=$recommended_model"
+            else
+                # A/B testing for intelligence recommendation
+                local ab_ratio=20
                 local daemon_cfg="${PROJECT_ROOT}/.claude/daemon-config.json"
                 if [[ -f "$daemon_cfg" ]]; then
                     local cfg_ratio
                     cfg_ratio=$(jq -r '.intelligence.ab_test_ratio // 0.2' "$daemon_cfg" 2>/dev/null || echo "0.2")
-                    # Convert ratio (0.0-1.0) to percentage (0-100)
                     ab_ratio=$(awk -v r="$cfg_ratio" 'BEGIN{printf "%d", r * 100}' 2>/dev/null || echo "20")
                 fi
-                # Check if we have enough data points to graduate from A/B testing
                 local routing_file="${HOME}/.shipwright/optimization/model-routing.json"
                 local use_recommended=false
                 local ab_group="control"
                 if [[ -f "$routing_file" ]]; then
-                    local stage_samples
-                    stage_samples=$(jq -r --arg s "$id" '.[$s].sonnet_samples // 0' "$routing_file" 2>/dev/null || echo "0")
-                    local total_samples
-                    total_samples=$(jq -r --arg s "$id" '((.[$s].sonnet_samples // 0) + (.[$s].opus_samples // 0))' "$routing_file" 2>/dev/null || echo "0")
-                    if [[ "$total_samples" -ge 50 ]]; then
-                        # Enough data — use optimizer's recommendation as default
+                    local stage_samples total_samples
+                    stage_samples=$(jq -r --arg s "$id" '.routes[$s].sonnet_samples // .[$s].sonnet_samples // 0' "$routing_file" 2>/dev/null || echo "0")
+                    total_samples=$(jq -r --arg s "$id" '((.routes[$s].sonnet_samples // .[$s].sonnet_samples // 0) + (.routes[$s].opus_samples // .[$s].opus_samples // 0))' "$routing_file" 2>/dev/null || echo "0")
+                    if [[ "${total_samples:-0}" -ge 50 ]]; then
                         use_recommended=true
                         ab_group="graduated"
                     fi
                 fi
                 if [[ "$use_recommended" != "true" ]]; then
-                    # A/B test: RANDOM % 100 < ab_ratio → use recommended
                     local roll=$((RANDOM % 100))
                     if [[ "$roll" -lt "$ab_ratio" ]]; then
                         use_recommended=true
                         ab_group="experiment"
-                    else
-                        ab_group="control"
                     fi
                 fi
@@ -1475,7 +1533,7 @@ run_pipeline() {
         emit_event "stage.started" "issue=${ISSUE_NUMBER:-0}" "stage=$id"
         # Mark GitHub Check Run as in-progress
-        if [[ "${NO_GITHUB:-false}" != "true" ]] && type gh_checks_stage_update &>/dev/null 2>&1; then
+        if [[ "${NO_GITHUB:-false}" != "true" ]] && type gh_checks_stage_update >/dev/null 2>&1; then
             gh_checks_stage_update "$id" "in_progress" "" "Stage $id started" 2>/dev/null || true
         fi
@@ -1491,7 +1549,9 @@ run_pipeline() {
             timing=$(get_stage_timing "$id")
             stage_dur_s=$(( $(now_epoch) - stage_start_epoch ))
             success "Stage ${BOLD}$id${RESET} complete ${DIM}(${timing})${RESET}"
-            emit_event "stage.completed" "issue=${ISSUE_NUMBER:-0}" "stage=$id" "duration_s=$stage_dur_s"
+            emit_event "stage.completed" "issue=${ISSUE_NUMBER:-0}" "stage=$id" "duration_s=$stage_dur_s" "result=success"
+            # Record model outcome for UCB1 learning
+            type record_model_outcome >/dev/null 2>&1 && record_model_outcome "$stage_model_used" "$id" 1 "$stage_dur_s" 0 2>/dev/null || true
             # Broadcast discovery for cross-pipeline learning
             if [[ -x "$SCRIPT_DIR/sw-discovery.sh" ]]; then
                 local _disc_cat _disc_patterns _disc_text
@@ -1514,9 +1574,16 @@ run_pipeline() {
             stage_dur_s=$(( $(now_epoch) - stage_start_epoch ))
             error "Pipeline failed at stage: ${BOLD}$id${RESET}"
             update_status "failed" "$id"
-            emit_event "stage.failed" "issue=${ISSUE_NUMBER:-0}" "stage=$id" "duration_s=$stage_dur_s"
+            emit_event "stage.failed" \
+                "issue=${ISSUE_NUMBER:-0}" \
+                "stage=$id" \
+                "duration_s=$stage_dur_s" \
+                "error=${LAST_STAGE_ERROR:-unknown}" \
+                "error_class=${LAST_STAGE_ERROR_CLASS:-unknown}"
             # Log model used for prediction feedback
             echo "${id}|${stage_model_used}|false" >> "${ARTIFACTS_DIR}/model-routing.log"
+            # Record model outcome for UCB1 learning
+            type record_model_outcome >/dev/null 2>&1 && record_model_outcome "$stage_model_used" "$id" 0 "$stage_dur_s" 0 2>/dev/null || true
             # Cancel any remaining in_progress check runs
             pipeline_cancel_check_runs 2>/dev/null || true
             return 1
@@ -1525,6 +1592,11 @@ run_pipeline() {
     # Pipeline complete!
     update_status "complete" ""
+    PIPELINE_STAGES_PASSED="$completed"
+    PIPELINE_SLOWEST_STAGE=""
+    if type get_slowest_stage >/dev/null 2>&1; then
+        PIPELINE_SLOWEST_STAGE=$(get_slowest_stage 2>/dev/null || true)
+    fi
     local total_dur=""
     if [[ -n "$PIPELINE_START_EPOCH" ]]; then
         total_dur=$(format_duration $(( $(now_epoch) - PIPELINE_START_EPOCH )))
@@ -1568,7 +1640,7 @@ run_pipeline() {
 pipeline_post_completion_cleanup() {
     local cleaned=0
-    # 1. Clear checkpoints (they only matter for resume; pipeline is done)
+    # 1. Clear checkpoints and context files (they only matter for resume; pipeline is done)
     if [[ -d "${ARTIFACTS_DIR}/checkpoints" ]]; then
         local cp_count=0
         local cp_file
@@ -1577,6 +1649,11 @@ pipeline_post_completion_cleanup() {
             rm -f "$cp_file"
             cp_count=$((cp_count + 1))
         done
+        for cp_file in "${ARTIFACTS_DIR}/checkpoints"/*-claude-context.json; do
+            [[ -f "$cp_file" ]] || continue
+            rm -f "$cp_file"
+            cp_count=$((cp_count + 1))
+        done
         if [[ "$cp_count" -gt 0 ]]; then
             cleaned=$((cleaned + cp_count))
         fi
@@ -1621,7 +1698,7 @@ pipeline_cancel_check_runs() {
         return
     fi
-    if ! type gh_checks_stage_update &>/dev/null 2>&1; then
+    if ! type gh_checks_stage_update >/dev/null 2>&1; then
         return
     fi
@@ -1673,7 +1750,7 @@ pipeline_setup_worktree() {
     # Store original dir for cleanup, then cd into worktree
     ORIGINAL_REPO_DIR="$(pwd)"
-    cd "$worktree_path"
+    cd "$worktree_path" || { error "Failed to cd into worktree: $worktree_path"; return 1; }
     CLEANUP_WORKTREE=true
     success "Worktree ready: ${CYAN}${worktree_path}${RESET} (branch: ${branch_name})"
@@ -1807,7 +1884,7 @@ run_dry_run() {
     local optional_tools=("gh" "claude" "bc")
     for tool in "${required_tools[@]}"; do
-        if command -v "$tool" &>/dev/null; then
+        if command -v "$tool" >/dev/null 2>&1; then
             echo -e "  ${GREEN}✓${RESET} $tool"
         else
             echo -e "  ${RED}✗${RESET} $tool ${RED}(required)${RESET}"
@@ -1816,7 +1893,7 @@ run_dry_run() {
     done
     for tool in "${optional_tools[@]}"; do
-        if command -v "$tool" &>/dev/null; then
+        if command -v "$tool" >/dev/null 2>&1; then
             echo -e "  ${GREEN}✓${RESET} $tool"
         else
             echo -e "  ${DIM}○${RESET} $tool"
@@ -1825,15 +1902,17 @@ run_dry_run() {
     echo ""
-    # Cost estimation (rough approximation)
+    # Cost estimation: use historical averages from past pipelines when available
     echo -e "${BLUE}${BOLD}━━━ Estimated Resource Usage ━━━${RESET}"
     echo ""
-    # Very rough cost estimation: ~2000 input tokens per stage, ~3000 output tokens
-    # Adjust based on pipeline complexity
+    local stages_json
+    stages_json=$(jq '[.stages[] | select(.enabled == true)]' "$PIPELINE_CONFIG" 2>/dev/null || echo "[]")
+    local est
+    est=$(estimate_pipeline_cost "$stages_json")
     local input_tokens_estimate output_tokens_estimate
-    input_tokens_estimate=$(( enabled_stages * 2000 ))
-    output_tokens_estimate=$(( enabled_stages * 3000 ))
+    input_tokens_estimate=$(echo "$est" | jq -r '.input_tokens // 0')
+    output_tokens_estimate=$(echo "$est" | jq -r '.output_tokens // 0')
     # Calculate cost based on selected model
     local input_rate output_rate input_cost output_cost total_cost
@@ -1848,11 +1927,11 @@ run_dry_run() {
     echo -e "  ${BOLD}Estimated Input Tokens:${RESET}  ~$input_tokens_estimate"
     echo -e "  ${BOLD}Estimated Output Tokens:${RESET} ~$output_tokens_estimate"
     echo -e "  ${BOLD}Model Cost Rate:${RESET}        $stage_model"
-    echo -e "  ${BOLD}Estimated Cost:${RESET}         \$$total_cost USD (rough estimate)"
+    echo -e "  ${BOLD}Estimated Cost:${RESET}         \$$total_cost USD"
     echo ""
     # Validate composed pipeline if intelligence is enabled
-    if [[ -f "$ARTIFACTS_DIR/composed-pipeline.json" ]] && type composer_validate_pipeline &>/dev/null; then
+    if [[ -f "$ARTIFACTS_DIR/composed-pipeline.json" ]] && type composer_validate_pipeline >/dev/null 2>&1; then
         echo -e "${BLUE}${BOLD}━━━ Intelligence-Composed Pipeline ━━━${RESET}"
         echo ""
@@ -1877,6 +1956,100 @@ run_dry_run() {
     return 0
 }
+# ─── Reasoning Trace Generation ──────────────────────────────────────────────
+# Multi-step autonomous reasoning traces for pipeline start (before stages run)
+generate_reasoning_trace() {
+    local job_id="${SHIPWRIGHT_PIPELINE_ID:-$$}"
+    local issue="${ISSUE_NUMBER:-}"
+    local goal="${GOAL:-}"
+    # Step 1: Analyze issue complexity and risk
+    local complexity="medium"
+    local risk_score=50
+    if [[ -n "$issue" ]] && type intelligence_analyze_issue >/dev/null 2>&1; then
+        local issue_json analysis
+        issue_json=$(gh issue view "$issue" --json number,title,body,labels 2>/dev/null || echo "{}")
+        if [[ -n "$issue_json" && "$issue_json" != "{}" ]]; then
+            analysis=$(intelligence_analyze_issue "$issue_json" 2>/dev/null || echo "")
+            if [[ -n "$analysis" ]]; then
+                local comp_num
+                comp_num=$(echo "$analysis" | jq -r '.complexity // 5' 2>/dev/null || echo "5")
+                if [[ "$comp_num" -le 3 ]]; then
+                    complexity="low"
+                elif [[ "$comp_num" -le 6 ]]; then
+                    complexity="medium"
+                else
+                    complexity="high"
+                fi
+                risk_score=$((100 - $(echo "$analysis" | jq -r '.success_probability // 50' 2>/dev/null || echo "50")))
+            fi
+        fi
+    elif [[ -n "$goal" ]]; then
+        issue_json=$(jq -n --arg title "${goal}" --arg body "" '{title: $title, body: $body, labels: []}')
+        if type intelligence_analyze_issue >/dev/null 2>&1; then
+            analysis=$(intelligence_analyze_issue "$issue_json" 2>/dev/null || echo "")
+            if [[ -n "$analysis" ]]; then
+                local comp_num
+                comp_num=$(echo "$analysis" | jq -r '.complexity // 5' 2>/dev/null || echo "5")
+                if [[ "$comp_num" -le 3 ]]; then complexity="low"; elif [[ "$comp_num" -le 6 ]]; then complexity="medium"; else complexity="high"; fi
+                risk_score=$((100 - $(echo "$analysis" | jq -r '.success_probability // 50' 2>/dev/null || echo "50")))
+            fi
+        fi
+    fi
+    # Step 2: Query similar past issues
+    local similar_context=""
+    if type memory_semantic_search >/dev/null 2>&1 && [[ -n "$goal" ]]; then
+        similar_context=$(memory_semantic_search "$goal" "" 3 2>/dev/null || echo "")
+    fi
+    # Step 3: Select template using Thompson sampling
+    local selected_template="${PIPELINE_TEMPLATE:-}"
+    if [[ -z "$selected_template" ]] && type thompson_select_template >/dev/null 2>&1; then
+        selected_template=$(thompson_select_template "$complexity" 2>/dev/null || echo "standard")
+    fi
+    [[ -z "$selected_template" ]] && selected_template="standard"
+    # Step 4: Predict failure modes from memory
+    local failure_predictions=""
+    if type memory_semantic_search >/dev/null 2>&1 && [[ -n "$goal" ]]; then
+        failure_predictions=$(memory_semantic_search "failure error $goal" "" 3 2>/dev/null || echo "")
+    fi
+    # Save reasoning traces to DB
+    if type db_save_reasoning_trace >/dev/null 2>&1; then
+        db_save_reasoning_trace "$job_id" "complexity_analysis" \
+            "issue=$issue goal=$goal" \
+            "Analyzed complexity=$complexity risk=$risk_score" \
+            "complexity=$complexity risk_score=$risk_score" 0.7 2>/dev/null || true
+        db_save_reasoning_trace "$job_id" "template_selection" \
+            "complexity=$complexity historical_outcomes" \
+            "Thompson sampling over historical success rates" \
+            "template=$selected_template" 0.8 2>/dev/null || true
+        if [[ -n "$similar_context" && "$similar_context" != "[]" ]]; then
+            db_save_reasoning_trace "$job_id" "similar_issues" \
+                "$goal" \
+                "Found similar past issues for context injection" \
+                "$similar_context" 0.6 2>/dev/null || true
+        fi
+        if [[ -n "$failure_predictions" && "$failure_predictions" != "[]" ]]; then
+            db_save_reasoning_trace "$job_id" "failure_prediction" \
+                "$goal" \
+                "Predicted potential failure modes from history" \
+                "$failure_predictions" 0.5 2>/dev/null || true
+        fi
+    fi
+    # Export for use by pipeline stages
+    [[ -n "$selected_template" && -z "${PIPELINE_TEMPLATE:-}" ]] && export PIPELINE_TEMPLATE="$selected_template"
+    emit_event "reasoning.trace" "job_id=$job_id" "complexity=$complexity" "risk=$risk_score" "template=${selected_template:-standard}" 2>/dev/null || true
+}
 # ─── Subcommands ────────────────────────────────────────────────────────────
 pipeline_start() {
@@ -1898,6 +2071,13 @@ pipeline_start() {
         info "Using repository: $ORIGINAL_REPO_DIR"
     fi
+    # Bootstrap optimization & memory if cold start (before first intelligence use)
+    if [[ -f "$SCRIPT_DIR/lib/bootstrap.sh" ]]; then
+        source "$SCRIPT_DIR/lib/bootstrap.sh"
+        [[ ! -f "$HOME/.shipwright/optimization/iteration-model.json" ]] && bootstrap_optimization 2>/dev/null || true
+        [[ ! -f "$HOME/.shipwright/memory/patterns.json" ]] && bootstrap_memory 2>/dev/null || true
+    fi
     if [[ -z "$GOAL" && -z "$ISSUE_NUMBER" ]]; then
         error "Must provide --goal or --issue"
         echo -e "  Example: ${DIM}shipwright pipeline start --goal \"Add JWT auth\"${RESET}"
@@ -1905,7 +2085,7 @@ pipeline_start() {
         exit 1
     fi
-    if ! command -v jq &>/dev/null; then
+    if ! command -v jq >/dev/null 2>&1; then
         error "jq is required. Install it: brew install jq"
         exit 1
     fi
@@ -1923,6 +2103,13 @@ pipeline_start() {
     setup_dirs
+    # Generate reasoning trace (complexity analysis, template selection, failure predictions)
+    local user_specified_pipeline="$PIPELINE_NAME"
+    generate_reasoning_trace 2>/dev/null || true
+    if [[ -n "${PIPELINE_TEMPLATE:-}" && "$user_specified_pipeline" == "standard" ]]; then
+        PIPELINE_NAME="$PIPELINE_TEMPLATE"
+    fi
     # Check for existing pipeline
     if [[ -f "$STATE_FILE" ]]; then
         local existing_status
@@ -1942,7 +2129,87 @@ pipeline_start() {
     gh_init
     load_pipeline_config
-    initialize_state
+    # Checkpoint resume: when --resume is passed, try DB first, then file-based
+    checkpoint_stage=""
+    checkpoint_iteration=0
+    if $RESUME_FROM_CHECKPOINT && type db_load_checkpoint >/dev/null 2>&1; then
+        local saved_checkpoint
+        saved_checkpoint=$(db_load_checkpoint "pipeline-${SHIPWRIGHT_PIPELINE_ID:-$$}" 2>/dev/null || echo "")
+        if [[ -n "$saved_checkpoint" ]]; then
+            checkpoint_stage=$(echo "$saved_checkpoint" | jq -r '.stage // ""' 2>/dev/null || echo "")
+            if [[ -n "$checkpoint_stage" ]]; then
+                info "Resuming from DB checkpoint: stage=$checkpoint_stage"
+                checkpoint_iteration=$(echo "$saved_checkpoint" | jq -r '.iteration // 0' 2>/dev/null || echo "0")
+                # Build COMPLETED_STAGES: all enabled stages before checkpoint_stage
+                local enabled_list before_list=""
+                enabled_list=$(jq -r '.stages[] | select(.enabled == true) | .id' "$PIPELINE_CONFIG" 2>/dev/null) || true
+                local s
+                while IFS= read -r s; do
+                    [[ -z "$s" ]] && continue
+                    if [[ "$s" == "$checkpoint_stage" ]]; then
+                        break
+                    fi
+                    [[ -n "$before_list" ]] && before_list="${before_list},${s}" || before_list="$s"
+                done <<< "$enabled_list"
+                if [[ -n "$before_list" ]]; then
+                    COMPLETED_STAGES="${before_list}"
+                    SELF_HEAL_COUNT="${checkpoint_iteration}"
+                fi
+            fi
+        fi
+    fi
+    if $RESUME_FROM_CHECKPOINT && [[ -z "$checkpoint_stage" ]] && [[ -d "${ARTIFACTS_DIR}/checkpoints" ]]; then
+        local cp_dir="${ARTIFACTS_DIR}/checkpoints"
+        local latest_cp="" latest_mtime=0
+        local f
+        for f in "$cp_dir"/*-checkpoint.json; do
+            [[ -f "$f" ]] || continue
+            local mtime
+            mtime=$(file_mtime "$f" 2>/dev/null || echo "0")
+            if [[ "${mtime:-0}" -gt "$latest_mtime" ]]; then
+                latest_mtime="${mtime}"
+                latest_cp="$f"
+            fi
+        done
+        if [[ -n "$latest_cp" && -x "$SCRIPT_DIR/sw-checkpoint.sh" ]]; then
+            checkpoint_stage="$(basename "$latest_cp" -checkpoint.json)"
+            local cp_json
+            cp_json="$("$SCRIPT_DIR/sw-checkpoint.sh" restore --stage "$checkpoint_stage" 2>/dev/null)" || true
+            if [[ -n "$cp_json" ]] && command -v jq >/dev/null 2>&1; then
+                checkpoint_iteration="$(echo "$cp_json" | jq -r '.iteration // 0' 2>/dev/null)" || checkpoint_iteration=0
+                info "Checkpoint resume: stage=${checkpoint_stage} iteration=${checkpoint_iteration}"
+                # Build COMPLETED_STAGES: all enabled stages before checkpoint_stage
+                local enabled_list before_list=""
+                enabled_list="$(jq -r '.stages[] | select(.enabled == true) | .id' "$PIPELINE_CONFIG" 2>/dev/null)" || true
+                local s
+                while IFS= read -r s; do
+                    [[ -z "$s" ]] && continue
+                    if [[ "$s" == "$checkpoint_stage" ]]; then
+                        break
+                    fi
+                    [[ -n "$before_list" ]] && before_list="${before_list},${s}" || before_list="$s"
+                done <<< "$enabled_list"
+                if [[ -n "$before_list" ]]; then
+                    COMPLETED_STAGES="${before_list}"
+                    SELF_HEAL_COUNT="${checkpoint_iteration}"
+                fi
+            fi
+        fi
+    fi
+    # Restore from state file if resuming (failed/interrupted pipeline); else initialize fresh
+    if $RESUME_FROM_CHECKPOINT && [[ -f "$STATE_FILE" ]]; then
+        local existing_status
+        existing_status="$(sed -n 's/^status: *//p' "$STATE_FILE" | head -1)"
+        if [[ "$existing_status" == "failed" || "$existing_status" == "interrupted" ]]; then
+            resume_state
+        else
+            initialize_state
+        fi
+    else
+        initialize_state
+    fi
     # CI resume: restore branch + goal context when intake is skipped
     if [[ -n "${COMPLETED_STAGES:-}" ]] && echo "$COMPLETED_STAGES" | tr ',' '\n' | grep -qx "intake"; then
@@ -1951,7 +2218,7 @@ pipeline_start() {
         # Restore GOAL from issue if not already set
         if [[ -z "$GOAL" && -n "$ISSUE_NUMBER" ]]; then
-            GOAL=$(gh issue view "$ISSUE_NUMBER" --json title -q .title 2>/dev/null || echo "Issue #${ISSUE_NUMBER}")
+            GOAL=$(_timeout "$(_config_get_int "network.gh_timeout" 30 2>/dev/null || echo 30)" gh issue view "$ISSUE_NUMBER" --json title -q .title 2>/dev/null || echo "Issue #${ISSUE_NUMBER}")
             info "CI resume: goal from issue — ${GOAL}"
         fi
@@ -2018,11 +2285,38 @@ pipeline_start() {
         return $?
     fi
+    # Capture predictions for feedback loop (intelligence → actuals → learning)
+    if type intelligence_analyze_issue >/dev/null 2>&1 && (type intelligence_estimate_iterations >/dev/null 2>&1 || type intelligence_predict_cost >/dev/null 2>&1); then
+        local issue_json="${INTELLIGENCE_ANALYSIS:-}"
+        if [[ -z "$issue_json" || "$issue_json" == "{}" ]]; then
+            if [[ -n "$ISSUE_NUMBER" ]]; then
+                issue_json=$(gh issue view "$ISSUE_NUMBER" --json number,title,body,labels 2>/dev/null || echo "{}")
+            else
+                issue_json=$(jq -n --arg title "${GOAL:-untitled}" --arg body "" '{title: $title, body: $body, labels: []}')
+            fi
+            if [[ -n "$issue_json" && "$issue_json" != "{}" ]]; then
+                issue_json=$(intelligence_analyze_issue "$issue_json" 2>/dev/null || echo "{}")
+            fi
+        fi
+        if [[ -n "$issue_json" && "$issue_json" != "{}" ]]; then
+            if type intelligence_estimate_iterations >/dev/null 2>&1; then
+                PREDICTED_ITERATIONS=$(intelligence_estimate_iterations "$issue_json" "" 2>/dev/null || echo "")
+                export PREDICTED_ITERATIONS
+            fi
+            if type intelligence_predict_cost >/dev/null 2>&1; then
+                local cost_json
+                cost_json=$(intelligence_predict_cost "$issue_json" "{}" 2>/dev/null || echo "{}")
+                PREDICTED_COST=$(echo "$cost_json" | jq -r '.estimated_cost_usd // empty' 2>/dev/null || echo "")
+                export PREDICTED_COST
+            fi
+        fi
+    fi
     # Start background heartbeat writer
     start_heartbeat
     # Initialize GitHub Check Runs for all pipeline stages
-    if [[ "${NO_GITHUB:-false}" != "true" ]] && type gh_checks_pipeline_start &>/dev/null 2>&1; then
+    if [[ "${NO_GITHUB:-false}" != "true" ]] && type gh_checks_pipeline_start >/dev/null 2>&1; then
         local head_sha
         head_sha=$(git rev-parse HEAD 2>/dev/null || echo "")
         if [[ -n "$head_sha" && -n "$REPO_OWNER" && -n "$REPO_NAME" ]]; then
@@ -2038,12 +2332,15 @@ pipeline_start() {
     emit_event "pipeline.started" \
         "issue=${ISSUE_NUMBER:-0}" \
+        "template=${PIPELINE_NAME}" \
+        "complexity=${INTELLIGENCE_COMPLEXITY:-0}" \
+        "machine=$(hostname 2>/dev/null || echo "unknown")" \
         "pipeline=${PIPELINE_NAME}" \
         "model=${MODEL:-opus}" \
         "goal=${GOAL}"
     # Durable WAL: publish pipeline start event
-    if type publish_event &>/dev/null 2>&1; then
+    if type publish_event >/dev/null 2>&1; then
         publish_event "pipeline.started" "{\"issue\":\"${ISSUE_NUMBER:-0}\",\"pipeline\":\"${PIPELINE_NAME}\",\"goal\":\"${GOAL:0:200}\"}" 2>/dev/null || true
     fi
@@ -2051,6 +2348,18 @@ pipeline_start() {
     local exit_code=$?
     PIPELINE_EXIT_CODE="$exit_code"
+    # Compute total cost for pipeline.completed (prefer actual from Claude when available)
+    local model_key="${MODEL:-sonnet}"
+    local total_cost
+    if [[ -n "${TOTAL_COST_USD:-}" && "${TOTAL_COST_USD}" != "0" && "${TOTAL_COST_USD}" != "null" ]]; then
+        total_cost="${TOTAL_COST_USD}"
+    else
+        local input_cost output_cost
+        input_cost=$(awk -v tokens="$TOTAL_INPUT_TOKENS" -v rate="$(echo "$COST_MODEL_RATES" | jq -r ".${model_key}.input // 3")" 'BEGIN{printf "%.4f", (tokens / 1000000) * rate}')
+        output_cost=$(awk -v tokens="$TOTAL_OUTPUT_TOKENS" -v rate="$(echo "$COST_MODEL_RATES" | jq -r ".${model_key}.output // 15")" 'BEGIN{printf "%.4f", (tokens / 1000000) * rate}')
+        total_cost=$(awk -v i="$input_cost" -v o="$output_cost" 'BEGIN{printf "%.4f", i + o}')
+    fi
     # Send completion notification + event
     local total_dur_s=""
     [[ -n "$PIPELINE_START_EPOCH" ]] && total_dur_s=$(( $(now_epoch) - PIPELINE_START_EPOCH ))
@@ -2064,10 +2373,16 @@ pipeline_start() {
             "issue=${ISSUE_NUMBER:-0}" \
             "result=success" \
             "duration_s=${total_dur_s:-0}" \
+            "iterations=$((SELF_HEAL_COUNT + 1))" \
+            "template=${PIPELINE_NAME}" \
+            "complexity=${INTELLIGENCE_COMPLEXITY:-0}" \
+            "stages_passed=${PIPELINE_STAGES_PASSED:-0}" \
+            "slowest_stage=${PIPELINE_SLOWEST_STAGE:-}" \
             "pr_url=${pr_url:-}" \
             "agent_id=${PIPELINE_AGENT_ID}" \
             "input_tokens=$TOTAL_INPUT_TOKENS" \
             "output_tokens=$TOTAL_OUTPUT_TOKENS" \
+            "total_cost=$total_cost" \
             "self_heal_count=$SELF_HEAL_COUNT"
         # Auto-ingest pipeline outcome into recruit profiles
@@ -2080,10 +2395,15 @@ pipeline_start() {
             "issue=${ISSUE_NUMBER:-0}" \
             "result=failure" \
             "duration_s=${total_dur_s:-0}" \
+            "iterations=$((SELF_HEAL_COUNT + 1))" \
+            "template=${PIPELINE_NAME}" \
+            "complexity=${INTELLIGENCE_COMPLEXITY:-0}" \
             "failed_stage=${CURRENT_STAGE_ID:-unknown}" \
+            "error_class=${LAST_STAGE_ERROR_CLASS:-unknown}" \
             "agent_id=${PIPELINE_AGENT_ID}" \
             "input_tokens=$TOTAL_INPUT_TOKENS" \
             "output_tokens=$TOTAL_OUTPUT_TOKENS" \
+            "total_cost=$total_cost" \
             "self_heal_count=$SELF_HEAL_COUNT"
         # Auto-ingest pipeline outcome into recruit profiles
@@ -2121,7 +2441,7 @@ pipeline_start() {
         "success=$pipeline_success"
     # Close intelligence prediction feedback loop — validate predicted vs actual
-    if type intelligence_validate_prediction &>/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
+    if type intelligence_validate_prediction >/dev/null 2>&1 && [[ -n "${ISSUE_NUMBER:-}" ]]; then
         intelligence_validate_prediction \
             "$ISSUE_NUMBER" \
             "${INTELLIGENCE_COMPLEXITY:-0}" \
@@ -2129,6 +2449,12 @@ pipeline_start() {
             "$pipeline_success" 2>/dev/null || true
     fi
+    # Validate iterations prediction against actuals (cost validation moved below after total_cost is computed)
+    local ACTUAL_ITERATIONS=$((SELF_HEAL_COUNT + 1))
+    if [[ -n "${PREDICTED_ITERATIONS:-}" ]] && type intelligence_validate_prediction >/dev/null 2>&1; then
+        intelligence_validate_prediction "iterations" "$PREDICTED_ITERATIONS" "$ACTUAL_ITERATIONS" 2>/dev/null || true
+    fi
     # Close predictive anomaly feedback loop — confirm whether flagged anomalies were real
     if [[ -x "$SCRIPT_DIR/sw-predictive.sh" ]]; then
         local _actual_failure="false"
@@ -2144,7 +2470,8 @@ pipeline_start() {
         "issue=${ISSUE_NUMBER:-0}" \
         "template=${PIPELINE_NAME}" \
         "success=$pipeline_success" \
-        "duration_s=${total_dur_s:-0}"
+        "duration_s=${total_dur_s:-0}" \
+        "complexity=${INTELLIGENCE_COMPLEXITY:-0}"
     # Risk prediction vs actual failure
     local predicted_risk="${INTELLIGENCE_RISK_SCORE:-0}"
@@ -2167,20 +2494,26 @@ pipeline_start() {
     fi
     # Record pipeline outcome for model routing feedback loop
-    if type optimize_analyze_outcome &>/dev/null 2>&1; then
+    if type optimize_analyze_outcome >/dev/null 2>&1; then
         optimize_analyze_outcome "$STATE_FILE" 2>/dev/null || true
-        # Tune template weights based on accumulated outcomes
-        if type optimize_tune_templates &>/dev/null 2>&1; then
-            optimize_tune_templates 2>/dev/null || true
-        fi
     fi
-    if type memory_finalize_pipeline &>/dev/null 2>&1; then
+    # Auto-learn after pipeline completion (non-blocking)
+    if type optimize_tune_templates &>/dev/null; then
+        (
+            optimize_tune_templates 2>/dev/null
+            optimize_learn_iterations 2>/dev/null
+            optimize_route_models 2>/dev/null
+            optimize_learn_risk_keywords 2>/dev/null
+        ) &
+    fi
+    if type memory_finalize_pipeline >/dev/null 2>&1; then
         memory_finalize_pipeline "$STATE_FILE" "$ARTIFACTS_DIR" 2>/dev/null || true
     fi
     # Broadcast discovery for cross-pipeline learning
-    if type broadcast_discovery &>/dev/null 2>&1; then
+    if type broadcast_discovery >/dev/null 2>&1; then
         local _disc_result="failure"
         [[ "$exit_code" -eq 0 ]] && _disc_result="success"
         local _disc_files=""
@@ -2209,6 +2542,29 @@ pipeline_start() {
         "model=$model_key" \
         "cost_usd=$total_cost"
+    # Record pipeline outcome for Thompson sampling / outcome-based learning
+    if type db_record_outcome >/dev/null 2>&1; then
+        local _outcome_success=0
+        [[ "$exit_code" -eq 0 ]] && _outcome_success=1
+        local _outcome_complexity="medium"
+        [[ "${INTELLIGENCE_COMPLEXITY:-5}" -le 3 ]] && _outcome_complexity="low"
+        [[ "${INTELLIGENCE_COMPLEXITY:-5}" -ge 7 ]] && _outcome_complexity="high"
+        db_record_outcome \
+            "${SHIPWRIGHT_PIPELINE_ID:-pipeline-$$-${ISSUE_NUMBER:-0}}" \
+            "${ISSUE_NUMBER:-}" \
+            "${PIPELINE_NAME:-standard}" \
+            "$_outcome_success" \
+            "${total_dur_s:-0}" \
+            "${SELF_HEAL_COUNT:-0}" \
+            "${total_cost:-0}" \
+            "$_outcome_complexity" 2>/dev/null || true
+    fi
+    # Validate cost prediction against actual (after total_cost is computed)
+    if [[ -n "${PREDICTED_COST:-}" ]] && type intelligence_validate_prediction >/dev/null 2>&1; then
+        intelligence_validate_prediction "cost" "$PREDICTED_COST" "$total_cost" 2>/dev/null || true
+    fi
     return $exit_code
 }