npm - shipwright-cli - Versions diffs - 1.9.0 → 2.0.0 - Mend

shipwright-cli 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/.claude/hooks/post-tool-use.sh +12 -5
package/README.md +114 -36
package/completions/_shipwright +212 -32
package/completions/shipwright.bash +97 -25
package/docs/strategy/01-market-research.md +619 -0
package/docs/strategy/02-mission-and-brand.md +587 -0
package/docs/strategy/03-gtm-and-roadmap.md +759 -0
package/docs/strategy/QUICK-START.txt +289 -0
package/docs/strategy/README.md +172 -0
package/package.json +4 -2
package/scripts/sw +217 -2
package/scripts/sw-activity.sh +500 -0
package/scripts/sw-adaptive.sh +925 -0
package/scripts/sw-adversarial.sh +1 -1
package/scripts/sw-architecture-enforcer.sh +1 -1
package/scripts/sw-auth.sh +613 -0
package/scripts/sw-autonomous.sh +664 -0
package/scripts/sw-changelog.sh +704 -0
package/scripts/sw-checkpoint.sh +79 -1
package/scripts/sw-ci.sh +602 -0
package/scripts/sw-cleanup.sh +192 -7
package/scripts/sw-code-review.sh +637 -0
package/scripts/sw-connect.sh +1 -1
package/scripts/sw-context.sh +605 -0
package/scripts/sw-cost.sh +1 -1
package/scripts/sw-daemon.sh +812 -138
package/scripts/sw-dashboard.sh +1 -1
package/scripts/sw-db.sh +540 -0
package/scripts/sw-decompose.sh +539 -0
package/scripts/sw-deps.sh +551 -0
package/scripts/sw-developer-simulation.sh +1 -1
package/scripts/sw-discovery.sh +412 -0
package/scripts/sw-docs-agent.sh +539 -0
package/scripts/sw-docs.sh +1 -1
package/scripts/sw-doctor.sh +59 -1
package/scripts/sw-dora.sh +615 -0
package/scripts/sw-durable.sh +710 -0
package/scripts/sw-e2e-orchestrator.sh +535 -0
package/scripts/sw-eventbus.sh +393 -0
package/scripts/sw-feedback.sh +471 -0
package/scripts/sw-fix.sh +1 -1
package/scripts/sw-fleet-discover.sh +567 -0
package/scripts/sw-fleet-viz.sh +404 -0
package/scripts/sw-fleet.sh +8 -1
package/scripts/sw-github-app.sh +596 -0
package/scripts/sw-github-checks.sh +1 -1
package/scripts/sw-github-deploy.sh +1 -1
package/scripts/sw-github-graphql.sh +1 -1
package/scripts/sw-guild.sh +569 -0
package/scripts/sw-heartbeat.sh +1 -1
package/scripts/sw-hygiene.sh +559 -0
package/scripts/sw-incident.sh +617 -0
package/scripts/sw-init.sh +88 -1
package/scripts/sw-instrument.sh +699 -0
package/scripts/sw-intelligence.sh +1 -1
package/scripts/sw-jira.sh +1 -1
package/scripts/sw-launchd.sh +366 -31
package/scripts/sw-linear.sh +1 -1
package/scripts/sw-logs.sh +1 -1
package/scripts/sw-loop.sh +507 -51
package/scripts/sw-memory.sh +198 -3
package/scripts/sw-mission-control.sh +487 -0
package/scripts/sw-model-router.sh +545 -0
package/scripts/sw-otel.sh +596 -0
package/scripts/sw-oversight.sh +689 -0
package/scripts/sw-pipeline-composer.sh +8 -8
package/scripts/sw-pipeline-vitals.sh +1096 -0
package/scripts/sw-pipeline.sh +2451 -180
package/scripts/sw-pm.sh +693 -0
package/scripts/sw-pr-lifecycle.sh +522 -0
package/scripts/sw-predictive.sh +1 -1
package/scripts/sw-prep.sh +1 -1
package/scripts/sw-ps.sh +4 -3
package/scripts/sw-public-dashboard.sh +798 -0
package/scripts/sw-quality.sh +595 -0
package/scripts/sw-reaper.sh +5 -3
package/scripts/sw-recruit.sh +573 -0
package/scripts/sw-regression.sh +642 -0
package/scripts/sw-release-manager.sh +736 -0
package/scripts/sw-release.sh +706 -0
package/scripts/sw-remote.sh +1 -1
package/scripts/sw-replay.sh +520 -0
package/scripts/sw-retro.sh +691 -0
package/scripts/sw-scale.sh +444 -0
package/scripts/sw-security-audit.sh +505 -0
package/scripts/sw-self-optimize.sh +109 -8
package/scripts/sw-session.sh +31 -9
package/scripts/sw-setup.sh +1 -1
package/scripts/sw-standup.sh +712 -0
package/scripts/sw-status.sh +192 -1
package/scripts/sw-strategic.sh +658 -0
package/scripts/sw-stream.sh +450 -0
package/scripts/sw-swarm.sh +583 -0
package/scripts/sw-team-stages.sh +511 -0
package/scripts/sw-templates.sh +1 -1
package/scripts/sw-testgen.sh +515 -0
package/scripts/sw-tmux-pipeline.sh +554 -0
package/scripts/sw-tmux.sh +1 -1
package/scripts/sw-trace.sh +485 -0
package/scripts/sw-tracker-github.sh +188 -0
package/scripts/sw-tracker-jira.sh +172 -0
package/scripts/sw-tracker-linear.sh +251 -0
package/scripts/sw-tracker.sh +117 -2
package/scripts/sw-triage.sh +603 -0
package/scripts/sw-upgrade.sh +1 -1
package/scripts/sw-ux.sh +677 -0
package/scripts/sw-webhook.sh +627 -0
package/scripts/sw-widgets.sh +530 -0
package/scripts/sw-worktree.sh +1 -1
package/templates/pipelines/autonomous.json +8 -1
package/templates/pipelines/cost-aware.json +21 -0
package/templates/pipelines/deployed.json +40 -6
package/templates/pipelines/enterprise.json +16 -2
package/templates/pipelines/fast.json +19 -0
package/templates/pipelines/full.json +16 -2
package/templates/pipelines/hotfix.json +19 -0
package/templates/pipelines/standard.json +19 -0

package/scripts/sw-loop.sh CHANGED Viewed

@@ -10,6 +10,11 @@
 set -euo pipefail
 trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
+# Allow spawning Claude CLI from within a Claude Code session (daemon, fleet, etc.)
+unset CLAUDECODE 2>/dev/null || true
+# Ignore SIGHUP so tmux attach/detach doesn't kill long-running agent sessions
+trap '' HUP
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # ─── Colors (matches shipwright theme) ──────────────────────────────────────────────
@@ -34,17 +39,25 @@ error()   { echo -e "${RED}${BOLD}✗${RESET} $*" >&2; }
 # ─── Defaults ─────────────────────────────────────────────────────────────────
 GOAL=""
+ORIGINAL_GOAL=""  # Preserved across restarts — GOAL gets appended to
 MAX_ITERATIONS="${SW_MAX_ITERATIONS:-20}"
 TEST_CMD=""
+FAST_TEST_CMD=""
+FAST_TEST_INTERVAL=5
+TEST_LOG_FILE=""
 MODEL="${SW_MODEL:-opus}"
 AGENTS=1
+AGENT_ROLES=""
 USE_WORKTREE=false
 SKIP_PERMISSIONS=false
 MAX_TURNS=""
 RESUME=false
 VERBOSE=false
 MAX_ITERATIONS_EXPLICIT=false
-VERSION="1.9.0"
+MAX_RESTARTS=0
+SESSION_RESTART=false
+RESTART_COUNT=0
+VERSION="2.0.0"
 # ─── Flexible Iteration Defaults ────────────────────────────────────────────
 AUTO_EXTEND=true          # Auto-extend iterations when work is incomplete
@@ -75,12 +88,16 @@ show_help() {
     echo -e "${BOLD}OPTIONS${RESET}"
     echo -e "  ${CYAN}--max-iterations${RESET} N       Max loop iterations (default: 20)"
     echo -e "  ${CYAN}--test-cmd${RESET} \"cmd\"         Test command to run between iterations"
+    echo -e "  ${CYAN}--fast-test-cmd${RESET} \"cmd\"      Fast/subset test command (alternates with full)"
+    echo -e "  ${CYAN}--fast-test-interval${RESET} N       Run full tests every N iterations (default: 5)"
     echo -e "  ${CYAN}--model${RESET} MODEL             Claude model to use (default: opus)"
     echo -e "  ${CYAN}--agents${RESET} N                Number of parallel agents (default: 1)"
+    echo -e "  ${CYAN}--roles${RESET} \"r1,r2,...\"        Role per agent: builder,reviewer,tester,optimizer,docs,security"
     echo -e "  ${CYAN}--worktree${RESET}                Use git worktrees for isolation (auto if agents > 1)"
     echo -e "  ${CYAN}--skip-permissions${RESET}        Pass --dangerously-skip-permissions to Claude"
     echo -e "  ${CYAN}--max-turns${RESET} N             Max API turns per Claude session"
     echo -e "  ${CYAN}--resume${RESET}                  Resume from existing .claude/loop-state.md"
+    echo -e "  ${CYAN}--max-restarts${RESET} N          Max session restarts on exhaustion (default: 0)"
     echo -e "  ${CYAN}--verbose${RESET}                 Show full Claude output (default: summary)"
     echo -e "  ${CYAN}--help${RESET}                    Show this help"
     echo ""
@@ -175,6 +192,30 @@ while [[ $# -gt 0 ]]; do
             shift 2
             ;;
         --max-extensions=*) MAX_EXTENSIONS="${1#--max-extensions=}"; shift ;;
+        --fast-test-cmd)
+            FAST_TEST_CMD="${2:-}"
+            [[ -z "$FAST_TEST_CMD" ]] && { error "Missing value for --fast-test-cmd"; exit 1; }
+            shift 2
+            ;;
+        --fast-test-cmd=*) FAST_TEST_CMD="${1#--fast-test-cmd=}"; shift ;;
+        --fast-test-interval)
+            FAST_TEST_INTERVAL="${2:-}"
+            [[ -z "$FAST_TEST_INTERVAL" ]] && { error "Missing value for --fast-test-interval"; exit 1; }
+            shift 2
+            ;;
+        --fast-test-interval=*) FAST_TEST_INTERVAL="${1#--fast-test-interval=}"; shift ;;
+        --max-restarts)
+            MAX_RESTARTS="${2:-}"
+            [[ -z "$MAX_RESTARTS" ]] && { error "Missing value for --max-restarts"; exit 1; }
+            shift 2
+            ;;
+        --max-restarts=*) MAX_RESTARTS="${1#--max-restarts=}"; shift ;;
+        --roles)
+            AGENT_ROLES="${2:-}"
+            [[ -z "$AGENT_ROLES" ]] && { error "Missing value for --roles"; exit 1; }
+            shift 2
+            ;;
+        --roles=*) AGENT_ROLES="${1#--roles=}"; shift ;;
         --help|-h)
             show_help
             exit 0
@@ -203,6 +244,27 @@ if [[ "$AGENTS" -gt 1 ]]; then
     USE_WORKTREE=true
 fi
+# Warn if --roles without --agents
+if [[ -n "$AGENT_ROLES" ]] && [[ "$AGENTS" -le 1 ]]; then
+    warn "--roles requires --agents > 1 (roles are ignored in single-agent mode)"
+fi
+# Warn if --max-restarts with --agents > 1 (not yet supported)
+if [[ "${MAX_RESTARTS:-0}" -gt 0 ]] && [[ "$AGENTS" -gt 1 ]]; then
+    warn "--max-restarts is ignored in multi-agent mode (restart support is single-agent only)"
+    MAX_RESTARTS=0
+fi
+# Validate numeric flags
+if ! [[ "$FAST_TEST_INTERVAL" =~ ^[1-9][0-9]*$ ]]; then
+    error "--fast-test-interval must be a positive integer (got: $FAST_TEST_INTERVAL)"
+    exit 1
+fi
+if ! [[ "$MAX_RESTARTS" =~ ^[0-9]+$ ]]; then
+    error "--max-restarts must be a non-negative integer (got: $MAX_RESTARTS)"
+    exit 1
+fi
 # ─── Validate Inputs ─────────────────────────────────────────────────────────
 if ! $RESUME && [[ -z "$GOAL" ]]; then
@@ -224,6 +286,9 @@ if ! git rev-parse --is-inside-work-tree &>/dev/null 2>&1; then
     exit 1
 fi
+# Preserve original goal before any appending (memory fixes, human feedback)
+ORIGINAL_GOAL="$GOAL"
 # ─── Timeout Detection ────────────────────────────────────────────────────────
 TIMEOUT_CMD=""
 if command -v timeout &>/dev/null; then
@@ -266,6 +331,17 @@ select_adaptive_model() {
         echo "$default_model"
         return 0
     fi
+    # Read learned model routing
+    local _routing_file="${HOME}/.shipwright/optimization/model-routing.json"
+    if [[ -f "$_routing_file" ]] && command -v jq &>/dev/null; then
+        local _routed_model
+        _routed_model=$(jq -r --arg r "$role" '.routes[$r].model // ""' "$_routing_file" 2>/dev/null) || true
+        if [[ -n "${_routed_model:-}" && "${_routed_model:-}" != "null" ]]; then
+            echo "${_routed_model}"
+            return 0
+        fi
+    fi
     # Try intelligence-based recommendation
     if type intelligence_recommend_model &>/dev/null 2>&1; then
         local rec
@@ -317,6 +393,18 @@ apply_adaptive_budget() {
         [[ -n "$tuned_cb" && "$tuned_cb" != "null" ]] && CIRCUIT_BREAKER_THRESHOLD="$tuned_cb"
     fi
+    # Read learned iteration model
+    local _iter_model="${HOME}/.shipwright/optimization/iteration-model.json"
+    if [[ -f "$_iter_model" ]] && ! $MAX_ITERATIONS_EXPLICIT && command -v jq &>/dev/null; then
+        local _complexity="${ISSUE_COMPLEXITY:-${COMPLEXITY:-medium}}"
+        local _predicted_max
+        _predicted_max=$(jq -r --arg c "$_complexity" '.predictions[$c].max_iterations // ""' "$_iter_model" 2>/dev/null) || true
+        if [[ -n "${_predicted_max:-}" && "${_predicted_max:-}" != "null" && "${_predicted_max:-0}" -gt 0 ]]; then
+            MAX_ITERATIONS="${_predicted_max}"
+            info "Iteration model: ${_complexity} complexity → max ${_predicted_max} iterations"
+        fi
+    fi
     # Try intelligence-based iteration estimate
     if type intelligence_estimate_iterations &>/dev/null 2>&1 && ! $MAX_ITERATIONS_EXPLICIT; then
         local est
@@ -481,31 +569,67 @@ resume_state() {
 }
 write_state() {
-    cat > "$STATE_FILE" <<EOF
----
-goal: "$GOAL"
-iteration: $ITERATION
-max_iterations: $MAX_ITERATIONS
-status: $STATUS
-test_cmd: "$TEST_CMD"
-model: $MODEL
-agents: $AGENTS
-started_at: $(now_iso)
-last_iteration_at: $(now_iso)
-consecutive_failures: $CONSECUTIVE_FAILURES
-total_commits: $TOTAL_COMMITS
-audit_enabled: $AUDIT_ENABLED
-audit_agent_enabled: $AUDIT_AGENT_ENABLED
-quality_gates_enabled: $QUALITY_GATES_ENABLED
-dod_file: "$DOD_FILE"
-auto_extend: $AUTO_EXTEND
-extension_count: $EXTENSION_COUNT
-max_extensions: $MAX_EXTENSIONS
----
-## Log
-$LOG_ENTRIES
-EOF
+    local tmp_state="${STATE_FILE}.tmp.$$"
+    # Use printf instead of heredoc to avoid delimiter injection from GOAL
+    {
+        printf -- '---\n'
+        printf 'goal: "%s"\n' "$GOAL"
+        printf 'iteration: %s\n' "$ITERATION"
+        printf 'max_iterations: %s\n' "$MAX_ITERATIONS"
+        printf 'status: %s\n' "$STATUS"
+        printf 'test_cmd: "%s"\n' "$TEST_CMD"
+        printf 'model: %s\n' "$MODEL"
+        printf 'agents: %s\n' "$AGENTS"
+        printf 'started_at: %s\n' "$(now_iso)"
+        printf 'last_iteration_at: %s\n' "$(now_iso)"
+        printf 'consecutive_failures: %s\n' "$CONSECUTIVE_FAILURES"
+        printf 'total_commits: %s\n' "$TOTAL_COMMITS"
+        printf 'audit_enabled: %s\n' "$AUDIT_ENABLED"
+        printf 'audit_agent_enabled: %s\n' "$AUDIT_AGENT_ENABLED"
+        printf 'quality_gates_enabled: %s\n' "$QUALITY_GATES_ENABLED"
+        printf 'dod_file: "%s"\n' "$DOD_FILE"
+        printf 'auto_extend: %s\n' "$AUTO_EXTEND"
+        printf 'extension_count: %s\n' "$EXTENSION_COUNT"
+        printf 'max_extensions: %s\n' "$MAX_EXTENSIONS"
+        printf -- '---\n\n'
+        printf '## Log\n'
+        printf '%s\n' "$LOG_ENTRIES"
+    } > "$tmp_state"
+    if ! mv "$tmp_state" "$STATE_FILE" 2>/dev/null; then
+        warn "Failed to write state file: $STATE_FILE"
+    fi
+}
+write_progress() {
+    local progress_file="$LOG_DIR/progress.md"
+    local recent_commits
+    recent_commits=$(git -C "$PROJECT_ROOT" log --oneline -5 2>/dev/null || echo "(no commits)")
+    local changed_files
+    changed_files=$(git -C "$PROJECT_ROOT" diff --name-only HEAD~3 2>/dev/null | head -20 || echo "(none)")
+    local last_error=""
+    local prev_test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
+    if [[ -f "$prev_test_log" ]] && [[ "${TEST_PASSED:-}" == "false" ]]; then
+        last_error=$(tail -10 "$prev_test_log" 2>/dev/null || true)
+    fi
+    # Use printf to avoid heredoc delimiter injection from GOAL content
+    local tmp_progress="${progress_file}.tmp.$$"
+    {
+        printf '# Session Progress (Auto-Generated)\n\n'
+        printf '## Goal\n%s\n\n' "${GOAL}"
+        printf '## Status\n'
+        printf -- '- Iteration: %s/%s\n' "${ITERATION}" "${MAX_ITERATIONS}"
+        printf -- '- Session restart: %s/%s\n' "${RESTART_COUNT:-0}" "${MAX_RESTARTS:-0}"
+        printf -- '- Tests passing: %s\n' "${TEST_PASSED:-unknown}"
+        printf -- '- Status: %s\n\n' "${STATUS:-running}"
+        printf '## Recent Commits\n%s\n\n' "${recent_commits}"
+        printf '## Changed Files\n%s\n\n' "${changed_files}"
+        if [[ -n "$last_error" ]]; then
+            printf '## Last Error\n%s\n\n' "$last_error"
+        fi
+        printf '## Timestamp\n%s\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
+    } > "$tmp_progress" 2>/dev/null
+    mv "$tmp_progress" "$progress_file" 2>/dev/null || rm -f "$tmp_progress" 2>/dev/null
 }
 append_log_entry() {
@@ -549,11 +673,50 @@ git_auto_commit() {
     return 0
 }
+# ─── Fatal Error Detection ────────────────────────────────────────────────────
+check_fatal_error() {
+    local log_file="$1"
+    local cli_exit_code="${2:-0}"
+    [[ -f "$log_file" ]] || return 1
+    # Known fatal error patterns from Claude CLI / Anthropic API
+    local fatal_patterns="Invalid API key|invalid_api_key|authentication_error|API key expired"
+    fatal_patterns="${fatal_patterns}|rate_limit_error|overloaded_error|billing"
+    fatal_patterns="${fatal_patterns}|Could not resolve host|connection refused|ECONNREFUSED"
+    fatal_patterns="${fatal_patterns}|ANTHROPIC_API_KEY.*not set|No API key"
+    if grep -qiE "$fatal_patterns" "$log_file" 2>/dev/null; then
+        local match
+        match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
+        error "Fatal CLI error: $match"
+        return 0  # fatal error detected
+    fi
+    # Non-zero exit + tiny output = likely CLI crash
+    if [[ "$cli_exit_code" -ne 0 ]]; then
+        local line_count
+        line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || echo 0)
+        if [[ "$line_count" -lt 3 ]]; then
+            local content
+            content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
+            error "CLI exited $cli_exit_code with minimal output: $content"
+            return 0
+        fi
+    fi
+    return 1  # no fatal error
+}
 # ─── Progress & Circuit Breaker ───────────────────────────────────────────────
 check_progress() {
     local changes
-    changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null | tail -1 || echo "")"
+    # Exclude loop bookkeeping files — only count real code changes as progress
+    changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 \
+        -- . ':!.claude/loop-state.md' ':!.claude/pipeline-state.md' \
+        ':!**/progress.md' ':!**/error-summary.json' \
+        2>/dev/null | tail -1 || echo "")"
     local insertions
     insertions="$(echo "$changes" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)"
     if [[ "${insertions:-0}" -lt "$MIN_PROGRESS_LINES" ]]; then
@@ -568,6 +731,30 @@ check_completion() {
 }
 check_circuit_breaker() {
+    # Vitals-driven circuit breaker (preferred over static threshold)
+    if type pipeline_compute_vitals &>/dev/null 2>&1 && type pipeline_health_verdict &>/dev/null 2>&1; then
+        local _vitals_json _verdict
+        local _loop_state="${STATE_FILE:-}"
+        local _loop_artifacts="${ARTIFACTS_DIR:-}"
+        local _loop_issue="${ISSUE_NUMBER:-}"
+        _vitals_json=$(pipeline_compute_vitals "$_loop_state" "$_loop_artifacts" "$_loop_issue" 2>/dev/null) || true
+        if [[ -n "$_vitals_json" && "$_vitals_json" != "{}" ]]; then
+            _verdict=$(echo "$_vitals_json" | jq -r '.verdict // "continue"' 2>/dev/null || echo "continue")
+            if [[ "$_verdict" == "abort" ]]; then
+                local _health_score
+                _health_score=$(echo "$_vitals_json" | jq -r '.health_score // 0' 2>/dev/null || echo "0")
+                error "Vitals circuit breaker: health score ${_health_score}/100 — aborting (${CONSECUTIVE_FAILURES} stagnant iterations)"
+                STATUS="circuit_breaker"
+                return 1
+            fi
+            # Vitals say continue/warn/intervene — don't trip circuit breaker yet
+            if [[ "$_verdict" == "continue" || "$_verdict" == "warn" ]]; then
+                return 0
+            fi
+        fi
+    fi
+    # Fallback: static threshold circuit breaker
     if [[ "$CONSECUTIVE_FAILURES" -ge "$CIRCUIT_BREAKER_THRESHOLD" ]]; then
         error "Circuit breaker tripped: ${CIRCUIT_BREAKER_THRESHOLD} consecutive iterations with no meaningful progress."
         STATUS="circuit_breaker"
@@ -646,16 +833,88 @@ run_test_gate() {
         return
     fi
+    # Determine which test command to use this iteration
+    local active_test_cmd="$TEST_CMD"
+    local test_mode="full"
+    if [[ -n "$FAST_TEST_CMD" ]]; then
+        # Use full test every FAST_TEST_INTERVAL iterations, on first iteration, and on final iteration
+        if [[ "$ITERATION" -eq 1 ]] || [[ $(( ITERATION % FAST_TEST_INTERVAL )) -eq 0 ]] || [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]]; then
+            active_test_cmd="$TEST_CMD"
+            test_mode="full"
+        else
+            active_test_cmd="$FAST_TEST_CMD"
+            test_mode="fast"
+        fi
+    fi
     local test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
-    if bash -c "$TEST_CMD" > "$test_log" 2>&1; then
+    TEST_LOG_FILE="$test_log"
+    echo -e "  ${DIM}Running ${test_mode} tests...${RESET}"
+    # Wrap test command with timeout (5 min default) to prevent hanging
+    local test_timeout="${SW_TEST_TIMEOUT:-300}"
+    local test_wrapper="$active_test_cmd"
+    if command -v timeout &>/dev/null; then
+        test_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
+    elif command -v gtimeout &>/dev/null; then
+        test_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
+    fi
+    if bash -c "$test_wrapper" > "$test_log" 2>&1; then
         TEST_PASSED=true
-        TEST_OUTPUT="All tests passed."
+        TEST_OUTPUT="All tests passed (${test_mode} mode)."
     else
         TEST_PASSED=false
         TEST_OUTPUT="$(tail -50 "$test_log")"
     fi
 }
+write_error_summary() {
+    local error_json="$LOG_DIR/error-summary.json"
+    # Only write on test failure
+    if [[ "${TEST_PASSED:-}" != "false" ]]; then
+        # Clear previous error summary on success
+        rm -f "$error_json" 2>/dev/null || true
+        return
+    fi
+    local test_log="${TEST_LOG_FILE:-$LOG_DIR/tests-iter-${ITERATION}.log}"
+    [[ ! -f "$test_log" ]] && return
+    # Extract error lines (last 30 lines, grep for error patterns)
+    local error_lines_raw
+    error_lines_raw=$(tail -30 "$test_log" 2>/dev/null | grep -iE '(error|fail|assert|exception|panic|FAIL|TypeError|ReferenceError|SyntaxError)' | head -10 || true)
+    local error_count=0
+    if [[ -n "$error_lines_raw" ]]; then
+        error_count=$(echo "$error_lines_raw" | wc -l | tr -d ' ')
+    fi
+    local tmp_json="${error_json}.tmp.$$"
+    # Build JSON with jq (preferred) or plain-text fallback
+    if command -v jq &>/dev/null; then
+        jq -n \
+            --argjson iteration "${ITERATION:-0}" \
+            --arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \
+            --argjson error_count "${error_count:-0}" \
+            --arg error_lines "$error_lines_raw" \
+            --arg test_cmd "${TEST_CMD:-}" \
+            '{
+                iteration: $iteration,
+                timestamp: $timestamp,
+                error_count: $error_count,
+                error_lines: ($error_lines | split("\n") | map(select(length > 0))),
+                test_cmd: $test_cmd
+            }' > "$tmp_json" 2>/dev/null && mv "$tmp_json" "$error_json" || rm -f "$tmp_json" 2>/dev/null
+    else
+        # Fallback: write plain-text error summary (still machine-parseable)
+        cat > "$tmp_json" <<ERRJSON
+{"iteration":${ITERATION:-0},"error_count":${error_count:-0},"error_lines":[],"test_cmd":"test"}
+ERRJSON
+        mv "$tmp_json" "$error_json" 2>/dev/null || rm -f "$tmp_json" 2>/dev/null
+    fi
+}
 # ─── Audit Agent ─────────────────────────────────────────────────────────────
 run_audit_agent() {
@@ -884,6 +1143,21 @@ compose_prompt() {
 $TEST_OUTPUT"
     fi
+    # Structured error context (machine-readable)
+    local error_summary_section=""
+    local error_json="$LOG_DIR/error-summary.json"
+    if [[ -f "$error_json" ]]; then
+        local err_count err_lines
+        err_count=$(jq -r '.error_count // 0' "$error_json" 2>/dev/null || echo "0")
+        err_lines=$(jq -r '.error_lines[]? // empty' "$error_json" 2>/dev/null | head -10 || true)
+        if [[ "$err_count" -gt 0 ]] && [[ -n "$err_lines" ]]; then
+            error_summary_section="## Structured Error Summary (${err_count} errors detected)
+${err_lines}
+Fix these specific errors. Each line above is one distinct error from the test output."
+        fi
+    fi
     # Build audit sections (captured before heredoc to avoid nested heredoc issues)
     local audit_section
     audit_section="$(compose_audit_section)"
@@ -1006,6 +1280,16 @@ ${last_error}"
     local stuckness_section=""
     stuckness_section="$(detect_stuckness)"
+    # Session restart context — inject previous session progress
+    local restart_section=""
+    if [[ "$SESSION_RESTART" == "true" ]] && [[ -f "$LOG_DIR/progress.md" ]]; then
+        restart_section="## Previous Session Progress
+$(cat "$LOG_DIR/progress.md")
+You are starting a FRESH session after the previous one exhausted its iterations.
+Read the progress above and continue from where it left off. Do NOT repeat work already done."
+    fi
     cat <<PROMPT
 You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
@@ -1021,6 +1305,8 @@ ${git_log}
 ## Test Results (Previous Iteration)
 ${test_section}
+${error_summary_section:+$error_summary_section
+}
 ${memory_section:+## Memory Context
 $memory_section
 }
@@ -1028,6 +1314,8 @@ ${dora_section:+$dora_section
 }
 ${intelligence_section:+$intelligence_section
 }
+${restart_section:+$restart_section
+}
 ## Instructions
 1. Read the codebase and understand the current state
 2. Identify the highest-priority remaining work toward the goal
@@ -1189,6 +1477,37 @@ compose_worker_prompt() {
     local base_prompt
     base_prompt="$(compose_prompt)"
+    # Role-specific instructions
+    local role_section=""
+    if [[ -n "$AGENT_ROLES" ]] && [[ "${agent_num:-0}" -ge 1 ]]; then
+        # Split comma-separated roles and get role for this agent
+        local role=""
+        local IFS_BAK="$IFS"
+        IFS=',' read -ra _roles <<< "$AGENT_ROLES"
+        IFS="$IFS_BAK"
+        if [[ "$agent_num" -le "${#_roles[@]}" ]]; then
+            role="${_roles[$((agent_num - 1))]}"
+            # Trim whitespace and skip empty roles (handles trailing comma)
+            role="$(echo "$role" | tr -d ' ')"
+        fi
+        if [[ -n "$role" ]]; then
+            local role_desc=""
+            case "$role" in
+                builder)   role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
+                reviewer)  role_desc="Focus on code review — look for bugs, security issues, edge cases in recent commits. Make fixes via commits." ;;
+                tester)    role_desc="Focus on test coverage — write new tests, fix failing tests, improve assertions and edge case coverage." ;;
+                optimizer) role_desc="Focus on performance — profile hot paths, reduce complexity, optimize algorithms and data structures." ;;
+                docs)      role_desc="Focus on documentation — update README, add docstrings, write usage guides for new features." ;;
+                security)  role_desc="Focus on security — audit for vulnerabilities, fix injection risks, validate inputs, check auth boundaries." ;;
+                *)         role_desc="Focus on: ${role}. Apply your expertise in this area to advance the goal." ;;
+            esac
+            role_section="## Your Role: ${role}
+${role_desc}
+Prioritize work in your area of expertise. Coordinate with other agents via git log."
+        fi
+    fi
     cat <<PROMPT
 ${base_prompt}
@@ -1196,6 +1515,8 @@ ${base_prompt}
 You are Agent ${agent_num} of ${total_agents}. Other agents are working in parallel.
 Check git log to see what they've done — avoid duplicating their work.
 Focus on areas they haven't touched yet.
+${role_section}
 PROMPT
 }
@@ -1268,7 +1589,14 @@ extract_summary() {
     local summary
     summary="$(grep -v '^$' "$log_file" | tail -5 | head -3 2>/dev/null || echo "(no output)")"
     # Truncate long lines
-    echo "$summary" | cut -c1-120
+    summary="$(echo "$summary" | cut -c1-120)"
+    # Sanitize: if summary is just a CLI/API error, replace with generic text
+    if echo "$summary" | grep -qiE 'Invalid API key|authentication_error|rate_limit|API key expired|ANTHROPIC_API_KEY'; then
+        summary="(CLI error — no useful output this iteration)"
+    fi
+    echo "$summary"
 }
 # ─── Display Helpers ─────────────────────────────────────────────────────────
@@ -1547,17 +1875,21 @@ done
 echo -e "\n${DIM}Agent ${AGENT_NUM} finished after ${ITERATION} iterations${RESET}"
 WORKEREOF
-    # Replace placeholders
+    # Replace placeholders — use awk for all values to avoid sed injection
+    # (sed breaks on & | \ in paths and test commands)
     sed_i "s|__AGENT_NUM__|${agent_num}|g" "$worker_script"
     sed_i "s|__TOTAL_AGENTS__|${total_agents}|g" "$worker_script"
-    sed_i "s|__WORK_DIR__|${wt_path}|g" "$worker_script"
-    sed_i "s|__LOG_DIR__|${LOG_DIR}|g" "$worker_script"
     sed_i "s|__MAX_ITERATIONS__|${MAX_ITERATIONS}|g" "$worker_script"
-    sed_i "s|__TEST_CMD__|${TEST_CMD}|g" "$worker_script"
-    sed_i "s|__CLAUDE_FLAGS__|${claude_flags}|g" "$worker_script"
-    # Goal needs special handling for sed (may contain special chars)
-    # Use awk for safe string replacement without python
-    awk -v goal="$GOAL" '{gsub(/__GOAL__/, goal); print}' "$worker_script" > "${worker_script}.tmp" \
+    # Paths and commands may contain sed-special chars — use awk
+    awk -v val="$wt_path" '{gsub(/__WORK_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
+        && mv "${worker_script}.tmp" "$worker_script"
+    awk -v val="$LOG_DIR" '{gsub(/__LOG_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
+        && mv "${worker_script}.tmp" "$worker_script"
+    awk -v val="$TEST_CMD" '{gsub(/__TEST_CMD__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
+        && mv "${worker_script}.tmp" "$worker_script"
+    awk -v val="$claude_flags" '{gsub(/__CLAUDE_FLAGS__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
+        && mv "${worker_script}.tmp" "$worker_script"
+    awk -v val="$GOAL" '{gsub(/__GOAL__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
         && mv "${worker_script}.tmp" "$worker_script"
     chmod +x "$worker_script"
     echo "$worker_script"
@@ -1577,10 +1909,14 @@ launch_multi_agent() {
     MULTI_WINDOW_NAME="sw-loop-$(date +%s)"
     tmux new-window -n "$MULTI_WINDOW_NAME" -c "$PROJECT_ROOT"
+    # Capture the first pane's ID (stable regardless of pane-base-index)
+    local monitor_pane_id
+    monitor_pane_id="$(tmux list-panes -t "$MULTI_WINDOW_NAME" -F '#{pane_id}' 2>/dev/null | head -1)"
     # First pane becomes monitor
-    tmux send-keys -t "$MULTI_WINDOW_NAME" "printf '\\033]2;loop-monitor\\033\\\\'" Enter
+    tmux send-keys -t "$monitor_pane_id" "printf '\\033]2;loop-monitor\\033\\\\'" Enter
     sleep 0.2
-    tmux send-keys -t "$MULTI_WINDOW_NAME" "clear && echo 'Loop Monitor — watching agent logs...'" Enter
+    tmux send-keys -t "$monitor_pane_id" "clear && echo 'Loop Monitor — watching agent logs...'" Enter
     # Create worker panes
     for i in $(seq 1 "$AGENTS"); do
@@ -1596,12 +1932,12 @@ launch_multi_agent() {
     # Layout: monitor pane on top (35%), worker agents tile below
     tmux select-layout -t "$MULTI_WINDOW_NAME" main-vertical 2>/dev/null || true
-    tmux resize-pane -t "$MULTI_WINDOW_NAME.0" -y 35% 2>/dev/null || true
+    tmux resize-pane -t "$monitor_pane_id" -y 35% 2>/dev/null || true
     # In the monitor pane, tail all agent logs
-    tmux select-pane -t "$MULTI_WINDOW_NAME.0"
+    tmux select-pane -t "$monitor_pane_id"
     sleep 0.5
-    tmux send-keys -t "$MULTI_WINDOW_NAME.0" "clear && tail -f $LOG_DIR/agent-*-iter-*.log 2>/dev/null || echo 'Waiting for agent logs...'" Enter
+    tmux send-keys -t "$monitor_pane_id" "clear && tail -f $LOG_DIR/agent-*-iter-*.log 2>/dev/null || echo 'Waiting for agent logs...'" Enter
     success "Launched $AGENTS worker agents in window: $MULTI_WINDOW_NAME"
     echo ""
@@ -1656,12 +1992,13 @@ wait_for_multi_completion() {
 cleanup_multi_agent() {
     if [[ -n "$MULTI_WINDOW_NAME" ]]; then
-        # Send Ctrl-C to all panes in the worker window
-        local pane_count
-        pane_count="$(tmux list-panes -t "$MULTI_WINDOW_NAME" 2>/dev/null | wc -l | tr -d ' ')"
-        for i in $(seq 0 $(( pane_count - 1 ))); do
-            tmux send-keys -t "$MULTI_WINDOW_NAME.$i" C-c 2>/dev/null || true
-        done
+        # Send Ctrl-C to all panes using stable pane IDs (not indices)
+        # Pane IDs (%0, %1, ...) are unaffected by pane-base-index setting
+        local pane_id
+        while IFS= read -r pane_id; do
+            [[ -z "$pane_id" ]] && continue
+            tmux send-keys -t "$pane_id" C-c 2>/dev/null || true
+        done < <(tmux list-panes -t "$MULTI_WINDOW_NAME" -F '#{pane_id}' 2>/dev/null || true)
         sleep 1
         tmux kill-window -t "$MULTI_WINDOW_NAME" 2>/dev/null || true
     fi
@@ -1673,7 +2010,10 @@ cleanup_multi_agent() {
 # ─── Main: Single-Agent Loop ─────────────────────────────────────────────────
 run_single_agent_loop() {
-    if $RESUME; then
+    if [[ "$SESSION_RESTART" == "true" ]]; then
+        # Restart: state already reset by run_loop_with_restarts, skip init
+        info "Session restart ${RESTART_COUNT}/${MAX_RESTARTS} — fresh context, reading progress"
+    elif $RESUME; then
         resume_state
     else
         initialize_state
@@ -1683,6 +2023,9 @@ run_single_agent_loop() {
     apply_adaptive_budget
     MODEL="$(select_adaptive_model "build" "$MODEL")"
+    # Track applied memory fix patterns for outcome recording
+    _applied_fix_pattern=""
     show_banner
     while true; do
@@ -1691,12 +2034,42 @@ run_single_agent_loop() {
         check_max_iterations || break
         ITERATION=$(( ITERATION + 1 ))
+        # Try memory-based fix suggestion on retry after test failure
+        if [[ "${TEST_PASSED:-}" == "false" ]]; then
+            local _last_error=""
+            local _prev_log="$LOG_DIR/iteration-$(( ITERATION - 1 )).log"
+            if [[ -f "$_prev_log" ]]; then
+                _last_error=$(tail -20 "$_prev_log" 2>/dev/null | grep -iE '(error|fail|exception)' | head -1 || true)
+            fi
+            local _fix_suggestion=""
+            if type memory_closed_loop_inject &>/dev/null 2>&1 && [[ -n "${_last_error:-}" ]]; then
+                _fix_suggestion=$(memory_closed_loop_inject "$_last_error" 2>/dev/null) || true
+            fi
+            if [[ -n "${_fix_suggestion:-}" ]]; then
+                _applied_fix_pattern="${_last_error}"
+                GOAL="KNOWN FIX (from past success): ${_fix_suggestion}
+${GOAL}"
+                info "Memory fix injected: ${_fix_suggestion:0:80}"
+            fi
+        fi
         # Run Claude
         local exit_code=0
         run_claude_iteration || exit_code=$?
         local log_file="$LOG_DIR/iteration-${ITERATION}.log"
+        # Detect fatal CLI errors (API key, auth, network) — abort immediately
+        if check_fatal_error "$log_file" "$exit_code"; then
+            STATUS="error"
+            write_state
+            write_progress
+            error "Fatal CLI error detected — aborting loop (see iteration log)"
+            show_summary
+            return 1
+        fi
         # Mid-loop memory refresh — re-query with current error context after iteration 3
         if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context &>/dev/null 2>&1; then
             local refresh_ctx
@@ -1733,6 +2106,7 @@ run_single_agent_loop() {
         # Test gate
         run_test_gate
+        write_error_summary
         if [[ -n "$TEST_CMD" ]]; then
             if [[ "$TEST_PASSED" == "true" ]]; then
                 echo -e "  ${GREEN}✓${RESET} Tests: passed"
@@ -1741,6 +2115,18 @@ run_single_agent_loop() {
             fi
         fi
+        # Track fix outcome for memory effectiveness
+        if [[ -n "${_applied_fix_pattern:-}" ]]; then
+            if type memory_record_fix_outcome &>/dev/null 2>&1; then
+                if [[ "${TEST_PASSED:-}" == "true" ]]; then
+                    memory_record_fix_outcome "$_applied_fix_pattern" "true" "true" 2>/dev/null || true
+                else
+                    memory_record_fix_outcome "$_applied_fix_pattern" "true" "false" 2>/dev/null || true
+                fi
+            fi
+            _applied_fix_pattern=""
+        fi
         # Audit agent (reviews implementer's work)
         run_audit_agent
@@ -1751,6 +2137,7 @@ run_single_agent_loop() {
         if guard_completion; then
             STATUS="complete"
             write_state
+            write_progress
             show_summary
             return 0
         fi
@@ -1771,6 +2158,7 @@ run_single_agent_loop() {
 $summary
 "
         write_state
+        write_progress
         # Update heartbeat
         "$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
@@ -1799,9 +2187,77 @@ HUMAN FEEDBACK (received after iteration $ITERATION): $human_msg"
     # Write final state after loop exits
     write_state
+    write_progress
     show_summary
 }
+# ─── Session Restart Wrapper ─────────────────────────────────────────────────
+run_loop_with_restarts() {
+    while true; do
+        local loop_exit=0
+        run_single_agent_loop || loop_exit=$?
+        # If completed successfully or no restarts configured, exit
+        if [[ "$STATUS" == "complete" ]]; then
+            return 0
+        fi
+        if [[ "$MAX_RESTARTS" -le 0 ]]; then
+            return "$loop_exit"
+        fi
+        if [[ "$RESTART_COUNT" -ge "$MAX_RESTARTS" ]]; then
+            warn "Max restarts ($MAX_RESTARTS) reached — stopping"
+            return "$loop_exit"
+        fi
+        # Hard cap safety net
+        if [[ "$RESTART_COUNT" -ge 5 ]]; then
+            warn "Hard restart cap (5) reached — stopping"
+            return "$loop_exit"
+        fi
+        # Check if tests are still failing (worth restarting)
+        if [[ "${TEST_PASSED:-}" == "true" ]]; then
+            info "Tests passing but loop incomplete — restarting session"
+        else
+            info "Tests failing and loop exhausted — restarting with fresh context"
+        fi
+        RESTART_COUNT=$(( RESTART_COUNT + 1 ))
+        if type emit_event &>/dev/null 2>&1; then
+            emit_event "loop.restart" "restart=$RESTART_COUNT" "max=$MAX_RESTARTS" "iteration=$ITERATION"
+        fi
+        info "Session restart ${RESTART_COUNT}/${MAX_RESTARTS} — resetting iteration counter"
+        # Reset ALL iteration-level state for the new session
+        # SESSION_RESTART tells run_single_agent_loop to skip init/resume
+        SESSION_RESTART=true
+        ITERATION=0
+        CONSECUTIVE_FAILURES=0
+        EXTENSION_COUNT=0
+        STATUS="running"
+        LOG_ENTRIES=""
+        TEST_PASSED=""
+        TEST_OUTPUT=""
+        TEST_LOG_FILE=""
+        # Reset GOAL to original — prevent unbounded growth from memory/human injections
+        GOAL="$ORIGINAL_GOAL"
+        # Archive old artifacts so they don't get overwritten or pollute new session
+        local restart_archive="$LOG_DIR/restart-${RESTART_COUNT}"
+        mkdir -p "$restart_archive"
+        for old_log in "$LOG_DIR"/iteration-*.log "$LOG_DIR"/tests-iter-*.log; do
+            [[ -f "$old_log" ]] && mv "$old_log" "$restart_archive/" 2>/dev/null || true
+        done
+        # Archive progress.md and error-summary.json from previous session
+        [[ -f "$LOG_DIR/progress.md" ]] && cp "$LOG_DIR/progress.md" "$restart_archive/progress.md" 2>/dev/null || true
+        [[ -f "$LOG_DIR/error-summary.json" ]] && mv "$LOG_DIR/error-summary.json" "$restart_archive/" 2>/dev/null || true
+        write_state
+        sleep 2
+    done
+}
 # ─── Main: Entry Point ───────────────────────────────────────────────────────
 main() {
@@ -1815,7 +2271,7 @@ main() {
         launch_multi_agent
         show_summary
     else
-        run_single_agent_loop
+        run_loop_with_restarts
     fi
 }