npm - shipwright-cli - Versions diffs - 1.10.0 → 2.1.0 - Mend

shipwright-cli 1.10.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

package/README.md +221 -55
package/completions/_shipwright +264 -32
package/completions/shipwright.bash +118 -26
package/completions/shipwright.fish +80 -2
package/dashboard/server.ts +208 -0
package/docs/strategy/01-market-research.md +619 -0
package/docs/strategy/02-mission-and-brand.md +587 -0
package/docs/strategy/03-gtm-and-roadmap.md +759 -0
package/docs/strategy/QUICK-START.txt +289 -0
package/docs/strategy/README.md +172 -0
package/docs/tmux-research/TMUX-ARCHITECTURE.md +567 -0
package/docs/tmux-research/TMUX-AUDIT.md +925 -0
package/docs/tmux-research/TMUX-BEST-PRACTICES-2025-2026.md +829 -0
package/docs/tmux-research/TMUX-QUICK-REFERENCE.md +543 -0
package/docs/tmux-research/TMUX-RESEARCH-INDEX.md +438 -0
package/package.json +4 -2
package/scripts/lib/helpers.sh +7 -0
package/scripts/sw +323 -2
package/scripts/sw-activity.sh +500 -0
package/scripts/sw-adaptive.sh +925 -0
package/scripts/sw-adversarial.sh +1 -1
package/scripts/sw-architecture-enforcer.sh +1 -1
package/scripts/sw-auth.sh +613 -0
package/scripts/sw-autonomous.sh +754 -0
package/scripts/sw-changelog.sh +704 -0
package/scripts/sw-checkpoint.sh +1 -1
package/scripts/sw-ci.sh +602 -0
package/scripts/sw-cleanup.sh +1 -1
package/scripts/sw-code-review.sh +698 -0
package/scripts/sw-connect.sh +1 -1
package/scripts/sw-context.sh +605 -0
package/scripts/sw-cost.sh +44 -3
package/scripts/sw-daemon.sh +568 -138
package/scripts/sw-dashboard.sh +1 -1
package/scripts/sw-db.sh +1380 -0
package/scripts/sw-decompose.sh +539 -0
package/scripts/sw-deps.sh +551 -0
package/scripts/sw-developer-simulation.sh +1 -1
package/scripts/sw-discovery.sh +412 -0
package/scripts/sw-docs-agent.sh +539 -0
package/scripts/sw-docs.sh +1 -1
package/scripts/sw-doctor.sh +107 -1
package/scripts/sw-dora.sh +615 -0
package/scripts/sw-durable.sh +710 -0
package/scripts/sw-e2e-orchestrator.sh +535 -0
package/scripts/sw-eventbus.sh +393 -0
package/scripts/sw-feedback.sh +479 -0
package/scripts/sw-fix.sh +1 -1
package/scripts/sw-fleet-discover.sh +567 -0
package/scripts/sw-fleet-viz.sh +404 -0
package/scripts/sw-fleet.sh +8 -1
package/scripts/sw-github-app.sh +596 -0
package/scripts/sw-github-checks.sh +4 -4
package/scripts/sw-github-deploy.sh +1 -1
package/scripts/sw-github-graphql.sh +1 -1
package/scripts/sw-guild.sh +569 -0
package/scripts/sw-heartbeat.sh +1 -1
package/scripts/sw-hygiene.sh +559 -0
package/scripts/sw-incident.sh +656 -0
package/scripts/sw-init.sh +237 -24
package/scripts/sw-instrument.sh +699 -0
package/scripts/sw-intelligence.sh +1 -1
package/scripts/sw-jira.sh +1 -1
package/scripts/sw-launchd.sh +363 -28
package/scripts/sw-linear.sh +1 -1
package/scripts/sw-logs.sh +1 -1
package/scripts/sw-loop.sh +267 -21
package/scripts/sw-memory.sh +18 -1
package/scripts/sw-mission-control.sh +487 -0
package/scripts/sw-model-router.sh +545 -0
package/scripts/sw-otel.sh +596 -0
package/scripts/sw-oversight.sh +764 -0
package/scripts/sw-pipeline-composer.sh +1 -1
package/scripts/sw-pipeline-vitals.sh +1 -1
package/scripts/sw-pipeline.sh +947 -35
package/scripts/sw-pm.sh +758 -0
package/scripts/sw-pr-lifecycle.sh +522 -0
package/scripts/sw-predictive.sh +8 -1
package/scripts/sw-prep.sh +1 -1
package/scripts/sw-ps.sh +1 -1
package/scripts/sw-public-dashboard.sh +798 -0
package/scripts/sw-quality.sh +595 -0
package/scripts/sw-reaper.sh +1 -1
package/scripts/sw-recruit.sh +2248 -0
package/scripts/sw-regression.sh +642 -0
package/scripts/sw-release-manager.sh +736 -0
package/scripts/sw-release.sh +706 -0
package/scripts/sw-remote.sh +1 -1
package/scripts/sw-replay.sh +520 -0
package/scripts/sw-retro.sh +691 -0
package/scripts/sw-scale.sh +444 -0
package/scripts/sw-security-audit.sh +505 -0
package/scripts/sw-self-optimize.sh +1 -1
package/scripts/sw-session.sh +1 -1
package/scripts/sw-setup.sh +263 -127
package/scripts/sw-standup.sh +712 -0
package/scripts/sw-status.sh +44 -2
package/scripts/sw-strategic.sh +806 -0
package/scripts/sw-stream.sh +450 -0
package/scripts/sw-swarm.sh +620 -0
package/scripts/sw-team-stages.sh +511 -0
package/scripts/sw-templates.sh +4 -4
package/scripts/sw-testgen.sh +566 -0
package/scripts/sw-tmux-pipeline.sh +554 -0
package/scripts/sw-tmux-role-color.sh +58 -0
package/scripts/sw-tmux-status.sh +128 -0
package/scripts/sw-tmux.sh +1 -1
package/scripts/sw-trace.sh +485 -0
package/scripts/sw-tracker-github.sh +188 -0
package/scripts/sw-tracker-jira.sh +172 -0
package/scripts/sw-tracker-linear.sh +251 -0
package/scripts/sw-tracker.sh +117 -2
package/scripts/sw-triage.sh +627 -0
package/scripts/sw-upgrade.sh +1 -1
package/scripts/sw-ux.sh +677 -0
package/scripts/sw-webhook.sh +627 -0
package/scripts/sw-widgets.sh +530 -0
package/scripts/sw-worktree.sh +1 -1
package/templates/pipelines/autonomous.json +2 -2
package/tmux/shipwright-overlay.conf +35 -17
package/tmux/tmux.conf +23 -21

package/scripts/sw-loop.sh CHANGED Viewed

@@ -10,6 +10,11 @@
 set -euo pipefail
 trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
+# Allow spawning Claude CLI from within a Claude Code session (daemon, fleet, etc.)
+unset CLAUDECODE 2>/dev/null || true
+# Ignore SIGHUP so tmux attach/detach doesn't kill long-running agent sessions
+trap '' HUP
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # ─── Colors (matches shipwright theme) ──────────────────────────────────────────────
@@ -52,7 +57,13 @@ MAX_ITERATIONS_EXPLICIT=false
 MAX_RESTARTS=0
 SESSION_RESTART=false
 RESTART_COUNT=0
-VERSION="1.10.0"
+REPO_OVERRIDE=""
+VERSION="2.1.0"
+# ─── Token Tracking ─────────────────────────────────────────────────────────
+LOOP_INPUT_TOKENS=0
+LOOP_OUTPUT_TOKENS=0
+LOOP_COST_MILLICENTS=0
 # ─── Flexible Iteration Defaults ────────────────────────────────────────────
 AUTO_EXTEND=true          # Auto-extend iterations when work is incomplete
@@ -81,6 +92,8 @@ show_help() {
     echo -e "  ${CYAN}shipwright loop${RESET} \"<goal>\" [options]"
     echo ""
     echo -e "${BOLD}OPTIONS${RESET}"
+    echo -e "  ${CYAN}--repo <path>${RESET}             Change to directory before running (must be a git repo)"
+    echo -e "  ${CYAN}--local${RESET}                   Disable GitHub operations (local-only mode)"
     echo -e "  ${CYAN}--max-iterations${RESET} N       Max loop iterations (default: 20)"
     echo -e "  ${CYAN}--test-cmd${RESET} \"cmd\"         Test command to run between iterations"
     echo -e "  ${CYAN}--fast-test-cmd${RESET} \"cmd\"      Fast/subset test command (alternates with full)"
@@ -130,6 +143,16 @@ show_help() {
 while [[ $# -gt 0 ]]; do
     case "$1" in
+        --repo)
+            REPO_OVERRIDE="${2:-}"
+            [[ -z "$REPO_OVERRIDE" ]] && { error "Missing value for --repo"; exit 1; }
+            shift 2
+            ;;
+        --repo=*) REPO_OVERRIDE="${1#--repo=}"; shift ;;
+        --local)
+            # Skip GitHub operations in loop
+            export NO_GITHUB=true
+            shift ;;
         --max-iterations)
             MAX_ITERATIONS="${2:-}"
             MAX_ITERATIONS_EXPLICIT=true
@@ -239,16 +262,28 @@ if [[ "$AGENTS" -gt 1 ]]; then
     USE_WORKTREE=true
 fi
+# Recruit-powered auto-role assignment when multi-agent but no roles specified
+if [[ "$AGENTS" -gt 1 ]] && [[ -z "$AGENT_ROLES" ]] && [[ -x "${SCRIPT_DIR:-}/sw-recruit.sh" ]]; then
+    _recruit_goal="${GOAL:-}"
+    if [[ -n "$_recruit_goal" ]]; then
+        _recruit_team=$(bash "$SCRIPT_DIR/sw-recruit.sh" team --json "$_recruit_goal" 2>/dev/null) || true
+        if [[ -n "$_recruit_team" ]]; then
+            _recruit_roles=$(echo "$_recruit_team" | jq -r '.team | join(",")' 2>/dev/null) || true
+            if [[ -n "$_recruit_roles" && "$_recruit_roles" != "null" ]]; then
+                AGENT_ROLES="$_recruit_roles"
+                info "Recruit assigned roles: ${AGENT_ROLES}"
+            fi
+        fi
+    fi
+fi
 # Warn if --roles without --agents
 if [[ -n "$AGENT_ROLES" ]] && [[ "$AGENTS" -le 1 ]]; then
     warn "--roles requires --agents > 1 (roles are ignored in single-agent mode)"
 fi
-# Warn if --max-restarts with --agents > 1 (not yet supported)
-if [[ "${MAX_RESTARTS:-0}" -gt 0 ]] && [[ "$AGENTS" -gt 1 ]]; then
-    warn "--max-restarts is ignored in multi-agent mode (restart support is single-agent only)"
-    MAX_RESTARTS=0
-fi
+# max-restarts is supported in both single-agent and multi-agent mode
+# In multi-agent mode, restarts apply per-agent (agent can be respawned up to MAX_RESTARTS)
 # Validate numeric flags
 if ! [[ "$FAST_TEST_INTERVAL" =~ ^[1-9][0-9]*$ ]]; then
@@ -270,6 +305,23 @@ if ! $RESUME && [[ -z "$GOAL" ]]; then
     exit 1
 fi
+# Handle --repo flag: change to directory before running
+if [[ -n "$REPO_OVERRIDE" ]]; then
+    if [[ ! -d "$REPO_OVERRIDE" ]]; then
+        error "Directory does not exist: $REPO_OVERRIDE"
+        exit 1
+    fi
+    if ! cd "$REPO_OVERRIDE" 2>/dev/null; then
+        error "Cannot cd to: $REPO_OVERRIDE"
+        exit 1
+    fi
+    if ! git rev-parse --show-toplevel >/dev/null 2>&1; then
+        error "Not a git repository: $REPO_OVERRIDE"
+        exit 1
+    fi
+    info "Using repository: $(pwd)"
+fi
 if ! command -v claude &>/dev/null; then
     error "Claude Code CLI not found. Install it first:"
     echo -e "  ${DIM}npm install -g @anthropic-ai/claude-code${RESET}"
@@ -368,6 +420,110 @@ select_audit_model() {
     echo "$default_model"
 }
+# ─── Token Accumulation ─────────────────────────────────────────────────────
+# Parse token counts from Claude CLI JSON output and accumulate running totals.
+# With --output-format json, the output is a JSON array containing a "result"
+# object with usage.input_tokens, usage.output_tokens, and total_cost_usd.
+accumulate_loop_tokens() {
+    local log_file="$1"
+    [[ ! -f "$log_file" ]] && return 0
+    # If jq is available and the file looks like JSON, parse structured output
+    if command -v jq &>/dev/null && head -c1 "$log_file" 2>/dev/null | grep -q '\['; then
+        local input_tok output_tok cache_read cache_create cost_usd
+        # The result object is the last element in the JSON array
+        input_tok=$(jq -r '.[-1].usage.input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
+        output_tok=$(jq -r '.[-1].usage.output_tokens // 0' "$log_file" 2>/dev/null || echo "0")
+        cache_read=$(jq -r '.[-1].usage.cache_read_input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
+        cache_create=$(jq -r '.[-1].usage.cache_creation_input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
+        cost_usd=$(jq -r '.[-1].total_cost_usd // 0' "$log_file" 2>/dev/null || echo "0")
+        LOOP_INPUT_TOKENS=$(( LOOP_INPUT_TOKENS + ${input_tok:-0} + ${cache_read:-0} + ${cache_create:-0} ))
+        LOOP_OUTPUT_TOKENS=$(( LOOP_OUTPUT_TOKENS + ${output_tok:-0} ))
+        # Accumulate cost in millicents for integer arithmetic
+        if [[ -n "$cost_usd" && "$cost_usd" != "0" && "$cost_usd" != "null" ]]; then
+            local cost_millicents
+            cost_millicents=$(echo "$cost_usd" | awk '{printf "%.0f", $1 * 100000}' 2>/dev/null || echo "0")
+            LOOP_COST_MILLICENTS=$(( ${LOOP_COST_MILLICENTS:-0} + ${cost_millicents:-0} ))
+        fi
+    else
+        # Fallback: regex-based parsing for non-JSON output
+        local input_tok output_tok
+        input_tok=$(grep -oE 'input[_ ]tokens?[: ]+[0-9,]+' "$log_file" 2>/dev/null | tail -1 | grep -oE '[0-9,]+' | tr -d ',' || echo "0")
+        output_tok=$(grep -oE 'output[_ ]tokens?[: ]+[0-9,]+' "$log_file" 2>/dev/null | tail -1 | grep -oE '[0-9,]+' | tr -d ',' || echo "0")
+        LOOP_INPUT_TOKENS=$(( LOOP_INPUT_TOKENS + ${input_tok:-0} ))
+        LOOP_OUTPUT_TOKENS=$(( LOOP_OUTPUT_TOKENS + ${output_tok:-0} ))
+    fi
+}
+# ─── JSON→Text Extraction ──────────────────────────────────────────────────
+# Extract plain text from Claude's --output-format json response.
+# Handles: valid JSON arrays, malformed JSON, non-JSON output, empty output.
+_extract_text_from_json() {
+    local json_file="$1" log_file="$2" err_file="${3:-}"
+    # Case 1: File doesn't exist or is empty
+    if [[ ! -s "$json_file" ]]; then
+        # Check stderr for error messages
+        if [[ -s "$err_file" ]]; then
+            cp "$err_file" "$log_file"
+        else
+            echo "(no output)" > "$log_file"
+        fi
+        return 0
+    fi
+    local first_char
+    first_char=$(head -c1 "$json_file" 2>/dev/null || true)
+    # Case 2: Valid JSON array — extract .result from last element
+    if [[ "$first_char" == "[" ]] && command -v jq &>/dev/null; then
+        local extracted
+        extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
+        if [[ -n "$extracted" ]]; then
+            echo "$extracted" > "$log_file"
+            return 0
+        fi
+        # jq succeeded but result was null/empty — try .content or raw text
+        extracted=$(jq -r '.[].content // empty' "$json_file" 2>/dev/null | head -500) || true
+        if [[ -n "$extracted" ]]; then
+            echo "$extracted" > "$log_file"
+            return 0
+        fi
+        # JSON parsed but no text found — write placeholder
+        warn "JSON output has no .result field — check $json_file"
+        echo "(no text result in JSON output)" > "$log_file"
+        return 0
+    fi
+    # Case 3: Looks like JSON but no jq — can't parse, use raw
+    if [[ "$first_char" == "[" || "$first_char" == "{" ]]; then
+        warn "JSON output but jq not available — using raw output"
+        cp "$json_file" "$log_file"
+        return 0
+    fi
+    # Case 4: Not JSON at all (plain text, error message, etc.) — use as-is
+    cp "$json_file" "$log_file"
+    return 0
+}
+# Write accumulated token totals to a JSON file for the pipeline to read.
+write_loop_tokens() {
+    local token_file="$LOG_DIR/loop-tokens.json"
+    local cost_usd="0"
+    if [[ "${LOOP_COST_MILLICENTS:-0}" -gt 0 ]]; then
+        cost_usd=$(awk "BEGIN {printf \"%.6f\", ${LOOP_COST_MILLICENTS} / 100000}" 2>/dev/null || echo "0")
+    fi
+    local tmp_file
+    tmp_file=$(mktemp "${token_file}.XXXXXX" 2>/dev/null || mktemp)
+    cat > "$tmp_file" <<TOKJSON
+{"input_tokens":${LOOP_INPUT_TOKENS},"output_tokens":${LOOP_OUTPUT_TOKENS},"cost_usd":${cost_usd},"iterations":${ITERATION:-0}}
+TOKJSON
+    mv "$tmp_file" "$token_file"
+}
 # ─── Adaptive Iteration Budget ──────────────────────────────────────────────
 # Reads tuning config for smarter iteration/circuit-breaker thresholds.
 apply_adaptive_budget() {
@@ -668,11 +824,50 @@ git_auto_commit() {
     return 0
 }
+# ─── Fatal Error Detection ────────────────────────────────────────────────────
+check_fatal_error() {
+    local log_file="$1"
+    local cli_exit_code="${2:-0}"
+    [[ -f "$log_file" ]] || return 1
+    # Known fatal error patterns from Claude CLI / Anthropic API
+    local fatal_patterns="Invalid API key|invalid_api_key|authentication_error|API key expired"
+    fatal_patterns="${fatal_patterns}|rate_limit_error|overloaded_error|billing"
+    fatal_patterns="${fatal_patterns}|Could not resolve host|connection refused|ECONNREFUSED"
+    fatal_patterns="${fatal_patterns}|ANTHROPIC_API_KEY.*not set|No API key"
+    if grep -qiE "$fatal_patterns" "$log_file" 2>/dev/null; then
+        local match
+        match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
+        error "Fatal CLI error: $match"
+        return 0  # fatal error detected
+    fi
+    # Non-zero exit + tiny output = likely CLI crash
+    if [[ "$cli_exit_code" -ne 0 ]]; then
+        local line_count
+        line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || echo 0)
+        if [[ "$line_count" -lt 3 ]]; then
+            local content
+            content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
+            error "CLI exited $cli_exit_code with minimal output: $content"
+            return 0
+        fi
+    fi
+    return 1  # no fatal error
+}
 # ─── Progress & Circuit Breaker ───────────────────────────────────────────────
 check_progress() {
     local changes
-    changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null | tail -1 || echo "")"
+    # Exclude loop bookkeeping files — only count real code changes as progress
+    changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 \
+        -- . ':!.claude/loop-state.md' ':!.claude/pipeline-state.md' \
+        ':!**/progress.md' ':!**/error-summary.json' \
+        2>/dev/null | tail -1 || echo "")"
     local insertions
     insertions="$(echo "$changes" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)"
     if [[ "${insertions:-0}" -lt "$MIN_PROGRESS_LINES" ]]; then
@@ -1449,15 +1644,27 @@ compose_worker_prompt() {
         if [[ -n "$role" ]]; then
             local role_desc=""
-            case "$role" in
-                builder)   role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
-                reviewer)  role_desc="Focus on code review — look for bugs, security issues, edge cases in recent commits. Make fixes via commits." ;;
-                tester)    role_desc="Focus on test coverage — write new tests, fix failing tests, improve assertions and edge case coverage." ;;
-                optimizer) role_desc="Focus on performance — profile hot paths, reduce complexity, optimize algorithms and data structures." ;;
-                docs)      role_desc="Focus on documentation — update README, add docstrings, write usage guides for new features." ;;
-                security)  role_desc="Focus on security — audit for vulnerabilities, fix injection risks, validate inputs, check auth boundaries." ;;
-                *)         role_desc="Focus on: ${role}. Apply your expertise in this area to advance the goal." ;;
-            esac
+            # Try to pull description from recruit's roles DB first
+            local recruit_roles_db="${HOME}/.shipwright/recruitment/roles.json"
+            if [[ -f "$recruit_roles_db" ]] && command -v jq &>/dev/null; then
+                local recruit_desc
+                recruit_desc=$(jq -r --arg r "$role" '.[$r].description // ""' "$recruit_roles_db" 2>/dev/null) || true
+                if [[ -n "$recruit_desc" && "$recruit_desc" != "null" ]]; then
+                    role_desc="$recruit_desc"
+                fi
+            fi
+            # Fallback to hardcoded descriptions
+            if [[ -z "$role_desc" ]]; then
+                case "$role" in
+                    builder)   role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
+                    reviewer)  role_desc="Focus on code review — look for bugs, security issues, edge cases in recent commits. Make fixes via commits." ;;
+                    tester)    role_desc="Focus on test coverage — write new tests, fix failing tests, improve assertions and edge case coverage." ;;
+                    optimizer) role_desc="Focus on performance — profile hot paths, reduce complexity, optimize algorithms and data structures." ;;
+                    docs)      role_desc="Focus on documentation — update README, add docstrings, write usage guides for new features." ;;
+                    security)  role_desc="Focus on security — audit for vulnerabilities, fix injection risks, validate inputs, check auth boundaries." ;;
+                    *)         role_desc="Focus on: ${role}. Apply your expertise in this area to advance the goal." ;;
+                esac
+            fi
             role_section="## Your Role: ${role}
 ${role_desc}
 Prioritize work in your area of expertise. Coordinate with other agents via git log."
@@ -1481,6 +1688,7 @@ PROMPT
 build_claude_flags() {
     local flags=()
     flags+=("--model" "$MODEL")
+    flags+=("--output-format" "json")
     if $SKIP_PERMISSIONS; then
         flags+=("--dangerously-skip-permissions")
@@ -1495,6 +1703,7 @@ build_claude_flags() {
 run_claude_iteration() {
     local log_file="$LOG_DIR/iteration-${ITERATION}.log"
+    local json_file="$LOG_DIR/iteration-${ITERATION}.json"
     local prompt
     prompt="$(compose_prompt)"
@@ -1507,12 +1716,14 @@ run_claude_iteration() {
     echo -e "\n${CYAN}${BOLD}▸${RESET} ${BOLD}Iteration ${ITERATION}/${MAX_ITERATIONS}${RESET} — Starting..."
     # Run Claude headless (with timeout + PID capture for signal handling)
+    # Output goes to .json first, then we extract text into .log for compat
     local exit_code=0
     # shellcheck disable=SC2086
+    local err_file="${json_file%.json}.stderr"
     if [[ -n "$TIMEOUT_CMD" ]]; then
-        $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$prompt" $flags > "$log_file" 2>&1 &
+        $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
     else
-        claude -p "$prompt" $flags > "$log_file" 2>&1 &
+        claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
     fi
     CHILD_PID=$!
     wait "$CHILD_PID" 2>/dev/null || exit_code=$?
@@ -1521,12 +1732,19 @@ run_claude_iteration() {
         warn "Claude CLI timed out after ${CLAUDE_TIMEOUT}s"
     fi
+    # Extract text result from JSON into .log for backwards compatibility
+    # With --output-format json, stdout is a JSON array; .[-1].result has the text
+    _extract_text_from_json "$json_file" "$log_file" "$err_file"
     local iter_end
     iter_end="$(now_epoch)"
     local iter_duration=$(( iter_end - iter_start ))
     echo -e "  ${GREEN}✓${RESET} Claude session completed ($(format_duration "$iter_duration"), exit $exit_code)"
+    # Accumulate token usage from this iteration's JSON output
+    accumulate_loop_tokens "$json_file"
     # Show verbose output if requested
     if $VERBOSE; then
         echo -e "  ${DIM}─── Claude Output ───${RESET}"
@@ -1545,7 +1763,14 @@ extract_summary() {
     local summary
     summary="$(grep -v '^$' "$log_file" | tail -5 | head -3 2>/dev/null || echo "(no output)")"
     # Truncate long lines
-    echo "$summary" | cut -c1-120
+    summary="$(echo "$summary" | cut -c1-120)"
+    # Sanitize: if summary is just a CLI/API error, replace with generic text
+    if echo "$summary" | grep -qiE 'Invalid API key|authentication_error|rate_limit|API key expired|ANTHROPIC_API_KEY'; then
+        summary="(CLI error — no useful output this iteration)"
+    fi
+    echo "$summary"
 }
 # ─── Display Helpers ─────────────────────────────────────────────────────────
@@ -1615,10 +1840,16 @@ show_summary() {
     echo -e "  ${BOLD}Duration:${RESET}    $(format_duration "$duration")"
     echo -e "  ${BOLD}Commits:${RESET}     $TOTAL_COMMITS"
     echo -e "  ${BOLD}Tests:${RESET}       $test_display"
+    if [[ "$LOOP_INPUT_TOKENS" -gt 0 || "$LOOP_OUTPUT_TOKENS" -gt 0 ]]; then
+        echo -e "  ${BOLD}Tokens:${RESET}      in=${LOOP_INPUT_TOKENS} out=${LOOP_OUTPUT_TOKENS}"
+    fi
     echo ""
     echo -e "  ${DIM}State: $STATE_FILE${RESET}"
     echo -e "  ${DIM}Logs:  $LOG_DIR/${RESET}"
     echo ""
+    # Write token totals for pipeline cost tracking
+    write_loop_tokens
 }
 # ─── Signal Handling ──────────────────────────────────────────────────────────
@@ -1781,10 +2012,15 @@ Focus on areas they haven't touched yet.
 PROMPT
 )"
-    # Run Claude
+    # Run Claude (output is JSON due to --output-format json in CLAUDE_FLAGS)
+    local JSON_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.json"
+    local ERR_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.stderr"
     LOG_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.log"
     # shellcheck disable=SC2086
-    claude -p "$PROMPT" $CLAUDE_FLAGS > "$LOG_FILE" 2>&1 || true
+    claude -p "$PROMPT" $CLAUDE_FLAGS > "$JSON_FILE" 2>"$ERR_FILE" || true
+    # Extract text result from JSON into .log for backwards compat
+    _extract_text_from_json "$JSON_FILE" "$LOG_FILE" "$ERR_FILE"
     echo -e "  ${GREEN}✓${RESET} Claude session completed"
@@ -2009,6 +2245,16 @@ ${GOAL}"
         local log_file="$LOG_DIR/iteration-${ITERATION}.log"
+        # Detect fatal CLI errors (API key, auth, network) — abort immediately
+        if check_fatal_error "$log_file" "$exit_code"; then
+            STATUS="error"
+            write_state
+            write_progress
+            error "Fatal CLI error detected — aborting loop (see iteration log)"
+            show_summary
+            return 1
+        fi
         # Mid-loop memory refresh — re-query with current error context after iteration 3
         if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context &>/dev/null 2>&1; then
             local refresh_ctx

package/scripts/sw-memory.sh CHANGED Viewed

@@ -6,7 +6,7 @@
 set -euo pipefail
 trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
-VERSION="1.10.0"
+VERSION="2.1.0"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
@@ -1077,6 +1077,20 @@ memory_get_dora_baseline() {
     echo "$metrics"
 }
+# memory_get_baseline <metric_name>
+# Output baseline value for a metric (bundle_size_kb, test_duration_s, coverage_pct, etc.).
+# Used by pipeline for regression checks. Outputs nothing if not set.
+memory_get_baseline() {
+    local metric_name="${1:-}"
+    [[ -z "$metric_name" ]] && return 1
+    ensure_memory_dir
+    local mem_dir
+    mem_dir="$(repo_memory_dir)"
+    local metrics_file="$mem_dir/metrics.json"
+    [[ ! -f "$metrics_file" ]] && return 0
+    jq -r --arg m "$metric_name" '.baselines[$m] // empty' "$metrics_file" 2>/dev/null || true
+}
 # memory_update_metrics <metric_name> <value>
 # Track performance baselines and flag regressions.
 memory_update_metrics() {
@@ -1602,6 +1616,9 @@ case "$SUBCOMMAND" in
     pattern)
         memory_capture_pattern "$@"
         ;;
+    get)
+        memory_get_baseline "$@"
+        ;;
     metric)
         memory_update_metrics "$@"
         ;;