npm - shipwright-cli - Versions diffs - 3.0.0 → 3.2.0 - Mend

shipwright-cli 3.0.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

package/README.md +21 -7
package/completions/_shipwright +247 -93
package/completions/shipwright.bash +69 -15
package/completions/shipwright.fish +309 -41
package/config/decision-tiers.json +55 -0
package/config/defaults.json +25 -2
package/config/event-schema.json +142 -5
package/config/policy.json +8 -0
package/dashboard/public/index.html +6 -0
package/dashboard/public/styles.css +76 -0
package/dashboard/server.ts +51 -0
package/dashboard/src/core/api.ts +5 -0
package/dashboard/src/types/api.ts +10 -0
package/dashboard/src/views/metrics.ts +69 -1
package/package.json +3 -3
package/scripts/lib/architecture.sh +2 -1
package/scripts/lib/bootstrap.sh +0 -0
package/scripts/lib/config.sh +0 -0
package/scripts/lib/daemon-adaptive.sh +4 -2
package/scripts/lib/daemon-dispatch.sh +24 -1
package/scripts/lib/daemon-failure.sh +0 -0
package/scripts/lib/daemon-health.sh +0 -0
package/scripts/lib/daemon-patrol.sh +42 -7
package/scripts/lib/daemon-poll.sh +17 -0
package/scripts/lib/daemon-state.sh +17 -0
package/scripts/lib/daemon-triage.sh +1 -1
package/scripts/lib/decide-autonomy.sh +295 -0
package/scripts/lib/decide-scoring.sh +228 -0
package/scripts/lib/decide-signals.sh +462 -0
package/scripts/lib/fleet-failover.sh +0 -0
package/scripts/lib/helpers.sh +19 -18
package/scripts/lib/pipeline-detection.sh +1 -1
package/scripts/lib/pipeline-github.sh +0 -0
package/scripts/lib/pipeline-intelligence.sh +23 -4
package/scripts/lib/pipeline-quality-checks.sh +11 -6
package/scripts/lib/pipeline-quality.sh +0 -0
package/scripts/lib/pipeline-stages.sh +330 -33
package/scripts/lib/pipeline-state.sh +14 -0
package/scripts/lib/policy.sh +0 -0
package/scripts/lib/test-helpers.sh +0 -0
package/scripts/postinstall.mjs +75 -1
package/scripts/signals/example-collector.sh +36 -0
package/scripts/sw +8 -4
package/scripts/sw-activity.sh +1 -7
package/scripts/sw-adaptive.sh +7 -7
package/scripts/sw-adversarial.sh +1 -1
package/scripts/sw-architecture-enforcer.sh +1 -1
package/scripts/sw-auth.sh +1 -1
package/scripts/sw-autonomous.sh +1 -1
package/scripts/sw-changelog.sh +1 -1
package/scripts/sw-checkpoint.sh +1 -1
package/scripts/sw-ci.sh +11 -6
package/scripts/sw-cleanup.sh +1 -1
package/scripts/sw-code-review.sh +36 -17
package/scripts/sw-connect.sh +1 -1
package/scripts/sw-context.sh +1 -1
package/scripts/sw-cost.sh +71 -5
package/scripts/sw-daemon.sh +6 -3
package/scripts/sw-dashboard.sh +1 -1
package/scripts/sw-db.sh +53 -38
package/scripts/sw-decide.sh +685 -0
package/scripts/sw-decompose.sh +1 -1
package/scripts/sw-deps.sh +1 -1
package/scripts/sw-developer-simulation.sh +1 -1
package/scripts/sw-discovery.sh +80 -4
package/scripts/sw-doc-fleet.sh +1 -1
package/scripts/sw-docs-agent.sh +1 -1
package/scripts/sw-docs.sh +1 -1
package/scripts/sw-doctor.sh +1 -1
package/scripts/sw-dora.sh +1 -1
package/scripts/sw-durable.sh +9 -5
package/scripts/sw-e2e-orchestrator.sh +1 -1
package/scripts/sw-eventbus.sh +7 -4
package/scripts/sw-evidence.sh +1 -1
package/scripts/sw-feedback.sh +1 -1
package/scripts/sw-fix.sh +1 -1
package/scripts/sw-fleet-discover.sh +1 -1
package/scripts/sw-fleet-viz.sh +6 -4
package/scripts/sw-fleet.sh +1 -1
package/scripts/sw-github-app.sh +3 -2
package/scripts/sw-github-checks.sh +1 -1
package/scripts/sw-github-deploy.sh +1 -1
package/scripts/sw-github-graphql.sh +1 -1
package/scripts/sw-guild.sh +1 -1
package/scripts/sw-heartbeat.sh +1 -1
package/scripts/sw-hygiene.sh +5 -3
package/scripts/sw-incident.sh +9 -5
package/scripts/sw-init.sh +1 -1
package/scripts/sw-instrument.sh +1 -1
package/scripts/sw-intelligence.sh +11 -6
package/scripts/sw-jira.sh +1 -1
package/scripts/sw-launchd.sh +1 -1
package/scripts/sw-linear.sh +1 -1
package/scripts/sw-logs.sh +1 -1
package/scripts/sw-loop.sh +338 -32
package/scripts/sw-memory.sh +23 -6
package/scripts/sw-mission-control.sh +1 -1
package/scripts/sw-model-router.sh +3 -2
package/scripts/sw-otel.sh +8 -4
package/scripts/sw-oversight.sh +1 -1
package/scripts/sw-pipeline-composer.sh +3 -1
package/scripts/sw-pipeline-vitals.sh +11 -6
package/scripts/sw-pipeline.sh +92 -8
package/scripts/sw-pm.sh +5 -4
package/scripts/sw-pr-lifecycle.sh +7 -4
package/scripts/sw-predictive.sh +11 -5
package/scripts/sw-prep.sh +1 -1
package/scripts/sw-ps.sh +1 -1
package/scripts/sw-public-dashboard.sh +3 -2
package/scripts/sw-quality.sh +21 -10
package/scripts/sw-reaper.sh +1 -1
package/scripts/sw-recruit.sh +1 -1
package/scripts/sw-regression.sh +1 -1
package/scripts/sw-release-manager.sh +1 -1
package/scripts/sw-release.sh +1 -1
package/scripts/sw-remote.sh +1 -1
package/scripts/sw-replay.sh +1 -1
package/scripts/sw-retro.sh +1 -1
package/scripts/sw-review-rerun.sh +1 -1
package/scripts/sw-scale.sh +69 -11
package/scripts/sw-security-audit.sh +1 -1
package/scripts/sw-self-optimize.sh +168 -4
package/scripts/sw-session.sh +3 -3
package/scripts/sw-setup.sh +1 -1
package/scripts/sw-standup.sh +1 -1
package/scripts/sw-status.sh +1 -1
package/scripts/sw-strategic.sh +11 -6
package/scripts/sw-stream.sh +7 -4
package/scripts/sw-swarm.sh +3 -2
package/scripts/sw-team-stages.sh +1 -1
package/scripts/sw-templates.sh +3 -3
package/scripts/sw-testgen.sh +11 -6
package/scripts/sw-tmux-pipeline.sh +1 -1
package/scripts/sw-tmux.sh +35 -1
package/scripts/sw-trace.sh +1 -1
package/scripts/sw-tracker.sh +1 -1
package/scripts/sw-triage.sh +7 -7
package/scripts/sw-upgrade.sh +1 -1
package/scripts/sw-ux.sh +1 -1
package/scripts/sw-webhook.sh +3 -2
package/scripts/sw-widgets.sh +7 -4
package/scripts/sw-worktree.sh +1 -1
package/scripts/update-homebrew-sha.sh +21 -15

package/scripts/sw-loop.sh CHANGED Viewed

@@ -14,6 +14,7 @@ trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
 unset CLAUDECODE 2>/dev/null || true
 # Ignore SIGHUP so tmux attach/detach doesn't kill long-running agent sessions
 trap '' HUP
+trap '' SIGPIPE
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -24,6 +25,14 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # shellcheck source=lib/helpers.sh
 [[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
 [[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
+# Source DB for dual-write (emit_event → JSONL + SQLite).
+# Note: do NOT call init_schema here — the pipeline (sw-pipeline.sh) owns schema
+# initialization. Calling it here would create an empty DB that shadows JSON cost data.
+if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
+    source "$SCRIPT_DIR/sw-db.sh" 2>/dev/null || true
+fi
+# Cross-pipeline discovery (learnings from other pipeline runs)
+[[ -f "$SCRIPT_DIR/sw-discovery.sh" ]] && source "$SCRIPT_DIR/sw-discovery.sh" 2>/dev/null || true
 # Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
 [[ "$(type -t info 2>/dev/null)" == "function" ]]    || info()    { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
 [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
@@ -63,7 +72,7 @@ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
 SESSION_RESTART=false
 RESTART_COUNT=0
 REPO_OVERRIDE=""
-VERSION="3.0.0"
+VERSION="3.2.0"
 # ─── Token Tracking ─────────────────────────────────────────────────────────
 LOOP_INPUT_TOKENS=0
@@ -655,6 +664,9 @@ initialize_state() {
     STATUS="running"
     LOG_ENTRIES=""
+    # Record starting commit for cumulative diff in quality gates
+    LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
     write_state
 }
@@ -726,6 +738,11 @@ resume_state() {
     START_EPOCH="$(now_epoch)"
     STATUS="running"
+    # Set starting commit for cumulative diff (approximate: use earliest tracked commit)
+    if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
+        LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
+    fi
     # If we hit max iterations before, warn user to extend
     if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
         warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
@@ -872,7 +889,8 @@ validate_claude_output() {
     # Check for obviously corrupt output (API errors dumped as code)
     local total_changed
-    total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || echo "0")
+    total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || true)
+    total_changed="${total_changed:-0}"
     if [[ "$total_changed" -eq 0 ]]; then
         warn "Claude iteration produced no file changes"
         issues=$((issues + 1))
@@ -960,13 +978,14 @@ check_fatal_error() {
         local match
         match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
         error "Fatal CLI error: $match"
-        return 0  # fatal error detected
+        return 1  # fatal error detected
     fi
     # Non-zero exit + tiny output = likely CLI crash
     if [[ "$cli_exit_code" -ne 0 ]]; then
         local line_count
-        line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || echo 0)
+        line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
+        line_count="${line_count:-0}"
         if [[ "$line_count" -lt 3 ]]; then
             local content
             content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
@@ -1140,7 +1159,8 @@ diagnose_failure() {
     local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
     local repeat_count=0
     if [[ -f "$diagnosis_file" ]]; then
-        repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || echo "0")
+        repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
+        repeat_count="${repeat_count:-0}"
     fi
     echo "$diagnosis" >> "$diagnosis_file"
@@ -1317,33 +1337,60 @@ run_audit_agent() {
     local log_file="$LOG_DIR/iteration-${ITERATION}.log"
     local audit_log="$LOG_DIR/audit-iter-${ITERATION}.log"
-    # Gather context: tail of implementer output + git diff
+    # Gather context: tail of implementer output + cumulative diff
     local impl_tail
     impl_tail="$(tail -100 "$log_file" 2>/dev/null || echo "(no output)")"
-    local diff_stat
-    diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
+    # Use cumulative diff from loop start so auditor sees ALL work, not just latest commit
+    local diff_stat cumulative_note=""
+    if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
+        diff_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no changes)")"
+        cumulative_note="Note: This diff shows ALL changes since the loop started (iteration 1 through ${ITERATION}), not just the latest commit."
+    else
+        diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
+    fi
+    # Include verified test status so auditor doesn't have to guess
+    local test_context=""
+    if [[ -n "$TEST_CMD" ]]; then
+        if [[ "${TEST_PASSED:-}" == "true" ]]; then
+            test_context="## Verified Test Status (from harness, not from agent)
+Tests: ALL PASSING (command: ${TEST_CMD})"
+        else
+            test_context="## Verified Test Status (from harness)
+Tests: FAILING (command: ${TEST_CMD})
+$(echo "${TEST_OUTPUT:-}" | tail -10)"
+        fi
+    fi
     local audit_prompt
     read -r -d '' audit_prompt <<AUDIT_PROMPT || true
-You are an independent code auditor reviewing an autonomous coding agent.
+You are an independent code auditor reviewing an autonomous coding agent's CUMULATIVE work.
+This is iteration ${ITERATION}. The agent may have done most of the work in earlier iterations.
 ## Goal the agent was working toward
 ${GOAL}
-## Agent Output (last 100 lines)
+## Agent Output This Iteration (last 100 lines)
 ${impl_tail}
-## Changes Made (git diff --stat)
+## Cumulative Changes Made (git diff --stat)
+${cumulative_note}
 ${diff_stat}
+${test_context}
 ## Your Task
-Critically review the work:
-1. Did the agent make meaningful progress toward the goal?
-2. Are there obvious bugs, logic errors, or security issues?
+Critically review the CUMULATIVE work (not just the latest iteration):
+1. Has the agent made meaningful progress toward the goal across all iterations?
+2. Are there obvious bugs, logic errors, or security issues in the current codebase?
 3. Did the agent leave incomplete work (TODOs, placeholder code)?
 4. Are there any regressions or broken patterns?
 5. Is the code quality acceptable?
+IMPORTANT: If the current iteration made small or no code changes, that may be acceptable
+if earlier iterations already completed the substantive work. Judge the whole body of work.
 If the work is acceptable and moves toward the goal, output exactly: AUDIT_PASS
 Otherwise, list the specific issues that need fixing.
 AUDIT_PROMPT
@@ -1429,21 +1476,52 @@ check_definition_of_done() {
     local dod_content
     dod_content="$(cat "$DOD_FILE")"
+    # Use cumulative diff from loop start (not just HEAD~1) so the evaluator
+    # can see ALL work done across every iteration, not just the latest commit.
     local diff_content
-    diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
+    if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
+        diff_content="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no diff)")"
+        diff_content="${diff_content}
+## Detailed Changes (cumulative diff, truncated to 200 lines)
+$(git -C "$PROJECT_ROOT" diff "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -200 || echo "(no diff)")"
+    else
+        diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
+    fi
+    # Inject verified runtime facts so the evaluator doesn't have to guess
+    local runtime_facts=""
+    if [[ -n "$TEST_CMD" ]]; then
+        if [[ "${TEST_PASSED:-}" == "true" ]]; then
+            runtime_facts="## Verified Runtime Facts (from the loop harness, not from the agent)
+- Tests: ALL PASSING (verified by running '${TEST_CMD}' after this iteration)
+- Test output (last 10 lines):
+$(echo "${TEST_OUTPUT:-}" | tail -10)"
+        else
+            runtime_facts="## Verified Runtime Facts
+- Tests: FAILING (verified by running '${TEST_CMD}')
+- Test output (last 10 lines):
+$(echo "${TEST_OUTPUT:-}" | tail -10)"
+        fi
+    fi
     local dod_prompt
     read -r -d '' dod_prompt <<DOD_PROMPT || true
-You are evaluating whether code changes satisfy a Definition of Done checklist.
+You are evaluating whether a project satisfies a Definition of Done checklist.
+You are reviewing the CUMULATIVE work across all iterations, not just the latest commit.
 ## Definition of Done
 ${dod_content}
-## Changes Made (git diff)
+${runtime_facts}
+## Cumulative Changes Made (git diff from start of loop to now)
 ${diff_content}
 ## Your Task
-For each item in the Definition of Done, determine if the changes satisfy it.
+For each item in the Definition of Done, determine if the project satisfies it.
+The runtime facts above are verified by the harness — trust them as ground truth.
 If ALL items are satisfied, output exactly: DOD_PASS
 Otherwise, list which items are NOT satisfied and why.
 DOD_PROMPT
@@ -1497,6 +1575,14 @@ guard_completion() {
         rejection_reasons+=("tests failing")
     fi
+    # Holistic final gate: when all other gates pass, run a project-level assessment
+    # that evaluates the entire codebase against the goal (not just the latest diff)
+    if [[ ${#rejection_reasons[@]} -eq 0 ]]; then
+        if ! run_holistic_gate; then
+            rejection_reasons+=("holistic project assessment found gaps")
+        fi
+    fi
     if [[ ${#rejection_reasons[@]} -gt 0 ]]; then
         local reasons_str
         reasons_str="$(printf ', %s' "${rejection_reasons[@]}")"
@@ -1510,17 +1596,88 @@ guard_completion() {
     return 0
 }
+# Holistic gate: evaluates the full project against the original goal.
+# Only runs when all other gates pass (final checkpoint before acceptance).
+run_holistic_gate() {
+    # Skip if no starting commit (can't compute cumulative diff)
+    [[ -z "${LOOP_START_COMMIT:-}" ]] && return 0
+    local holistic_log="$LOG_DIR/holistic-iter-${ITERATION}.log"
+    # Build a project summary: file tree, test count, cumulative diff stats
+    local file_count
+    file_count=$(git -C "$PROJECT_ROOT" ls-files | wc -l | tr -d ' ')
+    local cumulative_stat
+    cumulative_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || echo "(no changes)")"
+    local test_summary=""
+    if [[ -n "${TEST_OUTPUT:-}" ]]; then
+        test_summary="$(echo "$TEST_OUTPUT" | tail -5)"
+    fi
+    local holistic_prompt
+    read -r -d '' holistic_prompt <<HOLISTIC_PROMPT || true
+You are a final quality gate evaluating whether an autonomous coding agent has FULLY achieved its goal.
+## Original Goal
+${GOAL}
+## Project Stats
+- Files in repo: ${file_count}
+- Iterations completed: ${ITERATION}
+- Cumulative changes: ${cumulative_stat}
+- Tests: ${TEST_PASSED:-unknown} (command: ${TEST_CMD:-none})
+${test_summary:+- Test output: ${test_summary}}
+## Cumulative Git Changes (diff --stat from start)
+$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -40 || echo "(none)")
+## Your Task
+Based on the goal and the cumulative work done:
+1. Has the goal been FULLY achieved (not partially)?
+2. Is there any critical gap that would make this unacceptable for production?
+If the goal is fully achieved, output exactly: HOLISTIC_PASS
+Otherwise, list the specific gaps remaining.
+HOLISTIC_PROMPT
+    echo -e "  ${PURPLE}▸${RESET} Running holistic project assessment..."
+    local hol_model
+    hol_model="$(select_audit_model)"
+    local hol_flags=("--model" "$hol_model")
+    if $SKIP_PERMISSIONS; then
+        hol_flags+=("--dangerously-skip-permissions")
+    fi
+    claude -p "$holistic_prompt" "${hol_flags[@]}" > "$holistic_log" 2>&1 || true
+    if grep -q "HOLISTIC_PASS" "$holistic_log" 2>/dev/null; then
+        echo -e "  ${GREEN}✓${RESET} Holistic assessment: passed"
+        return 0
+    else
+        echo -e "  ${YELLOW}⚠${RESET} Holistic assessment: gaps found"
+        return 1
+    fi
+}
 # ─── Context Window Management ───────────────────────────────────────────────
 # Prevents prompt from exceeding Claude's context limit (~200K tokens).
 # Trims least-critical sections first when over budget.
-CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-180000}"  # ~45K tokens at 4 chars/token
+CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-$(_config_get_int "loop.context_budget_chars" 180000 2>/dev/null || echo 180000)}"  # ~45K tokens at 4 chars/token
 manage_context_window() {
     local prompt="$1"
     local budget="${CONTEXT_BUDGET_CHARS}"
     local current_len=${#prompt}
+    # Read trimming tunables from config (env > daemon-config > policy > defaults.json)
+    local trim_memory_chars trim_git_entries trim_hotspot_files trim_test_lines
+    trim_memory_chars=$(_config_get_int "loop.context_trim_memory_chars" 20000 2>/dev/null || echo 20000)
+    trim_git_entries=$(_config_get_int "loop.context_trim_git_entries" 10 2>/dev/null || echo 10)
+    trim_hotspot_files=$(_config_get_int "loop.context_trim_hotspot_files" 5 2>/dev/null || echo 5)
+    trim_test_lines=$(_config_get_int "loop.context_trim_test_lines" 50 2>/dev/null || echo 50)
     if [[ "$current_len" -le "$budget" ]]; then
         echo "$prompt"
         return
@@ -1534,19 +1691,19 @@ manage_context_window() {
         trimmed=$(echo "$trimmed" | awk '/^## Performance Baselines/{skip=1; next} skip && /^## [^#]/{skip=0} !skip{print}')
     fi
-    # 2. Trim file hotspots to top 5
+    # 2. Trim file hotspots to top N
     if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>5) next} {print}')
+        trimmed=$(echo "$trimmed" | awk -v max="$trim_hotspot_files" '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>max) next} {print}')
     fi
-    # 3. Trim git log to last 10 entries
+    # 3. Trim git log to last N entries
     if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>10) next} {print}')
+        trimmed=$(echo "$trimmed" | awk -v max="$trim_git_entries" '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>max) next} {print}')
     fi
-    # 4. Truncate memory context to first 20K chars
+    # 4. Truncate memory context to first N chars
     if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk -v max=20000 '
+        trimmed=$(echo "$trimmed" | awk -v max="$trim_memory_chars" '
             /## Memory Context/{mem=1; skip_rest=0; chars=0; print; next}
             mem && /^## [^#]/{mem=0; print; next}
             mem{chars+=length($0)+1; if(chars>max){print "... (memory truncated for context budget)"; skip_rest=1; mem=0; next}}
@@ -1556,11 +1713,11 @@ manage_context_window() {
         ')
     fi
-    # 5. Truncate test output to last 50 lines
+    # 5. Truncate test output to last N lines
     if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk '
+        trimmed=$(echo "$trimmed" | awk -v max="$trim_test_lines" '
             /## Test Results/{found=1; buf=""; print; next}
-            found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>50)?(n-49):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
+            found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>max)?(n-max+1):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
             found{buf=buf $0 "\n"; next}
             {print}
         ')
@@ -1639,6 +1796,16 @@ Fix these specific errors. Each line above is one distinct error from the test o
         memory_section="$("$SCRIPT_DIR/sw-memory.sh" inject build 2>/dev/null || true)"
     fi
+    # Cross-pipeline discovery injection (learnings from other pipeline runs)
+    local discovery_section=""
+    if type inject_discoveries >/dev/null 2>&1; then
+        local disc_output
+        disc_output="$(inject_discoveries "${GOAL:-}" 2>/dev/null || true)"
+        if [[ -n "$disc_output" ]]; then
+            discovery_section="$disc_output"
+        fi
+    fi
     # DORA baselines for context
     local dora_section=""
     if type memory_get_dora_baseline >/dev/null 2>&1; then
@@ -1810,12 +1977,25 @@ ${_test_tail}
         RESUMED_TEST_OUTPUT=""
     fi
+    # Build cumulative progress summary showing all iterations' work
+    local cumulative_section=""
+    if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
+        local cum_stat
+        cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
+        if [[ -n "$cum_stat" ]]; then
+            cumulative_section="## Cumulative Progress (all iterations combined)
+${cum_stat}
+"
+        fi
+    fi
     cat <<PROMPT
 You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
 ${resume_section}
 ## Your Goal
 ${GOAL}
+${cumulative_section}
 ## Current Progress
 ${recent_log}
@@ -1830,6 +2010,9 @@ ${error_summary_section:+$error_summary_section
 ${memory_section:+## Memory Context
 $memory_section
 }
+${discovery_section:+## Cross-Pipeline Learnings
+$discovery_section
+}
 ${dora_section:+$dora_section
 }
 ${intelligence_section:+$intelligence_section
@@ -1844,6 +2027,13 @@ ${restart_section:+$restart_section
 5. Commit your work with a descriptive message
 6. When the goal is FULLY achieved, output exactly: LOOP_COMPLETE
+## Context Efficiency
+- Batch independent tool calls in parallel — avoid sequential round-trips
+- Use targeted file reads (offset/limit) instead of reading entire large files
+- Delegate large searches to subagents — only import the summary
+- Filter tool results with grep/jq before reasoning over them
+- Keep working memory lean — summarize completed steps, don't preserve full outputs
 ${audit_section}
 ${audit_feedback_section}
@@ -1861,6 +2051,58 @@ ${stuckness_section}
 PROMPT
 }
+# ─── Alternative Strategy Exploration ─────────────────────────────────────────
+# When stuckness is detected, generate a context-aware alternative strategy.
+# Uses pattern matching on error type + iteration count to suggest different approaches.
+explore_alternative_strategy() {
+    local last_error="${1:-unknown}"
+    local iteration="${2:-0}"
+    local diagnosis="${3:-}"
+    # Track attempted strategies to avoid repeating them
+    local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
+    local attempted
+    attempted=$(cat "$strategy_file" 2>/dev/null || true)
+    local strategy=""
+    # If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
+    if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
+        if ! echo "$attempted" | grep -q "evaluator_alignment"; then
+            echo "evaluator_alignment" >> "$strategy_file"
+            strategy="## Alternative Strategy: Evaluator Alignment
+The code appears functionally complete (tests pass). Focus on satisfying the remaining
+quality gate evaluators. Check the DoD log and audit log for specific complaints, then
+address those exact points rather than adding new features."
+        fi
+    fi
+    # If no code changes in last iteration, suggest verifying existing work
+    if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
+        if ! echo "$attempted" | grep -q "verify_existing"; then
+            echo "verify_existing" >> "$strategy_file"
+            strategy="## Alternative Strategy: Verify Existing Work
+Recent iterations made no code changes. The work may already be complete.
+Run the full test suite, verify all features work, and if everything passes,
+commit a verification message and declare LOOP_COMPLETE with evidence."
+        fi
+    fi
+    # Generic fallback: break the problem down
+    if [[ -z "$strategy" ]]; then
+        if ! echo "$attempted" | grep -q "decompose"; then
+            echo "decompose" >> "$strategy_file"
+            strategy="## Alternative Strategy: Decompose
+Break the remaining work into smaller, independent steps. Focus on one specific
+file or function at a time. Read error messages literally — the root cause may
+differ from your assumption."
+        fi
+    fi
+    echo "$strategy"
+}
 # ─── Stuckness Detection ─────────────────────────────────────────────────────
 # Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
 # Returns 0 when stuck, 1 when not. Outputs stuckness section and sets STUCKNESS_HINT when stuck.
@@ -1890,7 +2132,8 @@ detect_stuckness() {
     local stuckness_reasons=()
     local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
     local tracking_lines
-    tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || echo "0")
+    tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || true)
+    tracking_lines="${tracking_lines:-0}"
     # Signal 1: Text overlap (existing logic) — compare last 2 iteration logs
     if [[ "$iteration" -ge 3 ]]; then
@@ -1905,7 +2148,8 @@ detect_stuckness() {
             if [[ -n "$lines1" && -n "$lines2" ]]; then
                 total=$(echo "$lines1" | wc -l | tr -d ' ')
-                common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || echo "0")
+                common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || true)
+                common="${common:-0}"
                 if [[ "$total" -gt 0 ]]; then
                     overlap_pct=$(( common * 100 / total ))
                 else
@@ -1977,7 +2221,8 @@ detect_stuckness() {
     # Signal 6: Git diff size — no or minimal code changes (existing)
     local diff_lines
-    diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || echo "0")
+    diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || true)
+    diff_lines="${diff_lines:-0}"
     if [[ "${diff_lines:-0}" -lt 5 ]] && [[ "$iteration" -gt 2 ]]; then
         stuckness_signals=$((stuckness_signals + 1))
         stuckness_reasons+=("no code changes in last iteration")
@@ -1994,6 +2239,17 @@ detect_stuckness() {
         stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
     fi
+    # Gate-aware dampening: if tests pass and the agent has made progress overall,
+    # reduce stuckness signal count. The "no code changes" and "identical diffs" signals
+    # fire when code is already complete and the agent is fighting evaluator quirks —
+    # that's not genuine stuckness, it's "done but gates disagree."
+    if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
+        # If at least one quality signal is positive, dampen by 1
+        if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
+            stuckness_signals=$((stuckness_signals - 1))
+        fi
+    fi
     # Decision: 2+ signals = stuck
     if [[ "$stuckness_signals" -ge 2 ]]; then
         STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
@@ -2133,7 +2389,7 @@ compose_worker_prompt() {
                     role_desc="$recruit_desc"
                 fi
             fi
-            # Fallback to hardcoded descriptions
+            # Fallback to built-in role descriptions
             if [[ -z "$role_desc" ]]; then
                 case "$role" in
                     builder)   role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
@@ -2189,10 +2445,33 @@ run_claude_iteration() {
     local final_prompt
     final_prompt=$(manage_context_window "$prompt")
+    local raw_prompt_chars=${#prompt}
     local prompt_chars=${#final_prompt}
     local approx_tokens=$((prompt_chars / 4))
     info "Prompt: ~${approx_tokens} tokens (${prompt_chars} chars)"
+    # Emit context efficiency metrics
+    if type emit_event >/dev/null 2>&1; then
+        local trim_ratio=0
+        local budget_utilization=0
+        if [[ "$raw_prompt_chars" -gt 0 ]]; then
+            trim_ratio=$(awk -v raw="$raw_prompt_chars" -v trimmed="$prompt_chars" \
+                'BEGIN { printf "%.1f", ((raw - trimmed) / raw) * 100 }')
+        fi
+        if [[ "${CONTEXT_BUDGET_CHARS:-0}" -gt 0 ]]; then
+            budget_utilization=$(awk -v used="$prompt_chars" -v budget="${CONTEXT_BUDGET_CHARS}" \
+                'BEGIN { printf "%.1f", (used / budget) * 100 }')
+        fi
+        emit_event "loop.context_efficiency" \
+            "iteration=$ITERATION" \
+            "raw_prompt_chars=$raw_prompt_chars" \
+            "trimmed_prompt_chars=$prompt_chars" \
+            "trim_ratio=$trim_ratio" \
+            "budget_utilization=$budget_utilization" \
+            "budget_chars=${CONTEXT_BUDGET_CHARS:-0}" \
+            "job_id=${PIPELINE_JOB_ID:-loop-$$}" 2>/dev/null || true
+    fi
     local flags
     flags="$(build_claude_flags)"
@@ -2719,6 +2998,11 @@ run_single_agent_loop() {
         initialize_state
     fi
+    # Ensure LOOP_START_COMMIT is set (may not be on resume/restart)
+    if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
+        LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
+    fi
     # Apply adaptive budget/model before showing banner
     apply_adaptive_budget
     MODEL="$(select_adaptive_model "build" "$MODEL")"
@@ -2746,6 +3030,16 @@ run_single_agent_loop() {
         }
         ITERATION=$(( ITERATION + 1 ))
+        # Emit iteration start event for pipeline visibility
+        if type emit_event >/dev/null 2>&1; then
+            emit_event "loop.iteration_start" \
+                "iteration=$ITERATION" \
+                "max=$MAX_ITERATIONS" \
+                "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
+                "agent=${AGENT_NUM:-1}" \
+                "test_passed=${TEST_PASSED:-unknown}"
+        fi
         # Root-cause diagnosis and memory-based fix on retry after test failure
         if [[ "${TEST_PASSED:-}" == "false" ]]; then
             # Source memory module for diagnosis and fix lookup
@@ -2915,6 +3209,18 @@ $summary
         write_state
         write_progress
+        # Emit iteration complete event for pipeline visibility
+        if type emit_event >/dev/null 2>&1; then
+            emit_event "loop.iteration_complete" \
+                "iteration=$ITERATION" \
+                "max=$MAX_ITERATIONS" \
+                "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
+                "agent=${AGENT_NUM:-1}" \
+                "test_passed=${TEST_PASSED:-unknown}" \
+                "commits=$TOTAL_COMMITS" \
+                "status=${STATUS:-running}"
+        fi
         # Update heartbeat
         "$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
             --pid $$ \