npm - shipwright-cli - Versions diffs - 3.2.0 → 3.3.0 - Mend

shipwright-cli 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (279) hide show

package/.claude/agents/code-reviewer.md +2 -0
package/.claude/agents/devops-engineer.md +2 -0
package/.claude/agents/doc-fleet-agent.md +2 -0
package/.claude/agents/pipeline-agent.md +2 -0
package/.claude/agents/shell-script-specialist.md +2 -0
package/.claude/agents/test-specialist.md +2 -0
package/.claude/hooks/agent-crash-capture.sh +32 -0
package/.claude/hooks/post-tool-use.sh +3 -2
package/.claude/hooks/pre-tool-use.sh +35 -3
package/README.md +4 -4
package/claude-code/hooks/config-change.sh +18 -0
package/claude-code/hooks/instructions-reloaded.sh +7 -0
package/claude-code/hooks/worktree-create.sh +25 -0
package/claude-code/hooks/worktree-remove.sh +20 -0
package/config/code-constitution.json +130 -0
package/dashboard/middleware/auth.ts +134 -0
package/dashboard/middleware/constants.ts +21 -0
package/dashboard/public/index.html +2 -6
package/dashboard/public/styles.css +100 -97
package/dashboard/routes/auth.ts +38 -0
package/dashboard/server.ts +66 -25
package/dashboard/services/config.ts +26 -0
package/dashboard/services/db.ts +118 -0
package/dashboard/src/canvas/pixel-agent.ts +298 -0
package/dashboard/src/canvas/pixel-sprites.ts +440 -0
package/dashboard/src/canvas/shipyard-effects.ts +367 -0
package/dashboard/src/canvas/shipyard-scene.ts +616 -0
package/dashboard/src/canvas/submarine-layout.ts +267 -0
package/dashboard/src/components/header.ts +8 -7
package/dashboard/src/core/router.ts +1 -0
package/dashboard/src/design/submarine-theme.ts +253 -0
package/dashboard/src/main.ts +2 -0
package/dashboard/src/types/api.ts +2 -1
package/dashboard/src/views/activity.ts +2 -1
package/dashboard/src/views/shipyard.ts +39 -0
package/dashboard/types/index.ts +166 -0
package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
package/docs/research/RESEARCH_INDEX.md +439 -0
package/docs/research/RESEARCH_SOURCES.md +440 -0
package/docs/research/RESEARCH_SUMMARY.txt +275 -0
package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
package/package.json +2 -2
package/scripts/lib/adaptive-model.sh +427 -0
package/scripts/lib/adaptive-timeout.sh +316 -0
package/scripts/lib/audit-trail.sh +309 -0
package/scripts/lib/auto-recovery.sh +471 -0
package/scripts/lib/bandit-selector.sh +431 -0
package/scripts/lib/bootstrap.sh +104 -2
package/scripts/lib/causal-graph.sh +455 -0
package/scripts/lib/compat.sh +126 -0
package/scripts/lib/compound-audit.sh +337 -0
package/scripts/lib/constitutional.sh +454 -0
package/scripts/lib/context-budget.sh +359 -0
package/scripts/lib/convergence.sh +594 -0
package/scripts/lib/cost-optimizer.sh +634 -0
package/scripts/lib/daemon-adaptive.sh +10 -0
package/scripts/lib/daemon-dispatch.sh +106 -17
package/scripts/lib/daemon-failure.sh +34 -4
package/scripts/lib/daemon-patrol.sh +23 -2
package/scripts/lib/daemon-poll-github.sh +361 -0
package/scripts/lib/daemon-poll-health.sh +299 -0
package/scripts/lib/daemon-poll.sh +27 -611
package/scripts/lib/daemon-state.sh +112 -66
package/scripts/lib/daemon-triage.sh +10 -0
package/scripts/lib/dod-scorecard.sh +442 -0
package/scripts/lib/error-actionability.sh +300 -0
package/scripts/lib/formal-spec.sh +461 -0
package/scripts/lib/helpers.sh +177 -4
package/scripts/lib/intent-analysis.sh +409 -0
package/scripts/lib/loop-convergence.sh +350 -0
package/scripts/lib/loop-iteration.sh +682 -0
package/scripts/lib/loop-progress.sh +48 -0
package/scripts/lib/loop-restart.sh +185 -0
package/scripts/lib/memory-effectiveness.sh +506 -0
package/scripts/lib/mutation-executor.sh +352 -0
package/scripts/lib/outcome-feedback.sh +521 -0
package/scripts/lib/pipeline-cli.sh +336 -0
package/scripts/lib/pipeline-commands.sh +1216 -0
package/scripts/lib/pipeline-detection.sh +100 -2
package/scripts/lib/pipeline-execution.sh +897 -0
package/scripts/lib/pipeline-github.sh +28 -3
package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
package/scripts/lib/pipeline-intelligence.sh +100 -1136
package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
package/scripts/lib/pipeline-quality-checks.sh +17 -715
package/scripts/lib/pipeline-quality-gates.sh +563 -0
package/scripts/lib/pipeline-stages-build.sh +730 -0
package/scripts/lib/pipeline-stages-delivery.sh +965 -0
package/scripts/lib/pipeline-stages-intake.sh +1133 -0
package/scripts/lib/pipeline-stages-monitor.sh +407 -0
package/scripts/lib/pipeline-stages-review.sh +1022 -0
package/scripts/lib/pipeline-stages.sh +59 -2929
package/scripts/lib/pipeline-state.sh +36 -5
package/scripts/lib/pipeline-util.sh +487 -0
package/scripts/lib/policy-learner.sh +438 -0
package/scripts/lib/process-reward.sh +493 -0
package/scripts/lib/project-detect.sh +649 -0
package/scripts/lib/quality-profile.sh +334 -0
package/scripts/lib/recruit-commands.sh +885 -0
package/scripts/lib/recruit-learning.sh +739 -0
package/scripts/lib/recruit-roles.sh +648 -0
package/scripts/lib/reward-aggregator.sh +458 -0
package/scripts/lib/rl-optimizer.sh +362 -0
package/scripts/lib/root-cause.sh +427 -0
package/scripts/lib/scope-enforcement.sh +445 -0
package/scripts/lib/session-restart.sh +493 -0
package/scripts/lib/skill-memory.sh +300 -0
package/scripts/lib/skill-registry.sh +775 -0
package/scripts/lib/spec-driven.sh +476 -0
package/scripts/lib/test-helpers.sh +18 -7
package/scripts/lib/test-holdout.sh +429 -0
package/scripts/lib/test-optimizer.sh +511 -0
package/scripts/shipwright-file-suggest.sh +45 -0
package/scripts/skills/adversarial-quality.md +61 -0
package/scripts/skills/api-design.md +44 -0
package/scripts/skills/architecture-design.md +50 -0
package/scripts/skills/brainstorming.md +43 -0
package/scripts/skills/data-pipeline.md +44 -0
package/scripts/skills/deploy-safety.md +64 -0
package/scripts/skills/documentation.md +38 -0
package/scripts/skills/frontend-design.md +45 -0
package/scripts/skills/generated/.gitkeep +0 -0
package/scripts/skills/generated/_refinements/.gitkeep +0 -0
package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
package/scripts/skills/generated/cli-version-management.md +29 -0
package/scripts/skills/generated/collection-system-validation.md +99 -0
package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
package/scripts/skills/generated/test-parallelization-detection.md +65 -0
package/scripts/skills/observability.md +79 -0
package/scripts/skills/performance.md +48 -0
package/scripts/skills/pr-quality.md +49 -0
package/scripts/skills/product-thinking.md +43 -0
package/scripts/skills/security-audit.md +49 -0
package/scripts/skills/systematic-debugging.md +40 -0
package/scripts/skills/testing-strategy.md +47 -0
package/scripts/skills/two-stage-review.md +52 -0
package/scripts/skills/validation-thoroughness.md +55 -0
package/scripts/sw +9 -3
package/scripts/sw-activity.sh +9 -2
package/scripts/sw-adaptive.sh +2 -1
package/scripts/sw-adversarial.sh +2 -1
package/scripts/sw-architecture-enforcer.sh +3 -1
package/scripts/sw-auth.sh +12 -2
package/scripts/sw-autonomous.sh +5 -1
package/scripts/sw-changelog.sh +4 -1
package/scripts/sw-checkpoint.sh +2 -1
package/scripts/sw-ci.sh +5 -1
package/scripts/sw-cleanup.sh +4 -26
package/scripts/sw-code-review.sh +10 -4
package/scripts/sw-connect.sh +2 -1
package/scripts/sw-context.sh +2 -1
package/scripts/sw-cost.sh +48 -3
package/scripts/sw-daemon.sh +66 -9
package/scripts/sw-dashboard.sh +3 -1
package/scripts/sw-db.sh +59 -16
package/scripts/sw-decide.sh +8 -2
package/scripts/sw-decompose.sh +360 -17
package/scripts/sw-deps.sh +4 -1
package/scripts/sw-developer-simulation.sh +4 -1
package/scripts/sw-discovery.sh +325 -2
package/scripts/sw-doc-fleet.sh +4 -1
package/scripts/sw-docs-agent.sh +3 -1
package/scripts/sw-docs.sh +2 -1
package/scripts/sw-doctor.sh +453 -2
package/scripts/sw-dora.sh +4 -1
package/scripts/sw-durable.sh +4 -3
package/scripts/sw-e2e-orchestrator.sh +17 -16
package/scripts/sw-eventbus.sh +7 -1
package/scripts/sw-evidence.sh +364 -12
package/scripts/sw-feedback.sh +550 -9
package/scripts/sw-fix.sh +20 -1
package/scripts/sw-fleet-discover.sh +6 -2
package/scripts/sw-fleet-viz.sh +4 -1
package/scripts/sw-fleet.sh +5 -1
package/scripts/sw-github-app.sh +16 -3
package/scripts/sw-github-checks.sh +3 -2
package/scripts/sw-github-deploy.sh +3 -2
package/scripts/sw-github-graphql.sh +18 -7
package/scripts/sw-guild.sh +5 -1
package/scripts/sw-heartbeat.sh +5 -30
package/scripts/sw-hello.sh +67 -0
package/scripts/sw-hygiene.sh +6 -1
package/scripts/sw-incident.sh +265 -1
package/scripts/sw-init.sh +18 -2
package/scripts/sw-instrument.sh +10 -2
package/scripts/sw-intelligence.sh +42 -6
package/scripts/sw-jira.sh +5 -1
package/scripts/sw-launchd.sh +2 -1
package/scripts/sw-linear.sh +4 -1
package/scripts/sw-logs.sh +4 -1
package/scripts/sw-loop.sh +432 -1128
package/scripts/sw-memory.sh +356 -2
package/scripts/sw-mission-control.sh +6 -1
package/scripts/sw-model-router.sh +481 -26
package/scripts/sw-otel.sh +13 -4
package/scripts/sw-oversight.sh +14 -5
package/scripts/sw-patrol-meta.sh +334 -0
package/scripts/sw-pipeline-composer.sh +5 -1
package/scripts/sw-pipeline-vitals.sh +2 -1
package/scripts/sw-pipeline.sh +53 -2664
package/scripts/sw-pm.sh +12 -5
package/scripts/sw-pr-lifecycle.sh +2 -1
package/scripts/sw-predictive.sh +7 -1
package/scripts/sw-prep.sh +185 -2
package/scripts/sw-ps.sh +5 -25
package/scripts/sw-public-dashboard.sh +15 -3
package/scripts/sw-quality.sh +2 -1
package/scripts/sw-reaper.sh +8 -25
package/scripts/sw-recruit.sh +156 -2303
package/scripts/sw-regression.sh +19 -12
package/scripts/sw-release-manager.sh +3 -1
package/scripts/sw-release.sh +4 -1
package/scripts/sw-remote.sh +3 -1
package/scripts/sw-replay.sh +7 -1
package/scripts/sw-retro.sh +158 -1
package/scripts/sw-review-rerun.sh +3 -1
package/scripts/sw-scale.sh +10 -3
package/scripts/sw-security-audit.sh +6 -1
package/scripts/sw-self-optimize.sh +6 -3
package/scripts/sw-session.sh +9 -3
package/scripts/sw-setup.sh +3 -1
package/scripts/sw-stall-detector.sh +406 -0
package/scripts/sw-standup.sh +15 -7
package/scripts/sw-status.sh +3 -1
package/scripts/sw-strategic.sh +4 -1
package/scripts/sw-stream.sh +7 -1
package/scripts/sw-swarm.sh +18 -6
package/scripts/sw-team-stages.sh +13 -6
package/scripts/sw-templates.sh +5 -29
package/scripts/sw-testgen.sh +7 -1
package/scripts/sw-tmux-pipeline.sh +4 -1
package/scripts/sw-tmux-role-color.sh +2 -0
package/scripts/sw-tmux-status.sh +1 -1
package/scripts/sw-tmux.sh +3 -1
package/scripts/sw-trace.sh +3 -1
package/scripts/sw-tracker-github.sh +3 -0
package/scripts/sw-tracker-jira.sh +3 -0
package/scripts/sw-tracker-linear.sh +3 -0
package/scripts/sw-tracker.sh +3 -1
package/scripts/sw-triage.sh +2 -1
package/scripts/sw-upgrade.sh +3 -1
package/scripts/sw-ux.sh +5 -2
package/scripts/sw-webhook.sh +3 -1
package/scripts/sw-widgets.sh +3 -1
package/scripts/sw-worktree.sh +15 -3
package/scripts/test-skill-injection.sh +1233 -0
package/templates/pipelines/autonomous.json +27 -3
package/templates/pipelines/cost-aware.json +34 -8
package/templates/pipelines/deployed.json +12 -0
package/templates/pipelines/enterprise.json +12 -0
package/templates/pipelines/fast.json +6 -0
package/templates/pipelines/full.json +27 -3
package/templates/pipelines/hotfix.json +6 -0
package/templates/pipelines/standard.json +12 -0
package/templates/pipelines/tdd.json +12 -0

package/scripts/sw-loop.sh CHANGED Viewed

@@ -33,6 +33,40 @@ if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
 fi
 # Cross-pipeline discovery (learnings from other pipeline runs)
 [[ -f "$SCRIPT_DIR/sw-discovery.sh" ]] && source "$SCRIPT_DIR/sw-discovery.sh" 2>/dev/null || true
+# Source loop sub-modules for modular iteration management
+[[ -f "$SCRIPT_DIR/lib/loop-iteration.sh" ]] && source "$SCRIPT_DIR/lib/loop-iteration.sh"
+[[ -f "$SCRIPT_DIR/lib/loop-convergence.sh" ]] && source "$SCRIPT_DIR/lib/loop-convergence.sh"
+[[ -f "$SCRIPT_DIR/lib/loop-restart.sh" ]] && source "$SCRIPT_DIR/lib/loop-restart.sh"
+[[ -f "$SCRIPT_DIR/lib/loop-progress.sh" ]] && source "$SCRIPT_DIR/lib/loop-progress.sh"
+# Intelligent session restart with enhanced briefings and cross-session tracking
+[[ -f "$SCRIPT_DIR/lib/session-restart.sh" ]] && source "$SCRIPT_DIR/lib/session-restart.sh"
+# Context window budget monitoring (issue #209)
+# shellcheck source=lib/context-budget.sh
+[[ -f "$SCRIPT_DIR/lib/context-budget.sh" ]] && source "$SCRIPT_DIR/lib/context-budget.sh" 2>/dev/null || true
+# Convergence detection and scoring (issue #203)
+[[ -f "$SCRIPT_DIR/lib/convergence.sh" ]] && source "$SCRIPT_DIR/lib/convergence.sh" 2>/dev/null || true
+# Error actionability scoring and enhancement for better error context
+# shellcheck source=lib/error-actionability.sh
+[[ -f "$SCRIPT_DIR/lib/error-actionability.sh" ]] && source "$SCRIPT_DIR/lib/error-actionability.sh" 2>/dev/null || true
+# Autonomous error recovery with model escalation
+# shellcheck source=lib/auto-recovery.sh
+[[ -f "$SCRIPT_DIR/lib/auto-recovery.sh" ]] && source "$SCRIPT_DIR/lib/auto-recovery.sh" 2>/dev/null || true
+# Test execution optimization (issue #200)
+# shellcheck source=lib/test-optimizer.sh
+[[ -f "$SCRIPT_DIR/lib/test-optimizer.sh" ]] && source "$SCRIPT_DIR/lib/test-optimizer.sh" 2>/dev/null || true
+# Audit trail for compliance-grade pipeline traceability
+# shellcheck source=lib/audit-trail.sh
+[[ -f "$SCRIPT_DIR/lib/audit-trail.sh" ]] && source "$SCRIPT_DIR/lib/audit-trail.sh" 2>/dev/null || true
+# Process reward model for per-step iteration scoring (Phase 3)
+# shellcheck source=lib/process-reward.sh
+[[ -f "$SCRIPT_DIR/lib/process-reward.sh" ]] && source "$SCRIPT_DIR/lib/process-reward.sh" 2>/dev/null || true
+# Cross-session reinforcement learning optimizer (Phase 7)
+# shellcheck source=lib/rl-optimizer.sh
+[[ -f "$SCRIPT_DIR/lib/rl-optimizer.sh" ]] && source "$SCRIPT_DIR/lib/rl-optimizer.sh" 2>/dev/null || true
+# Autoresearch RL modules (Phase 8): reward aggregation, bandit selection, policy learning
+[[ -f "$SCRIPT_DIR/lib/reward-aggregator.sh" ]] && source "$SCRIPT_DIR/lib/reward-aggregator.sh" 2>/dev/null || true
+[[ -f "$SCRIPT_DIR/lib/bandit-selector.sh" ]] && source "$SCRIPT_DIR/lib/bandit-selector.sh" 2>/dev/null || true
+[[ -f "$SCRIPT_DIR/lib/policy-learner.sh" ]] && source "$SCRIPT_DIR/lib/policy-learner.sh" 2>/dev/null || true
 # Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
 [[ "$(type -t info 2>/dev/null)" == "function" ]]    || info()    { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
 [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
@@ -45,6 +79,7 @@ fi
 if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
   emit_event() {
     local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
+    # shellcheck disable=SC2155
     local payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
     while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
     echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
@@ -72,22 +107,27 @@ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
 SESSION_RESTART=false
 RESTART_COUNT=0
 REPO_OVERRIDE=""
-VERSION="3.2.0"
+VERSION="3.3.0"
 # ─── Token Tracking ─────────────────────────────────────────────────────────
 LOOP_INPUT_TOKENS=0
 LOOP_OUTPUT_TOKENS=0
 LOOP_COST_MILLICENTS=0
-# ─── Flexible Iteration Defaults ────────────────────────────────────────────
-AUTO_EXTEND=true          # Auto-extend iterations when work is incomplete
-EXTENSION_SIZE=5          # Additional iterations per extension
-MAX_EXTENSIONS=3          # Max number of extensions (hard cap safety net)
-EXTENSION_COUNT=0         # Current number of extensions applied
+# ─── Flexible Iteration Defaults (all config-driven) ───────────────────────
+AUTO_EXTEND=true
+EXTENSION_SIZE=$(_smart_int "loop.extension_size" 5)
+MAX_EXTENSIONS=$(_smart_int "loop.max_extensions" 3)
+EXTENSION_COUNT=0
-# ─── Circuit Breaker Defaults ──────────────────────────────────────────────
-CIRCUIT_BREAKER_THRESHOLD=3       # Consecutive low-progress iterations before stopping
-MIN_PROGRESS_LINES=5              # Minimum insertions to count as progress
+# ─── Circuit Breaker Defaults (config-driven) ─────────────────────────────
+CIRCUIT_BREAKER_THRESHOLD=$(_smart_int "loop.circuit_breaker_threshold" 3)
+MIN_PROGRESS_LINES=$(_smart_int "loop.min_progress_lines" 5)
+# ─── Context Exhaustion Recovery ────────────────────────────────────────────────
+CONTEXT_EXHAUSTION_PATTERNS="context.length.exceeded|maximum context length|context_length_exceeded|prompt is too long"
+CONTEXT_RESTART_COUNT=0
+CONTEXT_RESTART_LIMIT=$(_smart_int "loop.context_restart_limit" 2)
 # ─── Audit & Quality Gate Defaults ───────────────────────────────────────────
 AUDIT_ENABLED=false
@@ -98,6 +138,16 @@ AUDIT_RESULT=""
 COMPLETION_REJECTED=false
 QUALITY_GATE_PASSED=true
+# ─── Multi-Test Defaults ──────────────────────────────────────────────────
+ADDITIONAL_TEST_CMDS=()   # Array of extra test commands (from --additional-test-cmds)
+# ─── Context Budget ──────────────────────────────────────────────────────────
+CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-200000}"  # Max prompt chars before trimming
+# ─── Claude CLI Flags ─────────────────────────────────────────────────────────
+EFFORT_LEVEL="${SW_EFFORT_LEVEL:-}"
+FALLBACK_MODEL="${SW_FALLBACK_MODEL:-}"  # Empty = no fallback flag (intelligent default)
 # ─── Parse Arguments ──────────────────────────────────────────────────────────
 show_help() {
     echo -e "${CYAN}${BOLD}shipwright${RESET} ${DIM}v${VERSION}${RESET} — ${BOLD}Continuous Loop${RESET}"
@@ -112,7 +162,10 @@ show_help() {
     echo -e "  ${CYAN}--test-cmd${RESET} \"cmd\"         Test command to run between iterations"
     echo -e "  ${CYAN}--fast-test-cmd${RESET} \"cmd\"      Fast/subset test command (alternates with full)"
     echo -e "  ${CYAN}--fast-test-interval${RESET} N       Run full tests every N iterations (default: 5)"
+    echo -e "  ${CYAN}--additional-test-cmds${RESET} \"cmd\" Extra test command (repeatable)"
     echo -e "  ${CYAN}--model${RESET} MODEL             Claude model to use (default: opus)"
+    echo -e "  ${CYAN}--effort${RESET} low|medium|high   Effort level for Claude reasoning (default: auto per stage)"
+    echo -e "  ${CYAN}--fallback-model${RESET} MODEL      Fallback model on rate limits (default: sonnet)"
     echo -e "  ${CYAN}--agents${RESET} N                Number of parallel agents (default: 1)"
     echo -e "  ${CYAN}--roles${RESET} \"r1,r2,...\"        Role per agent: builder,reviewer,tester,optimizer,docs,security"
     echo -e "  ${CYAN}--worktree${RESET}                Use git worktrees for isolation (auto if agents > 1)"
@@ -186,6 +239,18 @@ while [[ $# -gt 0 ]]; do
             shift 2
             ;;
         --model=*) MODEL="${1#--model=}"; shift ;;
+        --effort)
+            EFFORT_LEVEL="${2:-}"
+            [[ -z "$EFFORT_LEVEL" ]] && { error "Missing value for --effort"; exit 1; }
+            shift 2
+            ;;
+        --effort=*) EFFORT_LEVEL="${1#--effort=}"; shift ;;
+        --fallback-model)
+            FALLBACK_MODEL="${2:-}"
+            [[ -z "$FALLBACK_MODEL" ]] && { error "Missing value for --fallback-model"; exit 1; }
+            shift 2
+            ;;
+        --fallback-model=*) FALLBACK_MODEL="${1#--fallback-model=}"; shift ;;
         --agents)
             AGENTS="${2:-}"
             [[ -z "$AGENTS" ]] && { error "Missing value for --agents"; exit 1; }
@@ -236,6 +301,12 @@ while [[ $# -gt 0 ]]; do
             shift 2
             ;;
         --fast-test-interval=*) FAST_TEST_INTERVAL="${1#--fast-test-interval=}"; shift ;;
+        --additional-test-cmds)
+            ADDITIONAL_TEST_CMDS+=("${2:-}")
+            [[ -z "${2:-}" ]] && { error "Missing value for --additional-test-cmds"; exit 1; }
+            shift 2
+            ;;
+        --additional-test-cmds=*) ADDITIONAL_TEST_CMDS+=("${1#--additional-test-cmds=}"); shift ;;
         --max-restarts)
             MAX_RESTARTS="${2:-}"
             [[ -z "$MAX_RESTARTS" ]] && { error "Missing value for --max-restarts"; exit 1; }
@@ -273,6 +344,7 @@ done
 # Auto-enable worktree for multi-agent
 if [[ "$AGENTS" -gt 1 ]]; then
+    # shellcheck disable=SC2034
     USE_WORKTREE=true
 fi
@@ -309,6 +381,12 @@ if ! [[ "$MAX_RESTARTS" =~ ^[0-9]+$ ]]; then
     exit 1
 fi
+# Validate effort level
+if [[ -n "$EFFORT_LEVEL" ]] && [[ "$EFFORT_LEVEL" != "low" && "$EFFORT_LEVEL" != "medium" && "$EFFORT_LEVEL" != "high" ]]; then
+    error "--effort must be low, medium, or high (got: $EFFORT_LEVEL)"
+    exit 1
+fi
 # ─── Validate Inputs ─────────────────────────────────────────────────────────
 if ! $RESUME && [[ -z "$GOAL" ]]; then
@@ -382,6 +460,16 @@ WORKTREE_DIR="$PROJECT_ROOT/.worktrees"
 mkdir -p "$STATE_DIR" "$LOG_DIR"
+# ─── Context Budget Initialization ────────────────────────────────────────────
+# Initialize context window budget tracker (issue #209)
+ARTIFACTS_DIR="${STATE_DIR}/pipeline-artifacts"
+mkdir -p "$ARTIFACTS_DIR"
+if type context_budget_init >/dev/null 2>&1; then
+    # Set total budget (default 800K, configurable via env/config)
+    CONTEXT_BUDGET="${CONTEXT_BUDGET_TOKENS:-800000}"
+    context_budget_init "$CONTEXT_BUDGET" "$ARTIFACTS_DIR" 2>/dev/null || true
+fi
 # ─── Adaptive Model Selection ────────────────────────────────────────────────
 # Uses intelligence engine when available, falls back to defaults.
 select_adaptive_model() {
@@ -505,16 +593,28 @@ _extract_text_from_json() {
     local first_char
     first_char=$(head -c1 "$json_file" 2>/dev/null || true)
-    # Case 2: Valid JSON array — extract .result from last element
-    if [[ "$first_char" == "[" ]] && command -v jq >/dev/null 2>&1; then
+    # Case 2: Valid JSON (array or object) — extract text with jq
+    if [[ ("$first_char" == "[" || "$first_char" == "{") ]] && command -v jq >/dev/null 2>&1; then
         local extracted
-        extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
-        if [[ -n "$extracted" ]]; then
-            echo "$extracted" > "$log_file"
-            return 0
+        if [[ "$first_char" == "[" ]]; then
+            # Array: extract .result from last element
+            extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
+            if [[ -n "$extracted" ]]; then
+                echo "$extracted" > "$log_file"
+                return 0
+            fi
+            # Try .content fields
+            extracted=$(jq -r '.[].content // empty' "$json_file" 2>/dev/null | head -500) || true
+        else
+            # Object: extract .result directly
+            extracted=$(jq -r '.result // empty' "$json_file" 2>/dev/null) || true
+            if [[ -n "$extracted" ]]; then
+                echo "$extracted" > "$log_file"
+                return 0
+            fi
+            # Try .content field
+            extracted=$(jq -r '.content // empty' "$json_file" 2>/dev/null) || true
         fi
-        # jq succeeded but result was null/empty — try .content or raw text
-        extracted=$(jq -r '.[].content // empty' "$json_file" 2>/dev/null | head -500) || true
         if [[ -n "$extracted" ]]; then
             echo "$extracted" > "$log_file"
             return 0
@@ -525,7 +625,7 @@ _extract_text_from_json() {
         return 0
     fi
-    # Case 3: Looks like JSON but no jq — can't parse, use raw
+    # Case 3: Looks like JSON but jq is not available — can't parse, use raw
     if [[ "$first_char" == "[" || "$first_char" == "{" ]]; then
         warn "JSON output but jq not available — using raw output"
         cp "$json_file" "$log_file"
@@ -546,6 +646,7 @@ write_loop_tokens() {
     fi
     local tmp_file
     tmp_file=$(mktemp "${token_file}.XXXXXX" 2>/dev/null || mktemp)
+    # shellcheck disable=SC2064
     trap "rm -f '$tmp_file'" RETURN
     cat > "$tmp_file" <<TOKJSON
 {"input_tokens":${LOOP_INPUT_TOKENS},"output_tokens":${LOOP_OUTPUT_TOKENS},"cost_usd":${cost_usd},"iterations":${ITERATION:-0}}
@@ -599,38 +700,8 @@ apply_adaptive_budget() {
 ITERATION_LINES_CHANGED=""
 VELOCITY_HISTORY=""
-track_iteration_velocity() {
-    local changes
-    changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null | tail -1 || echo "")"
-    local insertions
-    insertions="$(echo "$changes" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)"
-    ITERATION_LINES_CHANGED="${insertions:-0}"
-    if [[ -n "$VELOCITY_HISTORY" ]]; then
-        VELOCITY_HISTORY="${VELOCITY_HISTORY},${ITERATION_LINES_CHANGED}"
-    else
-        VELOCITY_HISTORY="${ITERATION_LINES_CHANGED}"
-    fi
-}
 # Compute average lines/iteration from recent history
-compute_velocity_avg() {
-    if [[ -z "$VELOCITY_HISTORY" ]]; then
-        echo "0"
-        return 0
-    fi
-    local total=0 count=0
-    local IFS=','
-    local val
-    for val in $VELOCITY_HISTORY; do
-        total=$((total + val))
-        count=$((count + 1))
-    done
-    if [[ "$count" -gt 0 ]]; then
-        echo $((total / count))
-    else
-        echo "0"
-    fi
-}
 # ─── Timing Helpers ───────────────────────────────────────────────────────────
@@ -656,191 +727,10 @@ TEST_PASSED=""
 TEST_OUTPUT=""
 LOG_ENTRIES=""
-initialize_state() {
-    ITERATION=0
-    CONSECUTIVE_FAILURES=0
-    TOTAL_COMMITS=0
-    START_EPOCH="$(now_epoch)"
-    STATUS="running"
-    LOG_ENTRIES=""
-    # Record starting commit for cumulative diff in quality gates
-    LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
-    write_state
-}
-resume_state() {
-    if [[ ! -f "$STATE_FILE" ]]; then
-        error "No state file found at $STATE_FILE"
-        echo -e "  Start a new loop instead: ${DIM}shipwright loop \"<goal>\"${RESET}"
-        exit 1
-    fi
-    info "Resuming from $STATE_FILE"
-    # Save CLI values before parsing state (CLI takes precedence)
-    local cli_max_iterations="$MAX_ITERATIONS"
-    # Parse YAML front matter
-    local in_frontmatter=false
-    while IFS= read -r line; do
-        if [[ "$line" == "---" ]]; then
-            if $in_frontmatter; then
-                break
-            else
-                in_frontmatter=true
-                continue
-            fi
-        fi
-        if $in_frontmatter; then
-            case "$line" in
-                goal:*)          [[ -z "$GOAL" ]] && GOAL="$(echo "${line#goal:}" | sed 's/^ *"//;s/" *$//')" ;;
-                iteration:*)     ITERATION="$(echo "${line#iteration:}" | tr -d ' ')" ;;
-                max_iterations:*) MAX_ITERATIONS="$(echo "${line#max_iterations:}" | tr -d ' ')" ;;
-                status:*)        STATUS="$(echo "${line#status:}" | tr -d ' ')" ;;
-                test_cmd:*)      [[ -z "$TEST_CMD" ]] && TEST_CMD="$(echo "${line#test_cmd:}" | sed 's/^ *"//;s/" *$//')" ;;
-                model:*)         MODEL="$(echo "${line#model:}" | tr -d ' ')" ;;
-                agents:*)        AGENTS="$(echo "${line#agents:}" | tr -d ' ')" ;;
-                consecutive_failures:*) CONSECUTIVE_FAILURES="$(echo "${line#consecutive_failures:}" | tr -d ' ')" ;;
-                total_commits:*) TOTAL_COMMITS="$(echo "${line#total_commits:}" | tr -d ' ')" ;;
-                audit_enabled:*)         AUDIT_ENABLED="$(echo "${line#audit_enabled:}" | tr -d ' ')" ;;
-                audit_agent_enabled:*)   AUDIT_AGENT_ENABLED="$(echo "${line#audit_agent_enabled:}" | tr -d ' ')" ;;
-                quality_gates_enabled:*) QUALITY_GATES_ENABLED="$(echo "${line#quality_gates_enabled:}" | tr -d ' ')" ;;
-                dod_file:*)              DOD_FILE="$(echo "${line#dod_file:}" | sed 's/^ *"//;s/" *$//')" ;;
-                auto_extend:*)           AUTO_EXTEND="$(echo "${line#auto_extend:}" | tr -d ' ')" ;;
-                extension_count:*)       EXTENSION_COUNT="$(echo "${line#extension_count:}" | tr -d ' ')" ;;
-                max_extensions:*)        MAX_EXTENSIONS="$(echo "${line#max_extensions:}" | tr -d ' ')" ;;
-            esac
-        fi
-    done < "$STATE_FILE"
-    # CLI --max-iterations overrides state file
-    if $MAX_ITERATIONS_EXPLICIT; then
-        MAX_ITERATIONS="$cli_max_iterations"
-    fi
-    # Extract the log section (everything after ## Log)
-    LOG_ENTRIES="$(sed -n '/^## Log$/,$ { /^## Log$/d; p; }' "$STATE_FILE" 2>/dev/null || true)"
-    if [[ -z "$GOAL" ]]; then
-        error "Could not parse goal from state file."
-        exit 1
-    fi
-    if [[ "$STATUS" == "complete" ]]; then
-        warn "Previous loop completed. Start a new one or edit the state file."
-        exit 0
-    fi
-    # Reset circuit breaker on resume
-    CONSECUTIVE_FAILURES=0
-    START_EPOCH="$(now_epoch)"
-    STATUS="running"
-    # Set starting commit for cumulative diff (approximate: use earliest tracked commit)
-    if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
-        LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
-    fi
-    # If we hit max iterations before, warn user to extend
-    if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
-        warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
-        echo -e "  Extend with: ${DIM}shipwright loop --resume --max-iterations $(( MAX_ITERATIONS + 10 ))${RESET}"
-        exit 0
-    fi
-    # Restore Claude context for meaningful resume (source so exports persist to this shell)
-    if [[ -f "$SCRIPT_DIR/sw-checkpoint.sh" ]] && [[ -d "${PROJECT_ROOT:-}" ]]; then
-        source "$SCRIPT_DIR/sw-checkpoint.sh"
-        local _orig_pwd="$PWD"
-        cd "$PROJECT_ROOT" 2>/dev/null || true
-        if checkpoint_restore_context "build" 2>/dev/null; then
-            RESUMED_FROM_ITERATION="${RESTORED_ITERATION:-}"
-            RESUMED_MODIFIED="${RESTORED_MODIFIED:-}"
-            RESUMED_FINDINGS="${RESTORED_FINDINGS:-}"
-            RESUMED_TEST_OUTPUT="${RESTORED_TEST_OUTPUT:-}"
-            [[ -n "${RESTORED_ITERATION:-}" && "${RESTORED_ITERATION:-0}" -gt 0 ]] && info "Restored context from iteration ${RESTORED_ITERATION}"
-        fi
-        cd "$_orig_pwd" 2>/dev/null || true
-    fi
-    success "Resumed: iteration $ITERATION/$MAX_ITERATIONS"
-}
-write_state() {
-    local tmp_state="${STATE_FILE}.tmp.$$"
-    # Use printf instead of heredoc to avoid delimiter injection from GOAL
-    {
-        printf -- '---\n'
-        printf 'goal: "%s"\n' "$GOAL"
-        printf 'iteration: %s\n' "$ITERATION"
-        printf 'max_iterations: %s\n' "$MAX_ITERATIONS"
-        printf 'status: %s\n' "$STATUS"
-        printf 'test_cmd: "%s"\n' "$TEST_CMD"
-        printf 'model: %s\n' "$MODEL"
-        printf 'agents: %s\n' "$AGENTS"
-        printf 'started_at: %s\n' "$(now_iso)"
-        printf 'last_iteration_at: %s\n' "$(now_iso)"
-        printf 'consecutive_failures: %s\n' "$CONSECUTIVE_FAILURES"
-        printf 'total_commits: %s\n' "$TOTAL_COMMITS"
-        printf 'audit_enabled: %s\n' "$AUDIT_ENABLED"
-        printf 'audit_agent_enabled: %s\n' "$AUDIT_AGENT_ENABLED"
-        printf 'quality_gates_enabled: %s\n' "$QUALITY_GATES_ENABLED"
-        printf 'dod_file: "%s"\n' "$DOD_FILE"
-        printf 'auto_extend: %s\n' "$AUTO_EXTEND"
-        printf 'extension_count: %s\n' "$EXTENSION_COUNT"
-        printf 'max_extensions: %s\n' "$MAX_EXTENSIONS"
-        printf -- '---\n\n'
-        printf '## Log\n'
-        printf '%s\n' "$LOG_ENTRIES"
-    } > "$tmp_state"
-    if ! mv "$tmp_state" "$STATE_FILE" 2>/dev/null; then
-        warn "Failed to write state file: $STATE_FILE"
-    fi
-}
-write_progress() {
-    local progress_file="$LOG_DIR/progress.md"
-    local recent_commits
-    recent_commits=$(git -C "$PROJECT_ROOT" log --oneline -5 2>/dev/null || echo "(no commits)")
-    local changed_files
-    changed_files=$(git -C "$PROJECT_ROOT" diff --name-only HEAD~3 2>/dev/null | head -20 || echo "(none)")
-    local last_error=""
-    local prev_test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
-    if [[ -f "$prev_test_log" ]] && [[ "${TEST_PASSED:-}" == "false" ]]; then
-        last_error=$(tail -10 "$prev_test_log" 2>/dev/null || true)
-    fi
-    # Use printf to avoid heredoc delimiter injection from GOAL content
-    local tmp_progress="${progress_file}.tmp.$$"
-    {
-        printf '# Session Progress (Auto-Generated)\n\n'
-        printf '## Goal\n%s\n\n' "${GOAL}"
-        printf '## Status\n'
-        printf -- '- Iteration: %s/%s\n' "${ITERATION}" "${MAX_ITERATIONS}"
-        printf -- '- Session restart: %s/%s\n' "${RESTART_COUNT:-0}" "${MAX_RESTARTS:-0}"
-        printf -- '- Tests passing: %s\n' "${TEST_PASSED:-unknown}"
-        printf -- '- Status: %s\n\n' "${STATUS:-running}"
-        printf '## Recent Commits\n%s\n\n' "${recent_commits}"
-        printf '## Changed Files\n%s\n\n' "${changed_files}"
-        if [[ -n "$last_error" ]]; then
-            printf '## Last Error\n%s\n\n' "$last_error"
-        fi
-        printf '## Timestamp\n%s\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
-    } > "$tmp_progress" 2>/dev/null
-    mv "$tmp_progress" "$progress_file" 2>/dev/null || rm -f "$tmp_progress" 2>/dev/null
-}
-append_log_entry() {
-    local entry="$1"
-    if [[ -n "$LOG_ENTRIES" ]]; then
-        LOG_ENTRIES="${LOG_ENTRIES}
-${entry}"
-    else
-        LOG_ENTRIES="$entry"
-    fi
-}
 # ─── Semantic Validation for Claude Output ─────────────────────────────────────
 # Validates changed files before commit to catch syntax errors and API error leakage.
@@ -963,155 +853,12 @@ git_auto_commit() {
 # ─── Fatal Error Detection ────────────────────────────────────────────────────
-check_fatal_error() {
-    local log_file="$1"
-    local cli_exit_code="${2:-0}"
-    [[ -f "$log_file" ]] || return 1
-    # Known fatal error patterns from Claude CLI / Anthropic API
-    local fatal_patterns="Invalid API key|invalid_api_key|authentication_error|API key expired"
-    fatal_patterns="${fatal_patterns}|rate_limit_error|overloaded_error|billing"
-    fatal_patterns="${fatal_patterns}|Could not resolve host|connection refused|ECONNREFUSED"
-    fatal_patterns="${fatal_patterns}|ANTHROPIC_API_KEY.*not set|No API key"
-    if grep -qiE "$fatal_patterns" "$log_file" 2>/dev/null; then
-        local match
-        match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
-        error "Fatal CLI error: $match"
-        return 1  # fatal error detected
-    fi
-    # Non-zero exit + tiny output = likely CLI crash
-    if [[ "$cli_exit_code" -ne 0 ]]; then
-        local line_count
-        line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
-        line_count="${line_count:-0}"
-        if [[ "$line_count" -lt 3 ]]; then
-            local content
-            content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
-            error "CLI exited $cli_exit_code with minimal output: $content"
-            return 0
-        fi
-    fi
-    return 1  # no fatal error
-}
 # ─── Progress & Circuit Breaker ───────────────────────────────────────────────
-check_progress() {
-    local changes
-    # Exclude loop bookkeeping files — only count real code changes as progress
-    changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 \
-        -- . ':!.claude/loop-state.md' ':!.claude/pipeline-state.md' \
-        ':!**/progress.md' ':!**/error-summary.json' \
-        2>/dev/null | tail -1 || echo "")"
-    local insertions
-    insertions="$(echo "$changes" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)"
-    if [[ "${insertions:-0}" -lt "$MIN_PROGRESS_LINES" ]]; then
-        return 1  # No meaningful progress
-    fi
-    return 0
-}
-check_completion() {
-    local log_file="$1"
-    grep -q "LOOP_COMPLETE" "$log_file" 2>/dev/null
-}
-check_circuit_breaker() {
-    # Vitals-driven circuit breaker (preferred over static threshold)
-    if type pipeline_compute_vitals >/dev/null 2>&1 && type pipeline_health_verdict >/dev/null 2>&1; then
-        local _vitals_json _verdict
-        local _loop_state="${STATE_FILE:-}"
-        local _loop_artifacts="${ARTIFACTS_DIR:-}"
-        local _loop_issue="${ISSUE_NUMBER:-}"
-        _vitals_json=$(pipeline_compute_vitals "$_loop_state" "$_loop_artifacts" "$_loop_issue" 2>/dev/null) || true
-        if [[ -n "$_vitals_json" && "$_vitals_json" != "{}" ]]; then
-            _verdict=$(echo "$_vitals_json" | jq -r '.verdict // "continue"' 2>/dev/null || echo "continue")
-            if [[ "$_verdict" == "abort" ]]; then
-                local _health_score
-                _health_score=$(echo "$_vitals_json" | jq -r '.health_score // 0' 2>/dev/null || echo "0")
-                error "Vitals circuit breaker: health score ${_health_score}/100 — aborting (${CONSECUTIVE_FAILURES} stagnant iterations)"
-                STATUS="circuit_breaker"
-                return 1
-            fi
-            # Vitals say continue/warn/intervene — don't trip circuit breaker yet
-            if [[ "$_verdict" == "continue" || "$_verdict" == "warn" ]]; then
-                return 0
-            fi
-        fi
-    fi
-    # Fallback: static threshold circuit breaker
-    if [[ "$CONSECUTIVE_FAILURES" -ge "$CIRCUIT_BREAKER_THRESHOLD" ]]; then
-        error "Circuit breaker tripped: ${CIRCUIT_BREAKER_THRESHOLD} consecutive iterations with no meaningful progress."
-        STATUS="circuit_breaker"
-        return 1
-    fi
-    return 0
-}
-check_max_iterations() {
-    if [[ "$ITERATION" -le "$MAX_ITERATIONS" ]]; then
-        return 0
-    fi
-    # Hit the cap — check if we should auto-extend
-    if ! $AUTO_EXTEND || [[ "$EXTENSION_COUNT" -ge "$MAX_EXTENSIONS" ]]; then
-        if [[ "$EXTENSION_COUNT" -ge "$MAX_EXTENSIONS" ]]; then
-            warn "Hard cap reached: ${EXTENSION_COUNT} extensions applied (max ${MAX_EXTENSIONS})."
-        fi
-        warn "Max iterations ($MAX_ITERATIONS) reached."
-        STATUS="max_iterations"
-        return 1
-    fi
-    # Checkpoint audit: is there meaningful progress worth extending for?
-    echo -e "\n  ${CYAN}${BOLD}▸ Checkpoint${RESET} — max iterations ($MAX_ITERATIONS) reached, evaluating progress..."
-    local should_extend=false
-    local extension_reason=""
-    # Check 1: recent meaningful progress (not stuck)
-    if [[ "${CONSECUTIVE_FAILURES:-0}" -lt 2 ]]; then
-        # Check 2: agent hasn't signaled completion (if it did, guard_completion handles it)
-        local last_log="$LOG_DIR/iteration-$(( ITERATION - 1 )).log"
-        if [[ -f "$last_log" ]] && ! grep -q "LOOP_COMPLETE" "$last_log" 2>/dev/null; then
-            should_extend=true
-            extension_reason="work in progress with recent progress"
-        fi
-    fi
-    # Check 3: if quality gates or tests are failing, extend to let agent fix them
-    if [[ "$TEST_PASSED" == "false" ]] || ! $QUALITY_GATE_PASSED; then
-        should_extend=true
-        extension_reason="quality gates or tests not yet passing"
-    fi
-    if $should_extend; then
-        # Scale extension size by velocity — good progress earns more iterations
-        local velocity_avg
-        velocity_avg="$(compute_velocity_avg)"
-        local effective_extension="$EXTENSION_SIZE"
-        if [[ "$velocity_avg" -gt 20 ]]; then
-            # High velocity: grant more iterations
-            effective_extension=$(( EXTENSION_SIZE + 3 ))
-        elif [[ "$velocity_avg" -lt 5 ]]; then
-            # Low velocity: grant fewer iterations
-            effective_extension=$(( EXTENSION_SIZE > 2 ? EXTENSION_SIZE - 2 : 1 ))
-        fi
-        EXTENSION_COUNT=$(( EXTENSION_COUNT + 1 ))
-        MAX_ITERATIONS=$(( MAX_ITERATIONS + effective_extension ))
-        echo -e "  ${GREEN}✓${RESET} Auto-extending: +${effective_extension} iterations (now ${MAX_ITERATIONS} max, extension ${EXTENSION_COUNT}/${MAX_EXTENSIONS})"
-        echo -e "  ${DIM}Reason: ${extension_reason} | velocity: ~${velocity_avg} lines/iter${RESET}"
-        return 0
-    fi
-    warn "Max iterations reached — no recent progress detected."
-    STATUS="max_iterations"
-    return 1
-}
 # ─── Failure Diagnosis ─────────────────────────────────────────────────────────
 # Pattern-based root-cause classification for smarter retries (no Claude needed).
@@ -1156,7 +903,7 @@ diagnose_failure() {
     fi
     # Check if we've seen this diagnosis before in this session
-    local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
+    local diagnosis_file="${LOG_DIR}/diagnoses.txt"
     local repeat_count=0
     if [[ -f "$diagnosis_file" ]]; then
         repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
@@ -1224,7 +971,7 @@ INSTRUCTION: This error has occurred $repeat_count times. The previous approach
 # ─── Test Gate ────────────────────────────────────────────────────────────────
 run_test_gate() {
-    if [[ -z "$TEST_CMD" ]]; then
+    if [[ -z "$TEST_CMD" ]] && [[ ${#ADDITIONAL_TEST_CMDS[@]} -eq 0 ]]; then
         TEST_PASSED=""
         TEST_OUTPUT=""
         return
@@ -1244,24 +991,91 @@ run_test_gate() {
         fi
     fi
-    local test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
-    TEST_LOG_FILE="$test_log"
-    echo -e "  ${DIM}Running ${test_mode} tests...${RESET}"
-    # Wrap test command with timeout (5 min default) to prevent hanging
-    local test_timeout="${SW_TEST_TIMEOUT:-300}"
-    local test_wrapper="$active_test_cmd"
-    if command -v timeout >/dev/null 2>&1; then
-        test_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
-    elif command -v gtimeout >/dev/null 2>&1; then
-        test_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
-    fi
-    if bash -c "$test_wrapper" > "$test_log" 2>&1; then
-        TEST_PASSED=true
-        TEST_OUTPUT="All tests passed (${test_mode} mode)."
-    else
-        TEST_PASSED=false
-        TEST_OUTPUT="$(tail -50 "$test_log")"
+    local all_passed=true
+    local test_results="[]"
+    local combined_output=""
+    local test_timeout="${SW_TEST_TIMEOUT:-900}"
+    # Run primary test command
+    if [[ -n "$active_test_cmd" ]]; then
+        local test_log="$LOG_DIR/tests-iter-${ITERATION}.log"
+        TEST_LOG_FILE="$test_log"
+        echo -e "  ${DIM}Running ${test_mode} tests...${RESET}"
+        local test_wrapper="$active_test_cmd"
+        if command -v timeout >/dev/null 2>&1; then
+            test_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
+        elif command -v gtimeout >/dev/null 2>&1; then
+            test_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
+        fi
+        local start_ts exit_code=0
+        start_ts=$(date +%s)
+        bash -c "$test_wrapper" > "$test_log" 2>&1 || exit_code=$?
+        local duration=$(( $(date +%s) - start_ts ))
+        if command -v jq >/dev/null 2>&1; then
+            test_results=$(echo "$test_results" | jq --arg cmd "$active_test_cmd" \
+                --argjson exit "$exit_code" --argjson dur "$duration" \
+                '. + [{"command": $cmd, "exit_code": $exit, "duration_s": $dur}]')
+        fi
+        [[ "$exit_code" -ne 0 ]] && all_passed=false
+        combined_output+="$(cat "$test_log" 2>/dev/null)"$'\n'
+    fi
+    # Run additional test commands (discovered or explicit)
+    # Mid-build discovery: find test files created since loop start
+    local mid_build_cmds=()
+    if [[ -n "${LOOP_START_COMMIT:-}" ]] && type detect_created_test_files >/dev/null 2>&1; then
+        while IFS= read -r _cmd; do
+            [[ -n "$_cmd" ]] && mid_build_cmds+=("$_cmd")
+        done < <(detect_created_test_files "$LOOP_START_COMMIT" 2>/dev/null || true)
     fi
+    local all_extra=("${ADDITIONAL_TEST_CMDS[@]+"${ADDITIONAL_TEST_CMDS[@]}"}" "${mid_build_cmds[@]+"${mid_build_cmds[@]}"}")
+    for extra_cmd in "${all_extra[@]+"${all_extra[@]}"}"; do
+        [[ -z "$extra_cmd" ]] && continue
+        local extra_log="${LOG_DIR}/tests-extra-iter-${ITERATION}.log"
+        echo -e "  ${DIM}Running additional: ${extra_cmd}${RESET}"
+        local extra_wrapper="$extra_cmd"
+        if command -v timeout >/dev/null 2>&1; then
+            extra_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$extra_cmd")"
+        elif command -v gtimeout >/dev/null 2>&1; then
+            extra_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$extra_cmd")"
+        fi
+        local start_ts exit_code=0
+        start_ts=$(date +%s)
+        bash -c "$extra_wrapper" >> "$extra_log" 2>&1 || exit_code=$?
+        local duration=$(( $(date +%s) - start_ts ))
+        if command -v jq >/dev/null 2>&1; then
+            test_results=$(echo "$test_results" | jq --arg cmd "$extra_cmd" \
+                --argjson exit "$exit_code" --argjson dur "$duration" \
+                '. + [{"command": $cmd, "exit_code": $exit, "duration_s": $dur}]')
+        fi
+        [[ "$exit_code" -ne 0 ]] && all_passed=false
+        combined_output+="$(cat "$extra_log" 2>/dev/null)"$'\n'
+    done
+    # Write structured test evidence
+    if command -v jq >/dev/null 2>&1; then
+        echo "$test_results" > "${LOG_DIR}/test-evidence-iter-${ITERATION}.json"
+    fi
+    # Audit: emit test gate event
+    if type audit_emit >/dev/null 2>&1; then
+        local cmd_count=0
+        command -v jq >/dev/null 2>&1 && cmd_count=$(echo "$test_results" | jq 'length' 2>/dev/null || echo 0)
+        audit_emit "loop.test_gate" "iteration=$ITERATION" "commands=$cmd_count" \
+            "all_passed=$all_passed" "evidence_path=test-evidence-iter-${ITERATION}.json" || true
+    fi
+    TEST_PASSED=$all_passed
+    TEST_OUTPUT="$(echo "$combined_output" | tail -50)"
 }
 write_error_summary() {
@@ -1352,7 +1166,18 @@ run_audit_agent() {
     # Include verified test status so auditor doesn't have to guess
     local test_context=""
-    if [[ -n "$TEST_CMD" ]]; then
+    local evidence_file="${LOG_DIR}/test-evidence-iter-${ITERATION}.json"
+    if [[ -f "$evidence_file" ]] && command -v jq >/dev/null 2>&1; then
+        local cmd_count total_cmds evidence_detail
+        cmd_count=$(jq 'length' "$evidence_file" 2>/dev/null || echo 0)
+        total_cmds=$(jq -r '[.[].command] | join(", ")' "$evidence_file" 2>/dev/null || echo "unknown")
+        evidence_detail=$(jq -r '.[] | "- \(.command): exit \(.exit_code) (\(.duration_s)s)"' "$evidence_file" 2>/dev/null || echo "")
+        test_context="## Verified Test Status (from harness, not from agent)
+Test commands run: ${cmd_count} (${total_cmds})
+${evidence_detail}
+Overall: $(if [[ "${TEST_PASSED:-}" == "true" ]]; then echo "ALL PASSING"; else echo "FAILING"; fi)"
+    elif [[ -n "$TEST_CMD" ]]; then
+        # Fallback to existing boolean
         if [[ "${TEST_PASSED:-}" == "true" ]]; then
             test_context="## Verified Test Status (from harness, not from agent)
 Tests: ALL PASSING (command: ${TEST_CMD})"
@@ -1406,6 +1231,12 @@ AUDIT_PROMPT
         audit_flags+=("--dangerously-skip-permissions")
     fi
+    # Use structured output for machine-parseable audit results
+    local schema_file="${SCRIPT_DIR}/../schemas/audit-result.json"
+    if [[ -f "$schema_file" ]]; then
+        audit_flags+=("--json-schema" "$(cat "$schema_file")")
+    fi
     local exit_code=0
     claude -p "$audit_prompt" "${audit_flags[@]}" > "$audit_log" 2>&1 || exit_code=$?
@@ -1442,9 +1273,11 @@ run_quality_gates() {
         gate_failures+=("uncommitted changes present")
     fi
-    # Gate 3: No TODO/FIXME/HACK/XXX in new code
+    # Gate 3: No TODO/FIXME/HACK/XXX in new source code
+    # Exclude .claude/, docs/plans/, and markdown files (which legitimately contain task markers)
     local todo_count
-    todo_count="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null | grep -cE '^\+.*(TODO|FIXME|HACK|XXX)' || true)"
+    todo_count="$(git -C "$PROJECT_ROOT" diff HEAD~1 -- ':!.claude/' ':!docs/plans/' ':!*.md' 2>/dev/null \
+        | grep -cE '^\+.*(TODO|FIXME|HACK|XXX)' || true)"
     todo_count="${todo_count:-0}"
     if [[ "${todo_count:-0}" -gt 0 ]]; then
         gate_failures+=("${todo_count} TODO/FIXME/HACK/XXX markers in new code")
@@ -1661,447 +1494,14 @@ HOLISTIC_PROMPT
 }
 # ─── Context Window Management ───────────────────────────────────────────────
-# Prevents prompt from exceeding Claude's context limit (~200K tokens).
-# Trims least-critical sections first when over budget.
-CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-$(_config_get_int "loop.context_budget_chars" 180000 2>/dev/null || echo 180000)}"  # ~45K tokens at 4 chars/token
-manage_context_window() {
-    local prompt="$1"
-    local budget="${CONTEXT_BUDGET_CHARS}"
-    local current_len=${#prompt}
-    # Read trimming tunables from config (env > daemon-config > policy > defaults.json)
-    local trim_memory_chars trim_git_entries trim_hotspot_files trim_test_lines
-    trim_memory_chars=$(_config_get_int "loop.context_trim_memory_chars" 20000 2>/dev/null || echo 20000)
-    trim_git_entries=$(_config_get_int "loop.context_trim_git_entries" 10 2>/dev/null || echo 10)
-    trim_hotspot_files=$(_config_get_int "loop.context_trim_hotspot_files" 5 2>/dev/null || echo 5)
-    trim_test_lines=$(_config_get_int "loop.context_trim_test_lines" 50 2>/dev/null || echo 50)
-    if [[ "$current_len" -le "$budget" ]]; then
-        echo "$prompt"
-        return
-    fi
-    # Over budget — progressively trim sections (least important first)
-    local trimmed="$prompt"
-    # 1. Trim DORA/Performance baselines (least critical for code generation)
-    if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk '/^## Performance Baselines/{skip=1; next} skip && /^## [^#]/{skip=0} !skip{print}')
-    fi
-    # 2. Trim file hotspots to top N
-    if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk -v max="$trim_hotspot_files" '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>max) next} {print}')
-    fi
-    # 3. Trim git log to last N entries
-    if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk -v max="$trim_git_entries" '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>max) next} {print}')
-    fi
-    # 4. Truncate memory context to first N chars
-    if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk -v max="$trim_memory_chars" '
-            /## Memory Context/{mem=1; skip_rest=0; chars=0; print; next}
-            mem && /^## [^#]/{mem=0; print; next}
-            mem{chars+=length($0)+1; if(chars>max){print "... (memory truncated for context budget)"; skip_rest=1; mem=0; next}}
-            skip_rest && /^## [^#]/{skip_rest=0; print; next}
-            skip_rest{next}
-            {print}
-        ')
-    fi
-    # 5. Truncate test output to last N lines
-    if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed=$(echo "$trimmed" | awk -v max="$trim_test_lines" '
-            /## Test Results/{found=1; buf=""; print; next}
-            found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>max)?(n-max+1):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
-            found{buf=buf $0 "\n"; next}
-            {print}
-        ')
-    fi
-    # 6. Last resort: hard truncate with notice
-    if [[ "${#trimmed}" -gt "$budget" ]]; then
-        trimmed="${trimmed:0:$budget}
-... [CONTEXT TRUNCATED: prompt exceeded ${budget} char budget. Focus on the goal and most recent errors.]"
-    fi
-    # Log the trimming
-    local final_len=${#trimmed}
-    if [[ "$final_len" -lt "$current_len" ]]; then
-        warn "Context trimmed from ${current_len} to ${final_len} chars (budget: ${budget})"
-        emit_event "loop.context_trimmed" "original=$current_len" "trimmed=$final_len" "budget=$budget" 2>/dev/null || true
-    fi
-    echo "$trimmed"
-}
 # ─── Prompt Composition ──────────────────────────────────────────────────────
-compose_prompt() {
-    local recent_log
-    # Get last 3 iteration summaries from log entries
-    recent_log="$(echo "$LOG_ENTRIES" | tail -15)"
-    if [[ -z "$recent_log" ]]; then
-        recent_log="(first iteration — no previous progress)"
-    fi
-    local git_log
-    git_log="$(git_recent_log)"
-    local test_section
-    if [[ -z "$TEST_CMD" ]]; then
-        test_section="No test command configured."
-    elif [[ -z "$TEST_PASSED" ]]; then
-        test_section="No test results yet (first iteration). Test command: $TEST_CMD"
-    elif $TEST_PASSED; then
-        test_section="$TEST_OUTPUT"
-    else
-        test_section="TESTS FAILED — fix these before proceeding:
-$TEST_OUTPUT"
-    fi
-    # Structured error context (machine-readable)
-    local error_summary_section=""
-    local error_json="$LOG_DIR/error-summary.json"
-    if [[ -f "$error_json" ]]; then
-        local err_count err_lines
-        err_count=$(jq -r '.error_count // 0' "$error_json" 2>/dev/null || echo "0")
-        err_lines=$(jq -r '.error_lines[]? // empty' "$error_json" 2>/dev/null | head -10 || true)
-        if [[ "$err_count" -gt 0 ]] && [[ -n "$err_lines" ]]; then
-            error_summary_section="## Structured Error Summary (${err_count} errors detected)
-${err_lines}
-Fix these specific errors. Each line above is one distinct error from the test output."
-        fi
-    fi
-    # Build audit sections (captured before heredoc to avoid nested heredoc issues)
-    local audit_section
-    audit_section="$(compose_audit_section)"
-    local audit_feedback_section
-    audit_feedback_section="$(compose_audit_feedback_section)"
-    local rejection_notice_section
-    rejection_notice_section="$(compose_rejection_notice_section)"
-    # Memory context injection (failure patterns + past learnings)
-    local memory_section=""
-    if type memory_inject_context >/dev/null 2>&1; then
-        memory_section="$(memory_inject_context "build" 2>/dev/null || true)"
-    elif [[ -f "$SCRIPT_DIR/sw-memory.sh" ]]; then
-        memory_section="$("$SCRIPT_DIR/sw-memory.sh" inject build 2>/dev/null || true)"
-    fi
-    # Cross-pipeline discovery injection (learnings from other pipeline runs)
-    local discovery_section=""
-    if type inject_discoveries >/dev/null 2>&1; then
-        local disc_output
-        disc_output="$(inject_discoveries "${GOAL:-}" 2>/dev/null || true)"
-        if [[ -n "$disc_output" ]]; then
-            discovery_section="$disc_output"
-        fi
-    fi
-    # DORA baselines for context
-    local dora_section=""
-    if type memory_get_dora_baseline >/dev/null 2>&1; then
-        local dora_json
-        dora_json="$(memory_get_dora_baseline 7 2>/dev/null || echo "{}")"
-        local dora_total
-        dora_total=$(echo "$dora_json" | jq -r '.total // 0' 2>/dev/null || echo "0")
-        if [[ "$dora_total" -gt 0 ]]; then
-            local dora_df dora_cfr
-            dora_df=$(echo "$dora_json" | jq -r '.deploy_freq // 0' 2>/dev/null || echo "0")
-            dora_cfr=$(echo "$dora_json" | jq -r '.cfr // 0' 2>/dev/null || echo "0")
-            dora_section="## Performance Baselines (Last 7 Days)
-- Deploy frequency: ${dora_df}/week
-- Change failure rate: ${dora_cfr}%
-- Total pipeline runs: ${dora_total}"
-        fi
-    fi
-    # Append mid-loop memory refresh if available
-    local memory_refresh_file="$LOG_DIR/memory-refresh-$(( ITERATION - 1 )).txt"
-    if [[ -f "$memory_refresh_file" ]]; then
-        memory_section="${memory_section}
-## Fresh Context (from iteration $(( ITERATION - 1 )) analysis)
-$(cat "$memory_refresh_file")"
-    fi
-    # GitHub intelligence context (gated by availability)
-    local intelligence_section=""
-    if [[ "${NO_GITHUB:-}" != "true" ]]; then
-        # File hotspots — top 5 most-changed files
-        if type gh_file_change_frequency >/dev/null 2>&1; then
-            local hotspots
-            hotspots=$(gh_file_change_frequency 2>/dev/null | head -5 || true)
-            if [[ -n "$hotspots" ]]; then
-                intelligence_section="${intelligence_section}
-## File Hotspots (most frequently changed)
-${hotspots}"
-            fi
-        fi
-        # CODEOWNERS context
-        if type gh_codeowners >/dev/null 2>&1; then
-            local owners
-            owners=$(gh_codeowners 2>/dev/null | head -10 || true)
-            if [[ -n "$owners" ]]; then
-                intelligence_section="${intelligence_section}
-## Code Owners
-${owners}"
-            fi
-        fi
-        # Active security alerts
-        if type gh_security_alerts >/dev/null 2>&1; then
-            local alerts
-            alerts=$(gh_security_alerts 2>/dev/null | head -5 || true)
-            if [[ -n "$alerts" ]]; then
-                intelligence_section="${intelligence_section}
-## Active Security Alerts
-${alerts}"
-            fi
-        fi
-    fi
-    # Architecture rules (from intelligence layer)
-    local repo_hash
-    repo_hash=$(echo -n "$(pwd)" | shasum -a 256 2>/dev/null | cut -c1-12 || echo "unknown")
-    local arch_file="${HOME}/.shipwright/memory/${repo_hash}/architecture.json"
-    if [[ -f "$arch_file" ]]; then
-        local arch_rules
-        arch_rules=$(jq -r '.rules[]? // empty' "$arch_file" 2>/dev/null | head -10 || true)
-        if [[ -n "$arch_rules" ]]; then
-            intelligence_section="${intelligence_section}
-## Architecture Rules
-${arch_rules}"
-        fi
-    fi
-    # Coverage baseline
-    local coverage_file="${HOME}/.shipwright/baselines/${repo_hash}/coverage.json"
-    if [[ -f "$coverage_file" ]]; then
-        local coverage_pct
-        coverage_pct=$(jq -r '.coverage_percent // empty' "$coverage_file" 2>/dev/null || true)
-        if [[ -n "$coverage_pct" ]]; then
-            intelligence_section="${intelligence_section}
-## Coverage Baseline
-Current coverage: ${coverage_pct}% — do not decrease this."
-        fi
-    fi
-    # Error classification from last failure
-    local error_log=".claude/pipeline-artifacts/error-log.jsonl"
-    if [[ -f "$error_log" ]]; then
-        local last_error
-        last_error=$(tail -1 "$error_log" 2>/dev/null | jq -r '"Type: \(.type), Exit: \(.exit_code), Error: \(.error | split("\n") | first)"' 2>/dev/null || true)
-        if [[ -n "$last_error" ]]; then
-            intelligence_section="${intelligence_section}
-## Last Error Context
-${last_error}"
-        fi
-    fi
-    # Stuckness detection — compare last 3 iteration outputs
-    local stuckness_section=""
-    stuckness_section="$(detect_stuckness)"
-    local _stuck_ret=$?
-    local stuckness_detected=false
-    [[ "$_stuck_ret" -eq 0 ]] && stuckness_detected=true
-    # Strategy exploration when stuck — append alternative strategy to GOAL
-    if [[ "$stuckness_detected" == "true" ]]; then
-        local last_error diagnosis
-        last_error=$(tail -1 "${ARTIFACTS_DIR:-${PROJECT_ROOT:-.}/.claude/pipeline-artifacts}/error-log.jsonl" 2>/dev/null | jq -r '"Type: \(.type), Exit: \(.exit_code), Error: \(.error | split("\n") | first)"' 2>/dev/null || true)
-        [[ -z "$last_error" || "$last_error" == "null" ]] && last_error="unknown"
-        diagnosis="${STUCKNESS_DIAGNOSIS:-}"
-        local alt_strategy
-        alt_strategy=$(explore_alternative_strategy "$last_error" "${ITERATION:-0}" "$diagnosis")
-        GOAL="${GOAL}
-${alt_strategy}"
-        # Handle model escalation
-        if [[ "${ESCALATE_MODEL:-}" == "true" ]]; then
-            if [[ -f "$SCRIPT_DIR/sw-model-router.sh" ]]; then
-                source "$SCRIPT_DIR/sw-model-router.sh" 2>/dev/null || true
-            fi
-            if type escalate_model &>/dev/null; then
-                MODEL=$(escalate_model "${MODEL:-sonnet}")
-                info "Escalated to model: $MODEL"
-            fi
-            unset ESCALATE_MODEL
-        fi
-    fi
-    # Session restart context — inject previous session progress
-    local restart_section=""
-    if [[ "$SESSION_RESTART" == "true" ]] && [[ -f "$LOG_DIR/progress.md" ]]; then
-        restart_section="## Previous Session Progress
-$(cat "$LOG_DIR/progress.md")
-You are starting a FRESH session after the previous one exhausted its iterations.
-Read the progress above and continue from where it left off. Do NOT repeat work already done."
-    fi
-    # Resume-from-checkpoint context — reconstruct Claude context for meaningful resume
-    local resume_section=""
-    if [[ -n "${RESUMED_FROM_ITERATION:-}" && "${RESUMED_FROM_ITERATION:-0}" -gt 0 ]]; then
-        local _test_tail="  (none recorded)"
-        [[ -n "${RESUMED_TEST_OUTPUT:-}" ]] && _test_tail="$(echo "$RESUMED_TEST_OUTPUT" | tail -20)"
-        resume_section="## RESUMING FROM ITERATION ${RESUMED_FROM_ITERATION}
-Continue from where you left off. Do NOT repeat work already done.
-Previous work modified these files:
-${RESUMED_MODIFIED:-  (none recorded)}
-Previous findings/errors from earlier iterations:
-${RESUMED_FINDINGS:-  (none recorded)}
-Last test output (fix any failures, tail):
-${_test_tail}
----
-"
-        # Clear after first use so we don't keep injecting on every iteration
-        RESUMED_FROM_ITERATION=""
-        RESUMED_MODIFIED=""
-        RESUMED_FINDINGS=""
-        RESUMED_TEST_OUTPUT=""
-    fi
-    # Build cumulative progress summary showing all iterations' work
-    local cumulative_section=""
-    if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
-        local cum_stat
-        cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
-        if [[ -n "$cum_stat" ]]; then
-            cumulative_section="## Cumulative Progress (all iterations combined)
-${cum_stat}
-"
-        fi
-    fi
-    cat <<PROMPT
-You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
-${resume_section}
-## Your Goal
-${GOAL}
-${cumulative_section}
-## Current Progress
-${recent_log}
-## Recent Git Activity
-${git_log}
-## Test Results (Previous Iteration)
-${test_section}
-${error_summary_section:+$error_summary_section
-}
-${memory_section:+## Memory Context
-$memory_section
-}
-${discovery_section:+## Cross-Pipeline Learnings
-$discovery_section
-}
-${dora_section:+$dora_section
-}
-${intelligence_section:+$intelligence_section
-}
-${restart_section:+$restart_section
-}
-## Instructions
-1. Read the codebase and understand the current state
-2. Identify the highest-priority remaining work toward the goal
-3. Implement ONE meaningful chunk of progress
-4. Run tests if a test command exists: ${TEST_CMD:-"(none)"}
-5. Commit your work with a descriptive message
-6. When the goal is FULLY achieved, output exactly: LOOP_COMPLETE
-## Context Efficiency
-- Batch independent tool calls in parallel — avoid sequential round-trips
-- Use targeted file reads (offset/limit) instead of reading entire large files
-- Delegate large searches to subagents — only import the summary
-- Filter tool results with grep/jq before reasoning over them
-- Keep working memory lean — summarize completed steps, don't preserve full outputs
-${audit_section}
-${audit_feedback_section}
-${rejection_notice_section}
-${stuckness_section}
-## Rules
-- Focus on ONE task per iteration — do it well
-- Always commit with descriptive messages
-- If tests fail, fix them before ending
-- If stuck on the same issue for 2+ iterations, try a different approach
-- Do NOT output LOOP_COMPLETE unless the goal is genuinely achieved
-PROMPT
-}
+# NOTE: compose_prompt() is now in lib/loop-iteration.sh (extracted upstream)
 # ─── Alternative Strategy Exploration ─────────────────────────────────────────
 # When stuckness is detected, generate a context-aware alternative strategy.
 # Uses pattern matching on error type + iteration count to suggest different approaches.
-explore_alternative_strategy() {
-    local last_error="${1:-unknown}"
-    local iteration="${2:-0}"
-    local diagnosis="${3:-}"
-    # Track attempted strategies to avoid repeating them
-    local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
-    local attempted
-    attempted=$(cat "$strategy_file" 2>/dev/null || true)
-    local strategy=""
-    # If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
-    if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
-        if ! echo "$attempted" | grep -q "evaluator_alignment"; then
-            echo "evaluator_alignment" >> "$strategy_file"
-            strategy="## Alternative Strategy: Evaluator Alignment
-The code appears functionally complete (tests pass). Focus on satisfying the remaining
-quality gate evaluators. Check the DoD log and audit log for specific complaints, then
-address those exact points rather than adding new features."
-        fi
-    fi
-    # If no code changes in last iteration, suggest verifying existing work
-    if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
-        if ! echo "$attempted" | grep -q "verify_existing"; then
-            echo "verify_existing" >> "$strategy_file"
-            strategy="## Alternative Strategy: Verify Existing Work
-Recent iterations made no code changes. The work may already be complete.
-Run the full test suite, verify all features work, and if everything passes,
-commit a verification message and declare LOOP_COMPLETE with evidence."
-        fi
-    fi
-    # Generic fallback: break the problem down
-    if [[ -z "$strategy" ]]; then
-        if ! echo "$attempted" | grep -q "decompose"; then
-            echo "decompose" >> "$strategy_file"
-            strategy="## Alternative Strategy: Decompose
-Break the remaining work into smaller, independent steps. Focus on one specific
-file or function at a time. Read error messages literally — the root cause may
-differ from your assumption."
-        fi
-    fi
-    echo "$strategy"
-}
 # ─── Stuckness Detection ─────────────────────────────────────────────────────
 # Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
@@ -2110,189 +1510,7 @@ differ from your assumption."
 STUCKNESS_COUNT=0
 STUCKNESS_TRACKING_FILE=""
-record_iteration_stuckness_data() {
-    local exit_code="${1:-0}"
-    [[ -z "$LOG_DIR" ]] && return 0
-    local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
-    local diff_hash error_hash
-    diff_hash=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
-    local error_log="${ARTIFACTS_DIR:-${STATE_DIR:-${PROJECT_ROOT:-.}/.claude}/pipeline-artifacts}/error-log.jsonl"
-    if [[ -f "$error_log" ]]; then
-        error_hash=$(tail -5 "$error_log" 2>/dev/null | sort -u | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
-    else
-        error_hash="none"
-    fi
-    echo "${diff_hash}|${error_hash}|${exit_code}" >> "$tracking_file"
-}
-detect_stuckness() {
-    STUCKNESS_HINT=""
-    local iteration="${ITERATION:-0}"
-    local stuckness_signals=0
-    local stuckness_reasons=()
-    local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
-    local tracking_lines
-    tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || true)
-    tracking_lines="${tracking_lines:-0}"
-    # Signal 1: Text overlap (existing logic) — compare last 2 iteration logs
-    if [[ "$iteration" -ge 3 ]]; then
-        local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
-        local log2="$LOG_DIR/iteration-$(( iteration - 2 )).log"
-        local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
-        if [[ -f "$log1" && -f "$log2" ]]; then
-            local lines1 lines2 common total overlap_pct
-            lines1=$(tail -50 "$log1" 2>/dev/null | grep -v '^$' | sort || true)
-            lines2=$(tail -50 "$log2" 2>/dev/null | grep -v '^$' | sort || true)
-            if [[ -n "$lines1" && -n "$lines2" ]]; then
-                total=$(echo "$lines1" | wc -l | tr -d ' ')
-                common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || true)
-                common="${common:-0}"
-                if [[ "$total" -gt 0 ]]; then
-                    overlap_pct=$(( common * 100 / total ))
-                else
-                    overlap_pct=0
-                fi
-                if [[ "${overlap_pct:-0}" -ge 90 ]]; then
-                    stuckness_signals=$((stuckness_signals + 1))
-                    stuckness_reasons+=("high text overlap (${overlap_pct}%) between iterations")
-                fi
-            fi
-        fi
-    fi
-    # Signal 2: Git diff hash — last 3 iterations produced zero or identical diffs
-    if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
-        local last_three
-        last_three=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f1 || true)
-        local unique_hashes
-        unique_hashes=$(echo "$last_three" | sort -u | grep -v '^$' | wc -l | tr -d ' ')
-        if [[ "$unique_hashes" -le 1 ]] && [[ -n "$last_three" ]]; then
-            stuckness_signals=$((stuckness_signals + 1))
-            stuckness_reasons+=("identical or zero git diffs in last 3 iterations")
-        fi
-    fi
-    # Signal 3: Error repetition — same error hash in last 3 iterations
-    if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
-        local last_three_errors
-        last_three_errors=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f2 || true)
-        local unique_error_hashes
-        unique_error_hashes=$(echo "$last_three_errors" | sort -u | grep -v '^none$' | grep -v '^$' | wc -l | tr -d ' ')
-        if [[ "$unique_error_hashes" -eq 1 ]] && [[ -n "$(echo "$last_three_errors" | grep -v '^none$')" ]]; then
-            stuckness_signals=$((stuckness_signals + 1))
-            stuckness_reasons+=("same error in last 3 iterations")
-        fi
-    fi
-    # Signal 4: Same error repeating 3+ times (legacy check on error-log content)
-    local error_log
-    error_log="${ARTIFACTS_DIR:-$PROJECT_ROOT/.claude/pipeline-artifacts}/error-log.jsonl"
-    if [[ -f "$error_log" ]]; then
-        local last_errors
-        last_errors=$(tail -5 "$error_log" 2>/dev/null | jq -r '.error // .message // .error_hash // empty' 2>/dev/null | sort | uniq -c | sort -rn | head -1 || true)
-        local repeat_count
-        repeat_count=$(echo "$last_errors" | awk '{print $1}' 2>/dev/null || echo "0")
-        if [[ "${repeat_count:-0}" -ge 3 ]]; then
-            stuckness_signals=$((stuckness_signals + 1))
-            stuckness_reasons+=("same error repeated ${repeat_count} times")
-        fi
-    fi
-    # Signal 5: Exit code pattern — last 3 iterations had same non-zero exit code
-    if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
-        local last_three_exits
-        last_three_exits=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f3 || true)
-        local first_exit
-        first_exit=$(echo "$last_three_exits" | head -1)
-        if [[ "$first_exit" =~ ^[0-9]+$ ]] && [[ "$first_exit" -ne 0 ]]; then
-            local all_same=true
-            while IFS= read -r ex; do
-                [[ "$ex" != "$first_exit" ]] && all_same=false
-            done <<< "$last_three_exits"
-            if [[ "$all_same" == true ]]; then
-                stuckness_signals=$((stuckness_signals + 1))
-                stuckness_reasons+=("same non-zero exit code (${first_exit}) in last 3 iterations")
-            fi
-        fi
-    fi
-    # Signal 6: Git diff size — no or minimal code changes (existing)
-    local diff_lines
-    diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || true)
-    diff_lines="${diff_lines:-0}"
-    if [[ "${diff_lines:-0}" -lt 5 ]] && [[ "$iteration" -gt 2 ]]; then
-        stuckness_signals=$((stuckness_signals + 1))
-        stuckness_reasons+=("no code changes in last iteration")
-    fi
-    # Signal 7: Iteration budget — used >70% without passing tests
-    local max_iter="${MAX_ITERATIONS:-20}"
-    local progress_pct=0
-    if [[ "$max_iter" -gt 0 ]]; then
-        progress_pct=$(( iteration * 100 / max_iter ))
-    fi
-    if [[ "$progress_pct" -gt 70 ]] && [[ "${TEST_PASSED:-false}" != "true" ]]; then
-        stuckness_signals=$((stuckness_signals + 1))
-        stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
-    fi
-    # Gate-aware dampening: if tests pass and the agent has made progress overall,
-    # reduce stuckness signal count. The "no code changes" and "identical diffs" signals
-    # fire when code is already complete and the agent is fighting evaluator quirks —
-    # that's not genuine stuckness, it's "done but gates disagree."
-    if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
-        # If at least one quality signal is positive, dampen by 1
-        if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
-            stuckness_signals=$((stuckness_signals - 1))
-        fi
-    fi
-    # Decision: 2+ signals = stuck
-    if [[ "$stuckness_signals" -ge 2 ]]; then
-        STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
-        STUCKNESS_DIAGNOSIS="${stuckness_reasons[*]}"
-        if type emit_event >/dev/null 2>&1; then
-            emit_event "loop.stuckness_detected" "signals=$stuckness_signals" "count=$STUCKNESS_COUNT" "iteration=$iteration" "reasons=${stuckness_reasons[*]}"
-        fi
-        STUCKNESS_HINT="IMPORTANT: The loop appears stuck. Previous approaches have not worked. You MUST try a fundamentally different strategy. Reasons: ${stuckness_reasons[*]}"
-        warn "Stuckness detected (${stuckness_signals} signals, count ${STUCKNESS_COUNT}): ${stuckness_reasons[*]}"
-        local diff_summary=""
-        local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
-        local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
-        if [[ -f "$log3" && -f "$log1" ]]; then
-            diff_summary=$(diff <(tail -30 "$log3" 2>/dev/null) <(tail -30 "$log1" 2>/dev/null) 2>/dev/null | head -10 || true)
-        fi
-        local alternatives=""
-        if type memory_inject_context >/dev/null 2>&1; then
-            alternatives=$(memory_inject_context "build" 2>/dev/null | grep -i "fix:" | head -3 || true)
-        fi
-        cat <<STUCK_SECTION
-## Stuckness Detected
-${STUCKNESS_HINT}
-${diff_summary:+Changes between recent iterations:
-$diff_summary
-}
-${alternatives:+Consider these alternative approaches from past fixes:
-$alternatives
-}
-Try a fundamentally different approach:
-- Break the problem into smaller steps
-- Look for an entirely different implementation strategy
-- Check if there's a dependency or configuration issue blocking progress
-- Read error messages more carefully — the root cause may differ from your assumption
-STUCK_SECTION
-        return 0
-    fi
-    return 1
-}
 compose_audit_section() {
     if ! $AUDIT_ENABLED; then
@@ -2421,122 +1639,10 @@ PROMPT
 # ─── Claude Execution ────────────────────────────────────────────────────────
-build_claude_flags() {
-    local flags=()
-    flags+=("--model" "$MODEL")
-    flags+=("--output-format" "json")
-    if $SKIP_PERMISSIONS; then
-        flags+=("--dangerously-skip-permissions")
-    fi
-    if [[ -n "$MAX_TURNS" ]]; then
-        flags+=("--max-turns" "$MAX_TURNS")
-    fi
-    echo "${flags[*]}"
-}
-run_claude_iteration() {
-    local log_file="$LOG_DIR/iteration-${ITERATION}.log"
-    local json_file="$LOG_DIR/iteration-${ITERATION}.json"
-    local prompt
-    prompt="$(compose_prompt)"
-    local final_prompt
-    final_prompt=$(manage_context_window "$prompt")
-    local raw_prompt_chars=${#prompt}
-    local prompt_chars=${#final_prompt}
-    local approx_tokens=$((prompt_chars / 4))
-    info "Prompt: ~${approx_tokens} tokens (${prompt_chars} chars)"
-    # Emit context efficiency metrics
-    if type emit_event >/dev/null 2>&1; then
-        local trim_ratio=0
-        local budget_utilization=0
-        if [[ "$raw_prompt_chars" -gt 0 ]]; then
-            trim_ratio=$(awk -v raw="$raw_prompt_chars" -v trimmed="$prompt_chars" \
-                'BEGIN { printf "%.1f", ((raw - trimmed) / raw) * 100 }')
-        fi
-        if [[ "${CONTEXT_BUDGET_CHARS:-0}" -gt 0 ]]; then
-            budget_utilization=$(awk -v used="$prompt_chars" -v budget="${CONTEXT_BUDGET_CHARS}" \
-                'BEGIN { printf "%.1f", (used / budget) * 100 }')
-        fi
-        emit_event "loop.context_efficiency" \
-            "iteration=$ITERATION" \
-            "raw_prompt_chars=$raw_prompt_chars" \
-            "trimmed_prompt_chars=$prompt_chars" \
-            "trim_ratio=$trim_ratio" \
-            "budget_utilization=$budget_utilization" \
-            "budget_chars=${CONTEXT_BUDGET_CHARS:-0}" \
-            "job_id=${PIPELINE_JOB_ID:-loop-$$}" 2>/dev/null || true
-    fi
-    local flags
-    flags="$(build_claude_flags)"
-    local iter_start
-    iter_start="$(now_epoch)"
-    echo -e "\n${CYAN}${BOLD}▸${RESET} ${BOLD}Iteration ${ITERATION}/${MAX_ITERATIONS}${RESET} — Starting..."
-    # Run Claude headless (with timeout + PID capture for signal handling)
-    # Output goes to .json first, then we extract text into .log for compat
-    local exit_code=0
-    # shellcheck disable=SC2086
-    local err_file="${json_file%.json}.stderr"
-    if [[ -n "$TIMEOUT_CMD" ]]; then
-        $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
-    else
-        claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
-    fi
-    CHILD_PID=$!
-    wait "$CHILD_PID" 2>/dev/null || exit_code=$?
-    CHILD_PID=""
-    if [[ "$exit_code" -eq 124 ]]; then
-        warn "Claude CLI timed out after ${CLAUDE_TIMEOUT}s"
-    fi
-    # Extract text result from JSON into .log for backwards compatibility
-    # With --output-format json, stdout is a JSON array; .[-1].result has the text
-    _extract_text_from_json "$json_file" "$log_file" "$err_file"
-    local iter_end
-    iter_end="$(now_epoch)"
-    local iter_duration=$(( iter_end - iter_start ))
-    echo -e "  ${GREEN}✓${RESET} Claude session completed ($(format_duration "$iter_duration"), exit $exit_code)"
-    # Accumulate token usage from this iteration's JSON output
-    accumulate_loop_tokens "$json_file"
-    # Show verbose output if requested
-    if $VERBOSE; then
-        echo -e "  ${DIM}─── Claude Output ───${RESET}"
-        sed 's/^/  /' "$log_file" | head -100
-        echo -e "  ${DIM}─────────────────────${RESET}"
-    fi
-    return $exit_code
-}
 # ─── Iteration Summary Extraction ────────────────────────────────────────────
-extract_summary() {
-    local log_file="$1"
-    # Grab last meaningful lines from Claude output, skipping empty lines
-    local summary
-    summary="$(grep -v '^$' "$log_file" | tail -5 | head -3 2>/dev/null || echo "(no output)")"
-    # Truncate long lines
-    summary="$(echo "$summary" | cut -c1-120)"
-    # Sanitize: if summary is just a CLI/API error, replace with generic text
-    if echo "$summary" | grep -qiE 'Invalid API key|authentication_error|rate_limit|API key expired|ANTHROPIC_API_KEY'; then
-        summary="(CLI error — no useful output this iteration)"
-    fi
-    echo "$summary"
-}
 # ─── Display Helpers ─────────────────────────────────────────────────────────
@@ -2652,6 +1758,7 @@ cleanup() {
     export SW_LOOP_STATUS="$STATUS"
     export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
     export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
+    # shellcheck disable=SC2155
     export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
     "$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
@@ -2735,7 +1842,7 @@ DIM='\033[2m'
 BOLD='\033[1m'
 RESET='\033[0m'
-cd "$WORK_DIR"
+cd "$WORK_DIR" || { echo "ERROR: Cannot cd to WORK_DIR: $WORK_DIR" >&2; exit 1; }
 ITERATION=0
 CONSECUTIVE_FAILURES=0
@@ -2818,8 +1925,11 @@ PROMPT
         break
     fi
-    # Auto-commit
+    # Auto-commit — stage only source files, exclude build artifacts
     git add -A 2>/dev/null || true
+    git reset -- .claude/loop-logs/ .claude/loop-state.md .claude/intelligence-cache.json \
+        .claude/platform-hygiene.json .claude/pipeline-artifacts/ .claude/code-review.json \
+        .claude/hygiene-report.json .claude/pr-draft.md 2>/dev/null || true
     if git commit -m "agent-${AGENT_NUM}: iteration ${ITERATION}" --no-verify 2>/dev/null; then
         if ! git push origin "loop/agent-${AGENT_NUM}" 2>/dev/null; then
             echo -e "  ${YELLOW}⚠${RESET} git push failed for loop/agent-${AGENT_NUM} — remote may be out of sync"
@@ -2989,8 +2099,16 @@ cleanup_multi_agent() {
 # ─── Main: Single-Agent Loop ─────────────────────────────────────────────────
 run_single_agent_loop() {
+    # Save original environment variables before loop starts
+    local SAVED_CLAUDE_MODEL="${CLAUDE_MODEL:-}"
+    local SAVED_ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
     if [[ "$SESSION_RESTART" == "true" ]]; then
         # Restart: state already reset by run_loop_with_restarts, skip init
+        # Restore environment variables for clean iteration state
+        [[ -n "$SAVED_CLAUDE_MODEL" ]] && export CLAUDE_MODEL="$SAVED_CLAUDE_MODEL"
+        # Reset context exhaustion counter for this session (it tracks restarts WITHIN a single session)
+        CONTEXT_RESTART_COUNT=0
         info "Session restart ${RESTART_COUNT}/${MAX_RESTARTS} — fresh context, reading progress"
     elif $RESUME; then
         resume_state
@@ -3012,11 +2130,16 @@ run_single_agent_loop() {
     STUCKNESS_COUNT=0
     STUCKNESS_TRACKING_FILE="$LOG_DIR/stuckness-tracking.txt"
     : > "$STUCKNESS_TRACKING_FILE" 2>/dev/null || true
-    : > "${LOG_DIR:-/tmp}/strategy-attempts.txt" 2>/dev/null || true
+    : > "${LOG_DIR}/strategy-attempts.txt" 2>/dev/null || true
     show_banner
     while true; do
+        # Reset environment variables at start of each iteration
+        # Prevents previous iterations from affecting model selection or API keys
+        [[ -n "$SAVED_CLAUDE_MODEL" ]] && export CLAUDE_MODEL="$SAVED_CLAUDE_MODEL"
+        [[ -n "$SAVED_ANTHROPIC_API_KEY" ]] && export ANTHROPIC_API_KEY="$SAVED_ANTHROPIC_API_KEY"
         # Pre-checks (before incrementing — ITERATION tracks completed count)
         check_circuit_breaker || break
         check_max_iterations || break
@@ -3100,6 +2223,11 @@ ${GOAL}"
         # Record iteration data for stuckness detection (diff hash, error hash, exit code)
         record_iteration_stuckness_data "$exit_code"
+        # Dark factory: score this iteration with process reward model
+        if type process_reward_score_iteration >/dev/null 2>&1; then
+            process_reward_score_iteration "$PROJECT_ROOT" "${TEST_OUTPUT:-}" "$ITERATION" 2>/dev/null || true
+        fi
         # Detect fatal CLI errors (API key, auth, network) — abort immediately
         if check_fatal_error "$log_file" "$exit_code"; then
             STATUS="error"
@@ -3110,6 +2238,32 @@ ${GOAL}"
             return 1
         fi
+        # Detect context exhaustion and trigger intelligent restart
+        local log_content=""
+        [[ -f "$log_file" ]] && log_content=$(cat "$log_file" 2>/dev/null || true)
+        local stderr_file="${LOG_DIR}/iteration-${ITERATION}.stderr"
+        local stderr_content=""
+        [[ -f "$stderr_file" ]] && stderr_content=$(cat "$stderr_file" 2>/dev/null || true)
+        if echo "${log_content}${stderr_content}" | grep -qiE "$CONTEXT_EXHAUSTION_PATTERNS" 2>/dev/null; then
+            if [[ "${CONTEXT_RESTART_COUNT:-0}" -lt "${CONTEXT_RESTART_LIMIT:-2}" ]]; then
+                CONTEXT_RESTART_COUNT=$(( CONTEXT_RESTART_COUNT + 1 ))
+                STATUS="context_exhaustion_restart"
+                write_state
+                write_progress
+                warn "Context exhaustion detected (iteration $ITERATION) — triggering intelligent restart ($CONTEXT_RESTART_COUNT/$CONTEXT_RESTART_LIMIT)"
+                if type emit_event >/dev/null 2>&1; then
+                    emit_event "loop.context_exhaustion" "iteration=$ITERATION" "restart_count=$CONTEXT_RESTART_COUNT" "max_restarts=$MAX_RESTARTS"
+                fi
+                break
+            else
+                warn "Context exhaustion detected but restart limit ($CONTEXT_RESTART_LIMIT) reached"
+                STATUS="context_exhaustion_fatal"
+                write_state
+                write_progress
+            fi
+        fi
         # Mid-loop memory refresh — re-query with current error context after iteration 3
         if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context >/dev/null 2>&1; then
             local refresh_ctx
@@ -3155,6 +2309,15 @@ ${GOAL}"
             fi
         fi
+        # Dark factory: update RL weights based on test outcome
+        if type rl_update_weights >/dev/null 2>&1; then
+            if [[ "${TEST_PASSED:-}" == "true" ]]; then
+                rl_update_weights "success" 2>/dev/null || true
+            elif [[ "${TEST_PASSED:-}" == "false" ]]; then
+                rl_update_weights "failure" 2>/dev/null || true
+            fi
+        fi
         # Track fix outcome for memory effectiveness
         if [[ -n "${_applied_fix_pattern:-}" ]]; then
             if type memory_record_fix_outcome >/dev/null 2>&1; then
@@ -3173,15 +2336,98 @@ ${GOAL}"
         export SW_LOOP_STATUS="${STATUS:-running}"
         export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
         export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
+        # shellcheck disable=SC2155
         export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
         "$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
         # Audit agent (reviews implementer's work)
         run_audit_agent
+        # Verification gap detection: audit failed but tests passed
+        # Instead of a full retry (which causes context bloat/timeout), run targeted verification
+        if [[ "${AUDIT_RESULT:-}" != "pass" ]] && [[ "${TEST_PASSED:-}" == "true" ]]; then
+            echo -e "  ${YELLOW}▸${RESET} Verification gap detected (tests pass, audit disagrees)"
+            local verification_passed=true
+            # 1. Re-run ALL test commands to double-check
+            local recheck_log="${LOG_DIR}/verification-iter-${ITERATION}.log"
+            if [[ -n "$TEST_CMD" ]]; then
+                eval "$TEST_CMD" > "$recheck_log" 2>&1 || verification_passed=false
+            fi
+            for _vg_cmd in "${ADDITIONAL_TEST_CMDS[@]+"${ADDITIONAL_TEST_CMDS[@]}"}"; do
+                [[ -z "$_vg_cmd" ]] && continue
+                eval "$_vg_cmd" >> "$recheck_log" 2>&1 || verification_passed=false
+            done
+            # 2. Check for uncommitted changes (quality gate)
+            if ! git -C "$PROJECT_ROOT" diff --quiet 2>/dev/null; then
+                echo -e "  ${YELLOW}⚠${RESET} Uncommitted changes detected"
+                verification_passed=false
+            fi
+            if [[ "$verification_passed" == "true" ]]; then
+                echo -e "  ${GREEN}✓${RESET} Verification passed — overriding audit"
+                AUDIT_RESULT="pass"
+                emit_event "loop.verification_gap_resolved" \
+                    "iteration=$ITERATION" "action=override_audit"
+                if type audit_emit >/dev/null 2>&1; then
+                    audit_emit "loop.verification_gap" "iteration=$ITERATION" \
+                        "resolution=override" "tests_recheck=pass" || true
+                fi
+            else
+                echo -e "  ${RED}✗${RESET} Verification failed — audit stands"
+                emit_event "loop.verification_gap_confirmed" \
+                    "iteration=$ITERATION" "action=retry"
+                if type audit_emit >/dev/null 2>&1; then
+                    audit_emit "loop.verification_gap" "iteration=$ITERATION" \
+                        "resolution=retry" "tests_recheck=fail" || true
+                fi
+            fi
+        fi
+        # Auto-commit any remaining changes before quality gates
+        # (audit agent, verification handler, or test evidence may create files)
+        if ! git -C "$PROJECT_ROOT" diff --quiet 2>/dev/null || \
+           ! git -C "$PROJECT_ROOT" diff --cached --quiet 2>/dev/null || \
+           [[ -n "$(git -C "$PROJECT_ROOT" ls-files --others --exclude-standard 2>/dev/null | head -1)" ]]; then
+            git -C "$PROJECT_ROOT" add -A 2>/dev/null || true
+            git -C "$PROJECT_ROOT" commit -m "loop: iteration $ITERATION — post-audit cleanup" --no-verify 2>/dev/null || true
+        fi
         # Quality gates (automated checks)
         run_quality_gates
+        # Convergence detection (issue #203) — score iteration progress and detect convergence
+        if type convergence_integrate >/dev/null 2>&1; then
+            local conv_exit=0
+            convergence_integrate || conv_exit=$?
+            case "$conv_exit" in
+                1)
+                    # Converged — stop successfully
+                    info "Build loop converged — stopping"
+                    STATUS="complete"
+                    write_state
+                    write_progress
+                    show_summary
+                    return 0
+                    ;;
+                2)
+                    # Diverging — stop with failure
+                    warn "Build loop diverging — stopping (scores declining consistently)"
+                    STATUS="diverging"
+                    write_state
+                    write_progress
+                    show_summary
+                    return 1
+                    ;;
+                3)
+                    # Oscillating — escalate to manual review
+                    warn "Build loop oscillating — consider manual review or model escalation"
+                    ;;
+            esac
+        fi
         # Guarded completion (replaces naive grep check)
         if guard_completion; then
             STATUS="complete"
@@ -3194,6 +2440,10 @@ ${GOAL}"
         # Check progress (circuit breaker)
         if check_progress; then
             CONSECUTIVE_FAILURES=0
+            # Reset auto-recovery state on progress (tests passing, code advancing)
+            if type recovery_reset >/dev/null 2>&1; then
+                recovery_reset
+            fi
             echo -e "  ${GREEN}✓${RESET} Progress detected — continuing"
         else
             CONSECUTIVE_FAILURES=$(( CONSECUTIVE_FAILURES + 1 ))
@@ -3272,6 +2522,52 @@ run_loop_with_restarts() {
         if [[ "$STATUS" == "complete" ]]; then
             return 0
         fi
+        # Context exhaustion: treat as restart, not failure (unless restart limit hit)
+        if [[ "$STATUS" == "context_exhaustion_restart" ]]; then
+            if [[ "$CONTEXT_RESTART_COUNT" -lt "$CONTEXT_RESTART_LIMIT" ]]; then
+                RESTART_COUNT=$(( RESTART_COUNT + 1 ))
+                if type emit_event >/dev/null 2>&1; then
+                    emit_event "loop.restart" "restart=$RESTART_COUNT" "reason=context_exhaustion" "context_restart=$CONTEXT_RESTART_COUNT" "iteration=$ITERATION"
+                fi
+                info "Context exhaustion auto-recovery: restart $RESTART_COUNT/$MAX_RESTARTS (context restart $CONTEXT_RESTART_COUNT/$CONTEXT_RESTART_LIMIT)"
+                # Capture comprehensive state and generate briefing before restart
+                if type restart_before_restart >/dev/null 2>&1; then
+                    restart_before_restart || warn "Failed to prepare restart briefing (continuing anyway)"
+                fi
+                # Reset iteration-level state for fresh session
+                SESSION_RESTART=true
+                ITERATION=0
+                CONSECUTIVE_FAILURES=0
+                EXTENSION_COUNT=0
+                STUCKNESS_COUNT=0
+                STATUS="running"
+                LOG_ENTRIES=""
+                TEST_PASSED=""
+                TEST_OUTPUT=""
+                TEST_LOG_FILE=""
+                GOAL="$ORIGINAL_GOAL"
+                # Archive old artifacts
+                local restart_archive="$LOG_DIR/restart-${RESTART_COUNT}"
+                mkdir -p "$restart_archive"
+                for old_log in "$LOG_DIR"/iteration-*.log "$LOG_DIR"/tests-iter-*.log; do
+                    [[ -f "$old_log" ]] && mv "$old_log" "$restart_archive/" 2>/dev/null || true
+                done
+                [[ -f "$LOG_DIR/progress.md" ]] && cp "$LOG_DIR/progress.md" "$restart_archive/progress.md" 2>/dev/null || true
+                [[ -f "$LOG_DIR/error-summary.json" ]] && cp "$LOG_DIR/error-summary.json" "$restart_archive/" 2>/dev/null || true
+                write_state
+                sleep "$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)"
+                continue
+            else
+                warn "Context exhaustion limit reached — failing build"
+                return "$loop_exit"
+            fi
+        fi
         if [[ "$MAX_RESTARTS" -le 0 ]]; then
             return "$loop_exit"
         fi
@@ -3279,9 +2575,11 @@ run_loop_with_restarts() {
             warn "Max restarts ($MAX_RESTARTS) reached — stopping"
             return "$loop_exit"
         fi
-        # Hard cap safety net
-        if [[ "$RESTART_COUNT" -ge 5 ]]; then
-            warn "Hard restart cap (5) reached — stopping"
+        # Hard cap safety net (configurable)
+        local _hard_cap
+        _hard_cap=$(_smart_int "loop.hard_restart_cap" 5)
+        if [[ "$RESTART_COUNT" -ge "$_hard_cap" ]]; then
+            warn "Hard restart cap ($_hard_cap) reached — stopping"
             return "$loop_exit"
         fi
@@ -3293,6 +2591,12 @@ run_loop_with_restarts() {
         fi
         RESTART_COUNT=$(( RESTART_COUNT + 1 ))
+        # Capture comprehensive state and generate briefing before restart
+        if type restart_before_restart >/dev/null 2>&1; then
+            restart_before_restart || warn "Failed to prepare restart briefing (continuing anyway)"
+        fi
         if type emit_event >/dev/null 2>&1; then
             emit_event "loop.restart" "restart=$RESTART_COUNT" "max=$MAX_RESTARTS" "iteration=$ITERATION"
         fi