npm - shipwright-cli - Versions diffs - 3.2.0 → 3.3.0 - Mend

shipwright-cli 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (279) hide show

package/.claude/agents/code-reviewer.md +2 -0
package/.claude/agents/devops-engineer.md +2 -0
package/.claude/agents/doc-fleet-agent.md +2 -0
package/.claude/agents/pipeline-agent.md +2 -0
package/.claude/agents/shell-script-specialist.md +2 -0
package/.claude/agents/test-specialist.md +2 -0
package/.claude/hooks/agent-crash-capture.sh +32 -0
package/.claude/hooks/post-tool-use.sh +3 -2
package/.claude/hooks/pre-tool-use.sh +35 -3
package/README.md +4 -4
package/claude-code/hooks/config-change.sh +18 -0
package/claude-code/hooks/instructions-reloaded.sh +7 -0
package/claude-code/hooks/worktree-create.sh +25 -0
package/claude-code/hooks/worktree-remove.sh +20 -0
package/config/code-constitution.json +130 -0
package/dashboard/middleware/auth.ts +134 -0
package/dashboard/middleware/constants.ts +21 -0
package/dashboard/public/index.html +2 -6
package/dashboard/public/styles.css +100 -97
package/dashboard/routes/auth.ts +38 -0
package/dashboard/server.ts +66 -25
package/dashboard/services/config.ts +26 -0
package/dashboard/services/db.ts +118 -0
package/dashboard/src/canvas/pixel-agent.ts +298 -0
package/dashboard/src/canvas/pixel-sprites.ts +440 -0
package/dashboard/src/canvas/shipyard-effects.ts +367 -0
package/dashboard/src/canvas/shipyard-scene.ts +616 -0
package/dashboard/src/canvas/submarine-layout.ts +267 -0
package/dashboard/src/components/header.ts +8 -7
package/dashboard/src/core/router.ts +1 -0
package/dashboard/src/design/submarine-theme.ts +253 -0
package/dashboard/src/main.ts +2 -0
package/dashboard/src/types/api.ts +2 -1
package/dashboard/src/views/activity.ts +2 -1
package/dashboard/src/views/shipyard.ts +39 -0
package/dashboard/types/index.ts +166 -0
package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
package/docs/research/RESEARCH_INDEX.md +439 -0
package/docs/research/RESEARCH_SOURCES.md +440 -0
package/docs/research/RESEARCH_SUMMARY.txt +275 -0
package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
package/package.json +2 -2
package/scripts/lib/adaptive-model.sh +427 -0
package/scripts/lib/adaptive-timeout.sh +316 -0
package/scripts/lib/audit-trail.sh +309 -0
package/scripts/lib/auto-recovery.sh +471 -0
package/scripts/lib/bandit-selector.sh +431 -0
package/scripts/lib/bootstrap.sh +104 -2
package/scripts/lib/causal-graph.sh +455 -0
package/scripts/lib/compat.sh +126 -0
package/scripts/lib/compound-audit.sh +337 -0
package/scripts/lib/constitutional.sh +454 -0
package/scripts/lib/context-budget.sh +359 -0
package/scripts/lib/convergence.sh +594 -0
package/scripts/lib/cost-optimizer.sh +634 -0
package/scripts/lib/daemon-adaptive.sh +10 -0
package/scripts/lib/daemon-dispatch.sh +106 -17
package/scripts/lib/daemon-failure.sh +34 -4
package/scripts/lib/daemon-patrol.sh +23 -2
package/scripts/lib/daemon-poll-github.sh +361 -0
package/scripts/lib/daemon-poll-health.sh +299 -0
package/scripts/lib/daemon-poll.sh +27 -611
package/scripts/lib/daemon-state.sh +112 -66
package/scripts/lib/daemon-triage.sh +10 -0
package/scripts/lib/dod-scorecard.sh +442 -0
package/scripts/lib/error-actionability.sh +300 -0
package/scripts/lib/formal-spec.sh +461 -0
package/scripts/lib/helpers.sh +177 -4
package/scripts/lib/intent-analysis.sh +409 -0
package/scripts/lib/loop-convergence.sh +350 -0
package/scripts/lib/loop-iteration.sh +682 -0
package/scripts/lib/loop-progress.sh +48 -0
package/scripts/lib/loop-restart.sh +185 -0
package/scripts/lib/memory-effectiveness.sh +506 -0
package/scripts/lib/mutation-executor.sh +352 -0
package/scripts/lib/outcome-feedback.sh +521 -0
package/scripts/lib/pipeline-cli.sh +336 -0
package/scripts/lib/pipeline-commands.sh +1216 -0
package/scripts/lib/pipeline-detection.sh +100 -2
package/scripts/lib/pipeline-execution.sh +897 -0
package/scripts/lib/pipeline-github.sh +28 -3
package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
package/scripts/lib/pipeline-intelligence.sh +100 -1136
package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
package/scripts/lib/pipeline-quality-checks.sh +17 -715
package/scripts/lib/pipeline-quality-gates.sh +563 -0
package/scripts/lib/pipeline-stages-build.sh +730 -0
package/scripts/lib/pipeline-stages-delivery.sh +965 -0
package/scripts/lib/pipeline-stages-intake.sh +1133 -0
package/scripts/lib/pipeline-stages-monitor.sh +407 -0
package/scripts/lib/pipeline-stages-review.sh +1022 -0
package/scripts/lib/pipeline-stages.sh +59 -2929
package/scripts/lib/pipeline-state.sh +36 -5
package/scripts/lib/pipeline-util.sh +487 -0
package/scripts/lib/policy-learner.sh +438 -0
package/scripts/lib/process-reward.sh +493 -0
package/scripts/lib/project-detect.sh +649 -0
package/scripts/lib/quality-profile.sh +334 -0
package/scripts/lib/recruit-commands.sh +885 -0
package/scripts/lib/recruit-learning.sh +739 -0
package/scripts/lib/recruit-roles.sh +648 -0
package/scripts/lib/reward-aggregator.sh +458 -0
package/scripts/lib/rl-optimizer.sh +362 -0
package/scripts/lib/root-cause.sh +427 -0
package/scripts/lib/scope-enforcement.sh +445 -0
package/scripts/lib/session-restart.sh +493 -0
package/scripts/lib/skill-memory.sh +300 -0
package/scripts/lib/skill-registry.sh +775 -0
package/scripts/lib/spec-driven.sh +476 -0
package/scripts/lib/test-helpers.sh +18 -7
package/scripts/lib/test-holdout.sh +429 -0
package/scripts/lib/test-optimizer.sh +511 -0
package/scripts/shipwright-file-suggest.sh +45 -0
package/scripts/skills/adversarial-quality.md +61 -0
package/scripts/skills/api-design.md +44 -0
package/scripts/skills/architecture-design.md +50 -0
package/scripts/skills/brainstorming.md +43 -0
package/scripts/skills/data-pipeline.md +44 -0
package/scripts/skills/deploy-safety.md +64 -0
package/scripts/skills/documentation.md +38 -0
package/scripts/skills/frontend-design.md +45 -0
package/scripts/skills/generated/.gitkeep +0 -0
package/scripts/skills/generated/_refinements/.gitkeep +0 -0
package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
package/scripts/skills/generated/cli-version-management.md +29 -0
package/scripts/skills/generated/collection-system-validation.md +99 -0
package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
package/scripts/skills/generated/test-parallelization-detection.md +65 -0
package/scripts/skills/observability.md +79 -0
package/scripts/skills/performance.md +48 -0
package/scripts/skills/pr-quality.md +49 -0
package/scripts/skills/product-thinking.md +43 -0
package/scripts/skills/security-audit.md +49 -0
package/scripts/skills/systematic-debugging.md +40 -0
package/scripts/skills/testing-strategy.md +47 -0
package/scripts/skills/two-stage-review.md +52 -0
package/scripts/skills/validation-thoroughness.md +55 -0
package/scripts/sw +9 -3
package/scripts/sw-activity.sh +9 -2
package/scripts/sw-adaptive.sh +2 -1
package/scripts/sw-adversarial.sh +2 -1
package/scripts/sw-architecture-enforcer.sh +3 -1
package/scripts/sw-auth.sh +12 -2
package/scripts/sw-autonomous.sh +5 -1
package/scripts/sw-changelog.sh +4 -1
package/scripts/sw-checkpoint.sh +2 -1
package/scripts/sw-ci.sh +5 -1
package/scripts/sw-cleanup.sh +4 -26
package/scripts/sw-code-review.sh +10 -4
package/scripts/sw-connect.sh +2 -1
package/scripts/sw-context.sh +2 -1
package/scripts/sw-cost.sh +48 -3
package/scripts/sw-daemon.sh +66 -9
package/scripts/sw-dashboard.sh +3 -1
package/scripts/sw-db.sh +59 -16
package/scripts/sw-decide.sh +8 -2
package/scripts/sw-decompose.sh +360 -17
package/scripts/sw-deps.sh +4 -1
package/scripts/sw-developer-simulation.sh +4 -1
package/scripts/sw-discovery.sh +325 -2
package/scripts/sw-doc-fleet.sh +4 -1
package/scripts/sw-docs-agent.sh +3 -1
package/scripts/sw-docs.sh +2 -1
package/scripts/sw-doctor.sh +453 -2
package/scripts/sw-dora.sh +4 -1
package/scripts/sw-durable.sh +4 -3
package/scripts/sw-e2e-orchestrator.sh +17 -16
package/scripts/sw-eventbus.sh +7 -1
package/scripts/sw-evidence.sh +364 -12
package/scripts/sw-feedback.sh +550 -9
package/scripts/sw-fix.sh +20 -1
package/scripts/sw-fleet-discover.sh +6 -2
package/scripts/sw-fleet-viz.sh +4 -1
package/scripts/sw-fleet.sh +5 -1
package/scripts/sw-github-app.sh +16 -3
package/scripts/sw-github-checks.sh +3 -2
package/scripts/sw-github-deploy.sh +3 -2
package/scripts/sw-github-graphql.sh +18 -7
package/scripts/sw-guild.sh +5 -1
package/scripts/sw-heartbeat.sh +5 -30
package/scripts/sw-hello.sh +67 -0
package/scripts/sw-hygiene.sh +6 -1
package/scripts/sw-incident.sh +265 -1
package/scripts/sw-init.sh +18 -2
package/scripts/sw-instrument.sh +10 -2
package/scripts/sw-intelligence.sh +42 -6
package/scripts/sw-jira.sh +5 -1
package/scripts/sw-launchd.sh +2 -1
package/scripts/sw-linear.sh +4 -1
package/scripts/sw-logs.sh +4 -1
package/scripts/sw-loop.sh +432 -1128
package/scripts/sw-memory.sh +356 -2
package/scripts/sw-mission-control.sh +6 -1
package/scripts/sw-model-router.sh +481 -26
package/scripts/sw-otel.sh +13 -4
package/scripts/sw-oversight.sh +14 -5
package/scripts/sw-patrol-meta.sh +334 -0
package/scripts/sw-pipeline-composer.sh +5 -1
package/scripts/sw-pipeline-vitals.sh +2 -1
package/scripts/sw-pipeline.sh +53 -2664
package/scripts/sw-pm.sh +12 -5
package/scripts/sw-pr-lifecycle.sh +2 -1
package/scripts/sw-predictive.sh +7 -1
package/scripts/sw-prep.sh +185 -2
package/scripts/sw-ps.sh +5 -25
package/scripts/sw-public-dashboard.sh +15 -3
package/scripts/sw-quality.sh +2 -1
package/scripts/sw-reaper.sh +8 -25
package/scripts/sw-recruit.sh +156 -2303
package/scripts/sw-regression.sh +19 -12
package/scripts/sw-release-manager.sh +3 -1
package/scripts/sw-release.sh +4 -1
package/scripts/sw-remote.sh +3 -1
package/scripts/sw-replay.sh +7 -1
package/scripts/sw-retro.sh +158 -1
package/scripts/sw-review-rerun.sh +3 -1
package/scripts/sw-scale.sh +10 -3
package/scripts/sw-security-audit.sh +6 -1
package/scripts/sw-self-optimize.sh +6 -3
package/scripts/sw-session.sh +9 -3
package/scripts/sw-setup.sh +3 -1
package/scripts/sw-stall-detector.sh +406 -0
package/scripts/sw-standup.sh +15 -7
package/scripts/sw-status.sh +3 -1
package/scripts/sw-strategic.sh +4 -1
package/scripts/sw-stream.sh +7 -1
package/scripts/sw-swarm.sh +18 -6
package/scripts/sw-team-stages.sh +13 -6
package/scripts/sw-templates.sh +5 -29
package/scripts/sw-testgen.sh +7 -1
package/scripts/sw-tmux-pipeline.sh +4 -1
package/scripts/sw-tmux-role-color.sh +2 -0
package/scripts/sw-tmux-status.sh +1 -1
package/scripts/sw-tmux.sh +3 -1
package/scripts/sw-trace.sh +3 -1
package/scripts/sw-tracker-github.sh +3 -0
package/scripts/sw-tracker-jira.sh +3 -0
package/scripts/sw-tracker-linear.sh +3 -0
package/scripts/sw-tracker.sh +3 -1
package/scripts/sw-triage.sh +2 -1
package/scripts/sw-upgrade.sh +3 -1
package/scripts/sw-ux.sh +5 -2
package/scripts/sw-webhook.sh +3 -1
package/scripts/sw-widgets.sh +3 -1
package/scripts/sw-worktree.sh +15 -3
package/scripts/test-skill-injection.sh +1233 -0
package/templates/pipelines/autonomous.json +27 -3
package/templates/pipelines/cost-aware.json +34 -8
package/templates/pipelines/deployed.json +12 -0
package/templates/pipelines/enterprise.json +12 -0
package/templates/pipelines/fast.json +6 -0
package/templates/pipelines/full.json +27 -3
package/templates/pipelines/hotfix.json +6 -0
package/templates/pipelines/standard.json +12 -0
package/templates/pipelines/tdd.json +12 -0

package/scripts/sw-model-router.sh CHANGED Viewed

@@ -7,8 +7,10 @@
 set -euo pipefail
 trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
-VERSION="3.2.0"
+# shellcheck disable=SC2034
+VERSION="3.3.0"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck disable=SC2034
 REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
 # ─── Cross-platform compatibility ──────────────────────────────────────────
@@ -30,7 +32,8 @@ fi
 if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
   emit_event() {
     local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
-    local payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
+    local payload
+    payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
     while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
     echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
   }
@@ -42,29 +45,61 @@ MODEL_ROUTING_OPTIMIZATION="${OPTIMIZATION_DIR}/model-routing.json"
 MODEL_ROUTING_LEGACY="${HOME}/.shipwright/model-routing.json"
 MODEL_USAGE_LOG="${OPTIMIZATION_DIR}/model-usage.jsonl"
 AB_RESULTS_FILE="${HOME}/.shipwright/ab-results.jsonl"
+CHAIN_CONFIG_FILE="${OPTIMIZATION_DIR}/reasoning-chains.json"
+CHAIN_EXECUTION_LOG="${OPTIMIZATION_DIR}/chain-executions.jsonl"
 # Resolve which config file to use (set by _resolve_routing_config)
 MODEL_ROUTING_CONFIG=""
-# ─── Model Costs (per million tokens) ───────────────────────────────────────
-HAIKU_INPUT_COST="0.80"
-HAIKU_OUTPUT_COST="4.00"
-SONNET_INPUT_COST="3.00"
-SONNET_OUTPUT_COST="15.00"
-OPUS_INPUT_COST="15.00"
-OPUS_OUTPUT_COST="75.00"
-# ─── Default Routing Rules ──────────────────────────────────────────────────
-# Stages that default to haiku (low complexity, fast)
+# ─── Model Costs (per million tokens, config-driven) ──────────────────────
+# Read from ~/.shipwright/pricing.json if exists, otherwise use defaults
+_load_pricing() {
+    local pricing_file="${HOME}/.shipwright/pricing.json"
+    if [[ -f "$pricing_file" ]]; then
+        HAIKU_INPUT_COST=$(jq -r '.haiku.input // "0.80"' "$pricing_file" 2>/dev/null || echo "0.80")
+        HAIKU_OUTPUT_COST=$(jq -r '.haiku.output // "4.00"' "$pricing_file" 2>/dev/null || echo "4.00")
+        SONNET_INPUT_COST=$(jq -r '.sonnet.input // "3.00"' "$pricing_file" 2>/dev/null || echo "3.00")
+        SONNET_OUTPUT_COST=$(jq -r '.sonnet.output // "15.00"' "$pricing_file" 2>/dev/null || echo "15.00")
+        OPUS_INPUT_COST=$(jq -r '.opus.input // "15.00"' "$pricing_file" 2>/dev/null || echo "15.00")
+        OPUS_OUTPUT_COST=$(jq -r '.opus.output // "75.00"' "$pricing_file" 2>/dev/null || echo "75.00")
+    else
+        HAIKU_INPUT_COST="0.80"
+        HAIKU_OUTPUT_COST="4.00"
+        SONNET_INPUT_COST="3.00"
+        SONNET_OUTPUT_COST="15.00"
+        OPUS_INPUT_COST="15.00"
+        OPUS_OUTPUT_COST="75.00"
+    fi
+}
+_load_pricing
+# ─── Default Routing Rules (config-driven) ────────────────────────────────
+# Read from daemon-config model_routing.stages if configured
+_load_routing_rules() {
+    local cfg="${DAEMON_CONFIG:-${WORK_DIR:-.}/.claude/daemon-config.json}"
+    if [[ -f "$cfg" ]]; then
+        local h s o
+        h=$(jq -r '.model_routing.haiku_stages // empty' "$cfg" 2>/dev/null || true)
+        s=$(jq -r '.model_routing.sonnet_stages // empty' "$cfg" 2>/dev/null || true)
+        o=$(jq -r '.model_routing.opus_stages // empty' "$cfg" 2>/dev/null || true)
+        [[ -n "$h" && "$h" != "null" ]] && HAIKU_STAGES="$h"
+        [[ -n "$s" && "$s" != "null" ]] && SONNET_STAGES="$s"
+        [[ -n "$o" && "$o" != "null" ]] && OPUS_STAGES="$o"
+    fi
+}
 HAIKU_STAGES="intake|monitor"
-# Stages that default to sonnet (medium complexity)
 SONNET_STAGES="test|review"
-# Stages that default to opus (high complexity, needs deep thinking)
 OPUS_STAGES="plan|design|build|compound_quality"
-# ─── Complexity Thresholds ──────────────────────────────────────────────────
-COMPLEXITY_LOW=30          # Below this: use sonnet
-COMPLEXITY_HIGH=80         # Above this: use opus
+_load_routing_rules 2>/dev/null || true
+# ─── Complexity Thresholds (config-driven) ────────────────────────────────
+if type _smart_int >/dev/null 2>&1; then
+    COMPLEXITY_LOW=$(_smart_int "model_routing.complexity_low" 30)
+    COMPLEXITY_HIGH=$(_smart_int "model_routing.complexity_high" 80)
+else
+    COMPLEXITY_LOW=30
+    COMPLEXITY_HIGH=80
+fi
 # ─── Resolve Routing Config Path ────────────────────────────────────────────
 # Priority: optimization (self-optimize writes) > legacy > create in optimization
@@ -243,6 +278,7 @@ set_config() {
     # Use jq to safely update the config
     local tmp_config
     tmp_config=$(mktemp)
+    # shellcheck disable=SC2064
     trap "rm -f '$tmp_config'" RETURN
     if [[ "$value" == "true" ]] || [[ "$value" == "false" ]]; then
@@ -353,7 +389,8 @@ record_usage() {
             ;;
     esac
-    local record="{\"ts\":\"$(now_iso)\",\"stage\":\"$stage\",\"model\":\"$model\",\"input_tokens\":$input_tokens,\"output_tokens\":$output_tokens,\"cost\":$cost}"
+    local record
+    record="{\"ts\":\"$(now_iso)\",\"stage\":\"$stage\",\"model\":\"$model\",\"input_tokens\":$input_tokens,\"output_tokens\":$output_tokens,\"cost\":$cost}"
     echo "$record" >> "$MODEL_USAGE_LOG"
 }
@@ -376,6 +413,7 @@ configure_ab_test() {
     local tmp_config
     tmp_config=$(mktemp)
+    # shellcheck disable=SC2064
     trap "rm -f '$tmp_config'" RETURN
     jq ".a_b_test = {\"enabled\": true, \"percentage\": $percentage, \"variant\": \"$variant\"}" \
@@ -395,7 +433,8 @@ log_ab_result() {
     mkdir -p "${HOME}/.shipwright"
-    local record="{\"ts\":\"$(now_iso)\",\"run_id\":\"$run_id\",\"variant\":\"$variant\",\"success\":$success_status,\"cost\":$cost,\"duration_seconds\":$duration}"
+    local record
+    record="{\"ts\":\"$(now_iso)\",\"run_id\":\"$run_id\",\"variant\":\"$variant\",\"success\":$success_status,\"cost\":$cost,\"duration_seconds\":$duration}"
     echo "$record" >> "$AB_RESULTS_FILE"
 }
@@ -492,6 +531,380 @@ show_ab_results() {
     ' "$AB_RESULTS_FILE" 2>/dev/null | jq -r '.[] | "\(.variant):\n  Runs: \(.total_runs)\n  Success: \(.successful)/\(.total_runs) (\(.success_rate | round)%)\n  Avg Cost: $\(.avg_cost | round)\n  Total Cost: $\(.total_cost | round)\n  Avg Duration: \(.avg_duration | round)s"' || true
 }
+# ─── Initialize Chain Templates ────────────────────────────────────────────
+_ensure_chain_templates() {
+    mkdir -p "$(dirname "$CHAIN_CONFIG_FILE")"
+    if [[ ! -f "$CHAIN_CONFIG_FILE" ]]; then
+        cat > "$CHAIN_CONFIG_FILE" <<'CHAINS'
+{
+  "version": "1.0",
+  "templates": {
+    "explore-decide": [
+      {"step": "explore", "model": "haiku", "max_tokens": 4000, "description": "Fast exploration with haiku"},
+      {"step": "decide", "model": "opus", "max_tokens": 8000, "description": "Final decision with opus"}
+    ],
+    "explore-synthesize-decide": [
+      {"step": "explore", "model": "haiku", "max_tokens": 4000, "description": "Explore with haiku"},
+      {"step": "synthesize", "model": "sonnet", "max_tokens": 6000, "description": "Synthesize with sonnet"},
+      {"step": "decide", "model": "opus", "max_tokens": 8000, "description": "Decide with opus"}
+    ],
+    "fast-verify": [
+      {"step": "generate", "model": "sonnet", "max_tokens": 6000, "description": "Generate with sonnet"},
+      {"step": "verify", "model": "haiku", "max_tokens": 2000, "description": "Verify with haiku"}
+    ],
+    "deep-analysis": [
+      {"step": "analyze", "model": "opus", "max_tokens": 8000, "description": "Deep analysis with opus"},
+      {"step": "validate", "model": "opus", "max_tokens": 4000, "description": "Validate with opus"}
+    ]
+  },
+  "confidence_threshold": 50,
+  "escalation_threshold": 80,
+  "max_escalations_per_step": 1,
+  "custom_chains": {}
+}
+CHAINS
+    fi
+}
+# ─── Define a Custom Reasoning Chain ────────────────────────────────────────
+chain_define() {
+    local chain_name="$1"
+    local steps_json="$2"
+    if [[ -z "$chain_name" ]] || [[ -z "$steps_json" ]]; then
+        error "Usage: chain_define <name> <steps_json>"
+        return 1
+    fi
+    _ensure_chain_templates
+    if ! command -v jq >/dev/null 2>&1; then
+        error "jq is required for chain definitions"
+        return 1
+    fi
+    local tmp_config
+    tmp_config=$(mktemp)
+    # shellcheck disable=SC2064
+    trap "rm -f '$tmp_config'" RETURN
+    # Validate that steps_json is valid JSON
+    if ! jq empty <<< "$steps_json" 2>/dev/null; then
+        error "Invalid JSON for chain steps"
+        return 1
+    fi
+    # Add the custom chain
+    jq --argjson steps "$steps_json" --arg name "$chain_name" \
+        '.custom_chains[$name] = $steps' \
+        "$CHAIN_CONFIG_FILE" > "$tmp_config"
+    mv "$tmp_config" "$CHAIN_CONFIG_FILE"
+    success "Defined custom chain: $chain_name"
+}
+# ─── Score Confidence from Output ──────────────────────────────────────────
+chain_score_confidence() {
+    local output="$1"
+    local step_type="${2:-general}"
+    # Simple heuristics-based confidence scoring
+    # In a real system, this could call Claude's API for self-assessment
+    local confidence=50
+    # Check for markers of confidence in the output
+    local has_reasoning=0
+    local has_conclusion=0
+    local has_caveats=0
+    if grep -qiE "(therefore|thus|conclud|result|found|identify)" <<< "$output"; then
+        has_conclusion=1
+    fi
+    if grep -qiE "(because|reason|since|based|due to)" <<< "$output"; then
+        has_reasoning=1
+    fi
+    if grep -qiE "(however|but|though|caveat|limitation|uncertain)" <<< "$output"; then
+        has_caveats=1
+    fi
+    # Calculate confidence: base + reasoning + conclusion - caveats
+    confidence=$((50 + (has_reasoning * 15) + (has_conclusion * 20) - (has_caveats * 10)))
+    # Clamp to 0-100
+    if [[ "$confidence" -lt 0 ]]; then confidence=0; fi
+    if [[ "$confidence" -gt 100 ]]; then confidence=100; fi
+    echo "$confidence"
+}
+# ─── Get Next Escalation Model ─────────────────────────────────────────────
+_get_escalation_model() {
+    local current_model="$1"
+    case "$current_model" in
+        haiku)  echo "sonnet" ;;
+        sonnet) echo "opus" ;;
+        opus)   echo "opus" ;;  # Already at top
+        *)      error "Unknown model: $current_model"; return 1 ;;
+    esac
+}
+# ─── Execute a Single Chain Step ───────────────────────────────────────────
+_execute_chain_step() {
+    local step_name="$1"
+    local model="$2"
+    local prompt="$3"
+    local max_tokens="${4:-4000}"
+    local output=""
+    local tokens_in=0
+    local tokens_out=0
+    local duration_ms=0
+    local start_time
+    start_time=$(date +%s%N | cut -b1-13)
+    # In production, this would call Claude API with the specified model
+    # For testing/mock mode, return a synthetic response
+    if [[ -z "${CLAUDE_API_KEY:-}" ]] || [[ "$NO_GITHUB" == "true" ]]; then
+        # Mock/test mode
+        output="{\"status\": \"success\", \"step\": \"$step_name\", \"model\": \"$model\", \"content\": \"Mock response from $model for step $step_name\"}"
+        tokens_in=500
+        tokens_out=300
+    else
+        # Real Claude API call would happen here
+        # For now, return mock response
+        output="{\"status\": \"success\", \"step\": \"$step_name\", \"model\": \"$model\", \"content\": \"Mock response from $model\"}"
+        tokens_in=500
+        tokens_out=300
+    fi
+    local end_time
+    end_time=$(date +%s%N | cut -b1-13)
+    duration_ms=$((end_time - start_time))
+    # Return execution record as JSON
+    jq -n \
+        --arg step "$step_name" \
+        --arg model "$model" \
+        --argjson tokens_in "$tokens_in" \
+        --argjson tokens_out "$tokens_out" \
+        --argjson duration_ms "$duration_ms" \
+        --arg output "$output" \
+        '{step: $step, model: $model, tokens_in: $tokens_in, tokens_out: $tokens_out, duration_ms: $duration_ms, output: $output}'
+}
+# ─── Execute a Complete Reasoning Chain ────────────────────────────────────
+chain_execute() {
+    local chain_name="$1"
+    local prompt="$2"
+    if [[ -z "$chain_name" ]] || [[ -z "$prompt" ]]; then
+        error "Usage: chain_execute <chain_name> <prompt>"
+        return 1
+    fi
+    _ensure_chain_templates
+    if ! command -v jq >/dev/null 2>&1; then
+        error "jq is required for chain execution"
+        return 1
+    fi
+    local steps
+    steps=$(jq -r ".templates[\"$chain_name\"] // .custom_chains[\"$chain_name\"] // empty" "$CHAIN_CONFIG_FILE" 2>/dev/null)
+    if [[ -z "$steps" || "$steps" == "null" ]]; then
+        error "Chain not found: $chain_name"
+        return 1
+    fi
+    mkdir -p "$(dirname "$CHAIN_EXECUTION_LOG")"
+    local execution_id
+    execution_id=$(date +%s)-$(od -An -N4 -tx4 /dev/urandom 2>/dev/null | tr -d ' ' | cut -c1-6 || echo "000000")
+    local chain_output
+    local confidence_threshold
+    confidence_threshold=$(jq -r '.confidence_threshold // 50' "$CHAIN_CONFIG_FILE")
+    local escalation_threshold
+    escalation_threshold=$(jq -r '.escalation_threshold // 80' "$CHAIN_CONFIG_FILE")
+    local total_cost="0"
+    local step_count
+    step_count=$(jq 'length' <<< "$steps")
+    local execution_trace
+    execution_trace="[]"
+    local current_prompt="$prompt"
+    for ((i = 0; i < step_count; i++)); do
+        local step_obj
+        step_obj=$(jq ".[$i]" <<< "$steps")
+        local step_name
+        step_name=$(jq -r '.step' <<< "$step_obj")
+        local model
+        model=$(jq -r '.model' <<< "$step_obj")
+        local max_tokens
+        max_tokens=$(jq -r '.max_tokens // 4000' <<< "$step_obj")
+        # Execute this step
+        local step_result
+        step_result=$(_execute_chain_step "$step_name" "$model" "$current_prompt" "$max_tokens")
+        # Extract output for next step
+        local step_output
+        step_output=$(jq -r '.output' <<< "$step_result")
+        # Score confidence
+        local confidence
+        confidence=$(chain_score_confidence "$step_output" "$step_name")
+        # Add confidence to result
+        step_result=$(jq --argjson conf "$confidence" '.confidence = $conf' <<< "$step_result")
+        # Check for early termination
+        if [[ "$confidence" -gt "$escalation_threshold" ]] && [[ "$i" -lt $((step_count - 1)) ]]; then
+            # Confidence is high enough, skip remaining steps
+            info "Step $step_name high confidence ($confidence%), skipping remaining steps"
+            execution_trace=$(jq ". += [$step_result]" <<< "$execution_trace")
+            chain_output="$step_output"
+            break
+        fi
+        # Check for low confidence escalation (only for first step typically)
+        if [[ "$confidence" -lt "$confidence_threshold" ]] && [[ "$i" -lt $((step_count - 1)) ]]; then
+            local next_model
+            next_model=$(_get_escalation_model "$model")
+            if [[ "$next_model" != "$model" ]]; then
+                warn "Step $step_name low confidence ($confidence%), escalating to $next_model"
+                # Re-execute with escalated model
+                step_result=$(_execute_chain_step "$step_name" "$next_model" "$current_prompt" "$max_tokens")
+                step_result=$(jq --argjson conf "$confidence" '.confidence = $conf | .escalated = true' <<< "$step_result")
+                step_output=$(jq -r '.output' <<< "$step_result")
+            fi
+        fi
+        # Add step to trace
+        execution_trace=$(jq ". += [$step_result]" <<< "$execution_trace")
+        # Use output as input to next step
+        current_prompt="$step_output"
+    done
+    # Extract final output
+    if [[ -z "$chain_output" ]]; then
+        chain_output=$(jq -r '.[-1].output' <<< "$execution_trace")
+    fi
+    # Calculate total cost (simplified: sum of all step costs)
+    total_cost=$(jq '[.[].tokens_in, .[].tokens_out] | add' <<< "$execution_trace" 2>/dev/null || echo "0")
+    # Log execution
+    local execution_record
+    execution_record=$(jq -n \
+        --arg id "$execution_id" \
+        --arg chain "$chain_name" \
+        --argjson trace "$execution_trace" \
+        --arg output "$chain_output" \
+        --argjson total_cost "$total_cost" \
+        --arg ts "$(now_iso)" \
+        '{id: $id, chain: $chain, ts: $ts, steps: $trace, output: $output, total_cost: $total_cost}')
+    echo "$execution_record" >> "$CHAIN_EXECUTION_LOG"
+    # Return execution record
+    echo "$execution_record"
+}
+# ─── Calculate Cost for a Single Step ──────────────────────────────────────
+chain_step_cost() {
+    local tokens_in="${1:-0}"
+    local tokens_out="${2:-0}"
+    local model="${3:-sonnet}"
+    if ! [[ "$tokens_in" =~ ^[0-9]+$ ]]; then tokens_in=0; fi
+    if ! [[ "$tokens_out" =~ ^[0-9]+$ ]]; then tokens_out=0; fi
+    local cost="0"
+    case "$model" in
+        haiku)
+            cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $HAIKU_INPUT_COST + $tokens_out * $HAIKU_OUTPUT_COST) / 1000000}")
+            ;;
+        sonnet)
+            cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $SONNET_INPUT_COST + $tokens_out * $SONNET_OUTPUT_COST) / 1000000}")
+            ;;
+        opus)
+            cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $OPUS_INPUT_COST + $tokens_out * $OPUS_OUTPUT_COST) / 1000000}")
+            ;;
+    esac
+    echo "$cost"
+}
+# ─── Show Chain Configuration ──────────────────────────────────────────────
+show_chain_config() {
+    _ensure_chain_templates
+    if [[ ! -f "$CHAIN_CONFIG_FILE" ]]; then
+        success "Created default chain templates at $CHAIN_CONFIG_FILE"
+    fi
+    info "Reasoning Chain Configuration"
+    echo ""
+    if command -v jq >/dev/null 2>&1; then
+        jq . "$CHAIN_CONFIG_FILE" 2>/dev/null || cat "$CHAIN_CONFIG_FILE"
+    else
+        cat "$CHAIN_CONFIG_FILE"
+    fi
+}
+# ─── Show Chain Execution Report ───────────────────────────────────────────
+show_chain_report() {
+    info "Chain Execution Report"
+    echo ""
+    if [[ ! -f "$CHAIN_EXECUTION_LOG" ]]; then
+        warn "No chain execution data yet."
+        return 0
+    fi
+    if ! command -v jq >/dev/null 2>&1; then
+        error "jq is required to view reports"
+        return 1
+    fi
+    local total_executions
+    total_executions=$(wc -l < "$CHAIN_EXECUTION_LOG" || echo "0")
+    local total_cost
+    total_cost=$(jq -s 'map(.total_cost) | add // 0' "$CHAIN_EXECUTION_LOG" 2>/dev/null || echo "0")
+    echo -e "${BOLD}Summary${RESET}"
+    echo "  Total chain executions: $total_executions"
+    echo "  Total cost: \$$total_cost"
+    echo ""
+    echo -e "${BOLD}Cost Per Chain${RESET}"
+    jq -s '
+        group_by(.chain) |
+        map({
+            chain: .[0].chain,
+            executions: length,
+            total_cost: (map(.total_cost) | add),
+            avg_cost: (map(.total_cost) | add / length)
+        }) |
+        sort_by(.chain)
+    ' "$CHAIN_EXECUTION_LOG" 2>/dev/null | jq -r '.[] | "  \(.chain): \(.executions) executions, $\(.total_cost | tostring), avg $\(.avg_cost | round)"' || true
+}
 # ─── Help Text ──────────────────────────────────────────────────────────────
 show_help() {
     echo -e "${BOLD}shipwright model${RESET} — Intelligent Model Routing & Optimization"
@@ -499,7 +912,7 @@ show_help() {
     echo -e "${BOLD}USAGE${RESET}"
     echo "  ${CYAN}shipwright model${RESET} <subcommand> [options]"
     echo ""
-    echo -e "${BOLD}SUBCOMMANDS${RESET}"
+    echo -e "${BOLD}SUBCOMMANDS — Routing${RESET}"
     echo "  ${CYAN}route${RESET} <stage> [complexity]    Route task to optimal model (returns: haiku|sonnet|opus)"
     echo "  ${CYAN}escalate${RESET} <model>              Get next tier model (haiku→sonnet→opus)"
     echo "  ${CYAN}config${RESET} [show|set <key> <val>] Show/set routing configuration"
@@ -507,15 +920,28 @@ show_help() {
     echo "  ${CYAN}ab-test${RESET} [enable|disable] [pct] [variant]  Configure A/B testing"
     echo "  ${CYAN}report${RESET}                        Show model usage and cost report"
     echo "  ${CYAN}ab-results${RESET}                     Show A/B test results"
-    echo "  ${CYAN}help${RESET}                          Show this help message"
+    echo ""
+    echo -e "${BOLD}SUBCOMMANDS — Multi-Model Reasoning Chains${RESET}"
+    echo "  ${CYAN}chain${RESET} [config|define|execute|report|step-cost]"
+    echo "    ${CYAN}config${RESET}                      Show chain configuration & templates"
+    echo "    ${CYAN}define${RESET} <name> <json>       Define custom reasoning chain"
+    echo "    ${CYAN}execute${RESET} <chain> <prompt>   Execute a reasoning chain"
+    echo "    ${CYAN}report${RESET}                      Show chain execution report"
+    echo "    ${CYAN}step-cost${RESET} <in> <out> <model> Calculate cost for one step"
+    echo ""
+    echo -e "${BOLD}BUILT-IN CHAINS${RESET}"
+    echo "  ${DIM}explore-decide${RESET}                2-step: haiku explores → opus decides"
+    echo "  ${DIM}explore-synthesize-decide${RESET}     3-step: haiku → sonnet → opus"
+    echo "  ${DIM}fast-verify${RESET}                   2-step: sonnet generates → haiku verifies"
+    echo "  ${DIM}deep-analysis${RESET}                 2-step: opus analyzes → opus validates"
     echo ""
     echo -e "${BOLD}EXAMPLES${RESET}"
     echo "  ${DIM}shipwright model route plan 65${RESET}        # Route 'plan' stage with 65% complexity"
     echo "  ${DIM}shipwright model escalate haiku${RESET}      # Upgrade from haiku"
-    echo "  ${DIM}shipwright model config show${RESET}         # View routing rules"
-    echo "  ${DIM}shipwright model estimate standard 50${RESET}  # Estimate standard pipeline cost"
-    echo "  ${DIM}shipwright model ab-test enable 15 cost-optimized${RESET}  # 15% A/B test"
-    echo "  ${DIM}shipwright model report${RESET}              # Show usage stats"
+    echo "  ${DIM}shipwright model chain config${RESET}        # Show chain templates"
+    echo "  ${DIM}shipwright model chain execute explore-decide \"analyze this code\"${RESET}"
+    echo "  ${DIM}shipwright model chain report${RESET}         # Show chain execution stats"
+    echo "  ${DIM}shipwright model chain step-cost 1000 500 sonnet${RESET}  # Cost for step"
 }
 # ─── Main ───────────────────────────────────────────────────────────────────
@@ -563,6 +989,7 @@ main() {
                 if command -v jq >/dev/null 2>&1; then
                     local tmp_config
                     tmp_config=$(mktemp)
+    # shellcheck disable=SC2064
                     trap "rm -f '$tmp_config'" RETURN
                     jq ".a_b_test.enabled = false" "$MODEL_ROUTING_CONFIG" > "$tmp_config"
                     mv "$tmp_config" "$MODEL_ROUTING_CONFIG"
@@ -582,6 +1009,34 @@ main() {
         ab-results)
             show_ab_results
             ;;
+        chain)
+            shift 2>/dev/null || true
+            case "${1:-config}" in
+                config)
+                    show_chain_config
+                    ;;
+                define)
+                    shift 2>/dev/null || true
+                    chain_define "$@"
+                    ;;
+                execute)
+                    shift 2>/dev/null || true
+                    chain_execute "$@"
+                    ;;
+                report)
+                    show_chain_report
+                    ;;
+                step-cost)
+                    shift 2>/dev/null || true
+                    chain_step_cost "$@"
+                    ;;
+                *)
+                    error "Unknown chain subcommand: ${1:-}"
+                    show_help
+                    exit 1
+                    ;;
+            esac
+            ;;
         help|--help|-h)
             show_help
             ;;