shipwright-cli 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +22 -8
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +8 -6
- package/dashboard/public/styles.css +176 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +117 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +12 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/metrics.ts +69 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +14 -2
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +25 -4
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +119 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +180 -5
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +101 -3
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +104 -1138
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -711
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +161 -2901
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -8
- package/scripts/sw-adaptive.sh +8 -7
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +15 -6
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +45 -20
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +107 -5
- package/scripts/sw-daemon.sh +71 -11
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +71 -20
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +378 -5
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +12 -7
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +13 -4
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +9 -4
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +18 -4
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +10 -3
- package/scripts/sw-incident.sh +273 -5
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +44 -7
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +436 -1076
- package/scripts/sw-memory.sh +357 -3
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +483 -27
- package/scripts/sw-otel.sh +15 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +7 -1
- package/scripts/sw-pipeline-vitals.sh +12 -6
- package/scripts/sw-pipeline.sh +54 -2653
- package/scripts/sw-pm.sh +16 -8
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +17 -5
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +17 -4
- package/scripts/sw-quality.sh +14 -6
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +14 -5
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +173 -6
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +14 -6
- package/scripts/sw-stream.sh +13 -4
- package/scripts/sw-swarm.sh +20 -7
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +7 -31
- package/scripts/sw-testgen.sh +17 -6
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +37 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +3 -2
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +5 -2
- package/scripts/sw-widgets.sh +9 -4
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
3
|
+
# ║ adaptive-model.sh — Real-Time Adaptive Model Selection During Build Loop ║
|
|
4
|
+
# ║ ║
|
|
5
|
+
# ║ Changes model choice mid-iteration based on real-time signals: ║
|
|
6
|
+
# ║ - Tests passing + converging: downgrade to cheaper model (save cost) ║
|
|
7
|
+
# ║ - Tests failing + same error 2x: escalate to stronger model ║
|
|
8
|
+
# ║ - Convergence score dropping: escalate model ║
|
|
9
|
+
# ║ - Rate limit hit: fallback to next available model ║
|
|
10
|
+
# ║ ║
|
|
11
|
+
# ║ Usage: Source from sw-loop.sh, call adaptive_model_select() ║
|
|
12
|
+
# ║ before each Claude invocation in the build loop ║
|
|
13
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
14
|
+
|
|
15
|
+
[[ -n "${_ADAPTIVE_MODEL_LOADED:-}" ]] && return 0
|
|
16
|
+
_ADAPTIVE_MODEL_LOADED=1
|
|
17
|
+
|
|
18
|
+
# ─── Defaults ──────────────────────────────────────────────────────────────
|
|
19
|
+
ARTIFACTS_DIR="${ARTIFACTS_DIR:-.claude/pipeline-artifacts}"
|
|
20
|
+
SCRIPT_DIR="${SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
|
|
21
|
+
|
|
22
|
+
# ─── Load helpers ──────────────────────────────────────────────────────────
|
|
23
|
+
if [[ "$(type -t info 2>/dev/null)" != "function" ]]; then
|
|
24
|
+
info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
25
|
+
success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
26
|
+
warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
|
|
27
|
+
error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
|
|
28
|
+
fi
|
|
29
|
+
if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
|
|
30
|
+
now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
|
|
31
|
+
now_epoch() { date +%s; }
|
|
32
|
+
fi
|
|
33
|
+
if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
|
|
34
|
+
emit_event() {
|
|
35
|
+
local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
|
|
36
|
+
local payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
|
|
37
|
+
while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
|
|
38
|
+
echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
|
|
39
|
+
}
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# ─── Model Ranking & Escalation Chains ─────────────────────────────────────
|
|
43
|
+
MODEL_HIERARCHY=("haiku" "sonnet" "opus")
|
|
44
|
+
|
|
45
|
+
# ─── Thresholds for Adaptive Behavior ──────────────────────────────────────
|
|
46
|
+
ESCALATION_ERROR_THRESHOLD=2 # Escalate after this many same error repeats
|
|
47
|
+
DOWNGRADE_SUCCESS_THRESHOLD=3 # Downgrade after this many consecutive successes
|
|
48
|
+
CONVERGENCE_DROP_THRESHOLD=10 # Escalate if convergence score drops by this
|
|
49
|
+
ESCALATION_COOLDOWN=2 # Wait this many iterations before escalating again
|
|
50
|
+
RATE_LIMIT_BACKOFF_SECONDS=60 # Cooldown on rate limit (handled by Claude CLI)
|
|
51
|
+
|
|
52
|
+
# ─── Initialize adaptive tracking for a pipeline run ──────────────────────
|
|
53
|
+
adaptive_model_init() {
|
|
54
|
+
local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
|
|
55
|
+
mkdir -p "$ARTIFACTS_DIR"
|
|
56
|
+
|
|
57
|
+
# Start fresh history for this pipeline run
|
|
58
|
+
echo "[]" > "${history_file}.tmp.$$"
|
|
59
|
+
mv "${history_file}.tmp.$$" "$history_file"
|
|
60
|
+
|
|
61
|
+
# Initialize preferences file if it doesn't exist
|
|
62
|
+
local preferences_file="${HOME}/.shipwright/optimization/model-preferences.json"
|
|
63
|
+
mkdir -p "$(dirname "$preferences_file")"
|
|
64
|
+
|
|
65
|
+
if [[ ! -f "$preferences_file" ]]; then
|
|
66
|
+
cat > "$preferences_file" <<'JSON'
|
|
67
|
+
{
|
|
68
|
+
"version": "1.0",
|
|
69
|
+
"stage_priors": {},
|
|
70
|
+
"learned_escalations": {},
|
|
71
|
+
"learned_downgrades": {},
|
|
72
|
+
"last_updated": ""
|
|
73
|
+
}
|
|
74
|
+
JSON
|
|
75
|
+
fi
|
|
76
|
+
|
|
77
|
+
emit_event "adaptive_model.init" "artifacts_dir=$ARTIFACTS_DIR" 2>/dev/null || true
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# ─── Select model based on real-time signals ───────────────────────────────
|
|
81
|
+
#
|
|
82
|
+
# Inputs:
|
|
83
|
+
# stage: pipeline stage name (optional, default "build")
|
|
84
|
+
# iteration_number: current iteration (0-based)
|
|
85
|
+
# last_test_result: "pass" or "fail"
|
|
86
|
+
# error_count: number of times same error appeared
|
|
87
|
+
# convergence_score: 0-100 quality score
|
|
88
|
+
# current_model: model currently in use
|
|
89
|
+
#
|
|
90
|
+
# Returns: Selected model (haiku|sonnet|opus) on stdout
|
|
91
|
+
#
|
|
92
|
+
adaptive_model_select() {
|
|
93
|
+
local stage="${1:-build}"
|
|
94
|
+
local iteration_number="${2:-0}"
|
|
95
|
+
local last_test_result="${3:-unknown}"
|
|
96
|
+
local error_count="${4:-0}"
|
|
97
|
+
local convergence_score="${5:-50}"
|
|
98
|
+
local current_model="${6:-opus}"
|
|
99
|
+
|
|
100
|
+
# Defaults
|
|
101
|
+
[[ ! "$iteration_number" =~ ^[0-9]+$ ]] && iteration_number=0
|
|
102
|
+
[[ ! "$error_count" =~ ^[0-9]+$ ]] && error_count=0
|
|
103
|
+
[[ ! "$convergence_score" =~ ^[0-9]+$ ]] && convergence_score=50
|
|
104
|
+
|
|
105
|
+
local selected_model="$current_model"
|
|
106
|
+
local reason="no_change"
|
|
107
|
+
local escalated=false
|
|
108
|
+
local downgraded=false
|
|
109
|
+
|
|
110
|
+
# First iteration: use default (don't adapt)
|
|
111
|
+
if [[ "$iteration_number" -le 0 ]]; then
|
|
112
|
+
reason="first_iteration"
|
|
113
|
+
# Tests passing + high convergence: consider downgrade for cost savings
|
|
114
|
+
elif [[ "$last_test_result" == "pass" && "$convergence_score" -ge 75 ]]; then
|
|
115
|
+
# Downgrade if we're above haiku
|
|
116
|
+
case "$current_model" in
|
|
117
|
+
opus)
|
|
118
|
+
selected_model="sonnet"
|
|
119
|
+
reason="tests_passing_high_convergence_downgrade_opus_to_sonnet"
|
|
120
|
+
downgraded=true
|
|
121
|
+
;;
|
|
122
|
+
sonnet)
|
|
123
|
+
selected_model="haiku"
|
|
124
|
+
reason="tests_passing_high_convergence_downgrade_sonnet_to_haiku"
|
|
125
|
+
downgraded=true
|
|
126
|
+
;;
|
|
127
|
+
*)
|
|
128
|
+
reason="already_at_minimum_model"
|
|
129
|
+
;;
|
|
130
|
+
esac
|
|
131
|
+
# Tests failing + repeated error: escalate to stronger model
|
|
132
|
+
elif [[ "$last_test_result" == "fail" && "$error_count" -ge "$ESCALATION_ERROR_THRESHOLD" ]]; then
|
|
133
|
+
case "$current_model" in
|
|
134
|
+
haiku)
|
|
135
|
+
selected_model="sonnet"
|
|
136
|
+
reason="test_failure_repeated_error_escalate_haiku_to_sonnet"
|
|
137
|
+
escalated=true
|
|
138
|
+
;;
|
|
139
|
+
sonnet)
|
|
140
|
+
selected_model="opus"
|
|
141
|
+
reason="test_failure_repeated_error_escalate_sonnet_to_opus"
|
|
142
|
+
escalated=true
|
|
143
|
+
;;
|
|
144
|
+
opus)
|
|
145
|
+
reason="already_at_maximum_model"
|
|
146
|
+
;;
|
|
147
|
+
esac
|
|
148
|
+
# Convergence score dropping: escalate to get better analysis
|
|
149
|
+
elif [[ "$iteration_number" -gt 0 && "$convergence_score" -lt 30 ]]; then
|
|
150
|
+
case "$current_model" in
|
|
151
|
+
haiku)
|
|
152
|
+
selected_model="sonnet"
|
|
153
|
+
reason="low_convergence_score_escalate_haiku_to_sonnet"
|
|
154
|
+
escalated=true
|
|
155
|
+
;;
|
|
156
|
+
sonnet)
|
|
157
|
+
selected_model="opus"
|
|
158
|
+
reason="low_convergence_score_escalate_sonnet_to_opus"
|
|
159
|
+
escalated=true
|
|
160
|
+
;;
|
|
161
|
+
opus)
|
|
162
|
+
reason="already_at_maximum_model"
|
|
163
|
+
;;
|
|
164
|
+
esac
|
|
165
|
+
else
|
|
166
|
+
reason="stable_conditions_keep_current"
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
# Record selection
|
|
170
|
+
adaptive_model_record "$iteration_number" "$selected_model" "$last_test_result" \
|
|
171
|
+
"$error_count" "$convergence_score" "$reason" "$escalated" "$downgraded"
|
|
172
|
+
|
|
173
|
+
echo "$selected_model"
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
# ─── Record model selection and outcome ────────────────────────────────────
|
|
177
|
+
adaptive_model_record() {
|
|
178
|
+
local iteration="${1:-0}"
|
|
179
|
+
local model_used="${2:-opus}"
|
|
180
|
+
local test_result="${3:-unknown}"
|
|
181
|
+
local error_count="${4:-0}"
|
|
182
|
+
local convergence_score="${5:-50}"
|
|
183
|
+
local reason="${6:-unknown}"
|
|
184
|
+
local escalated="${7:-false}"
|
|
185
|
+
local downgraded="${8:-false}"
|
|
186
|
+
|
|
187
|
+
[[ ! "$iteration" =~ ^[0-9]+$ ]] && iteration=0
|
|
188
|
+
[[ ! "$error_count" =~ ^[0-9]+$ ]] && error_count=0
|
|
189
|
+
[[ ! "$convergence_score" =~ ^[0-9]+$ ]] && convergence_score=50
|
|
190
|
+
|
|
191
|
+
local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
|
|
192
|
+
mkdir -p "$ARTIFACTS_DIR"
|
|
193
|
+
|
|
194
|
+
# Build record
|
|
195
|
+
local record
|
|
196
|
+
record=$(cat <<JSON
|
|
197
|
+
{
|
|
198
|
+
"ts": "$(now_iso)",
|
|
199
|
+
"iteration": $iteration,
|
|
200
|
+
"model": "$model_used",
|
|
201
|
+
"test_result": "$test_result",
|
|
202
|
+
"error_count": $error_count,
|
|
203
|
+
"convergence_score": $convergence_score,
|
|
204
|
+
"reason": "$reason",
|
|
205
|
+
"escalated": $escalated,
|
|
206
|
+
"downgraded": $downgraded
|
|
207
|
+
}
|
|
208
|
+
JSON
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Append to history (use jq if available)
|
|
212
|
+
local tmp_hist="${history_file}.tmp.$$"
|
|
213
|
+
if command -v jq >/dev/null 2>&1; then
|
|
214
|
+
if jq ". += [$(echo "$record" | jq '.')] " "$history_file" > "$tmp_hist" 2>/dev/null; then
|
|
215
|
+
mv "$tmp_hist" "$history_file"
|
|
216
|
+
else
|
|
217
|
+
rm -f "$tmp_hist"
|
|
218
|
+
fi
|
|
219
|
+
else
|
|
220
|
+
# Fallback: simple append (may not be valid JSON at end of file)
|
|
221
|
+
echo "$record" >> "$history_file"
|
|
222
|
+
fi
|
|
223
|
+
|
|
224
|
+
emit_event "adaptive_model.recorded" \
|
|
225
|
+
"iteration=$iteration" \
|
|
226
|
+
"model=$model_used" \
|
|
227
|
+
"test_result=$test_result" \
|
|
228
|
+
"reason=$reason" \
|
|
229
|
+
"escalated=$escalated" \
|
|
230
|
+
"downgraded=$downgraded" 2>/dev/null || true
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
# ─── Learn from history after pipeline completes ───────────────────────────
|
|
234
|
+
#
|
|
235
|
+
# Analyzes adaptive history to answer: which model changes helped?
|
|
236
|
+
# Writes learned preferences to optimization/model-preferences.json
|
|
237
|
+
#
|
|
238
|
+
adaptive_model_learn() {
|
|
239
|
+
local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
|
|
240
|
+
|
|
241
|
+
if [[ ! -f "$history_file" ]]; then
|
|
242
|
+
return 0
|
|
243
|
+
fi
|
|
244
|
+
|
|
245
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
246
|
+
warn "jq required for adaptive model learning"
|
|
247
|
+
return 1
|
|
248
|
+
fi
|
|
249
|
+
|
|
250
|
+
local preferences_file="${HOME}/.shipwright/optimization/model-preferences.json"
|
|
251
|
+
mkdir -p "$(dirname "$preferences_file")"
|
|
252
|
+
|
|
253
|
+
# Count model transitions and their outcomes
|
|
254
|
+
local escalation_success=0
|
|
255
|
+
local escalation_total=0
|
|
256
|
+
local downgrade_success=0
|
|
257
|
+
local downgrade_total=0
|
|
258
|
+
|
|
259
|
+
# Parse history: look for escalations/downgrades followed by success
|
|
260
|
+
if [[ -f "$history_file" ]]; then
|
|
261
|
+
# Count escalations that were followed by pass
|
|
262
|
+
escalation_total=$(jq '[.[] | select(.escalated == true)] | length' "$history_file" 2>/dev/null || echo "0")
|
|
263
|
+
escalation_success=$(jq '[.[] | select(.escalated == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
|
|
264
|
+
|
|
265
|
+
# Count downgrades that maintained pass
|
|
266
|
+
downgrade_total=$(jq '[.[] | select(.downgraded == true)] | length' "$history_file" 2>/dev/null || echo "0")
|
|
267
|
+
downgrade_success=$(jq '[.[] | select(.downgraded == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
|
|
268
|
+
fi
|
|
269
|
+
|
|
270
|
+
# Calculate effectiveness rates
|
|
271
|
+
local escalation_rate=0
|
|
272
|
+
local downgrade_rate=0
|
|
273
|
+
|
|
274
|
+
if [[ "$escalation_total" -gt 0 ]]; then
|
|
275
|
+
escalation_rate=$((escalation_success * 100 / escalation_total))
|
|
276
|
+
fi
|
|
277
|
+
|
|
278
|
+
if [[ "$downgrade_total" -gt 0 ]]; then
|
|
279
|
+
downgrade_rate=$((downgrade_success * 100 / downgrade_total))
|
|
280
|
+
fi
|
|
281
|
+
|
|
282
|
+
# Update preferences file
|
|
283
|
+
local tmp_prefs
|
|
284
|
+
tmp_prefs=$(mktemp)
|
|
285
|
+
trap "rm -f '$tmp_prefs'" RETURN
|
|
286
|
+
|
|
287
|
+
jq \
|
|
288
|
+
--argjson esc_success "$escalation_success" \
|
|
289
|
+
--argjson esc_total "$escalation_total" \
|
|
290
|
+
--argjson down_success "$downgrade_success" \
|
|
291
|
+
--argjson down_total "$downgrade_total" \
|
|
292
|
+
--argjson esc_rate "$escalation_rate" \
|
|
293
|
+
--argjson down_rate "$downgrade_rate" \
|
|
294
|
+
--arg timestamp "$(now_iso)" \
|
|
295
|
+
'.learned_escalations = {
|
|
296
|
+
"total_attempts": $esc_total,
|
|
297
|
+
"successful": $esc_success,
|
|
298
|
+
"success_rate": $esc_rate
|
|
299
|
+
} |
|
|
300
|
+
.learned_downgrades = {
|
|
301
|
+
"total_attempts": $down_total,
|
|
302
|
+
"successful": $down_success,
|
|
303
|
+
"success_rate": $down_rate
|
|
304
|
+
} |
|
|
305
|
+
.last_updated = $timestamp' \
|
|
306
|
+
"$preferences_file" > "$tmp_prefs" 2>/dev/null
|
|
307
|
+
|
|
308
|
+
if [[ -f "$tmp_prefs" ]]; then
|
|
309
|
+
mv "$tmp_prefs" "$preferences_file"
|
|
310
|
+
success "Learned model preferences: escalation=${escalation_rate}% success, downgrade=${downgrade_rate}% success"
|
|
311
|
+
emit_event "adaptive_model.learned" \
|
|
312
|
+
"escalation_rate=$escalation_rate" \
|
|
313
|
+
"downgrade_rate=$downgrade_rate" 2>/dev/null || true
|
|
314
|
+
fi
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
# ─── Show adaptive selection stats ─────────────────────────────────────────
|
|
318
|
+
#
|
|
319
|
+
# Reports on model usage distribution, cost savings, and effectiveness
|
|
320
|
+
#
|
|
321
|
+
adaptive_model_report() {
|
|
322
|
+
local history_file="${ARTIFACTS_DIR}/adaptive-model-history.json"
|
|
323
|
+
|
|
324
|
+
if [[ ! -f "$history_file" ]]; then
|
|
325
|
+
info "No adaptive model history yet"
|
|
326
|
+
return 0
|
|
327
|
+
fi
|
|
328
|
+
|
|
329
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
330
|
+
error "jq required for reports"
|
|
331
|
+
return 1
|
|
332
|
+
fi
|
|
333
|
+
|
|
334
|
+
echo ""
|
|
335
|
+
info "Adaptive Model Selection Report"
|
|
336
|
+
echo ""
|
|
337
|
+
|
|
338
|
+
# Model usage distribution
|
|
339
|
+
echo "▸ Model Usage Distribution:"
|
|
340
|
+
jq -s 'group_by(.model) | map({
|
|
341
|
+
model: .[0].model,
|
|
342
|
+
count: length,
|
|
343
|
+
percentage: ((length / (. | length)) * 100 | round)
|
|
344
|
+
}) | sort_by(.count) | reverse | .[]' "$history_file" 2>/dev/null | \
|
|
345
|
+
jq -r '" \(.model): \(.count) iterations (\(.percentage)%)"' 2>/dev/null || true
|
|
346
|
+
|
|
347
|
+
echo ""
|
|
348
|
+
echo "▸ Adaptive Actions:"
|
|
349
|
+
local escalations
|
|
350
|
+
escalations=$(jq '[.[] | select(.escalated == true)] | length' "$history_file" 2>/dev/null || echo "0")
|
|
351
|
+
local downgrades
|
|
352
|
+
downgrades=$(jq '[.[] | select(.downgraded == true)] | length' "$history_file" 2>/dev/null || echo "0")
|
|
353
|
+
|
|
354
|
+
echo " Escalations: $escalations"
|
|
355
|
+
echo " Downgrades: $downgrades"
|
|
356
|
+
|
|
357
|
+
# Effectiveness
|
|
358
|
+
if [[ "$escalations" -gt 0 ]]; then
|
|
359
|
+
local esc_success
|
|
360
|
+
esc_success=$(jq '[.[] | select(.escalated == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
|
|
361
|
+
local esc_rate=$((esc_success * 100 / escalations))
|
|
362
|
+
echo " Escalation Success Rate: ${esc_rate}% ($esc_success/$escalations)"
|
|
363
|
+
fi
|
|
364
|
+
|
|
365
|
+
if [[ "$downgrades" -gt 0 ]]; then
|
|
366
|
+
local down_success
|
|
367
|
+
down_success=$(jq '[.[] | select(.downgraded == true and .test_result == "pass")] | length' "$history_file" 2>/dev/null || echo "0")
|
|
368
|
+
local down_rate=$((down_success * 100 / downgrades))
|
|
369
|
+
echo " Downgrade Success Rate: ${down_rate}% ($down_success/$downgrades)"
|
|
370
|
+
fi
|
|
371
|
+
|
|
372
|
+
# Test result summary
|
|
373
|
+
echo ""
|
|
374
|
+
echo "▸ Test Result Summary:"
|
|
375
|
+
jq -s 'group_by(.test_result) | map({
|
|
376
|
+
result: .[0].test_result,
|
|
377
|
+
count: length
|
|
378
|
+
}) | .[]' "$history_file" 2>/dev/null | \
|
|
379
|
+
jq -r '" \(.result): \(.count)"' 2>/dev/null || true
|
|
380
|
+
|
|
381
|
+
# Top reasons
|
|
382
|
+
echo ""
|
|
383
|
+
echo "▸ Top Adaptation Reasons:"
|
|
384
|
+
jq -s 'group_by(.reason) | map({
|
|
385
|
+
reason: .[0].reason,
|
|
386
|
+
count: length
|
|
387
|
+
}) | sort_by(.count) | reverse | .[0:5] | .[]' "$history_file" 2>/dev/null | \
|
|
388
|
+
jq -r '" \(.reason): \(.count) times"' 2>/dev/null || true
|
|
389
|
+
|
|
390
|
+
echo ""
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
# ─── Load learned preferences from previous runs ────────────────────────────
|
|
394
|
+
adaptive_model_apply_learned_preferences() {
|
|
395
|
+
local preferences_file="${HOME}/.shipwright/optimization/model-preferences.json"
|
|
396
|
+
|
|
397
|
+
if [[ ! -f "$preferences_file" ]]; then
|
|
398
|
+
return 0
|
|
399
|
+
fi
|
|
400
|
+
|
|
401
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
402
|
+
return 0
|
|
403
|
+
fi
|
|
404
|
+
|
|
405
|
+
# Load learned escalation/downgrade rates (could affect future decisions)
|
|
406
|
+
local learned_escalation_rate
|
|
407
|
+
learned_escalation_rate=$(jq -r '.learned_escalations.success_rate // 0' "$preferences_file" 2>/dev/null || echo "0")
|
|
408
|
+
|
|
409
|
+
local learned_downgrade_rate
|
|
410
|
+
learned_downgrade_rate=$(jq -r '.learned_downgrades.success_rate // 0' "$preferences_file" 2>/dev/null || echo "0")
|
|
411
|
+
|
|
412
|
+
# Could use these to adjust thresholds dynamically in future runs
|
|
413
|
+
# For now, just log them
|
|
414
|
+
if [[ -n "$learned_escalation_rate" && "$learned_escalation_rate" -gt 0 ]]; then
|
|
415
|
+
emit_event "adaptive_model.using_learned_preferences" \
|
|
416
|
+
"escalation_rate=$learned_escalation_rate" \
|
|
417
|
+
"downgrade_rate=$learned_downgrade_rate" 2>/dev/null || true
|
|
418
|
+
fi
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
# ─── Export functions for use by sw-loop.sh ───────────────────────────────
|
|
422
|
+
export -f adaptive_model_init
|
|
423
|
+
export -f adaptive_model_select
|
|
424
|
+
export -f adaptive_model_record
|
|
425
|
+
export -f adaptive_model_learn
|
|
426
|
+
export -f adaptive_model_report
|
|
427
|
+
export -f adaptive_model_apply_learned_preferences
|