shipwright-cli 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +22 -8
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +8 -6
- package/dashboard/public/styles.css +176 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +117 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +12 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/metrics.ts +69 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +14 -2
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +25 -4
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +119 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +180 -5
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +101 -3
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +104 -1138
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -711
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +161 -2901
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -8
- package/scripts/sw-adaptive.sh +8 -7
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +15 -6
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +45 -20
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +107 -5
- package/scripts/sw-daemon.sh +71 -11
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +71 -20
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +378 -5
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +12 -7
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +13 -4
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +9 -4
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +18 -4
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +10 -3
- package/scripts/sw-incident.sh +273 -5
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +44 -7
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +436 -1076
- package/scripts/sw-memory.sh +357 -3
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +483 -27
- package/scripts/sw-otel.sh +15 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +7 -1
- package/scripts/sw-pipeline-vitals.sh +12 -6
- package/scripts/sw-pipeline.sh +54 -2653
- package/scripts/sw-pm.sh +16 -8
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +17 -5
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +17 -4
- package/scripts/sw-quality.sh +14 -6
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +14 -5
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +173 -6
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +14 -6
- package/scripts/sw-stream.sh +13 -4
- package/scripts/sw-swarm.sh +20 -7
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +7 -31
- package/scripts/sw-testgen.sh +17 -6
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +37 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +3 -2
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +5 -2
- package/scripts/sw-widgets.sh +9 -4
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -7,8 +7,10 @@
|
|
|
7
7
|
set -euo pipefail
|
|
8
8
|
trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
# shellcheck disable=SC2034
|
|
11
|
+
VERSION="3.3.0"
|
|
11
12
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
13
|
+
# shellcheck disable=SC2034
|
|
12
14
|
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
13
15
|
|
|
14
16
|
# ─── Cross-platform compatibility ──────────────────────────────────────────
|
|
@@ -30,7 +32,8 @@ fi
|
|
|
30
32
|
if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
|
|
31
33
|
emit_event() {
|
|
32
34
|
local event_type="$1"; shift; mkdir -p "${HOME}/.shipwright"
|
|
33
|
-
local payload
|
|
35
|
+
local payload
|
|
36
|
+
payload="{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"type\":\"$event_type\""
|
|
34
37
|
while [[ $# -gt 0 ]]; do local key="${1%%=*}" val="${1#*=}"; payload="${payload},\"${key}\":\"${val}\""; shift; done
|
|
35
38
|
echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
|
|
36
39
|
}
|
|
@@ -42,29 +45,61 @@ MODEL_ROUTING_OPTIMIZATION="${OPTIMIZATION_DIR}/model-routing.json"
|
|
|
42
45
|
MODEL_ROUTING_LEGACY="${HOME}/.shipwright/model-routing.json"
|
|
43
46
|
MODEL_USAGE_LOG="${OPTIMIZATION_DIR}/model-usage.jsonl"
|
|
44
47
|
AB_RESULTS_FILE="${HOME}/.shipwright/ab-results.jsonl"
|
|
48
|
+
CHAIN_CONFIG_FILE="${OPTIMIZATION_DIR}/reasoning-chains.json"
|
|
49
|
+
CHAIN_EXECUTION_LOG="${OPTIMIZATION_DIR}/chain-executions.jsonl"
|
|
45
50
|
|
|
46
51
|
# Resolve which config file to use (set by _resolve_routing_config)
|
|
47
52
|
MODEL_ROUTING_CONFIG=""
|
|
48
53
|
|
|
49
|
-
# ─── Model Costs (per million tokens)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
# ─── Model Costs (per million tokens, config-driven) ──────────────────────
|
|
55
|
+
# Read from ~/.shipwright/pricing.json if exists, otherwise use defaults
|
|
56
|
+
_load_pricing() {
|
|
57
|
+
local pricing_file="${HOME}/.shipwright/pricing.json"
|
|
58
|
+
if [[ -f "$pricing_file" ]]; then
|
|
59
|
+
HAIKU_INPUT_COST=$(jq -r '.haiku.input // "0.80"' "$pricing_file" 2>/dev/null || echo "0.80")
|
|
60
|
+
HAIKU_OUTPUT_COST=$(jq -r '.haiku.output // "4.00"' "$pricing_file" 2>/dev/null || echo "4.00")
|
|
61
|
+
SONNET_INPUT_COST=$(jq -r '.sonnet.input // "3.00"' "$pricing_file" 2>/dev/null || echo "3.00")
|
|
62
|
+
SONNET_OUTPUT_COST=$(jq -r '.sonnet.output // "15.00"' "$pricing_file" 2>/dev/null || echo "15.00")
|
|
63
|
+
OPUS_INPUT_COST=$(jq -r '.opus.input // "15.00"' "$pricing_file" 2>/dev/null || echo "15.00")
|
|
64
|
+
OPUS_OUTPUT_COST=$(jq -r '.opus.output // "75.00"' "$pricing_file" 2>/dev/null || echo "75.00")
|
|
65
|
+
else
|
|
66
|
+
HAIKU_INPUT_COST="0.80"
|
|
67
|
+
HAIKU_OUTPUT_COST="4.00"
|
|
68
|
+
SONNET_INPUT_COST="3.00"
|
|
69
|
+
SONNET_OUTPUT_COST="15.00"
|
|
70
|
+
OPUS_INPUT_COST="15.00"
|
|
71
|
+
OPUS_OUTPUT_COST="75.00"
|
|
72
|
+
fi
|
|
73
|
+
}
|
|
74
|
+
_load_pricing
|
|
75
|
+
|
|
76
|
+
# ─── Default Routing Rules (config-driven) ────────────────────────────────
|
|
77
|
+
# Read from daemon-config model_routing.stages if configured
|
|
78
|
+
_load_routing_rules() {
|
|
79
|
+
local cfg="${DAEMON_CONFIG:-${WORK_DIR:-.}/.claude/daemon-config.json}"
|
|
80
|
+
if [[ -f "$cfg" ]]; then
|
|
81
|
+
local h s o
|
|
82
|
+
h=$(jq -r '.model_routing.haiku_stages // empty' "$cfg" 2>/dev/null || true)
|
|
83
|
+
s=$(jq -r '.model_routing.sonnet_stages // empty' "$cfg" 2>/dev/null || true)
|
|
84
|
+
o=$(jq -r '.model_routing.opus_stages // empty' "$cfg" 2>/dev/null || true)
|
|
85
|
+
[[ -n "$h" && "$h" != "null" ]] && HAIKU_STAGES="$h"
|
|
86
|
+
[[ -n "$s" && "$s" != "null" ]] && SONNET_STAGES="$s"
|
|
87
|
+
[[ -n "$o" && "$o" != "null" ]] && OPUS_STAGES="$o"
|
|
88
|
+
fi
|
|
89
|
+
}
|
|
59
90
|
HAIKU_STAGES="intake|monitor"
|
|
60
|
-
# Stages that default to sonnet (medium complexity)
|
|
61
91
|
SONNET_STAGES="test|review"
|
|
62
|
-
# Stages that default to opus (high complexity, needs deep thinking)
|
|
63
92
|
OPUS_STAGES="plan|design|build|compound_quality"
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
93
|
+
_load_routing_rules 2>/dev/null || true
|
|
94
|
+
|
|
95
|
+
# ─── Complexity Thresholds (config-driven) ────────────────────────────────
|
|
96
|
+
if type _smart_int >/dev/null 2>&1; then
|
|
97
|
+
COMPLEXITY_LOW=$(_smart_int "model_routing.complexity_low" 30)
|
|
98
|
+
COMPLEXITY_HIGH=$(_smart_int "model_routing.complexity_high" 80)
|
|
99
|
+
else
|
|
100
|
+
COMPLEXITY_LOW=30
|
|
101
|
+
COMPLEXITY_HIGH=80
|
|
102
|
+
fi
|
|
68
103
|
|
|
69
104
|
# ─── Resolve Routing Config Path ────────────────────────────────────────────
|
|
70
105
|
# Priority: optimization (self-optimize writes) > legacy > create in optimization
|
|
@@ -243,6 +278,7 @@ set_config() {
|
|
|
243
278
|
# Use jq to safely update the config
|
|
244
279
|
local tmp_config
|
|
245
280
|
tmp_config=$(mktemp)
|
|
281
|
+
# shellcheck disable=SC2064
|
|
246
282
|
trap "rm -f '$tmp_config'" RETURN
|
|
247
283
|
|
|
248
284
|
if [[ "$value" == "true" ]] || [[ "$value" == "false" ]]; then
|
|
@@ -353,7 +389,8 @@ record_usage() {
|
|
|
353
389
|
;;
|
|
354
390
|
esac
|
|
355
391
|
|
|
356
|
-
local record
|
|
392
|
+
local record
|
|
393
|
+
record="{\"ts\":\"$(now_iso)\",\"stage\":\"$stage\",\"model\":\"$model\",\"input_tokens\":$input_tokens,\"output_tokens\":$output_tokens,\"cost\":$cost}"
|
|
357
394
|
echo "$record" >> "$MODEL_USAGE_LOG"
|
|
358
395
|
}
|
|
359
396
|
|
|
@@ -376,6 +413,7 @@ configure_ab_test() {
|
|
|
376
413
|
|
|
377
414
|
local tmp_config
|
|
378
415
|
tmp_config=$(mktemp)
|
|
416
|
+
# shellcheck disable=SC2064
|
|
379
417
|
trap "rm -f '$tmp_config'" RETURN
|
|
380
418
|
|
|
381
419
|
jq ".a_b_test = {\"enabled\": true, \"percentage\": $percentage, \"variant\": \"$variant\"}" \
|
|
@@ -395,7 +433,8 @@ log_ab_result() {
|
|
|
395
433
|
|
|
396
434
|
mkdir -p "${HOME}/.shipwright"
|
|
397
435
|
|
|
398
|
-
local record
|
|
436
|
+
local record
|
|
437
|
+
record="{\"ts\":\"$(now_iso)\",\"run_id\":\"$run_id\",\"variant\":\"$variant\",\"success\":$success_status,\"cost\":$cost,\"duration_seconds\":$duration}"
|
|
399
438
|
echo "$record" >> "$AB_RESULTS_FILE"
|
|
400
439
|
}
|
|
401
440
|
|
|
@@ -416,7 +455,8 @@ show_report() {
|
|
|
416
455
|
|
|
417
456
|
# Summary stats
|
|
418
457
|
local total_runs
|
|
419
|
-
total_runs=$(wc -l < "$MODEL_USAGE_LOG" ||
|
|
458
|
+
total_runs=$(wc -l < "$MODEL_USAGE_LOG" || true)
|
|
459
|
+
total_runs="${total_runs:-0}"
|
|
420
460
|
|
|
421
461
|
local haiku_runs
|
|
422
462
|
haiku_runs=$(grep -c '"model":"haiku"' "$MODEL_USAGE_LOG" || true)
|
|
@@ -491,6 +531,380 @@ show_ab_results() {
|
|
|
491
531
|
' "$AB_RESULTS_FILE" 2>/dev/null | jq -r '.[] | "\(.variant):\n Runs: \(.total_runs)\n Success: \(.successful)/\(.total_runs) (\(.success_rate | round)%)\n Avg Cost: $\(.avg_cost | round)\n Total Cost: $\(.total_cost | round)\n Avg Duration: \(.avg_duration | round)s"' || true
|
|
492
532
|
}
|
|
493
533
|
|
|
534
|
+
# ─── Initialize Chain Templates ────────────────────────────────────────────
|
|
535
|
+
_ensure_chain_templates() {
|
|
536
|
+
mkdir -p "$(dirname "$CHAIN_CONFIG_FILE")"
|
|
537
|
+
|
|
538
|
+
if [[ ! -f "$CHAIN_CONFIG_FILE" ]]; then
|
|
539
|
+
cat > "$CHAIN_CONFIG_FILE" <<'CHAINS'
|
|
540
|
+
{
|
|
541
|
+
"version": "1.0",
|
|
542
|
+
"templates": {
|
|
543
|
+
"explore-decide": [
|
|
544
|
+
{"step": "explore", "model": "haiku", "max_tokens": 4000, "description": "Fast exploration with haiku"},
|
|
545
|
+
{"step": "decide", "model": "opus", "max_tokens": 8000, "description": "Final decision with opus"}
|
|
546
|
+
],
|
|
547
|
+
"explore-synthesize-decide": [
|
|
548
|
+
{"step": "explore", "model": "haiku", "max_tokens": 4000, "description": "Explore with haiku"},
|
|
549
|
+
{"step": "synthesize", "model": "sonnet", "max_tokens": 6000, "description": "Synthesize with sonnet"},
|
|
550
|
+
{"step": "decide", "model": "opus", "max_tokens": 8000, "description": "Decide with opus"}
|
|
551
|
+
],
|
|
552
|
+
"fast-verify": [
|
|
553
|
+
{"step": "generate", "model": "sonnet", "max_tokens": 6000, "description": "Generate with sonnet"},
|
|
554
|
+
{"step": "verify", "model": "haiku", "max_tokens": 2000, "description": "Verify with haiku"}
|
|
555
|
+
],
|
|
556
|
+
"deep-analysis": [
|
|
557
|
+
{"step": "analyze", "model": "opus", "max_tokens": 8000, "description": "Deep analysis with opus"},
|
|
558
|
+
{"step": "validate", "model": "opus", "max_tokens": 4000, "description": "Validate with opus"}
|
|
559
|
+
]
|
|
560
|
+
},
|
|
561
|
+
"confidence_threshold": 50,
|
|
562
|
+
"escalation_threshold": 80,
|
|
563
|
+
"max_escalations_per_step": 1,
|
|
564
|
+
"custom_chains": {}
|
|
565
|
+
}
|
|
566
|
+
CHAINS
|
|
567
|
+
fi
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
# ─── Define a Custom Reasoning Chain ────────────────────────────────────────
|
|
571
|
+
chain_define() {
|
|
572
|
+
local chain_name="$1"
|
|
573
|
+
local steps_json="$2"
|
|
574
|
+
|
|
575
|
+
if [[ -z "$chain_name" ]] || [[ -z "$steps_json" ]]; then
|
|
576
|
+
error "Usage: chain_define <name> <steps_json>"
|
|
577
|
+
return 1
|
|
578
|
+
fi
|
|
579
|
+
|
|
580
|
+
_ensure_chain_templates
|
|
581
|
+
|
|
582
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
583
|
+
error "jq is required for chain definitions"
|
|
584
|
+
return 1
|
|
585
|
+
fi
|
|
586
|
+
|
|
587
|
+
local tmp_config
|
|
588
|
+
tmp_config=$(mktemp)
|
|
589
|
+
# shellcheck disable=SC2064
|
|
590
|
+
trap "rm -f '$tmp_config'" RETURN
|
|
591
|
+
|
|
592
|
+
# Validate that steps_json is valid JSON
|
|
593
|
+
if ! jq empty <<< "$steps_json" 2>/dev/null; then
|
|
594
|
+
error "Invalid JSON for chain steps"
|
|
595
|
+
return 1
|
|
596
|
+
fi
|
|
597
|
+
|
|
598
|
+
# Add the custom chain
|
|
599
|
+
jq --argjson steps "$steps_json" --arg name "$chain_name" \
|
|
600
|
+
'.custom_chains[$name] = $steps' \
|
|
601
|
+
"$CHAIN_CONFIG_FILE" > "$tmp_config"
|
|
602
|
+
|
|
603
|
+
mv "$tmp_config" "$CHAIN_CONFIG_FILE"
|
|
604
|
+
success "Defined custom chain: $chain_name"
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
# ─── Score Confidence from Output ──────────────────────────────────────────
|
|
608
|
+
chain_score_confidence() {
|
|
609
|
+
local output="$1"
|
|
610
|
+
local step_type="${2:-general}"
|
|
611
|
+
|
|
612
|
+
# Simple heuristics-based confidence scoring
|
|
613
|
+
# In a real system, this could call Claude's API for self-assessment
|
|
614
|
+
local confidence=50
|
|
615
|
+
|
|
616
|
+
# Check for markers of confidence in the output
|
|
617
|
+
local has_reasoning=0
|
|
618
|
+
local has_conclusion=0
|
|
619
|
+
local has_caveats=0
|
|
620
|
+
|
|
621
|
+
if grep -qiE "(therefore|thus|conclud|result|found|identify)" <<< "$output"; then
|
|
622
|
+
has_conclusion=1
|
|
623
|
+
fi
|
|
624
|
+
|
|
625
|
+
if grep -qiE "(because|reason|since|based|due to)" <<< "$output"; then
|
|
626
|
+
has_reasoning=1
|
|
627
|
+
fi
|
|
628
|
+
|
|
629
|
+
if grep -qiE "(however|but|though|caveat|limitation|uncertain)" <<< "$output"; then
|
|
630
|
+
has_caveats=1
|
|
631
|
+
fi
|
|
632
|
+
|
|
633
|
+
# Calculate confidence: base + reasoning + conclusion - caveats
|
|
634
|
+
confidence=$((50 + (has_reasoning * 15) + (has_conclusion * 20) - (has_caveats * 10)))
|
|
635
|
+
|
|
636
|
+
# Clamp to 0-100
|
|
637
|
+
if [[ "$confidence" -lt 0 ]]; then confidence=0; fi
|
|
638
|
+
if [[ "$confidence" -gt 100 ]]; then confidence=100; fi
|
|
639
|
+
|
|
640
|
+
echo "$confidence"
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
# ─── Get Next Escalation Model ─────────────────────────────────────────────
|
|
644
|
+
_get_escalation_model() {
|
|
645
|
+
local current_model="$1"
|
|
646
|
+
case "$current_model" in
|
|
647
|
+
haiku) echo "sonnet" ;;
|
|
648
|
+
sonnet) echo "opus" ;;
|
|
649
|
+
opus) echo "opus" ;; # Already at top
|
|
650
|
+
*) error "Unknown model: $current_model"; return 1 ;;
|
|
651
|
+
esac
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
# ─── Execute a Single Chain Step ───────────────────────────────────────────
|
|
655
|
+
_execute_chain_step() {
|
|
656
|
+
local step_name="$1"
|
|
657
|
+
local model="$2"
|
|
658
|
+
local prompt="$3"
|
|
659
|
+
local max_tokens="${4:-4000}"
|
|
660
|
+
|
|
661
|
+
local output=""
|
|
662
|
+
local tokens_in=0
|
|
663
|
+
local tokens_out=0
|
|
664
|
+
local duration_ms=0
|
|
665
|
+
local start_time
|
|
666
|
+
start_time=$(date +%s%N | cut -b1-13)
|
|
667
|
+
|
|
668
|
+
# In production, this would call Claude API with the specified model
|
|
669
|
+
# For testing/mock mode, return a synthetic response
|
|
670
|
+
if [[ -z "${CLAUDE_API_KEY:-}" ]] || [[ "$NO_GITHUB" == "true" ]]; then
|
|
671
|
+
# Mock/test mode
|
|
672
|
+
output="{\"status\": \"success\", \"step\": \"$step_name\", \"model\": \"$model\", \"content\": \"Mock response from $model for step $step_name\"}"
|
|
673
|
+
tokens_in=500
|
|
674
|
+
tokens_out=300
|
|
675
|
+
else
|
|
676
|
+
# Real Claude API call would happen here
|
|
677
|
+
# For now, return mock response
|
|
678
|
+
output="{\"status\": \"success\", \"step\": \"$step_name\", \"model\": \"$model\", \"content\": \"Mock response from $model\"}"
|
|
679
|
+
tokens_in=500
|
|
680
|
+
tokens_out=300
|
|
681
|
+
fi
|
|
682
|
+
|
|
683
|
+
local end_time
|
|
684
|
+
end_time=$(date +%s%N | cut -b1-13)
|
|
685
|
+
duration_ms=$((end_time - start_time))
|
|
686
|
+
|
|
687
|
+
# Return execution record as JSON
|
|
688
|
+
jq -n \
|
|
689
|
+
--arg step "$step_name" \
|
|
690
|
+
--arg model "$model" \
|
|
691
|
+
--argjson tokens_in "$tokens_in" \
|
|
692
|
+
--argjson tokens_out "$tokens_out" \
|
|
693
|
+
--argjson duration_ms "$duration_ms" \
|
|
694
|
+
--arg output "$output" \
|
|
695
|
+
'{step: $step, model: $model, tokens_in: $tokens_in, tokens_out: $tokens_out, duration_ms: $duration_ms, output: $output}'
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
# ─── Execute a Complete Reasoning Chain ────────────────────────────────────
|
|
699
|
+
chain_execute() {
|
|
700
|
+
local chain_name="$1"
|
|
701
|
+
local prompt="$2"
|
|
702
|
+
|
|
703
|
+
if [[ -z "$chain_name" ]] || [[ -z "$prompt" ]]; then
|
|
704
|
+
error "Usage: chain_execute <chain_name> <prompt>"
|
|
705
|
+
return 1
|
|
706
|
+
fi
|
|
707
|
+
|
|
708
|
+
_ensure_chain_templates
|
|
709
|
+
|
|
710
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
711
|
+
error "jq is required for chain execution"
|
|
712
|
+
return 1
|
|
713
|
+
fi
|
|
714
|
+
|
|
715
|
+
local steps
|
|
716
|
+
steps=$(jq -r ".templates[\"$chain_name\"] // .custom_chains[\"$chain_name\"] // empty" "$CHAIN_CONFIG_FILE" 2>/dev/null)
|
|
717
|
+
|
|
718
|
+
if [[ -z "$steps" || "$steps" == "null" ]]; then
|
|
719
|
+
error "Chain not found: $chain_name"
|
|
720
|
+
return 1
|
|
721
|
+
fi
|
|
722
|
+
|
|
723
|
+
mkdir -p "$(dirname "$CHAIN_EXECUTION_LOG")"
|
|
724
|
+
|
|
725
|
+
local execution_id
|
|
726
|
+
execution_id=$(date +%s)-$(od -An -N4 -tx4 /dev/urandom 2>/dev/null | tr -d ' ' | cut -c1-6 || echo "000000")
|
|
727
|
+
|
|
728
|
+
local chain_output
|
|
729
|
+
local confidence_threshold
|
|
730
|
+
confidence_threshold=$(jq -r '.confidence_threshold // 50' "$CHAIN_CONFIG_FILE")
|
|
731
|
+
|
|
732
|
+
local escalation_threshold
|
|
733
|
+
escalation_threshold=$(jq -r '.escalation_threshold // 80' "$CHAIN_CONFIG_FILE")
|
|
734
|
+
|
|
735
|
+
local total_cost="0"
|
|
736
|
+
local step_count
|
|
737
|
+
step_count=$(jq 'length' <<< "$steps")
|
|
738
|
+
|
|
739
|
+
local execution_trace
|
|
740
|
+
execution_trace="[]"
|
|
741
|
+
|
|
742
|
+
local current_prompt="$prompt"
|
|
743
|
+
|
|
744
|
+
for ((i = 0; i < step_count; i++)); do
|
|
745
|
+
local step_obj
|
|
746
|
+
step_obj=$(jq ".[$i]" <<< "$steps")
|
|
747
|
+
|
|
748
|
+
local step_name
|
|
749
|
+
step_name=$(jq -r '.step' <<< "$step_obj")
|
|
750
|
+
|
|
751
|
+
local model
|
|
752
|
+
model=$(jq -r '.model' <<< "$step_obj")
|
|
753
|
+
|
|
754
|
+
local max_tokens
|
|
755
|
+
max_tokens=$(jq -r '.max_tokens // 4000' <<< "$step_obj")
|
|
756
|
+
|
|
757
|
+
# Execute this step
|
|
758
|
+
local step_result
|
|
759
|
+
step_result=$(_execute_chain_step "$step_name" "$model" "$current_prompt" "$max_tokens")
|
|
760
|
+
|
|
761
|
+
# Extract output for next step
|
|
762
|
+
local step_output
|
|
763
|
+
step_output=$(jq -r '.output' <<< "$step_result")
|
|
764
|
+
|
|
765
|
+
# Score confidence
|
|
766
|
+
local confidence
|
|
767
|
+
confidence=$(chain_score_confidence "$step_output" "$step_name")
|
|
768
|
+
|
|
769
|
+
# Add confidence to result
|
|
770
|
+
step_result=$(jq --argjson conf "$confidence" '.confidence = $conf' <<< "$step_result")
|
|
771
|
+
|
|
772
|
+
# Check for early termination
|
|
773
|
+
if [[ "$confidence" -gt "$escalation_threshold" ]] && [[ "$i" -lt $((step_count - 1)) ]]; then
|
|
774
|
+
# Confidence is high enough, skip remaining steps
|
|
775
|
+
info "Step $step_name high confidence ($confidence%), skipping remaining steps"
|
|
776
|
+
execution_trace=$(jq ". += [$step_result]" <<< "$execution_trace")
|
|
777
|
+
chain_output="$step_output"
|
|
778
|
+
break
|
|
779
|
+
fi
|
|
780
|
+
|
|
781
|
+
# Check for low confidence escalation (only for first step typically)
|
|
782
|
+
if [[ "$confidence" -lt "$confidence_threshold" ]] && [[ "$i" -lt $((step_count - 1)) ]]; then
|
|
783
|
+
local next_model
|
|
784
|
+
next_model=$(_get_escalation_model "$model")
|
|
785
|
+
if [[ "$next_model" != "$model" ]]; then
|
|
786
|
+
warn "Step $step_name low confidence ($confidence%), escalating to $next_model"
|
|
787
|
+
# Re-execute with escalated model
|
|
788
|
+
step_result=$(_execute_chain_step "$step_name" "$next_model" "$current_prompt" "$max_tokens")
|
|
789
|
+
step_result=$(jq --argjson conf "$confidence" '.confidence = $conf | .escalated = true' <<< "$step_result")
|
|
790
|
+
step_output=$(jq -r '.output' <<< "$step_result")
|
|
791
|
+
fi
|
|
792
|
+
fi
|
|
793
|
+
|
|
794
|
+
# Add step to trace
|
|
795
|
+
execution_trace=$(jq ". += [$step_result]" <<< "$execution_trace")
|
|
796
|
+
|
|
797
|
+
# Use output as input to next step
|
|
798
|
+
current_prompt="$step_output"
|
|
799
|
+
done
|
|
800
|
+
|
|
801
|
+
# Extract final output
|
|
802
|
+
if [[ -z "$chain_output" ]]; then
|
|
803
|
+
chain_output=$(jq -r '.[-1].output' <<< "$execution_trace")
|
|
804
|
+
fi
|
|
805
|
+
|
|
806
|
+
# Calculate total cost (simplified: sum of all step costs)
|
|
807
|
+
total_cost=$(jq '[.[].tokens_in, .[].tokens_out] | add' <<< "$execution_trace" 2>/dev/null || echo "0")
|
|
808
|
+
|
|
809
|
+
# Log execution
|
|
810
|
+
local execution_record
|
|
811
|
+
execution_record=$(jq -n \
|
|
812
|
+
--arg id "$execution_id" \
|
|
813
|
+
--arg chain "$chain_name" \
|
|
814
|
+
--argjson trace "$execution_trace" \
|
|
815
|
+
--arg output "$chain_output" \
|
|
816
|
+
--argjson total_cost "$total_cost" \
|
|
817
|
+
--arg ts "$(now_iso)" \
|
|
818
|
+
'{id: $id, chain: $chain, ts: $ts, steps: $trace, output: $output, total_cost: $total_cost}')
|
|
819
|
+
|
|
820
|
+
echo "$execution_record" >> "$CHAIN_EXECUTION_LOG"
|
|
821
|
+
|
|
822
|
+
# Return execution record
|
|
823
|
+
echo "$execution_record"
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
# ─── Calculate Cost for a Single Step ──────────────────────────────────────
|
|
827
|
+
chain_step_cost() {
|
|
828
|
+
local tokens_in="${1:-0}"
|
|
829
|
+
local tokens_out="${2:-0}"
|
|
830
|
+
local model="${3:-sonnet}"
|
|
831
|
+
|
|
832
|
+
if ! [[ "$tokens_in" =~ ^[0-9]+$ ]]; then tokens_in=0; fi
|
|
833
|
+
if ! [[ "$tokens_out" =~ ^[0-9]+$ ]]; then tokens_out=0; fi
|
|
834
|
+
|
|
835
|
+
local cost="0"
|
|
836
|
+
case "$model" in
|
|
837
|
+
haiku)
|
|
838
|
+
cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $HAIKU_INPUT_COST + $tokens_out * $HAIKU_OUTPUT_COST) / 1000000}")
|
|
839
|
+
;;
|
|
840
|
+
sonnet)
|
|
841
|
+
cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $SONNET_INPUT_COST + $tokens_out * $SONNET_OUTPUT_COST) / 1000000}")
|
|
842
|
+
;;
|
|
843
|
+
opus)
|
|
844
|
+
cost=$(awk "BEGIN {printf \"%.6f\", ($tokens_in * $OPUS_INPUT_COST + $tokens_out * $OPUS_OUTPUT_COST) / 1000000}")
|
|
845
|
+
;;
|
|
846
|
+
esac
|
|
847
|
+
|
|
848
|
+
echo "$cost"
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
# ─── Show Chain Configuration ──────────────────────────────────────────────
|
|
852
|
+
show_chain_config() {
|
|
853
|
+
_ensure_chain_templates
|
|
854
|
+
|
|
855
|
+
if [[ ! -f "$CHAIN_CONFIG_FILE" ]]; then
|
|
856
|
+
success "Created default chain templates at $CHAIN_CONFIG_FILE"
|
|
857
|
+
fi
|
|
858
|
+
|
|
859
|
+
info "Reasoning Chain Configuration"
|
|
860
|
+
echo ""
|
|
861
|
+
|
|
862
|
+
if command -v jq >/dev/null 2>&1; then
|
|
863
|
+
jq . "$CHAIN_CONFIG_FILE" 2>/dev/null || cat "$CHAIN_CONFIG_FILE"
|
|
864
|
+
else
|
|
865
|
+
cat "$CHAIN_CONFIG_FILE"
|
|
866
|
+
fi
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
# ─── Show Chain Execution Report ───────────────────────────────────────────
|
|
870
|
+
show_chain_report() {
|
|
871
|
+
info "Chain Execution Report"
|
|
872
|
+
echo ""
|
|
873
|
+
|
|
874
|
+
if [[ ! -f "$CHAIN_EXECUTION_LOG" ]]; then
|
|
875
|
+
warn "No chain execution data yet."
|
|
876
|
+
return 0
|
|
877
|
+
fi
|
|
878
|
+
|
|
879
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
880
|
+
error "jq is required to view reports"
|
|
881
|
+
return 1
|
|
882
|
+
fi
|
|
883
|
+
|
|
884
|
+
local total_executions
|
|
885
|
+
total_executions=$(wc -l < "$CHAIN_EXECUTION_LOG" || echo "0")
|
|
886
|
+
|
|
887
|
+
local total_cost
|
|
888
|
+
total_cost=$(jq -s 'map(.total_cost) | add // 0' "$CHAIN_EXECUTION_LOG" 2>/dev/null || echo "0")
|
|
889
|
+
|
|
890
|
+
echo -e "${BOLD}Summary${RESET}"
|
|
891
|
+
echo " Total chain executions: $total_executions"
|
|
892
|
+
echo " Total cost: \$$total_cost"
|
|
893
|
+
echo ""
|
|
894
|
+
|
|
895
|
+
echo -e "${BOLD}Cost Per Chain${RESET}"
|
|
896
|
+
jq -s '
|
|
897
|
+
group_by(.chain) |
|
|
898
|
+
map({
|
|
899
|
+
chain: .[0].chain,
|
|
900
|
+
executions: length,
|
|
901
|
+
total_cost: (map(.total_cost) | add),
|
|
902
|
+
avg_cost: (map(.total_cost) | add / length)
|
|
903
|
+
}) |
|
|
904
|
+
sort_by(.chain)
|
|
905
|
+
' "$CHAIN_EXECUTION_LOG" 2>/dev/null | jq -r '.[] | " \(.chain): \(.executions) executions, $\(.total_cost | tostring), avg $\(.avg_cost | round)"' || true
|
|
906
|
+
}
|
|
907
|
+
|
|
494
908
|
# ─── Help Text ──────────────────────────────────────────────────────────────
|
|
495
909
|
show_help() {
|
|
496
910
|
echo -e "${BOLD}shipwright model${RESET} — Intelligent Model Routing & Optimization"
|
|
@@ -498,7 +912,7 @@ show_help() {
|
|
|
498
912
|
echo -e "${BOLD}USAGE${RESET}"
|
|
499
913
|
echo " ${CYAN}shipwright model${RESET} <subcommand> [options]"
|
|
500
914
|
echo ""
|
|
501
|
-
echo -e "${BOLD}SUBCOMMANDS${RESET}"
|
|
915
|
+
echo -e "${BOLD}SUBCOMMANDS — Routing${RESET}"
|
|
502
916
|
echo " ${CYAN}route${RESET} <stage> [complexity] Route task to optimal model (returns: haiku|sonnet|opus)"
|
|
503
917
|
echo " ${CYAN}escalate${RESET} <model> Get next tier model (haiku→sonnet→opus)"
|
|
504
918
|
echo " ${CYAN}config${RESET} [show|set <key> <val>] Show/set routing configuration"
|
|
@@ -506,15 +920,28 @@ show_help() {
|
|
|
506
920
|
echo " ${CYAN}ab-test${RESET} [enable|disable] [pct] [variant] Configure A/B testing"
|
|
507
921
|
echo " ${CYAN}report${RESET} Show model usage and cost report"
|
|
508
922
|
echo " ${CYAN}ab-results${RESET} Show A/B test results"
|
|
509
|
-
echo "
|
|
923
|
+
echo ""
|
|
924
|
+
echo -e "${BOLD}SUBCOMMANDS — Multi-Model Reasoning Chains${RESET}"
|
|
925
|
+
echo " ${CYAN}chain${RESET} [config|define|execute|report|step-cost]"
|
|
926
|
+
echo " ${CYAN}config${RESET} Show chain configuration & templates"
|
|
927
|
+
echo " ${CYAN}define${RESET} <name> <json> Define custom reasoning chain"
|
|
928
|
+
echo " ${CYAN}execute${RESET} <chain> <prompt> Execute a reasoning chain"
|
|
929
|
+
echo " ${CYAN}report${RESET} Show chain execution report"
|
|
930
|
+
echo " ${CYAN}step-cost${RESET} <in> <out> <model> Calculate cost for one step"
|
|
931
|
+
echo ""
|
|
932
|
+
echo -e "${BOLD}BUILT-IN CHAINS${RESET}"
|
|
933
|
+
echo " ${DIM}explore-decide${RESET} 2-step: haiku explores → opus decides"
|
|
934
|
+
echo " ${DIM}explore-synthesize-decide${RESET} 3-step: haiku → sonnet → opus"
|
|
935
|
+
echo " ${DIM}fast-verify${RESET} 2-step: sonnet generates → haiku verifies"
|
|
936
|
+
echo " ${DIM}deep-analysis${RESET} 2-step: opus analyzes → opus validates"
|
|
510
937
|
echo ""
|
|
511
938
|
echo -e "${BOLD}EXAMPLES${RESET}"
|
|
512
939
|
echo " ${DIM}shipwright model route plan 65${RESET} # Route 'plan' stage with 65% complexity"
|
|
513
940
|
echo " ${DIM}shipwright model escalate haiku${RESET} # Upgrade from haiku"
|
|
514
|
-
echo " ${DIM}shipwright model config
|
|
515
|
-
echo " ${DIM}shipwright model
|
|
516
|
-
echo " ${DIM}shipwright model
|
|
517
|
-
echo " ${DIM}shipwright model
|
|
941
|
+
echo " ${DIM}shipwright model chain config${RESET} # Show chain templates"
|
|
942
|
+
echo " ${DIM}shipwright model chain execute explore-decide \"analyze this code\"${RESET}"
|
|
943
|
+
echo " ${DIM}shipwright model chain report${RESET} # Show chain execution stats"
|
|
944
|
+
echo " ${DIM}shipwright model chain step-cost 1000 500 sonnet${RESET} # Cost for step"
|
|
518
945
|
}
|
|
519
946
|
|
|
520
947
|
# ─── Main ───────────────────────────────────────────────────────────────────
|
|
@@ -562,6 +989,7 @@ main() {
|
|
|
562
989
|
if command -v jq >/dev/null 2>&1; then
|
|
563
990
|
local tmp_config
|
|
564
991
|
tmp_config=$(mktemp)
|
|
992
|
+
# shellcheck disable=SC2064
|
|
565
993
|
trap "rm -f '$tmp_config'" RETURN
|
|
566
994
|
jq ".a_b_test.enabled = false" "$MODEL_ROUTING_CONFIG" > "$tmp_config"
|
|
567
995
|
mv "$tmp_config" "$MODEL_ROUTING_CONFIG"
|
|
@@ -581,6 +1009,34 @@ main() {
|
|
|
581
1009
|
ab-results)
|
|
582
1010
|
show_ab_results
|
|
583
1011
|
;;
|
|
1012
|
+
chain)
|
|
1013
|
+
shift 2>/dev/null || true
|
|
1014
|
+
case "${1:-config}" in
|
|
1015
|
+
config)
|
|
1016
|
+
show_chain_config
|
|
1017
|
+
;;
|
|
1018
|
+
define)
|
|
1019
|
+
shift 2>/dev/null || true
|
|
1020
|
+
chain_define "$@"
|
|
1021
|
+
;;
|
|
1022
|
+
execute)
|
|
1023
|
+
shift 2>/dev/null || true
|
|
1024
|
+
chain_execute "$@"
|
|
1025
|
+
;;
|
|
1026
|
+
report)
|
|
1027
|
+
show_chain_report
|
|
1028
|
+
;;
|
|
1029
|
+
step-cost)
|
|
1030
|
+
shift 2>/dev/null || true
|
|
1031
|
+
chain_step_cost "$@"
|
|
1032
|
+
;;
|
|
1033
|
+
*)
|
|
1034
|
+
error "Unknown chain subcommand: ${1:-}"
|
|
1035
|
+
show_help
|
|
1036
|
+
exit 1
|
|
1037
|
+
;;
|
|
1038
|
+
esac
|
|
1039
|
+
;;
|
|
584
1040
|
help|--help|-h)
|
|
585
1041
|
show_help
|
|
586
1042
|
;;
|