shipwright-cli 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +22 -8
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +8 -6
- package/dashboard/public/styles.css +176 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +117 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +12 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/metrics.ts +69 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +14 -2
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +25 -4
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +119 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +180 -5
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +101 -3
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +104 -1138
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -711
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +161 -2901
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -8
- package/scripts/sw-adaptive.sh +8 -7
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +15 -6
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +45 -20
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +107 -5
- package/scripts/sw-daemon.sh +71 -11
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +71 -20
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +378 -5
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +12 -7
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +13 -4
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +9 -4
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +18 -4
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +10 -3
- package/scripts/sw-incident.sh +273 -5
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +44 -7
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +436 -1076
- package/scripts/sw-memory.sh +357 -3
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +483 -27
- package/scripts/sw-otel.sh +15 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +7 -1
- package/scripts/sw-pipeline-vitals.sh +12 -6
- package/scripts/sw-pipeline.sh +54 -2653
- package/scripts/sw-pm.sh +16 -8
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +17 -5
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +17 -4
- package/scripts/sw-quality.sh +14 -6
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +14 -5
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +173 -6
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +14 -6
- package/scripts/sw-stream.sh +13 -4
- package/scripts/sw-swarm.sh +20 -7
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +7 -31
- package/scripts/sw-testgen.sh +17 -6
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +37 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +3 -2
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +5 -2
- package/scripts/sw-widgets.sh +9 -4
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Module guard - prevent double-sourcing
|
|
3
|
+
[[ -n "${_POLICY_LEARNER_LOADED:-}" ]] && return 0
|
|
4
|
+
_POLICY_LEARNER_LOADED=1
|
|
5
|
+
|
|
6
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
7
|
+
# ║ shipwright policy-learner — Strategy Selection & Prompt Optimization ║
|
|
8
|
+
# ║ Learn optimal strategies from historical RL episodes and rewards. ║
|
|
9
|
+
# ║ Bucket by (language, issue_type, complexity), find best strategy per ║
|
|
10
|
+
# ║ bucket, optimize prompt section weights, inject into agent prompts. ║
|
|
11
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
12
|
+
|
|
13
|
+
# shellcheck disable=SC2034
|
|
14
|
+
VERSION="3.3.0"
|
|
15
|
+
|
|
16
|
+
# ─── Output Helpers ──────────────────────────────────────────────────────────
|
|
17
|
+
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
18
|
+
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
19
|
+
[[ "$(type -t warn 2>/dev/null)" == "function" ]] || warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
|
|
20
|
+
[[ "$(type -t error 2>/dev/null)" == "function" ]] || error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
|
|
21
|
+
if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
|
|
22
|
+
now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
|
|
23
|
+
now_epoch() { date +%s; }
|
|
24
|
+
fi
|
|
25
|
+
[[ "$(type -t emit_event 2>/dev/null)" == "function" ]] || emit_event() { true; }
|
|
26
|
+
|
|
27
|
+
# ─── Configuration ───────────────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
POLICY_EPISODES_FILE="${POLICY_EPISODES_FILE:-${HOME}/.shipwright/rl-episodes.jsonl}"
|
|
30
|
+
POLICY_REWARDS_FILE="${POLICY_REWARDS_FILE:-${HOME}/.shipwright/rewards.jsonl}"
|
|
31
|
+
POLICY_LEARNED_FILE="${POLICY_LEARNED_FILE:-${HOME}/.shipwright/learned-policy.json}"
|
|
32
|
+
POLICY_MIN_EPISODES="${POLICY_MIN_EPISODES:-3}"
|
|
33
|
+
|
|
34
|
+
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
_policy_ensure_dir() {
|
|
37
|
+
local dir
|
|
38
|
+
dir="$(dirname "$POLICY_LEARNED_FILE")"
|
|
39
|
+
[[ -d "$dir" ]] || mkdir -p "$dir"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Build context key from language, issue_type, complexity
|
|
43
|
+
# Output: "ts:bug:medium" or partial like "*:bug:*"
|
|
44
|
+
_policy_context_key() {
|
|
45
|
+
local lang="${1:-*}"
|
|
46
|
+
local itype="${2:-*}"
|
|
47
|
+
local cplx="${3:-*}"
|
|
48
|
+
[[ -z "$lang" ]] && lang="*"
|
|
49
|
+
[[ -z "$itype" ]] && itype="*"
|
|
50
|
+
[[ -z "$cplx" ]] && cplx="*"
|
|
51
|
+
echo "${lang}:${itype}:${cplx}"
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# ─── Core Functions ──────────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
# Learn from all historical episodes. Analyze by context bucket, find best
|
|
57
|
+
# strategy per bucket, compute averages, and store in learned-policy.json.
|
|
58
|
+
policy_learn_from_history() {
|
|
59
|
+
_policy_ensure_dir
|
|
60
|
+
|
|
61
|
+
if [[ ! -f "$POLICY_EPISODES_FILE" ]]; then
|
|
62
|
+
warn "No episodes file found at $POLICY_EPISODES_FILE"
|
|
63
|
+
return 0
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
local episode_count
|
|
67
|
+
episode_count=$(wc -l < "$POLICY_EPISODES_FILE" | tr -d ' ')
|
|
68
|
+
if [[ "$episode_count" -eq 0 ]]; then
|
|
69
|
+
warn "No episodes recorded yet"
|
|
70
|
+
return 0
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
local now_ts
|
|
74
|
+
now_ts="$(now_iso)"
|
|
75
|
+
|
|
76
|
+
# Use jq to process all episodes: bucket by context key, find best strategy
|
|
77
|
+
# per bucket, compute avg reward/iterations/cost
|
|
78
|
+
local policy_json
|
|
79
|
+
policy_json=$(jq -c -s --arg now "$now_ts" --argjson min_ep "$POLICY_MIN_EPISODES" '
|
|
80
|
+
def ctx_key:
|
|
81
|
+
((.context.language // "*") + ":" +
|
|
82
|
+
(.context.issue_type // "*") + ":" +
|
|
83
|
+
(.context.complexity // "*"));
|
|
84
|
+
def strategy_key:
|
|
85
|
+
(.actions // [] | [.[] | if type == "object" then (.strategy // tostring) else tostring end] | sort | join(","));
|
|
86
|
+
group_by(ctx_key) |
|
|
87
|
+
[.[] | . as $bucket |
|
|
88
|
+
($bucket[0] | ctx_key) as $key |
|
|
89
|
+
($bucket | group_by(strategy_key)) |
|
|
90
|
+
[.[] |
|
|
91
|
+
{
|
|
92
|
+
strategy: (.[0] | strategy_key),
|
|
93
|
+
episodes: length,
|
|
94
|
+
successes: ([.[] | select(.outcome.success == true)] | length),
|
|
95
|
+
avg_reward: (
|
|
96
|
+
[.[] | if .outcome.success == true then 1.0 else 0.0 end] |
|
|
97
|
+
if length > 0 then (add / length)
|
|
98
|
+
else 0 end
|
|
99
|
+
),
|
|
100
|
+
avg_iterations: ([.[] | .outcome.iterations // 0] | add / length),
|
|
101
|
+
avg_cost: ([.[] | .outcome.cost_usd // 0] | add / length)
|
|
102
|
+
} |
|
|
103
|
+
.success_rate = (if .episodes > 0 then (.successes / .episodes) else 0 end)
|
|
104
|
+
] |
|
|
105
|
+
sort_by(-.success_rate, .avg_iterations) |
|
|
106
|
+
{
|
|
107
|
+
key: $key,
|
|
108
|
+
best: (if length > 0 then .[0].strategy else "default" end),
|
|
109
|
+
reward: (if length > 0 then .[0].success_rate else 0 end),
|
|
110
|
+
episodes: ([.[] | .episodes] | add // 0),
|
|
111
|
+
avg_iterations: (if length > 0 then .[0].avg_iterations else 0 end),
|
|
112
|
+
avg_cost: (if length > 0 then .[0].avg_cost else 0 end),
|
|
113
|
+
all_strategies: .
|
|
114
|
+
}
|
|
115
|
+
] |
|
|
116
|
+
{
|
|
117
|
+
updated_at: $now,
|
|
118
|
+
total_episodes: ([.[].episodes] | add // 0),
|
|
119
|
+
min_episodes: $min_ep,
|
|
120
|
+
strategies: (
|
|
121
|
+
[.[] | {(.key): {
|
|
122
|
+
best: .best,
|
|
123
|
+
reward: (.reward * 100 | floor / 100),
|
|
124
|
+
episodes: .episodes,
|
|
125
|
+
avg_iterations: (.avg_iterations * 10 | floor / 10),
|
|
126
|
+
avg_cost: (.avg_cost * 100 | floor / 100),
|
|
127
|
+
confident: (.episodes >= $min_ep)
|
|
128
|
+
}}] | add // {}
|
|
129
|
+
),
|
|
130
|
+
model_preferences: {},
|
|
131
|
+
prompt_weights: {}
|
|
132
|
+
}
|
|
133
|
+
' "$POLICY_EPISODES_FILE" 2>/dev/null)
|
|
134
|
+
|
|
135
|
+
if [[ -z "$policy_json" ]] || [[ "$policy_json" == "null" ]]; then
|
|
136
|
+
warn "Failed to analyze episodes"
|
|
137
|
+
return 1
|
|
138
|
+
fi
|
|
139
|
+
|
|
140
|
+
# Note: partial matching handled at query time in policy_suggest_strategy
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# Merge model preferences from episodes (which models correlate with success)
|
|
144
|
+
local model_prefs
|
|
145
|
+
model_prefs=$(jq -c -s '
|
|
146
|
+
[.[] | select(.context.model != null)] |
|
|
147
|
+
if length == 0 then {}
|
|
148
|
+
else
|
|
149
|
+
group_by(.context.complexity // "medium") |
|
|
150
|
+
[.[] |
|
|
151
|
+
(.[0].context.complexity // "medium") as $cplx |
|
|
152
|
+
group_by(.context.model) |
|
|
153
|
+
[.[] | {
|
|
154
|
+
model: .[0].context.model,
|
|
155
|
+
success_rate: (([.[] | select(.outcome.success == true)] | length) / length)
|
|
156
|
+
}] |
|
|
157
|
+
sort_by(-.success_rate) |
|
|
158
|
+
if length > 0 then {("build:" + $cplx): .[0].model} else {} end
|
|
159
|
+
] | add // {}
|
|
160
|
+
end
|
|
161
|
+
' "$POLICY_EPISODES_FILE" 2>/dev/null || echo "{}")
|
|
162
|
+
|
|
163
|
+
# Merge model preferences into policy
|
|
164
|
+
if [[ -n "$model_prefs" ]] && [[ "$model_prefs" != "{}" ]]; then
|
|
165
|
+
policy_json=$(echo "$policy_json" | jq -c --argjson mp "$model_prefs" '.model_preferences = $mp')
|
|
166
|
+
fi
|
|
167
|
+
|
|
168
|
+
# Compute prompt weights via policy_optimize_prompt_weights (inline)
|
|
169
|
+
local prompt_weights
|
|
170
|
+
prompt_weights=$(_policy_compute_prompt_weights)
|
|
171
|
+
if [[ -n "$prompt_weights" ]] && [[ "$prompt_weights" != "{}" ]]; then
|
|
172
|
+
policy_json=$(echo "$policy_json" | jq -c --argjson pw "$prompt_weights" '.prompt_weights = $pw')
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
# Atomic write
|
|
176
|
+
local tmp
|
|
177
|
+
tmp="$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/policy-learn-$$.tmp")"
|
|
178
|
+
echo "$policy_json" | jq '.' > "$tmp" 2>/dev/null
|
|
179
|
+
if [[ -s "$tmp" ]]; then
|
|
180
|
+
mv "$tmp" "$POLICY_LEARNED_FILE"
|
|
181
|
+
local total
|
|
182
|
+
total=$(echo "$policy_json" | jq '.total_episodes // 0')
|
|
183
|
+
local strat_count
|
|
184
|
+
strat_count=$(echo "$policy_json" | jq '.strategies | keys | length')
|
|
185
|
+
success "Learned policy from $total episodes across $strat_count context buckets"
|
|
186
|
+
emit_event "policy.learned" "episodes=$total" "buckets=$strat_count"
|
|
187
|
+
else
|
|
188
|
+
rm -f "$tmp"
|
|
189
|
+
error "Failed to write learned policy"
|
|
190
|
+
return 1
|
|
191
|
+
fi
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
# Suggest best strategy for a given context.
|
|
195
|
+
# Args: $1=language, $2=issue_type, $3=complexity
|
|
196
|
+
# Output: JSON with strategy, expected_reward, confidence, evidence_count
|
|
197
|
+
policy_suggest_strategy() {
|
|
198
|
+
local language="${1:-}"
|
|
199
|
+
local issue_type="${2:-}"
|
|
200
|
+
local complexity="${3:-}"
|
|
201
|
+
|
|
202
|
+
if [[ ! -f "$POLICY_LEARNED_FILE" ]]; then
|
|
203
|
+
echo '{"strategy":"default","expected_reward":0,"confidence":"none","evidence_count":0}'
|
|
204
|
+
return 0
|
|
205
|
+
fi
|
|
206
|
+
|
|
207
|
+
local exact_key partial1 partial2 partial3
|
|
208
|
+
exact_key="$(_policy_context_key "$language" "$issue_type" "$complexity")"
|
|
209
|
+
|
|
210
|
+
# Partial match keys for fallback
|
|
211
|
+
# ts:bug:medium → ts:bug:* → *:bug:medium → *:*:medium → default
|
|
212
|
+
partial1="$(_policy_context_key "$language" "$issue_type" "*")"
|
|
213
|
+
partial2="$(_policy_context_key "*" "$issue_type" "$complexity")"
|
|
214
|
+
partial3="$(_policy_context_key "*" "*" "$complexity")"
|
|
215
|
+
|
|
216
|
+
# Partial matching: try exact, then scan keys for partial matches
|
|
217
|
+
# ts:bug:medium → any key matching ts:bug:* → any matching *:bug:medium → *:*:medium
|
|
218
|
+
local result
|
|
219
|
+
result=$(jq -c \
|
|
220
|
+
--arg exact "$exact_key" \
|
|
221
|
+
--arg lang "$language" \
|
|
222
|
+
--arg itype "$issue_type" \
|
|
223
|
+
--arg cplx "$complexity" \
|
|
224
|
+
--argjson min_ep "$POLICY_MIN_EPISODES" '
|
|
225
|
+
.strategies as $s |
|
|
226
|
+
|
|
227
|
+
# Try exact match first
|
|
228
|
+
(if $s[$exact] then {match: $s[$exact], tier: "exact"}
|
|
229
|
+
else
|
|
230
|
+
# Scan all keys for partial matches
|
|
231
|
+
($s | to_entries | [
|
|
232
|
+
# Match lang:type:*
|
|
233
|
+
(.[] | select((.key | split(":")[0]) == $lang and (.key | split(":")[1]) == $itype) | {match: .value, tier: "partial_type"}),
|
|
234
|
+
# Match *:type:complexity
|
|
235
|
+
(.[] | select((.key | split(":")[1]) == $itype and (.key | split(":")[2]) == $cplx) | {match: .value, tier: "partial_lang"}),
|
|
236
|
+
# Match *:*:complexity
|
|
237
|
+
(.[] | select((.key | split(":")[2]) == $cplx) | {match: .value, tier: "partial_cplx"})
|
|
238
|
+
] | first // null)
|
|
239
|
+
end) |
|
|
240
|
+
|
|
241
|
+
if . == null then
|
|
242
|
+
{strategy: "default", expected_reward: 0, confidence: "none", evidence_count: 0}
|
|
243
|
+
else
|
|
244
|
+
{
|
|
245
|
+
strategy: .match.best,
|
|
246
|
+
expected_reward: .match.reward,
|
|
247
|
+
confidence: (
|
|
248
|
+
if .match.episodes >= ($min_ep * 3) then "high"
|
|
249
|
+
elif .match.episodes >= $min_ep then "medium"
|
|
250
|
+
else "low" end
|
|
251
|
+
),
|
|
252
|
+
evidence_count: .match.episodes,
|
|
253
|
+
match_tier: .tier,
|
|
254
|
+
avg_iterations: .match.avg_iterations
|
|
255
|
+
}
|
|
256
|
+
end
|
|
257
|
+
' "$POLICY_LEARNED_FILE" 2>/dev/null)
|
|
258
|
+
|
|
259
|
+
if [[ -z "$result" ]] || [[ "$result" == "null" ]]; then
|
|
260
|
+
echo '{"strategy":"default","expected_reward":0,"confidence":"none","evidence_count":0}'
|
|
261
|
+
else
|
|
262
|
+
echo "$result"
|
|
263
|
+
fi
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
# Compute which prompt sections correlate with better outcomes.
|
|
267
|
+
# Internal helper — called by policy_learn_from_history.
|
|
268
|
+
# Output: JSON object with section weights (0.0–1.0)
|
|
269
|
+
_policy_compute_prompt_weights() {
|
|
270
|
+
if [[ ! -f "$POLICY_EPISODES_FILE" ]]; then
|
|
271
|
+
echo "{}"
|
|
272
|
+
return 0
|
|
273
|
+
fi
|
|
274
|
+
|
|
275
|
+
# Analyze episodes for prompt section correlation
|
|
276
|
+
# Look for context fields that indicate which sections were active
|
|
277
|
+
jq -c -s '
|
|
278
|
+
def section_weight(field):
|
|
279
|
+
[.[] | select(.context[field] == true)] as $with |
|
|
280
|
+
[.[] | select(.context[field] != true)] as $without |
|
|
281
|
+
if ($with | length) < 2 then 0.5
|
|
282
|
+
elif ($without | length) < 2 then 0.5
|
|
283
|
+
else
|
|
284
|
+
(([($with[] | select(.outcome.success == true)] | length) / ([$with[] | .] | length)) -
|
|
285
|
+
([($without[] | select(.outcome.success == true)] | length) / ([$without[] | .] | length))) |
|
|
286
|
+
(0.5 + . / 2) |
|
|
287
|
+
if . < 0 then 0 elif . > 1 then 1 else (. * 100 | floor / 100) end
|
|
288
|
+
end;
|
|
289
|
+
{
|
|
290
|
+
inject_memory: section_weight("has_memory"),
|
|
291
|
+
inject_architecture: section_weight("has_architecture"),
|
|
292
|
+
inject_coverage_baseline: section_weight("has_coverage"),
|
|
293
|
+
inject_rl_context: section_weight("has_rl_context"),
|
|
294
|
+
inject_error_history: section_weight("has_error_history")
|
|
295
|
+
}
|
|
296
|
+
' "$POLICY_EPISODES_FILE" 2>/dev/null || echo "{}"
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
# Public wrapper — learn and return prompt weights.
|
|
300
|
+
policy_optimize_prompt_weights() {
|
|
301
|
+
if [[ ! -f "$POLICY_LEARNED_FILE" ]]; then
|
|
302
|
+
policy_learn_from_history
|
|
303
|
+
fi
|
|
304
|
+
|
|
305
|
+
if [[ -f "$POLICY_LEARNED_FILE" ]]; then
|
|
306
|
+
jq -c '.prompt_weights // {}' "$POLICY_LEARNED_FILE" 2>/dev/null || echo "{}"
|
|
307
|
+
else
|
|
308
|
+
echo "{}"
|
|
309
|
+
fi
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
# Format learned policy suggestions for agent prompt injection.
|
|
313
|
+
# Args: $1=language, $2=issue_type, $3=complexity
|
|
314
|
+
# Output: markdown section for prompt (empty if no useful data)
|
|
315
|
+
policy_inject_into_prompt() {
|
|
316
|
+
local language="${1:-}"
|
|
317
|
+
local issue_type="${2:-}"
|
|
318
|
+
local complexity="${3:-}"
|
|
319
|
+
|
|
320
|
+
local suggestion
|
|
321
|
+
suggestion="$(policy_suggest_strategy "$language" "$issue_type" "$complexity")"
|
|
322
|
+
|
|
323
|
+
local confidence
|
|
324
|
+
confidence=$(echo "$suggestion" | jq -r '.confidence // "none"')
|
|
325
|
+
|
|
326
|
+
if [[ "$confidence" == "none" ]]; then
|
|
327
|
+
return 0
|
|
328
|
+
fi
|
|
329
|
+
|
|
330
|
+
local strategy evidence reward avg_iters
|
|
331
|
+
strategy=$(echo "$suggestion" | jq -r '.strategy // "default"')
|
|
332
|
+
evidence=$(echo "$suggestion" | jq -r '.evidence_count // 0')
|
|
333
|
+
reward=$(echo "$suggestion" | jq -r '.expected_reward // 0')
|
|
334
|
+
avg_iters=$(echo "$suggestion" | jq -r '.avg_iterations // 0')
|
|
335
|
+
|
|
336
|
+
# Format strategy name for display
|
|
337
|
+
local display_strategy
|
|
338
|
+
display_strategy=$(echo "$strategy" | tr ',' ' → ')
|
|
339
|
+
|
|
340
|
+
local pct
|
|
341
|
+
pct=$(awk -v r="$reward" 'BEGIN { printf "%d", r * 100 }')
|
|
342
|
+
|
|
343
|
+
cat <<EOF
|
|
344
|
+
## Policy-Learned Strategy
|
|
345
|
+
Based on ${evidence} similar issues: **${display_strategy}** (${pct}% success rate, avg ${avg_iters} iterations)
|
|
346
|
+
Confidence: ${confidence}
|
|
347
|
+
EOF
|
|
348
|
+
|
|
349
|
+
# Add prompt weight guidance if available
|
|
350
|
+
if [[ -f "$POLICY_LEARNED_FILE" ]]; then
|
|
351
|
+
local weights
|
|
352
|
+
weights=$(jq -r '
|
|
353
|
+
.prompt_weights // {} |
|
|
354
|
+
to_entries |
|
|
355
|
+
[.[] | select(.value >= 0.7)] |
|
|
356
|
+
if length > 0 then
|
|
357
|
+
"Include: " + ([.[].key | gsub("inject_"; "")] | join(", "))
|
|
358
|
+
else empty end
|
|
359
|
+
' "$POLICY_LEARNED_FILE" 2>/dev/null || true)
|
|
360
|
+
|
|
361
|
+
local exclude
|
|
362
|
+
exclude=$(jq -r '
|
|
363
|
+
.prompt_weights // {} |
|
|
364
|
+
to_entries |
|
|
365
|
+
[.[] | select(.value < 0.3)] |
|
|
366
|
+
if length > 0 then
|
|
367
|
+
"Exclude (low impact): " + ([.[].key | gsub("inject_"; "")] | join(", "))
|
|
368
|
+
else empty end
|
|
369
|
+
' "$POLICY_LEARNED_FILE" 2>/dev/null || true)
|
|
370
|
+
|
|
371
|
+
if [[ -n "$weights" ]]; then
|
|
372
|
+
echo "$weights"
|
|
373
|
+
fi
|
|
374
|
+
if [[ -n "$exclude" ]]; then
|
|
375
|
+
echo "$exclude"
|
|
376
|
+
fi
|
|
377
|
+
fi
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
# Display a human-readable report of what the policy has learned.
|
|
381
|
+
policy_report() {
|
|
382
|
+
if [[ ! -f "$POLICY_LEARNED_FILE" ]]; then
|
|
383
|
+
echo "No learned policy found. Run policy_learn_from_history first."
|
|
384
|
+
return 0
|
|
385
|
+
fi
|
|
386
|
+
|
|
387
|
+
local updated total
|
|
388
|
+
updated=$(jq -r '.updated_at // "unknown"' "$POLICY_LEARNED_FILE")
|
|
389
|
+
total=$(jq -r '.total_episodes // 0' "$POLICY_LEARNED_FILE")
|
|
390
|
+
|
|
391
|
+
echo "╔═══════════════════════════════════════════════════════════════╗"
|
|
392
|
+
echo "║ Learned Policy Report ║"
|
|
393
|
+
echo "╚═══════════════════════════════════════════════════════════════╝"
|
|
394
|
+
echo ""
|
|
395
|
+
echo "Updated: ${updated}"
|
|
396
|
+
echo "Total episodes: ${total}"
|
|
397
|
+
echo ""
|
|
398
|
+
|
|
399
|
+
# Strategy buckets
|
|
400
|
+
echo "── Strategy Recommendations ──────────────────────────────────"
|
|
401
|
+
jq -r '
|
|
402
|
+
.strategies // {} | to_entries[] |
|
|
403
|
+
" \(.key): \(.value.best) " +
|
|
404
|
+
"(\(.value.reward * 100 | floor)% success, " +
|
|
405
|
+
"\(.value.avg_iterations) avg iters, " +
|
|
406
|
+
"~$\(.value.avg_cost), " +
|
|
407
|
+
"\(.value.episodes) episodes" +
|
|
408
|
+
(if .value.confident then ", confident" else ", low data" end) +
|
|
409
|
+
")"
|
|
410
|
+
' "$POLICY_LEARNED_FILE" 2>/dev/null || echo " (none)"
|
|
411
|
+
echo ""
|
|
412
|
+
|
|
413
|
+
# Model preferences
|
|
414
|
+
echo "── Model Preferences ─────────────────────────────────────────"
|
|
415
|
+
local mp_count
|
|
416
|
+
mp_count=$(jq '.model_preferences // {} | keys | length' "$POLICY_LEARNED_FILE" 2>/dev/null || echo "0")
|
|
417
|
+
if [[ "$mp_count" -gt 0 ]]; then
|
|
418
|
+
jq -r '.model_preferences // {} | to_entries[] | " \(.key): \(.value)"' "$POLICY_LEARNED_FILE" 2>/dev/null
|
|
419
|
+
else
|
|
420
|
+
echo " (no model preference data yet)"
|
|
421
|
+
fi
|
|
422
|
+
echo ""
|
|
423
|
+
|
|
424
|
+
# Prompt weights
|
|
425
|
+
echo "── Prompt Section Weights ────────────────────────────────────"
|
|
426
|
+
local pw_count
|
|
427
|
+
pw_count=$(jq '.prompt_weights // {} | keys | length' "$POLICY_LEARNED_FILE" 2>/dev/null || echo "0")
|
|
428
|
+
if [[ "$pw_count" -gt 0 ]]; then
|
|
429
|
+
jq -r '
|
|
430
|
+
.prompt_weights // {} | to_entries |
|
|
431
|
+
sort_by(-.value)[] |
|
|
432
|
+
" \(.key): \(.value)" +
|
|
433
|
+
(if .value >= 0.7 then " (include)" elif .value < 0.3 then " (exclude)" else " (neutral)" end)
|
|
434
|
+
' "$POLICY_LEARNED_FILE" 2>/dev/null
|
|
435
|
+
else
|
|
436
|
+
echo " (no prompt weight data yet)"
|
|
437
|
+
fi
|
|
438
|
+
}
|