shipwright-cli 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +4 -4
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +2 -6
- package/dashboard/public/styles.css +100 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +66 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +2 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +10 -0
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +23 -2
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +112 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +177 -4
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +100 -2
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +100 -1136
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -715
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +59 -2929
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -2
- package/scripts/sw-adaptive.sh +2 -1
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +5 -1
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +10 -4
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +48 -3
- package/scripts/sw-daemon.sh +66 -9
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +59 -16
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +325 -2
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +4 -3
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +7 -1
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +4 -1
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +16 -3
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +6 -1
- package/scripts/sw-incident.sh +265 -1
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +42 -6
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +432 -1128
- package/scripts/sw-memory.sh +356 -2
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +481 -26
- package/scripts/sw-otel.sh +13 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +5 -1
- package/scripts/sw-pipeline-vitals.sh +2 -1
- package/scripts/sw-pipeline.sh +53 -2664
- package/scripts/sw-pm.sh +12 -5
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +7 -1
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +15 -3
- package/scripts/sw-quality.sh +2 -1
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +10 -3
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +6 -3
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +4 -1
- package/scripts/sw-stream.sh +7 -1
- package/scripts/sw-swarm.sh +18 -6
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +5 -29
- package/scripts/sw-testgen.sh +7 -1
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +3 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +2 -1
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +3 -1
- package/scripts/sw-widgets.sh +3 -1
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Module guard - prevent double-sourcing
|
|
3
|
+
[[ -n "${_RL_OPTIMIZER_LOADED:-}" ]] && return 0
|
|
4
|
+
_RL_OPTIMIZER_LOADED=1
|
|
5
|
+
|
|
6
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
7
|
+
# ║ shipwright rl-optimizer — Cross-Session Reinforcement Learning (Phase 7)║
|
|
8
|
+
# ║ Record (context, actions, outcome) episodes, weight by success, ║
|
|
9
|
+
# ║ and suggest best approaches for new issues based on past experience. ║
|
|
10
|
+
# ║ Decay: halve weight every 30 days. Min weight: 0.1. ║
|
|
11
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
12
|
+
|
|
13
|
+
# shellcheck disable=SC2034
|
|
14
|
+
VERSION="3.3.0"
|
|
15
|
+
|
|
16
|
+
# ─── Output Helpers ──────────────────────────────────────────────────────────
|
|
17
|
+
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
18
|
+
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
19
|
+
[[ "$(type -t warn 2>/dev/null)" == "function" ]] || warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
|
|
20
|
+
[[ "$(type -t error 2>/dev/null)" == "function" ]] || error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
|
|
21
|
+
if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
|
|
22
|
+
now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
|
|
23
|
+
now_epoch() { date +%s; }
|
|
24
|
+
fi
|
|
25
|
+
[[ "$(type -t emit_event 2>/dev/null)" == "function" ]] || emit_event() { true; }
|
|
26
|
+
|
|
27
|
+
# ─── Configuration ───────────────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
RL_EPISODES_FILE="${RL_EPISODES_FILE:-${HOME}/.shipwright/rl-episodes.jsonl}"
|
|
30
|
+
RL_DECAY_HALF_LIFE_DAYS="${RL_DECAY_HALF_LIFE_DAYS:-30}"
|
|
31
|
+
RL_MIN_WEIGHT="${RL_MIN_WEIGHT:-0.1}"
|
|
32
|
+
RL_SUCCESS_REWARD="${RL_SUCCESS_REWARD:-1.0}"
|
|
33
|
+
RL_FAILURE_PENALTY="${RL_FAILURE_PENALTY:-0.5}"
|
|
34
|
+
RL_MAX_SUGGESTIONS="${RL_MAX_SUGGESTIONS:-3}"
|
|
35
|
+
|
|
36
|
+
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
_rl_ensure_dir() {
|
|
39
|
+
local dir
|
|
40
|
+
dir="$(dirname "$RL_EPISODES_FILE")"
|
|
41
|
+
[[ -d "$dir" ]] || mkdir -p "$dir"
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Compute decay factor for an episode based on age in days.
|
|
45
|
+
# Output: float between RL_MIN_WEIGHT and 1.0
|
|
46
|
+
_rl_decay_factor() {
|
|
47
|
+
local episode_epoch="${1:-0}"
|
|
48
|
+
local now
|
|
49
|
+
now="$(date +%s)"
|
|
50
|
+
local age_days
|
|
51
|
+
age_days=$(( (now - episode_epoch) / 86400 ))
|
|
52
|
+
if [[ "$age_days" -le 0 ]]; then
|
|
53
|
+
echo "1.0"
|
|
54
|
+
return
|
|
55
|
+
fi
|
|
56
|
+
# Halve weight every RL_DECAY_HALF_LIFE_DAYS days
|
|
57
|
+
# factor = max(0.5^(age/half_life), min_weight)
|
|
58
|
+
local factor
|
|
59
|
+
factor=$(awk -v age="$age_days" -v half="$RL_DECAY_HALF_LIFE_DAYS" -v min="$RL_MIN_WEIGHT" \
|
|
60
|
+
'BEGIN { f = 2^(-age/half); if (f < min) f = min; printf "%.4f", f }')
|
|
61
|
+
echo "$factor"
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# Convert ISO timestamp to epoch seconds (portable)
|
|
65
|
+
_rl_iso_to_epoch() {
|
|
66
|
+
local ts="${1:-}"
|
|
67
|
+
[[ -z "$ts" ]] && { echo "0"; return; }
|
|
68
|
+
# Try GNU date first, fall back to macOS date
|
|
69
|
+
date -d "$ts" +%s 2>/dev/null || date -jf "%Y-%m-%dT%H:%M:%SZ" "$ts" +%s 2>/dev/null || echo "0"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# ─── Core Functions ──────────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
# Record a completed pipeline episode.
|
|
75
|
+
# Args: $1=context_json, $2=actions_json, $3=outcome_json, $4=rewards_json (optional)
|
|
76
|
+
# context_json: {"language":"ts","complexity":"medium","issue_type":"bug"}
|
|
77
|
+
# actions_json: ["read_code","add_tests","refactor"]
|
|
78
|
+
# outcome_json: {"success":true,"iterations":5,"cost_usd":2.50}
|
|
79
|
+
# rewards_json: [45,55,70,85,95] (process reward trajectory, optional)
|
|
80
|
+
rl_record_episode() {
|
|
81
|
+
local context_json="${1:-"{}"}"
|
|
82
|
+
local actions_json="${2:-"[]"}"
|
|
83
|
+
local outcome_json="${3:-"{}"}"
|
|
84
|
+
local rewards_json="${4:-"[]"}"
|
|
85
|
+
|
|
86
|
+
_rl_ensure_dir
|
|
87
|
+
|
|
88
|
+
local timestamp
|
|
89
|
+
timestamp="$(now_iso)"
|
|
90
|
+
local epoch
|
|
91
|
+
epoch="$(date +%s)"
|
|
92
|
+
|
|
93
|
+
# Build episode JSON with jq (compact for JSONL)
|
|
94
|
+
local episode
|
|
95
|
+
episode=$(jq -c -n \
|
|
96
|
+
--arg ts "$timestamp" \
|
|
97
|
+
--argjson epoch "$epoch" \
|
|
98
|
+
--argjson ctx "$context_json" \
|
|
99
|
+
--argjson acts "$actions_json" \
|
|
100
|
+
--argjson out "$outcome_json" \
|
|
101
|
+
--argjson rw "$rewards_json" \
|
|
102
|
+
--argjson w 1.0 \
|
|
103
|
+
'{timestamp: $ts, epoch: $epoch, context: $ctx, actions: $acts, outcome: $out, process_rewards: $rw, weight: $w}')
|
|
104
|
+
|
|
105
|
+
# Atomic append via temp file
|
|
106
|
+
local tmp
|
|
107
|
+
tmp="$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/rl-ep-$$.tmp")"
|
|
108
|
+
echo "$episode" > "$tmp"
|
|
109
|
+
cat "$tmp" >> "$RL_EPISODES_FILE"
|
|
110
|
+
rm -f "$tmp"
|
|
111
|
+
|
|
112
|
+
local success_str
|
|
113
|
+
success_str=$(echo "$outcome_json" | jq -r '.success // false')
|
|
114
|
+
local iter_str
|
|
115
|
+
iter_str=$(echo "$outcome_json" | jq -r '.iterations // 0')
|
|
116
|
+
|
|
117
|
+
emit_event "rl.episode_recorded" \
|
|
118
|
+
"success=$success_str" \
|
|
119
|
+
"iterations=$iter_str" \
|
|
120
|
+
"actions_count=$(echo "$actions_json" | jq 'length')"
|
|
121
|
+
|
|
122
|
+
info "RL episode recorded (success=$success_str, iterations=$iter_str)"
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Suggest best approach for a new issue based on past episodes.
|
|
126
|
+
# Args: $1=language, $2=issue_type, $3=complexity
|
|
127
|
+
# Output: markdown-formatted suggestions
|
|
128
|
+
rl_suggest_approach() {
|
|
129
|
+
local language="${1:-}"
|
|
130
|
+
local issue_type="${2:-}"
|
|
131
|
+
local complexity="${3:-}"
|
|
132
|
+
|
|
133
|
+
if [[ ! -f "$RL_EPISODES_FILE" ]]; then
|
|
134
|
+
echo ""
|
|
135
|
+
return 0
|
|
136
|
+
fi
|
|
137
|
+
|
|
138
|
+
local episode_count
|
|
139
|
+
episode_count=$(wc -l < "$RL_EPISODES_FILE" | tr -d ' ')
|
|
140
|
+
if [[ "$episode_count" -eq 0 ]]; then
|
|
141
|
+
echo ""
|
|
142
|
+
return 0
|
|
143
|
+
fi
|
|
144
|
+
|
|
145
|
+
local now_epoch
|
|
146
|
+
now_epoch="$(date +%s)"
|
|
147
|
+
|
|
148
|
+
# Use jq to filter matching episodes, compute decay, rank by weighted success
|
|
149
|
+
# Match: at least one of language/issue_type/complexity must match
|
|
150
|
+
local suggestions
|
|
151
|
+
suggestions=$(jq -r -s --arg lang "$language" --arg itype "$issue_type" \
|
|
152
|
+
--arg cplx "$complexity" --argjson now "$now_epoch" \
|
|
153
|
+
--argjson half "$RL_DECAY_HALF_LIFE_DAYS" --argjson min "$RL_MIN_WEIGHT" '
|
|
154
|
+
# Compute similarity score (0-3)
|
|
155
|
+
def sim:
|
|
156
|
+
(if .context.language == $lang and $lang != "" then 1 else 0 end) +
|
|
157
|
+
(if .context.issue_type == $itype and $itype != "" then 1 else 0 end) +
|
|
158
|
+
(if .context.complexity == $cplx and $cplx != "" then 1 else 0 end);
|
|
159
|
+
|
|
160
|
+
# Compute decay factor
|
|
161
|
+
def decay:
|
|
162
|
+
(($now - (.epoch // 0)) / 86400) as $age |
|
|
163
|
+
if $age <= 0 then 1
|
|
164
|
+
else (pow(0.5; ($age / $half))) | if . < $min then $min else . end
|
|
165
|
+
end;
|
|
166
|
+
|
|
167
|
+
# Filter episodes with at least 1 matching dimension
|
|
168
|
+
[.[] | select(sim >= 1)] |
|
|
169
|
+
|
|
170
|
+
# Group by action sequence (joined as key)
|
|
171
|
+
group_by(.actions | sort | join(",")) |
|
|
172
|
+
|
|
173
|
+
# For each action group, compute weighted stats
|
|
174
|
+
[.[] | {
|
|
175
|
+
actions: (.[0].actions | sort),
|
|
176
|
+
action_key: (.[0].actions | sort | join(", ")),
|
|
177
|
+
total_episodes: length,
|
|
178
|
+
weighted_successes: ([.[] | select(.outcome.success == true) | decay * (.weight // 1)] | add // 0),
|
|
179
|
+
weighted_total: ([.[] | decay * (.weight // 1)] | add // 0),
|
|
180
|
+
avg_iterations: ([.[] | .outcome.iterations // 0] | add / length),
|
|
181
|
+
avg_cost: ([.[] | .outcome.cost_usd // 0] | add / length),
|
|
182
|
+
similarity: (.[0] | sim)
|
|
183
|
+
}] |
|
|
184
|
+
|
|
185
|
+
# Compute success rate and sort
|
|
186
|
+
[.[] | .success_rate = (if .weighted_total > 0 then (.weighted_successes / .weighted_total * 100) else 0 end)] |
|
|
187
|
+
sort_by(-.success_rate, -.similarity, .avg_iterations) |
|
|
188
|
+
|
|
189
|
+
# Take top 3
|
|
190
|
+
.[:3] |
|
|
191
|
+
|
|
192
|
+
# Format as output lines
|
|
193
|
+
.[] | "- **\(.action_key)**: \(.success_rate | floor)% success rate (\(.total_episodes) episodes, avg \(.avg_iterations | floor) iterations, ~$\(.avg_cost | . * 100 | floor / 100))"
|
|
194
|
+
' "$RL_EPISODES_FILE" 2>/dev/null || echo "")
|
|
195
|
+
|
|
196
|
+
if [[ -z "$suggestions" ]]; then
|
|
197
|
+
echo ""
|
|
198
|
+
return 0
|
|
199
|
+
fi
|
|
200
|
+
|
|
201
|
+
local context_desc=""
|
|
202
|
+
[[ -n "$language" ]] && context_desc="${language}"
|
|
203
|
+
[[ -n "$issue_type" ]] && context_desc="${context_desc:+$context_desc }${issue_type}"
|
|
204
|
+
[[ -n "$complexity" ]] && context_desc="${context_desc:+$context_desc }(${complexity})"
|
|
205
|
+
|
|
206
|
+
echo "Based on ${episode_count} past episodes${context_desc:+ for $context_desc}:
|
|
207
|
+
${suggestions}"
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
# Compute effectiveness scores per action type.
|
|
211
|
+
# Output: one line per action with stats
|
|
212
|
+
rl_effectiveness_score() {
|
|
213
|
+
if [[ ! -f "$RL_EPISODES_FILE" ]]; then
|
|
214
|
+
echo "No episodes recorded yet."
|
|
215
|
+
return 0
|
|
216
|
+
fi
|
|
217
|
+
|
|
218
|
+
local now_epoch
|
|
219
|
+
now_epoch="$(date +%s)"
|
|
220
|
+
|
|
221
|
+
jq -r -s --argjson now "$now_epoch" \
|
|
222
|
+
--argjson half "$RL_DECAY_HALF_LIFE_DAYS" --argjson min "$RL_MIN_WEIGHT" '
|
|
223
|
+
# Decay factor
|
|
224
|
+
def decay:
|
|
225
|
+
(($now - (.epoch // 0)) / 86400) as $age |
|
|
226
|
+
if $age <= 0 then 1
|
|
227
|
+
else (pow(0.5; ($age / $half))) | if . < $min then $min else . end
|
|
228
|
+
end;
|
|
229
|
+
|
|
230
|
+
# Flatten: one entry per action per episode
|
|
231
|
+
[.[] | . as $ep | .actions[]? | {action: ., ep: $ep}] |
|
|
232
|
+
|
|
233
|
+
# Group by action
|
|
234
|
+
group_by(.action) |
|
|
235
|
+
|
|
236
|
+
# Stats per action
|
|
237
|
+
[.[] | {
|
|
238
|
+
action: .[0].action,
|
|
239
|
+
total: length,
|
|
240
|
+
success_rate: (([.[] | select(.ep.outcome.success == true) | .ep | decay * (.weight // 1)] | add // 0) /
|
|
241
|
+
([.[] | .ep | decay * (.weight // 1)] | add // 1) * 100),
|
|
242
|
+
avg_iterations: ([.[] | .ep.outcome.iterations // 0] | add / length),
|
|
243
|
+
avg_cost: ([.[] | .ep.outcome.cost_usd // 0] | add / length),
|
|
244
|
+
recent_success: ([.[] | select(($now - (.ep.epoch // 0)) < 604800) | select(.ep.outcome.success == true)] | length),
|
|
245
|
+
recent_total: ([.[] | select(($now - (.ep.epoch // 0)) < 604800)] | length)
|
|
246
|
+
}] |
|
|
247
|
+
|
|
248
|
+
sort_by(-.success_rate) |
|
|
249
|
+
|
|
250
|
+
.[] |
|
|
251
|
+
"\(.action): \(.success_rate | floor)% success (\(.total) episodes, avg \(.avg_iterations | floor) iters, ~$\(.avg_cost | . * 100 | floor / 100))" +
|
|
252
|
+
(if .recent_total > 2 then
|
|
253
|
+
(if (.recent_success / .recent_total) > (.success_rate / 100 + 0.1) then " (trending up)"
|
|
254
|
+
elif (.recent_success / .recent_total) < (.success_rate / 100 - 0.1) then " (declining)"
|
|
255
|
+
else " (stable)" end)
|
|
256
|
+
else "" end)
|
|
257
|
+
' "$RL_EPISODES_FILE" 2>/dev/null || echo "No actionable data yet."
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# Inject RL suggestions into pipeline prompt as markdown.
|
|
261
|
+
# Args: $1=language, $2=issue_type, $3=complexity
|
|
262
|
+
# Output: markdown section for prompt injection (empty if no data)
|
|
263
|
+
rl_inject_context() {
|
|
264
|
+
local language="${1:-}"
|
|
265
|
+
local issue_type="${2:-}"
|
|
266
|
+
local complexity="${3:-}"
|
|
267
|
+
|
|
268
|
+
local suggestions
|
|
269
|
+
suggestions="$(rl_suggest_approach "$language" "$issue_type" "$complexity")"
|
|
270
|
+
|
|
271
|
+
if [[ -z "$suggestions" ]]; then
|
|
272
|
+
return 0
|
|
273
|
+
fi
|
|
274
|
+
|
|
275
|
+
cat <<EOF
|
|
276
|
+
## RL-Suggested Approaches (from past pipelines)
|
|
277
|
+
${suggestions}
|
|
278
|
+
|
|
279
|
+
Use these insights to guide your strategy. Approaches with higher success rates should be preferred.
|
|
280
|
+
EOF
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
# Update weights for actions after pipeline completion.
|
|
284
|
+
# Args: $1=actions_json (array), $2=success (true/false)
|
|
285
|
+
# Modifies the most recent episode matching these actions.
|
|
286
|
+
rl_update_weights() {
|
|
287
|
+
local actions_json="${1:-"[]"}"
|
|
288
|
+
local success="${2:-false}"
|
|
289
|
+
|
|
290
|
+
if [[ ! -f "$RL_EPISODES_FILE" ]]; then
|
|
291
|
+
return 0
|
|
292
|
+
fi
|
|
293
|
+
|
|
294
|
+
local delta
|
|
295
|
+
if [[ "$success" == "true" ]]; then
|
|
296
|
+
delta="$RL_SUCCESS_REWARD"
|
|
297
|
+
else
|
|
298
|
+
delta="-${RL_FAILURE_PENALTY}"
|
|
299
|
+
fi
|
|
300
|
+
|
|
301
|
+
# Update the weight of the last episode in the file
|
|
302
|
+
local tmp
|
|
303
|
+
tmp="$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/rl-uw-$$.tmp")"
|
|
304
|
+
|
|
305
|
+
# Read all episodes, update the last one's weight, output compact JSONL
|
|
306
|
+
jq -c -s --argjson delta "$delta" --argjson min "$RL_MIN_WEIGHT" '
|
|
307
|
+
if length == 0 then []
|
|
308
|
+
else
|
|
309
|
+
.[-1].weight = ((.[-1].weight // 1) + $delta | if . < $min then $min else . end) |
|
|
310
|
+
.
|
|
311
|
+
end | .[]
|
|
312
|
+
' "$RL_EPISODES_FILE" > "$tmp" 2>/dev/null
|
|
313
|
+
|
|
314
|
+
if [[ -s "$tmp" ]]; then
|
|
315
|
+
mv "$tmp" "$RL_EPISODES_FILE"
|
|
316
|
+
emit_event "rl.weights_updated" "success=$success" "delta=$delta"
|
|
317
|
+
else
|
|
318
|
+
rm -f "$tmp"
|
|
319
|
+
fi
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# ─── Pipeline Integration Helpers ────────────────────────────────────────────
|
|
323
|
+
|
|
324
|
+
# Record episode from pipeline globals (convenience wrapper).
|
|
325
|
+
# Called at pipeline completion with globals: GOAL, TASK_TYPE, TEST_CMD, etc.
|
|
326
|
+
rl_record_from_pipeline() {
|
|
327
|
+
local success="${1:-false}"
|
|
328
|
+
local iterations="${2:-0}"
|
|
329
|
+
local cost_usd="${3:-0}"
|
|
330
|
+
local language="${4:-}"
|
|
331
|
+
local complexity="${5:-}"
|
|
332
|
+
local issue_type="${6:-}"
|
|
333
|
+
local actions_json="${7:-"[]"}"
|
|
334
|
+
local rewards_json="${8:-"[]"}"
|
|
335
|
+
|
|
336
|
+
local context_json
|
|
337
|
+
context_json=$(jq -c -n \
|
|
338
|
+
--arg lang "$language" \
|
|
339
|
+
--arg cplx "$complexity" \
|
|
340
|
+
--arg itype "$issue_type" \
|
|
341
|
+
'{language: $lang, complexity: $cplx, issue_type: $itype}')
|
|
342
|
+
|
|
343
|
+
local outcome_json
|
|
344
|
+
outcome_json=$(jq -c -n \
|
|
345
|
+
--argjson success "$success" \
|
|
346
|
+
--argjson iters "$iterations" \
|
|
347
|
+
--argjson cost "$cost_usd" \
|
|
348
|
+
'{success: $success, iterations: $iters, cost_usd: $cost}')
|
|
349
|
+
|
|
350
|
+
rl_record_episode "$context_json" "$actions_json" "$outcome_json" "$rewards_json"
|
|
351
|
+
rl_update_weights "$actions_json" "$success"
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
# Extract RL context for compose_prompt injection.
|
|
355
|
+
# Uses pipeline globals to determine context dimensions.
|
|
356
|
+
rl_compose_prompt_section() {
|
|
357
|
+
local language="${INTELLIGENCE_LANGUAGE:-}"
|
|
358
|
+
local issue_type="${INTELLIGENCE_ISSUE_TYPE:-}"
|
|
359
|
+
local complexity="${INTELLIGENCE_COMPLEXITY:-}"
|
|
360
|
+
|
|
361
|
+
rl_inject_context "$language" "$issue_type" "$complexity"
|
|
362
|
+
}
|