shipwright-cli 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +22 -8
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +8 -6
- package/dashboard/public/styles.css +176 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +117 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +12 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/metrics.ts +69 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +14 -2
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +25 -4
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +119 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +180 -5
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +101 -3
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +104 -1138
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -711
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +161 -2901
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -8
- package/scripts/sw-adaptive.sh +8 -7
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +15 -6
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +45 -20
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +107 -5
- package/scripts/sw-daemon.sh +71 -11
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +71 -20
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +378 -5
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +12 -7
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +13 -4
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +9 -4
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +18 -4
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +10 -3
- package/scripts/sw-incident.sh +273 -5
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +44 -7
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +436 -1076
- package/scripts/sw-memory.sh +357 -3
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +483 -27
- package/scripts/sw-otel.sh +15 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +7 -1
- package/scripts/sw-pipeline-vitals.sh +12 -6
- package/scripts/sw-pipeline.sh +54 -2653
- package/scripts/sw-pm.sh +16 -8
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +17 -5
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +17 -4
- package/scripts/sw-quality.sh +14 -6
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +14 -5
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +173 -6
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +14 -6
- package/scripts/sw-stream.sh +13 -4
- package/scripts/sw-swarm.sh +20 -7
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +7 -31
- package/scripts/sw-testgen.sh +17 -6
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +37 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +3 -2
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +5 -2
- package/scripts/sw-widgets.sh +9 -4
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Module guard - prevent double-sourcing
|
|
3
|
+
[[ -n "${_REWARD_AGGREGATOR_LOADED:-}" ]] && return 0
|
|
4
|
+
_REWARD_AGGREGATOR_LOADED=1
|
|
5
|
+
|
|
6
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
7
|
+
# ║ shipwright reward-aggregator — Composite Reward from All Data Signals ║
|
|
8
|
+
# ║ Reads process-rewards, costs, stage-effectiveness, recovery-log, ║
|
|
9
|
+
# ║ quality-scores, memory-outcomes → weighted composite (0.0-1.0) ║
|
|
10
|
+
# ║ Stores history in ~/.shipwright/rewards.jsonl for RL feedback ║
|
|
11
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
12
|
+
|
|
13
|
+
# shellcheck disable=SC2034
|
|
14
|
+
VERSION="3.3.0"
|
|
15
|
+
|
|
16
|
+
# ─── Output Helpers ──────────────────────────────────────────────────────────
|
|
17
|
+
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
18
|
+
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
19
|
+
[[ "$(type -t warn 2>/dev/null)" == "function" ]] || warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
|
|
20
|
+
[[ "$(type -t error 2>/dev/null)" == "function" ]] || error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
|
|
21
|
+
if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
|
|
22
|
+
now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
|
|
23
|
+
now_epoch() { date +%s; }
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
# ─── Configuration ───────────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
REWARDS_FILE="${REWARDS_FILE:-${HOME}/.shipwright/rewards.jsonl}"
|
|
29
|
+
PROCESS_REWARDS_FILE="${PROCESS_REWARDS_FILE:-.claude/pipeline-artifacts/process-rewards.jsonl}"
|
|
30
|
+
COSTS_FILE="${COSTS_FILE:-${HOME}/.shipwright/costs.json}"
|
|
31
|
+
STAGE_EFFECTIVENESS_FILE="${STAGE_EFFECTIVENESS_FILE:-.claude/pipeline-artifacts/stage-effectiveness.jsonl}"
|
|
32
|
+
RECOVERY_LOG_FILE="${RECOVERY_LOG_FILE:-.claude/pipeline-artifacts/recovery-log.jsonl}"
|
|
33
|
+
QUALITY_SCORES_FILE="${QUALITY_SCORES_FILE:-.claude/pipeline-artifacts/quality-scores.jsonl}"
|
|
34
|
+
MEMORY_OUTCOMES_FILE="${MEMORY_OUTCOMES_FILE:-.claude/pipeline-artifacts/memory-outcomes.jsonl}"
|
|
35
|
+
|
|
36
|
+
# Weights for composite reward (must sum to 1.0)
|
|
37
|
+
REWARD_WEIGHT_TEST="${REWARD_WEIGHT_TEST:-0.30}"
|
|
38
|
+
REWARD_WEIGHT_ITERATIONS="${REWARD_WEIGHT_ITERATIONS:-0.20}"
|
|
39
|
+
REWARD_WEIGHT_COST="${REWARD_WEIGHT_COST:-0.15}"
|
|
40
|
+
REWARD_WEIGHT_QUALITY="${REWARD_WEIGHT_QUALITY:-0.15}"
|
|
41
|
+
REWARD_WEIGHT_CONVERGENCE="${REWARD_WEIGHT_CONVERGENCE:-0.10}"
|
|
42
|
+
REWARD_WEIGHT_MEMORY="${REWARD_WEIGHT_MEMORY:-0.10}"
|
|
43
|
+
|
|
44
|
+
REWARD_BASELINE_DAYS="${REWARD_BASELINE_DAYS:-30}"
|
|
45
|
+
REWARD_RECENT_COUNT="${REWARD_RECENT_COUNT:-5}"
|
|
46
|
+
|
|
47
|
+
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
# Clamp a float to [0.0, 1.0]
|
|
50
|
+
_reward_clamp() {
|
|
51
|
+
local val="${1:-0}"
|
|
52
|
+
awk -v v="$val" 'BEGIN { if (v < 0) v = 0; if (v > 1) v = 1; printf "%.4f", v }'
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Safe float division with zero guard
|
|
56
|
+
_reward_div() {
|
|
57
|
+
local num="${1:-0}" den="${2:-1}"
|
|
58
|
+
awk -v n="$num" -v d="$den" 'BEGIN { if (d == 0) d = 1; printf "%.4f", n / d }'
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# Ensure rewards directory exists
|
|
62
|
+
_reward_ensure_dir() {
|
|
63
|
+
local dir
|
|
64
|
+
dir=$(dirname "$REWARDS_FILE")
|
|
65
|
+
[[ -d "$dir" ]] || mkdir -p "$dir"
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# ─── Signal Extractors ──────────────────────────────────────────────────────
|
|
69
|
+
|
|
70
|
+
# Extract test outcome score from process-rewards.jsonl
|
|
71
|
+
# Returns 1.0 if tests passed, 0.0 if failed, 0.5 as default
|
|
72
|
+
_reward_extract_test_outcome() {
|
|
73
|
+
local pipeline_id="${1:-}"
|
|
74
|
+
|
|
75
|
+
if [[ ! -f "$PROCESS_REWARDS_FILE" ]]; then
|
|
76
|
+
echo "0.5"
|
|
77
|
+
return 0
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
# Get the last entry for this pipeline (or last overall)
|
|
81
|
+
local result
|
|
82
|
+
if [[ -n "$pipeline_id" ]]; then
|
|
83
|
+
result=$(jq -r --arg pid "$pipeline_id" \
|
|
84
|
+
'select(.pipeline_id == $pid) | .scores.test_outcome // .test_passed // empty' \
|
|
85
|
+
"$PROCESS_REWARDS_FILE" 2>/dev/null | tail -1)
|
|
86
|
+
fi
|
|
87
|
+
if [[ -z "${result:-}" ]]; then
|
|
88
|
+
result=$(jq -r '.scores.test_outcome // .test_passed // empty' \
|
|
89
|
+
"$PROCESS_REWARDS_FILE" 2>/dev/null | tail -1)
|
|
90
|
+
fi
|
|
91
|
+
|
|
92
|
+
if [[ -z "${result:-}" ]]; then
|
|
93
|
+
echo "0.5"
|
|
94
|
+
else
|
|
95
|
+
_reward_clamp "$result"
|
|
96
|
+
fi
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# Extract iteration efficiency from process-rewards.jsonl
|
|
100
|
+
# Score = 1.0 - (iterations_used / max_iterations), clamped
|
|
101
|
+
_reward_extract_iteration_efficiency() {
|
|
102
|
+
local pipeline_id="${1:-}"
|
|
103
|
+
|
|
104
|
+
if [[ ! -f "$PROCESS_REWARDS_FILE" ]]; then
|
|
105
|
+
echo "0.5"
|
|
106
|
+
return 0
|
|
107
|
+
fi
|
|
108
|
+
|
|
109
|
+
local iterations max_iterations
|
|
110
|
+
if [[ -n "$pipeline_id" ]]; then
|
|
111
|
+
iterations=$(jq -r --arg pid "$pipeline_id" \
|
|
112
|
+
'select(.pipeline_id == $pid) | .iteration // empty' \
|
|
113
|
+
"$PROCESS_REWARDS_FILE" 2>/dev/null | tail -1)
|
|
114
|
+
max_iterations=$(jq -r --arg pid "$pipeline_id" \
|
|
115
|
+
'select(.pipeline_id == $pid) | .max_iterations // empty' \
|
|
116
|
+
"$PROCESS_REWARDS_FILE" 2>/dev/null | tail -1)
|
|
117
|
+
fi
|
|
118
|
+
if [[ -z "${iterations:-}" ]]; then
|
|
119
|
+
iterations=$(jq -r '.iteration // empty' \
|
|
120
|
+
"$PROCESS_REWARDS_FILE" 2>/dev/null | tail -1)
|
|
121
|
+
fi
|
|
122
|
+
if [[ -z "${max_iterations:-}" ]]; then
|
|
123
|
+
max_iterations=$(jq -r '.max_iterations // empty' \
|
|
124
|
+
"$PROCESS_REWARDS_FILE" 2>/dev/null | tail -1)
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
iterations="${iterations:-5}"
|
|
128
|
+
max_iterations="${max_iterations:-10}"
|
|
129
|
+
|
|
130
|
+
local score
|
|
131
|
+
score=$(_reward_div "$iterations" "$max_iterations")
|
|
132
|
+
score=$(awk -v s="$score" 'BEGIN { printf "%.4f", 1.0 - s }')
|
|
133
|
+
_reward_clamp "$score"
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
# Extract cost efficiency from costs.json
|
|
137
|
+
# Score = budget_remaining / budget_total (higher = more efficient)
|
|
138
|
+
_reward_extract_cost_efficiency() {
|
|
139
|
+
if [[ ! -f "$COSTS_FILE" ]]; then
|
|
140
|
+
echo "0.5"
|
|
141
|
+
return 0
|
|
142
|
+
fi
|
|
143
|
+
|
|
144
|
+
local total_cost budget
|
|
145
|
+
total_cost=$(jq -r '.total_cost // .cost // 0' "$COSTS_FILE" 2>/dev/null || echo "0")
|
|
146
|
+
budget=$(jq -r '.budget // .daily_budget // 0' "$COSTS_FILE" 2>/dev/null || echo "0")
|
|
147
|
+
|
|
148
|
+
if [[ "$budget" == "0" ]] || [[ -z "$budget" ]] || [[ "$budget" == "null" ]]; then
|
|
149
|
+
# No budget set — if cost is low, that's good
|
|
150
|
+
if awk -v c="$total_cost" 'BEGIN { exit (c < 5) ? 0 : 1 }'; then
|
|
151
|
+
echo "0.8"
|
|
152
|
+
else
|
|
153
|
+
echo "0.5"
|
|
154
|
+
fi
|
|
155
|
+
return 0
|
|
156
|
+
fi
|
|
157
|
+
|
|
158
|
+
local ratio
|
|
159
|
+
ratio=$(_reward_div "$total_cost" "$budget")
|
|
160
|
+
# Efficiency = 1.0 - cost_ratio (spending less of budget = better)
|
|
161
|
+
local score
|
|
162
|
+
score=$(awk -v r="$ratio" 'BEGIN { printf "%.4f", 1.0 - r }')
|
|
163
|
+
_reward_clamp "$score"
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# Extract quality score from quality-scores.jsonl
|
|
167
|
+
_reward_extract_quality_score() {
|
|
168
|
+
local pipeline_id="${1:-}"
|
|
169
|
+
|
|
170
|
+
if [[ ! -f "$QUALITY_SCORES_FILE" ]]; then
|
|
171
|
+
echo "0.5"
|
|
172
|
+
return 0
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
local score
|
|
176
|
+
if [[ -n "$pipeline_id" ]]; then
|
|
177
|
+
score=$(jq -r --arg pid "$pipeline_id" \
|
|
178
|
+
'select(.pipeline_id == $pid) | .score // .quality // empty' \
|
|
179
|
+
"$QUALITY_SCORES_FILE" 2>/dev/null | tail -1)
|
|
180
|
+
fi
|
|
181
|
+
if [[ -z "${score:-}" ]]; then
|
|
182
|
+
score=$(jq -r '.score // .quality // empty' \
|
|
183
|
+
"$QUALITY_SCORES_FILE" 2>/dev/null | tail -1)
|
|
184
|
+
fi
|
|
185
|
+
|
|
186
|
+
if [[ -z "${score:-}" ]]; then
|
|
187
|
+
echo "0.5"
|
|
188
|
+
else
|
|
189
|
+
_reward_clamp "$score"
|
|
190
|
+
fi
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Extract convergence speed from stage-effectiveness.jsonl
|
|
194
|
+
# Ratio of passed stages to total stages
|
|
195
|
+
_reward_extract_convergence_speed() {
|
|
196
|
+
if [[ ! -f "$STAGE_EFFECTIVENESS_FILE" ]]; then
|
|
197
|
+
echo "0.5"
|
|
198
|
+
return 0
|
|
199
|
+
fi
|
|
200
|
+
|
|
201
|
+
local total passed
|
|
202
|
+
total=$(wc -l < "$STAGE_EFFECTIVENESS_FILE" 2>/dev/null || echo "0")
|
|
203
|
+
total=$(echo "$total" | tr -d ' ')
|
|
204
|
+
if [[ "$total" -eq 0 ]]; then
|
|
205
|
+
echo "0.5"
|
|
206
|
+
return 0
|
|
207
|
+
fi
|
|
208
|
+
|
|
209
|
+
passed=$(grep -c '"passed"' "$STAGE_EFFECTIVENESS_FILE" 2>/dev/null || true)
|
|
210
|
+
passed="${passed:-0}"
|
|
211
|
+
|
|
212
|
+
_reward_clamp "$(_reward_div "$passed" "$total")"
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
# Extract memory hit rate from memory-outcomes.jsonl
|
|
216
|
+
_reward_extract_memory_hit_rate() {
|
|
217
|
+
if [[ ! -f "$MEMORY_OUTCOMES_FILE" ]]; then
|
|
218
|
+
echo "0.5"
|
|
219
|
+
return 0
|
|
220
|
+
fi
|
|
221
|
+
|
|
222
|
+
local total hits
|
|
223
|
+
total=$(wc -l < "$MEMORY_OUTCOMES_FILE" 2>/dev/null || echo "0")
|
|
224
|
+
total=$(echo "$total" | tr -d ' ')
|
|
225
|
+
if [[ "$total" -eq 0 ]]; then
|
|
226
|
+
echo "0.5"
|
|
227
|
+
return 0
|
|
228
|
+
fi
|
|
229
|
+
|
|
230
|
+
hits=$(jq -r 'select(.hit == true or .useful == true) | "1"' \
|
|
231
|
+
"$MEMORY_OUTCOMES_FILE" 2>/dev/null | wc -l || true)
|
|
232
|
+
hits=$(echo "$hits" | tr -d ' ')
|
|
233
|
+
hits="${hits:-0}"
|
|
234
|
+
|
|
235
|
+
_reward_clamp "$(_reward_div "$hits" "$total")"
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# ─── Core Functions ──────────────────────────────────────────────────────────
|
|
239
|
+
|
|
240
|
+
# Aggregate all signals into a composite reward for a pipeline run
|
|
241
|
+
# Args: [pipeline_id] [language] [complexity]
|
|
242
|
+
# Output: JSON reward record to stdout, appends to rewards.jsonl
|
|
243
|
+
reward_aggregate_pipeline() {
|
|
244
|
+
local pipeline_id="${1:-$(date +%s)}"
|
|
245
|
+
local language="${2:-unknown}"
|
|
246
|
+
local complexity="${3:-medium}"
|
|
247
|
+
|
|
248
|
+
_reward_ensure_dir
|
|
249
|
+
|
|
250
|
+
# Extract all component scores
|
|
251
|
+
local test_score iteration_score cost_score quality_score convergence_score memory_score
|
|
252
|
+
test_score=$(_reward_extract_test_outcome "$pipeline_id")
|
|
253
|
+
iteration_score=$(_reward_extract_iteration_efficiency "$pipeline_id")
|
|
254
|
+
cost_score=$(_reward_extract_cost_efficiency)
|
|
255
|
+
quality_score=$(_reward_extract_quality_score "$pipeline_id")
|
|
256
|
+
convergence_score=$(_reward_extract_convergence_speed)
|
|
257
|
+
memory_score=$(_reward_extract_memory_hit_rate)
|
|
258
|
+
|
|
259
|
+
# Compute weighted composite
|
|
260
|
+
local composite
|
|
261
|
+
composite=$(awk \
|
|
262
|
+
-v t="$test_score" -v wt="$REWARD_WEIGHT_TEST" \
|
|
263
|
+
-v i="$iteration_score" -v wi="$REWARD_WEIGHT_ITERATIONS" \
|
|
264
|
+
-v c="$cost_score" -v wc="$REWARD_WEIGHT_COST" \
|
|
265
|
+
-v q="$quality_score" -v wq="$REWARD_WEIGHT_QUALITY" \
|
|
266
|
+
-v s="$convergence_score" -v ws="$REWARD_WEIGHT_CONVERGENCE" \
|
|
267
|
+
-v m="$memory_score" -v wm="$REWARD_WEIGHT_MEMORY" \
|
|
268
|
+
'BEGIN { printf "%.4f", t*wt + i*wi + c*wc + q*wq + s*ws + m*wm }')
|
|
269
|
+
composite=$(_reward_clamp "$composite")
|
|
270
|
+
|
|
271
|
+
# Build JSON record
|
|
272
|
+
local timestamp
|
|
273
|
+
timestamp=$(now_iso)
|
|
274
|
+
local record
|
|
275
|
+
record=$(jq -c -n \
|
|
276
|
+
--arg ts "$timestamp" \
|
|
277
|
+
--arg pid "$pipeline_id" \
|
|
278
|
+
--argjson reward "$composite" \
|
|
279
|
+
--argjson test "$test_score" \
|
|
280
|
+
--argjson iter "$iteration_score" \
|
|
281
|
+
--argjson cost "$cost_score" \
|
|
282
|
+
--argjson qual "$quality_score" \
|
|
283
|
+
--argjson conv "$convergence_score" \
|
|
284
|
+
--argjson mem "$memory_score" \
|
|
285
|
+
--arg lang "$language" \
|
|
286
|
+
--arg comp "$complexity" \
|
|
287
|
+
'{
|
|
288
|
+
timestamp: $ts,
|
|
289
|
+
epoch: (now | floor),
|
|
290
|
+
pipeline_id: $pid,
|
|
291
|
+
reward: $reward,
|
|
292
|
+
components: {
|
|
293
|
+
test_outcome: $test,
|
|
294
|
+
iteration_efficiency: $iter,
|
|
295
|
+
cost_efficiency: $cost,
|
|
296
|
+
quality_score: $qual,
|
|
297
|
+
convergence_speed: $conv,
|
|
298
|
+
memory_hit_rate: $mem
|
|
299
|
+
},
|
|
300
|
+
context: {
|
|
301
|
+
language: $lang,
|
|
302
|
+
complexity: $comp
|
|
303
|
+
}
|
|
304
|
+
}')
|
|
305
|
+
|
|
306
|
+
# Append to rewards file (non-atomic append — acceptable for single-worker pipelines)
|
|
307
|
+
local tmp_append
|
|
308
|
+
tmp_append=$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/reward-append-$$")
|
|
309
|
+
echo "$record" > "$tmp_append"
|
|
310
|
+
cat "$tmp_append" >> "$REWARDS_FILE"
|
|
311
|
+
rm -f "$tmp_append"
|
|
312
|
+
|
|
313
|
+
# Emit event
|
|
314
|
+
if type emit_event >/dev/null 2>&1; then
|
|
315
|
+
emit_event "reward_aggregated" \
|
|
316
|
+
"pipeline_id=$pipeline_id" \
|
|
317
|
+
"reward=$composite" \
|
|
318
|
+
"test=$test_score" \
|
|
319
|
+
"cost=$cost_score"
|
|
320
|
+
fi
|
|
321
|
+
|
|
322
|
+
echo "$record"
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
# Return last N rewards with context
|
|
326
|
+
# Args: [count]
|
|
327
|
+
# Output: JSON array
|
|
328
|
+
reward_get_history() {
|
|
329
|
+
local count="${1:-10}"
|
|
330
|
+
|
|
331
|
+
if [[ ! -f "$REWARDS_FILE" ]]; then
|
|
332
|
+
echo "[]"
|
|
333
|
+
return 0
|
|
334
|
+
fi
|
|
335
|
+
|
|
336
|
+
local total
|
|
337
|
+
total=$(wc -l < "$REWARDS_FILE" 2>/dev/null || echo "0")
|
|
338
|
+
total=$(echo "$total" | tr -d ' ')
|
|
339
|
+
|
|
340
|
+
if [[ "$total" -eq 0 ]]; then
|
|
341
|
+
echo "[]"
|
|
342
|
+
return 0
|
|
343
|
+
fi
|
|
344
|
+
|
|
345
|
+
tail -n "$count" "$REWARDS_FILE" | jq -s '.' 2>/dev/null || echo "[]"
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# Compute rolling baseline reward (average over N days)
|
|
349
|
+
# Args: [days]
|
|
350
|
+
# Output: float (0.0-1.0)
|
|
351
|
+
reward_compute_baseline() {
|
|
352
|
+
local days="${1:-$REWARD_BASELINE_DAYS}"
|
|
353
|
+
|
|
354
|
+
if [[ ! -f "$REWARDS_FILE" ]]; then
|
|
355
|
+
echo "0.5000"
|
|
356
|
+
return 0
|
|
357
|
+
fi
|
|
358
|
+
|
|
359
|
+
local cutoff_epoch now_epoch_val
|
|
360
|
+
now_epoch_val=$(date +%s)
|
|
361
|
+
cutoff_epoch=$(awk -v now="$now_epoch_val" -v d="$days" 'BEGIN { printf "%d", now - (d * 86400) }')
|
|
362
|
+
|
|
363
|
+
# Filter records within the baseline window and compute average
|
|
364
|
+
# Extract both epoch and reward so awk can filter by time
|
|
365
|
+
local avg
|
|
366
|
+
avg=$(jq -r '"\(.epoch // 9999999999) \(.reward // 0)"' "$REWARDS_FILE" 2>/dev/null | \
|
|
367
|
+
awk -v cutoff="$cutoff_epoch" '
|
|
368
|
+
BEGIN { sum = 0; count = 0 }
|
|
369
|
+
{ if ($1 >= cutoff) { sum += $2; count++ } }
|
|
370
|
+
END {
|
|
371
|
+
if (count == 0) { printf "0.5000"; exit }
|
|
372
|
+
printf "%.4f", sum / count
|
|
373
|
+
}')
|
|
374
|
+
|
|
375
|
+
if [[ -z "${avg:-}" ]] || [[ "$avg" == "0.5000" ]]; then
|
|
376
|
+
echo "0.5000"
|
|
377
|
+
return 0
|
|
378
|
+
fi
|
|
379
|
+
_reward_clamp "$avg"
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
# Compare recent reward average vs baseline
|
|
383
|
+
# Args: [recent_count] [baseline_days]
|
|
384
|
+
# Output: JSON with improving (bool), delta, recent_avg, baseline
|
|
385
|
+
reward_is_improving() {
|
|
386
|
+
local recent_count="${1:-$REWARD_RECENT_COUNT}"
|
|
387
|
+
local baseline_days="${2:-$REWARD_BASELINE_DAYS}"
|
|
388
|
+
|
|
389
|
+
local baseline
|
|
390
|
+
baseline=$(reward_compute_baseline "$baseline_days")
|
|
391
|
+
|
|
392
|
+
if [[ ! -f "$REWARDS_FILE" ]]; then
|
|
393
|
+
jq -n \
|
|
394
|
+
--argjson improving false \
|
|
395
|
+
--argjson delta 0 \
|
|
396
|
+
--argjson recent_avg 0.5 \
|
|
397
|
+
--argjson baseline "$baseline" \
|
|
398
|
+
'{ improving: $improving, delta: $delta, recent_avg: $recent_avg, baseline: $baseline }'
|
|
399
|
+
return 0
|
|
400
|
+
fi
|
|
401
|
+
|
|
402
|
+
local recent_avg
|
|
403
|
+
recent_avg=$(tail -n "$recent_count" "$REWARDS_FILE" 2>/dev/null | \
|
|
404
|
+
jq -r '.reward' 2>/dev/null | \
|
|
405
|
+
awk '
|
|
406
|
+
BEGIN { sum = 0; count = 0 }
|
|
407
|
+
{ sum += $1; count++ }
|
|
408
|
+
END {
|
|
409
|
+
if (count == 0) { printf "0.5000"; exit }
|
|
410
|
+
printf "%.4f", sum / count
|
|
411
|
+
}')
|
|
412
|
+
recent_avg="${recent_avg:-0.5}"
|
|
413
|
+
|
|
414
|
+
local delta improving
|
|
415
|
+
delta=$(awk -v r="$recent_avg" -v b="$baseline" 'BEGIN { printf "%.4f", r - b }')
|
|
416
|
+
improving=$(awk -v d="$delta" 'BEGIN { print (d > 0) ? "true" : "false" }')
|
|
417
|
+
|
|
418
|
+
jq -n \
|
|
419
|
+
--argjson improving "$improving" \
|
|
420
|
+
--argjson delta "$delta" \
|
|
421
|
+
--argjson recent_avg "$recent_avg" \
|
|
422
|
+
--argjson baseline "$baseline" \
|
|
423
|
+
'{ improving: $improving, delta: $delta, recent_avg: $recent_avg, baseline: $baseline }'
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
# Format reward trend as markdown for agent prompt injection
|
|
427
|
+
# Output: markdown string
|
|
428
|
+
reward_inject_feedback() {
|
|
429
|
+
local result
|
|
430
|
+
result=$(reward_is_improving)
|
|
431
|
+
|
|
432
|
+
local recent_avg baseline delta improving
|
|
433
|
+
recent_avg=$(echo "$result" | jq -r '.recent_avg')
|
|
434
|
+
baseline=$(echo "$result" | jq -r '.baseline')
|
|
435
|
+
delta=$(echo "$result" | jq -r '.delta')
|
|
436
|
+
improving=$(echo "$result" | jq -r '.improving')
|
|
437
|
+
|
|
438
|
+
local pct_change arrow
|
|
439
|
+
if awk -v b="$baseline" 'BEGIN { exit (b == 0) ? 0 : 1 }'; then
|
|
440
|
+
pct_change="N/A"
|
|
441
|
+
else
|
|
442
|
+
pct_change=$(awk -v d="$delta" -v b="$baseline" 'BEGIN { printf "%.0f", (d / b) * 100 }')
|
|
443
|
+
fi
|
|
444
|
+
|
|
445
|
+
if [[ "$improving" == "true" ]]; then
|
|
446
|
+
arrow="↑"
|
|
447
|
+
else
|
|
448
|
+
arrow="↓"
|
|
449
|
+
# Make pct_change absolute for display
|
|
450
|
+
pct_change=$(echo "$pct_change" | tr -d '-')
|
|
451
|
+
fi
|
|
452
|
+
|
|
453
|
+
if [[ "$pct_change" == "N/A" ]]; then
|
|
454
|
+
echo "Your pipeline performance: ${recent_avg} (no baseline yet)"
|
|
455
|
+
else
|
|
456
|
+
echo "Your pipeline performance: ${recent_avg} (${arrow}${pct_change}% vs 30-day baseline of ${baseline})"
|
|
457
|
+
fi
|
|
458
|
+
}
|