shipwright-cli 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +22 -8
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +8 -6
- package/dashboard/public/styles.css +176 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +117 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +12 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/metrics.ts +69 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +14 -2
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +25 -4
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +119 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +180 -5
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +101 -3
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +104 -1138
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -711
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +161 -2901
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -8
- package/scripts/sw-adaptive.sh +8 -7
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +15 -6
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +45 -20
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +107 -5
- package/scripts/sw-daemon.sh +71 -11
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +71 -20
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +378 -5
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +12 -7
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +13 -4
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +9 -4
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +18 -4
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +10 -3
- package/scripts/sw-incident.sh +273 -5
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +44 -7
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +436 -1076
- package/scripts/sw-memory.sh +357 -3
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +483 -27
- package/scripts/sw-otel.sh +15 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +7 -1
- package/scripts/sw-pipeline-vitals.sh +12 -6
- package/scripts/sw-pipeline.sh +54 -2653
- package/scripts/sw-pm.sh +16 -8
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +17 -5
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +17 -4
- package/scripts/sw-quality.sh +14 -6
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +14 -5
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +173 -6
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +14 -6
- package/scripts/sw-stream.sh +13 -4
- package/scripts/sw-swarm.sh +20 -7
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +7 -31
- package/scripts/sw-testgen.sh +17 -6
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +37 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +3 -2
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +5 -2
- package/scripts/sw-widgets.sh +9 -4
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Module guard - prevent double-sourcing
|
|
3
|
+
[[ -n "${_PROCESS_REWARD_LOADED:-}" ]] && return 0
|
|
4
|
+
_PROCESS_REWARD_LOADED=1
|
|
5
|
+
|
|
6
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
7
|
+
# ║ shipwright process-reward — Per-Step Iteration Scoring (Phase 3) ║
|
|
8
|
+
# ║ Score each loop iteration on 5 dimensions for dense learning signals ║
|
|
9
|
+
# ║ Weights: test_progress 30%, code_quality 25%, convergence 20%, ║
|
|
10
|
+
# ║ architecture 15%, security 10% ║
|
|
11
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
12
|
+
|
|
13
|
+
# shellcheck disable=SC2034
|
|
14
|
+
VERSION="3.3.0"
|
|
15
|
+
|
|
16
|
+
# ─── Output Helpers ──────────────────────────────────────────────────────────
|
|
17
|
+
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
18
|
+
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
19
|
+
[[ "$(type -t warn 2>/dev/null)" == "function" ]] || warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
|
|
20
|
+
[[ "$(type -t error 2>/dev/null)" == "function" ]] || error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
|
|
21
|
+
if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
|
|
22
|
+
now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
|
|
23
|
+
now_epoch() { date +%s; }
|
|
24
|
+
fi
|
|
25
|
+
[[ "$(type -t emit_event 2>/dev/null)" == "function" ]] || emit_event() { true; }
|
|
26
|
+
|
|
27
|
+
# ─── Configuration ───────────────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
PROCESS_REWARD_FILE="${PROCESS_REWARD_FILE:-.claude/pipeline-artifacts/process-rewards.jsonl}"
|
|
30
|
+
|
|
31
|
+
# Dimension weights (must sum to 100)
|
|
32
|
+
REWARD_WEIGHT_TEST="${REWARD_WEIGHT_TEST:-30}"
|
|
33
|
+
REWARD_WEIGHT_CODE="${REWARD_WEIGHT_CODE:-25}"
|
|
34
|
+
REWARD_WEIGHT_CONVERGENCE="${REWARD_WEIGHT_CONVERGENCE:-20}"
|
|
35
|
+
REWARD_WEIGHT_ARCH="${REWARD_WEIGHT_ARCH:-15}"
|
|
36
|
+
REWARD_WEIGHT_SECURITY="${REWARD_WEIGHT_SECURITY:-10}"
|
|
37
|
+
|
|
38
|
+
# ─── Dimension Scorers ──────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
# Score test progress (0-100)
|
|
41
|
+
# Inputs: test_passed (true/false/""), test_output, previous test state
|
|
42
|
+
_reward_score_test_progress() {
|
|
43
|
+
local test_passed="${1:-}"
|
|
44
|
+
local test_output="${2:-}"
|
|
45
|
+
local prev_passed="${3:-}"
|
|
46
|
+
local score=50 # neutral default
|
|
47
|
+
|
|
48
|
+
# No test command — return neutral
|
|
49
|
+
if [[ -z "$test_passed" ]]; then
|
|
50
|
+
echo "$score"
|
|
51
|
+
return 0
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
if [[ "$test_passed" == "true" ]]; then
|
|
55
|
+
score=90
|
|
56
|
+
# Bonus if previously failing
|
|
57
|
+
if [[ "$prev_passed" == "false" ]]; then
|
|
58
|
+
score=100
|
|
59
|
+
fi
|
|
60
|
+
elif [[ "$test_passed" == "false" ]]; then
|
|
61
|
+
score=20
|
|
62
|
+
# Check if test count improved (partial progress)
|
|
63
|
+
local pass_count=0
|
|
64
|
+
if [[ -n "$test_output" ]]; then
|
|
65
|
+
pass_count=$(echo "$test_output" | grep -ciE '(pass|passed|ok|✓)' || true)
|
|
66
|
+
pass_count="${pass_count:-0}"
|
|
67
|
+
fi
|
|
68
|
+
if [[ "$pass_count" -gt 0 ]]; then
|
|
69
|
+
# Some tests passing — partial credit
|
|
70
|
+
score=40
|
|
71
|
+
fi
|
|
72
|
+
# Previously also failing — at least not regressing
|
|
73
|
+
if [[ "$prev_passed" == "false" ]]; then
|
|
74
|
+
score=$(( score + 5 ))
|
|
75
|
+
fi
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
echo "$score"
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Score code quality (0-100)
|
|
82
|
+
# Checks: diff size, duplication, complexity indicators
|
|
83
|
+
_reward_score_code_quality() {
|
|
84
|
+
local project_root="${1:-.}"
|
|
85
|
+
local score=70 # default: decent
|
|
86
|
+
|
|
87
|
+
# Check recent diff for quality signals
|
|
88
|
+
local diff_text
|
|
89
|
+
diff_text=$(git -C "$project_root" diff HEAD~1 --unified=0 2>/dev/null || true)
|
|
90
|
+
|
|
91
|
+
if [[ -z "$diff_text" ]]; then
|
|
92
|
+
echo "$score"
|
|
93
|
+
return 0
|
|
94
|
+
fi
|
|
95
|
+
|
|
96
|
+
# Count additions and deletions
|
|
97
|
+
local additions deletions
|
|
98
|
+
additions=$(echo "$diff_text" | grep -c '^+[^+]' || true)
|
|
99
|
+
additions="${additions:-0}"
|
|
100
|
+
deletions=$(echo "$diff_text" | grep -c '^-[^-]' || true)
|
|
101
|
+
deletions="${deletions:-0}"
|
|
102
|
+
|
|
103
|
+
# Penalize very large diffs (>500 lines added = likely unfocused)
|
|
104
|
+
if [[ "$additions" -gt 500 ]]; then
|
|
105
|
+
score=$(( score - 15 ))
|
|
106
|
+
elif [[ "$additions" -gt 200 ]]; then
|
|
107
|
+
score=$(( score - 5 ))
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
# Reward cleanup (more deletions than additions)
|
|
111
|
+
if [[ "$deletions" -gt "$additions" ]] && [[ "$additions" -gt 0 ]]; then
|
|
112
|
+
score=$(( score + 10 ))
|
|
113
|
+
fi
|
|
114
|
+
|
|
115
|
+
# Check for TODO/FIXME/HACK in new code
|
|
116
|
+
local hack_count
|
|
117
|
+
hack_count=$(echo "$diff_text" | grep -c '^+.*\(TODO\|FIXME\|HACK\|XXX\)' || true)
|
|
118
|
+
hack_count="${hack_count:-0}"
|
|
119
|
+
if [[ "$hack_count" -gt 3 ]]; then
|
|
120
|
+
score=$(( score - 10 ))
|
|
121
|
+
elif [[ "$hack_count" -gt 0 ]]; then
|
|
122
|
+
score=$(( score - 5 ))
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
# Check for debug/console statements left in
|
|
126
|
+
local debug_count
|
|
127
|
+
debug_count=$(echo "$diff_text" | grep -c '^+.*\(console\.log\|debugger\|print(\|echo "DEBUG\)' || true)
|
|
128
|
+
debug_count="${debug_count:-0}"
|
|
129
|
+
if [[ "$debug_count" -gt 0 ]]; then
|
|
130
|
+
score=$(( score - 10 ))
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
# Clamp 0-100
|
|
134
|
+
[[ "$score" -lt 0 ]] && score=0
|
|
135
|
+
[[ "$score" -gt 100 ]] && score=100
|
|
136
|
+
|
|
137
|
+
echo "$score"
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
# Score convergence (0-100)
|
|
141
|
+
# Is the diff getting smaller? Are we approaching the goal?
|
|
142
|
+
_reward_score_convergence() {
|
|
143
|
+
local iteration="${1:-1}"
|
|
144
|
+
local project_root="${2:-.}"
|
|
145
|
+
local reward_file="${3:-$PROCESS_REWARD_FILE}"
|
|
146
|
+
local score=50 # neutral default
|
|
147
|
+
|
|
148
|
+
# First iteration — no history to compare
|
|
149
|
+
if [[ "$iteration" -le 1 ]]; then
|
|
150
|
+
echo "60"
|
|
151
|
+
return 0
|
|
152
|
+
fi
|
|
153
|
+
|
|
154
|
+
# Get current diff stat
|
|
155
|
+
local current_diff_lines
|
|
156
|
+
current_diff_lines=$(git -C "$project_root" diff HEAD~1 --stat 2>/dev/null | tail -1 | grep -oE '[0-9]+ insertion|[0-9]+ deletion' | grep -oE '[0-9]+' | head -2 | paste -sd+ - | bc 2>/dev/null || echo "0")
|
|
157
|
+
current_diff_lines="${current_diff_lines:-0}"
|
|
158
|
+
|
|
159
|
+
# Get previous iteration's convergence score from reward history
|
|
160
|
+
local prev_convergence
|
|
161
|
+
prev_convergence=$(tail -1 "$reward_file" 2>/dev/null | jq -r '.scores.convergence // 50' 2>/dev/null || echo "50")
|
|
162
|
+
|
|
163
|
+
# Smaller diffs = more convergent (likely finishing touches)
|
|
164
|
+
if [[ "$current_diff_lines" -lt 20 ]]; then
|
|
165
|
+
score=85
|
|
166
|
+
elif [[ "$current_diff_lines" -lt 50 ]]; then
|
|
167
|
+
score=70
|
|
168
|
+
elif [[ "$current_diff_lines" -lt 100 ]]; then
|
|
169
|
+
score=55
|
|
170
|
+
elif [[ "$current_diff_lines" -lt 300 ]]; then
|
|
171
|
+
score=40
|
|
172
|
+
else
|
|
173
|
+
score=25
|
|
174
|
+
fi
|
|
175
|
+
|
|
176
|
+
# Bonus for sustained convergence trend
|
|
177
|
+
if [[ "$prev_convergence" -ge 70 ]] && [[ "$score" -ge 70 ]]; then
|
|
178
|
+
score=$(( score + 10 ))
|
|
179
|
+
fi
|
|
180
|
+
|
|
181
|
+
# Clamp 0-100
|
|
182
|
+
[[ "$score" -gt 100 ]] && score=100
|
|
183
|
+
|
|
184
|
+
echo "$score"
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Score architecture adherence (0-100)
|
|
188
|
+
# Check naming, file placement, patterns
|
|
189
|
+
_reward_score_architecture() {
|
|
190
|
+
local project_root="${1:-.}"
|
|
191
|
+
local score=80 # default: good
|
|
192
|
+
|
|
193
|
+
# Get list of files changed in last commit
|
|
194
|
+
local changed_files
|
|
195
|
+
changed_files=$(git -C "$project_root" diff --name-only HEAD~1 2>/dev/null || true)
|
|
196
|
+
|
|
197
|
+
if [[ -z "$changed_files" ]]; then
|
|
198
|
+
echo "$score"
|
|
199
|
+
return 0
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
# Check for test files alongside source (good practice)
|
|
203
|
+
local has_test=false
|
|
204
|
+
if echo "$changed_files" | grep -qE '(test|spec|_test\.)'; then
|
|
205
|
+
has_test=true
|
|
206
|
+
score=$(( score + 10 ))
|
|
207
|
+
fi
|
|
208
|
+
|
|
209
|
+
# Penalize changes to too many directories (unfocused)
|
|
210
|
+
local dir_count
|
|
211
|
+
dir_count=$(echo "$changed_files" | sed 's|/[^/]*$||' | sort -u | wc -l | tr -d ' ')
|
|
212
|
+
dir_count="${dir_count:-0}"
|
|
213
|
+
if [[ "$dir_count" -gt 10 ]]; then
|
|
214
|
+
score=$(( score - 15 ))
|
|
215
|
+
elif [[ "$dir_count" -gt 5 ]]; then
|
|
216
|
+
score=$(( score - 5 ))
|
|
217
|
+
fi
|
|
218
|
+
|
|
219
|
+
# Check architecture rules file if it exists
|
|
220
|
+
local repo_hash
|
|
221
|
+
repo_hash=$(echo -n "$project_root" | shasum -a 256 2>/dev/null | cut -c1-12 || echo "unknown")
|
|
222
|
+
local arch_file="${HOME}/.shipwright/memory/${repo_hash}/architecture.json"
|
|
223
|
+
if [[ -f "$arch_file" ]]; then
|
|
224
|
+
# Check if any rules are violated (simple heuristic: file in wrong layer)
|
|
225
|
+
local violations
|
|
226
|
+
violations=$(jq -r '.rules[]? // empty' "$arch_file" 2>/dev/null | wc -l | tr -d ' ')
|
|
227
|
+
# Having rules is good — we can only check heuristically here
|
|
228
|
+
score=$(( score + 5 ))
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
# Clamp 0-100
|
|
232
|
+
[[ "$score" -lt 0 ]] && score=0
|
|
233
|
+
[[ "$score" -gt 100 ]] && score=100
|
|
234
|
+
|
|
235
|
+
echo "$score"
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# Score security (0-100)
|
|
239
|
+
# Grep for obvious issues in new code
|
|
240
|
+
_reward_score_security() {
|
|
241
|
+
local project_root="${1:-.}"
|
|
242
|
+
local score=90 # default: no issues
|
|
243
|
+
|
|
244
|
+
local diff_text
|
|
245
|
+
diff_text=$(git -C "$project_root" diff HEAD~1 2>/dev/null || true)
|
|
246
|
+
|
|
247
|
+
if [[ -z "$diff_text" ]]; then
|
|
248
|
+
echo "$score"
|
|
249
|
+
return 0
|
|
250
|
+
fi
|
|
251
|
+
|
|
252
|
+
# Only check added lines
|
|
253
|
+
local added_lines
|
|
254
|
+
added_lines=$(echo "$diff_text" | grep '^+[^+]' || true)
|
|
255
|
+
|
|
256
|
+
if [[ -z "$added_lines" ]]; then
|
|
257
|
+
echo "$score"
|
|
258
|
+
return 0
|
|
259
|
+
fi
|
|
260
|
+
|
|
261
|
+
# Check for hardcoded secrets patterns
|
|
262
|
+
local secret_count
|
|
263
|
+
secret_count=$(echo "$added_lines" | grep -ciE '(password\s*=\s*["\x27][^"\x27]+|api_key\s*=\s*["\x27]|secret\s*=\s*["\x27][^"\x27]+|token\s*=\s*["\x27][A-Za-z0-9])' || true)
|
|
264
|
+
secret_count="${secret_count:-0}"
|
|
265
|
+
if [[ "$secret_count" -gt 0 ]]; then
|
|
266
|
+
score=$(( score - 30 ))
|
|
267
|
+
fi
|
|
268
|
+
|
|
269
|
+
# Check for eval/exec usage
|
|
270
|
+
local eval_count
|
|
271
|
+
eval_count=$(echo "$added_lines" | grep -cE '(^|\s)(eval|exec)\s' || true)
|
|
272
|
+
eval_count="${eval_count:-0}"
|
|
273
|
+
if [[ "$eval_count" -gt 0 ]]; then
|
|
274
|
+
score=$(( score - 15 ))
|
|
275
|
+
fi
|
|
276
|
+
|
|
277
|
+
# Check for SQL injection patterns (string concat in queries)
|
|
278
|
+
local sql_count
|
|
279
|
+
sql_count=$(echo "$added_lines" | grep -ciE '(query\(.*\+|execute\(.*\+|sql.*\+.*\$)' || true)
|
|
280
|
+
sql_count="${sql_count:-0}"
|
|
281
|
+
if [[ "$sql_count" -gt 0 ]]; then
|
|
282
|
+
score=$(( score - 20 ))
|
|
283
|
+
fi
|
|
284
|
+
|
|
285
|
+
# Check for command injection patterns
|
|
286
|
+
local cmd_count
|
|
287
|
+
cmd_count=$(echo "$added_lines" | grep -cE 'system\(\s*\$|`\$|exec\(\s*\$' || true)
|
|
288
|
+
cmd_count="${cmd_count:-0}"
|
|
289
|
+
if [[ "$cmd_count" -gt 0 ]]; then
|
|
290
|
+
score=$(( score - 20 ))
|
|
291
|
+
fi
|
|
292
|
+
|
|
293
|
+
# Clamp 0-100
|
|
294
|
+
[[ "$score" -lt 0 ]] && score=0
|
|
295
|
+
[[ "$score" -gt 100 ]] && score=100
|
|
296
|
+
|
|
297
|
+
echo "$score"
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
# ─── Core Functions ──────────────────────────────────────────────────────────
|
|
301
|
+
|
|
302
|
+
# Score a completed iteration on all 5 dimensions
|
|
303
|
+
# Returns JSON: {"composite":N, "scores":{"test_progress":N,...}}
|
|
304
|
+
process_reward_score_iteration() {
|
|
305
|
+
local iteration="${1:-1}"
|
|
306
|
+
local test_passed="${2:-}"
|
|
307
|
+
local test_output="${3:-}"
|
|
308
|
+
local prev_test_passed="${4:-}"
|
|
309
|
+
local project_root="${5:-.}"
|
|
310
|
+
|
|
311
|
+
local test_score code_score conv_score arch_score sec_score
|
|
312
|
+
|
|
313
|
+
test_score=$(_reward_score_test_progress "$test_passed" "$test_output" "$prev_test_passed")
|
|
314
|
+
code_score=$(_reward_score_code_quality "$project_root")
|
|
315
|
+
conv_score=$(_reward_score_convergence "$iteration" "$project_root" "$PROCESS_REWARD_FILE")
|
|
316
|
+
arch_score=$(_reward_score_architecture "$project_root")
|
|
317
|
+
sec_score=$(_reward_score_security "$project_root")
|
|
318
|
+
|
|
319
|
+
# Weighted composite (integer math — multiply by weight then divide by 100)
|
|
320
|
+
local composite
|
|
321
|
+
composite=$(( (test_score * REWARD_WEIGHT_TEST + code_score * REWARD_WEIGHT_CODE + conv_score * REWARD_WEIGHT_CONVERGENCE + arch_score * REWARD_WEIGHT_ARCH + sec_score * REWARD_WEIGHT_SECURITY) / 100 ))
|
|
322
|
+
|
|
323
|
+
# Clamp
|
|
324
|
+
[[ "$composite" -lt 0 ]] && composite=0
|
|
325
|
+
[[ "$composite" -gt 100 ]] && composite=100
|
|
326
|
+
|
|
327
|
+
# Return as JSON
|
|
328
|
+
printf '{"composite":%d,"scores":{"test_progress":%d,"code_quality":%d,"convergence":%d,"architecture":%d,"security":%d}}' \
|
|
329
|
+
"$composite" "$test_score" "$code_score" "$conv_score" "$arch_score" "$sec_score"
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
# Record iteration reward data to JSONL file
|
|
333
|
+
process_reward_record() {
|
|
334
|
+
local iteration="${1:-1}"
|
|
335
|
+
local scores_json="${2:-"{}"}"
|
|
336
|
+
local action_taken="${3:-unknown}"
|
|
337
|
+
local outcome="${4:-unknown}"
|
|
338
|
+
|
|
339
|
+
# Ensure directory exists
|
|
340
|
+
local reward_dir
|
|
341
|
+
reward_dir=$(dirname "$PROCESS_REWARD_FILE")
|
|
342
|
+
mkdir -p "$reward_dir" 2>/dev/null || true
|
|
343
|
+
|
|
344
|
+
local timestamp
|
|
345
|
+
timestamp=$(now_iso)
|
|
346
|
+
|
|
347
|
+
# Build record using jq for safe JSON construction
|
|
348
|
+
local record
|
|
349
|
+
record=$(jq -c -n \
|
|
350
|
+
--arg ts "$timestamp" \
|
|
351
|
+
--argjson iter "$iteration" \
|
|
352
|
+
--argjson scores "$scores_json" \
|
|
353
|
+
--arg action "$action_taken" \
|
|
354
|
+
--arg outcome "$outcome" \
|
|
355
|
+
'{timestamp: $ts, iteration: $iter, scores: $scores, action: $action, outcome: $outcome}' 2>/dev/null)
|
|
356
|
+
|
|
357
|
+
if [[ -z "$record" ]]; then
|
|
358
|
+
warn "process-reward: failed to build JSON record"
|
|
359
|
+
return 1
|
|
360
|
+
fi
|
|
361
|
+
|
|
362
|
+
# Atomic write via temp file + mv
|
|
363
|
+
local tmp_file
|
|
364
|
+
tmp_file=$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/process-reward-$$.tmp")
|
|
365
|
+
if [[ -f "$PROCESS_REWARD_FILE" ]]; then
|
|
366
|
+
cat "$PROCESS_REWARD_FILE" > "$tmp_file"
|
|
367
|
+
fi
|
|
368
|
+
echo "$record" >> "$tmp_file"
|
|
369
|
+
mv "$tmp_file" "$PROCESS_REWARD_FILE"
|
|
370
|
+
|
|
371
|
+
emit_event "process_reward.recorded" "iteration=$iteration" "composite=$(echo "$scores_json" | jq -r '.composite // 0' 2>/dev/null || echo 0)"
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
# Suggest next action based on reward trajectory (last 3 iterations)
|
|
375
|
+
process_reward_suggest_action() {
|
|
376
|
+
local reward_file="${1:-$PROCESS_REWARD_FILE}"
|
|
377
|
+
|
|
378
|
+
if [[ ! -f "$reward_file" ]]; then
|
|
379
|
+
echo "No reward history yet — proceed with the goal."
|
|
380
|
+
return 0
|
|
381
|
+
fi
|
|
382
|
+
|
|
383
|
+
local line_count
|
|
384
|
+
line_count=$(wc -l < "$reward_file" 2>/dev/null | tr -d ' ')
|
|
385
|
+
line_count="${line_count:-0}"
|
|
386
|
+
|
|
387
|
+
if [[ "$line_count" -lt 2 ]]; then
|
|
388
|
+
echo "Not enough history for suggestions — keep working on the goal."
|
|
389
|
+
return 0
|
|
390
|
+
fi
|
|
391
|
+
|
|
392
|
+
# Get last 3 records
|
|
393
|
+
local recent
|
|
394
|
+
recent=$(tail -3 "$reward_file")
|
|
395
|
+
|
|
396
|
+
# Extract composite scores
|
|
397
|
+
local composites
|
|
398
|
+
composites=$(echo "$recent" | jq -r '.scores.composite // .composite // 0' 2>/dev/null || true)
|
|
399
|
+
|
|
400
|
+
# Extract dimension scores from latest
|
|
401
|
+
local latest
|
|
402
|
+
latest=$(echo "$recent" | tail -1)
|
|
403
|
+
local test_score code_score conv_score
|
|
404
|
+
test_score=$(echo "$latest" | jq -r '.scores.test_progress // 50' 2>/dev/null || echo "50")
|
|
405
|
+
code_score=$(echo "$latest" | jq -r '.scores.code_quality // 50' 2>/dev/null || echo "50")
|
|
406
|
+
conv_score=$(echo "$latest" | jq -r '.scores.convergence // 50' 2>/dev/null || echo "50")
|
|
407
|
+
|
|
408
|
+
# Check for declining trend
|
|
409
|
+
local first_score last_score
|
|
410
|
+
first_score=$(echo "$composites" | head -1)
|
|
411
|
+
first_score="${first_score:-50}"
|
|
412
|
+
last_score=$(echo "$composites" | tail -1)
|
|
413
|
+
last_score="${last_score:-50}"
|
|
414
|
+
|
|
415
|
+
# Decision logic
|
|
416
|
+
if [[ "$test_score" -le 30 ]]; then
|
|
417
|
+
echo "Tests are failing badly (score: ${test_score}/100). Focus on making tests pass before anything else."
|
|
418
|
+
return 0
|
|
419
|
+
fi
|
|
420
|
+
|
|
421
|
+
if [[ "$code_score" -le 40 ]]; then
|
|
422
|
+
echo "Code quality is low (score: ${code_score}/100). Refactor and clean up before adding more features."
|
|
423
|
+
return 0
|
|
424
|
+
fi
|
|
425
|
+
|
|
426
|
+
if [[ "$conv_score" -le 30 ]]; then
|
|
427
|
+
echo "Changes are diverging, not converging (score: ${conv_score}/100). Make smaller, more focused changes."
|
|
428
|
+
return 0
|
|
429
|
+
fi
|
|
430
|
+
|
|
431
|
+
if [[ "$last_score" -lt "$first_score" ]] && [[ $(( first_score - last_score )) -ge 10 ]]; then
|
|
432
|
+
echo "Reward trajectory is declining (${first_score} -> ${last_score}). Try a different approach — current strategy is making things worse."
|
|
433
|
+
return 0
|
|
434
|
+
fi
|
|
435
|
+
|
|
436
|
+
if [[ "$last_score" -ge 80 ]]; then
|
|
437
|
+
echo "Strong progress (score: ${last_score}/100). Keep the current approach — you're converging well."
|
|
438
|
+
return 0
|
|
439
|
+
fi
|
|
440
|
+
|
|
441
|
+
echo "Moderate progress (score: ${last_score}/100). Continue working toward the goal."
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
# Format reward history as markdown for injection into iteration prompts
|
|
445
|
+
process_reward_inject_context() {
|
|
446
|
+
local reward_file="${1:-$PROCESS_REWARD_FILE}"
|
|
447
|
+
local max_entries="${2:-5}"
|
|
448
|
+
|
|
449
|
+
if [[ ! -f "$reward_file" ]]; then
|
|
450
|
+
return 0
|
|
451
|
+
fi
|
|
452
|
+
|
|
453
|
+
local line_count
|
|
454
|
+
line_count=$(wc -l < "$reward_file" 2>/dev/null | tr -d ' ')
|
|
455
|
+
line_count="${line_count:-0}"
|
|
456
|
+
|
|
457
|
+
if [[ "$line_count" -eq 0 ]]; then
|
|
458
|
+
return 0
|
|
459
|
+
fi
|
|
460
|
+
|
|
461
|
+
local recent
|
|
462
|
+
recent=$(tail -"$max_entries" "$reward_file")
|
|
463
|
+
|
|
464
|
+
local output="## Iteration Rewards (Process Reward Model)
|
|
465
|
+
| Iter | Composite | Test | Quality | Converge | Arch | Security |
|
|
466
|
+
|------|-----------|------|---------|----------|------|----------|"
|
|
467
|
+
|
|
468
|
+
local line
|
|
469
|
+
while IFS= read -r line; do
|
|
470
|
+
[[ -z "$line" ]] && continue
|
|
471
|
+
local iter comp ts cs cvs as ss
|
|
472
|
+
iter=$(echo "$line" | jq -r '.iteration // "?"' 2>/dev/null || echo "?")
|
|
473
|
+
comp=$(echo "$line" | jq -r '.scores.composite // .composite // "?"' 2>/dev/null || echo "?")
|
|
474
|
+
ts=$(echo "$line" | jq -r '.scores.test_progress // "?"' 2>/dev/null || echo "?")
|
|
475
|
+
cs=$(echo "$line" | jq -r '.scores.code_quality // "?"' 2>/dev/null || echo "?")
|
|
476
|
+
cvs=$(echo "$line" | jq -r '.scores.convergence // "?"' 2>/dev/null || echo "?")
|
|
477
|
+
as=$(echo "$line" | jq -r '.scores.architecture // "?"' 2>/dev/null || echo "?")
|
|
478
|
+
ss=$(echo "$line" | jq -r '.scores.security // "?"' 2>/dev/null || echo "?")
|
|
479
|
+
output="${output}
|
|
480
|
+
| ${iter} | ${comp} | ${ts} | ${cs} | ${cvs} | ${as} | ${ss} |"
|
|
481
|
+
done <<< "$recent"
|
|
482
|
+
|
|
483
|
+
# Add suggestion
|
|
484
|
+
local suggestion
|
|
485
|
+
suggestion=$(process_reward_suggest_action "$reward_file")
|
|
486
|
+
if [[ -n "$suggestion" ]]; then
|
|
487
|
+
output="${output}
|
|
488
|
+
|
|
489
|
+
**Reward signal:** ${suggestion}"
|
|
490
|
+
fi
|
|
491
|
+
|
|
492
|
+
echo "$output"
|
|
493
|
+
}
|