shipwright-cli 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/completions/_shipwright +247 -93
- package/completions/shipwright.bash +69 -15
- package/completions/shipwright.fish +309 -41
- package/config/decision-tiers.json +55 -0
- package/config/event-schema.json +142 -5
- package/config/policy.json +8 -0
- package/package.json +3 -3
- package/scripts/lib/architecture.sh +2 -1
- package/scripts/lib/bootstrap.sh +0 -0
- package/scripts/lib/config.sh +0 -0
- package/scripts/lib/daemon-adaptive.sh +0 -0
- package/scripts/lib/daemon-dispatch.sh +24 -1
- package/scripts/lib/daemon-failure.sh +0 -0
- package/scripts/lib/daemon-health.sh +0 -0
- package/scripts/lib/daemon-patrol.sh +40 -5
- package/scripts/lib/daemon-poll.sh +17 -0
- package/scripts/lib/daemon-state.sh +10 -0
- package/scripts/lib/daemon-triage.sh +1 -1
- package/scripts/lib/decide-autonomy.sh +295 -0
- package/scripts/lib/decide-scoring.sh +228 -0
- package/scripts/lib/decide-signals.sh +462 -0
- package/scripts/lib/fleet-failover.sh +0 -0
- package/scripts/lib/helpers.sh +16 -17
- package/scripts/lib/pipeline-detection.sh +0 -0
- package/scripts/lib/pipeline-github.sh +0 -0
- package/scripts/lib/pipeline-intelligence.sh +20 -3
- package/scripts/lib/pipeline-quality-checks.sh +3 -2
- package/scripts/lib/pipeline-quality.sh +0 -0
- package/scripts/lib/pipeline-stages.sh +199 -32
- package/scripts/lib/pipeline-state.sh +14 -0
- package/scripts/lib/policy.sh +0 -0
- package/scripts/lib/test-helpers.sh +0 -0
- package/scripts/postinstall.mjs +75 -1
- package/scripts/signals/example-collector.sh +36 -0
- package/scripts/sw +8 -4
- package/scripts/sw-activity.sh +1 -1
- package/scripts/sw-adaptive.sh +1 -1
- package/scripts/sw-adversarial.sh +1 -1
- package/scripts/sw-architecture-enforcer.sh +1 -1
- package/scripts/sw-auth.sh +1 -1
- package/scripts/sw-autonomous.sh +1 -1
- package/scripts/sw-changelog.sh +1 -1
- package/scripts/sw-checkpoint.sh +1 -1
- package/scripts/sw-ci.sh +1 -1
- package/scripts/sw-cleanup.sh +1 -1
- package/scripts/sw-code-review.sh +1 -1
- package/scripts/sw-connect.sh +1 -1
- package/scripts/sw-context.sh +1 -1
- package/scripts/sw-cost.sh +12 -3
- package/scripts/sw-daemon.sh +2 -2
- package/scripts/sw-dashboard.sh +1 -1
- package/scripts/sw-db.sh +41 -34
- package/scripts/sw-decide.sh +685 -0
- package/scripts/sw-decompose.sh +1 -1
- package/scripts/sw-deps.sh +1 -1
- package/scripts/sw-developer-simulation.sh +1 -1
- package/scripts/sw-discovery.sh +27 -1
- package/scripts/sw-doc-fleet.sh +1 -1
- package/scripts/sw-docs-agent.sh +1 -1
- package/scripts/sw-docs.sh +1 -1
- package/scripts/sw-doctor.sh +1 -1
- package/scripts/sw-dora.sh +1 -1
- package/scripts/sw-durable.sh +1 -1
- package/scripts/sw-e2e-orchestrator.sh +1 -1
- package/scripts/sw-eventbus.sh +1 -1
- package/scripts/sw-evidence.sh +1 -1
- package/scripts/sw-feedback.sh +1 -1
- package/scripts/sw-fix.sh +1 -1
- package/scripts/sw-fleet-discover.sh +1 -1
- package/scripts/sw-fleet-viz.sh +1 -1
- package/scripts/sw-fleet.sh +1 -1
- package/scripts/sw-github-app.sh +1 -1
- package/scripts/sw-github-checks.sh +1 -1
- package/scripts/sw-github-deploy.sh +1 -1
- package/scripts/sw-github-graphql.sh +1 -1
- package/scripts/sw-guild.sh +1 -1
- package/scripts/sw-heartbeat.sh +1 -1
- package/scripts/sw-hygiene.sh +1 -1
- package/scripts/sw-incident.sh +1 -1
- package/scripts/sw-init.sh +1 -1
- package/scripts/sw-instrument.sh +1 -1
- package/scripts/sw-intelligence.sh +9 -5
- package/scripts/sw-jira.sh +1 -1
- package/scripts/sw-launchd.sh +1 -1
- package/scripts/sw-linear.sh +1 -1
- package/scripts/sw-logs.sh +1 -1
- package/scripts/sw-loop.sh +267 -17
- package/scripts/sw-memory.sh +22 -5
- package/scripts/sw-mission-control.sh +1 -1
- package/scripts/sw-model-router.sh +1 -1
- package/scripts/sw-otel.sh +5 -3
- package/scripts/sw-oversight.sh +1 -1
- package/scripts/sw-pipeline-composer.sh +1 -1
- package/scripts/sw-pipeline-vitals.sh +1 -1
- package/scripts/sw-pipeline.sh +73 -1
- package/scripts/sw-pm.sh +1 -1
- package/scripts/sw-pr-lifecycle.sh +7 -4
- package/scripts/sw-predictive.sh +1 -1
- package/scripts/sw-prep.sh +1 -1
- package/scripts/sw-ps.sh +1 -1
- package/scripts/sw-public-dashboard.sh +1 -1
- package/scripts/sw-quality.sh +9 -5
- package/scripts/sw-reaper.sh +1 -1
- package/scripts/sw-regression.sh +1 -1
- package/scripts/sw-release-manager.sh +1 -1
- package/scripts/sw-release.sh +1 -1
- package/scripts/sw-remote.sh +1 -1
- package/scripts/sw-replay.sh +1 -1
- package/scripts/sw-retro.sh +1 -1
- package/scripts/sw-review-rerun.sh +1 -1
- package/scripts/sw-scale.sh +66 -10
- package/scripts/sw-security-audit.sh +1 -1
- package/scripts/sw-self-optimize.sh +1 -1
- package/scripts/sw-session.sh +3 -3
- package/scripts/sw-setup.sh +1 -1
- package/scripts/sw-standup.sh +1 -1
- package/scripts/sw-status.sh +1 -1
- package/scripts/sw-strategic.sh +1 -1
- package/scripts/sw-stream.sh +1 -1
- package/scripts/sw-swarm.sh +1 -1
- package/scripts/sw-team-stages.sh +1 -1
- package/scripts/sw-templates.sh +1 -1
- package/scripts/sw-testgen.sh +1 -1
- package/scripts/sw-tmux-pipeline.sh +1 -1
- package/scripts/sw-tmux.sh +1 -1
- package/scripts/sw-trace.sh +1 -1
- package/scripts/sw-tracker.sh +1 -1
- package/scripts/sw-triage.sh +6 -6
- package/scripts/sw-upgrade.sh +1 -1
- package/scripts/sw-ux.sh +1 -1
- package/scripts/sw-webhook.sh +1 -1
- package/scripts/sw-widgets.sh +1 -1
- package/scripts/sw-worktree.sh +1 -1
- package/scripts/update-homebrew-sha.sh +21 -15
package/scripts/sw-loop.sh
CHANGED
|
@@ -24,6 +24,12 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
24
24
|
# shellcheck source=lib/helpers.sh
|
|
25
25
|
[[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
|
|
26
26
|
[[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
|
|
27
|
+
# Source DB for dual-write (emit_event → JSONL + SQLite).
|
|
28
|
+
# Note: do NOT call init_schema here — the pipeline (sw-pipeline.sh) owns schema
|
|
29
|
+
# initialization. Calling it here would create an empty DB that shadows JSON cost data.
|
|
30
|
+
if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
|
|
31
|
+
source "$SCRIPT_DIR/sw-db.sh" 2>/dev/null || true
|
|
32
|
+
fi
|
|
27
33
|
# Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
|
|
28
34
|
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
29
35
|
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
@@ -63,7 +69,7 @@ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
|
|
|
63
69
|
SESSION_RESTART=false
|
|
64
70
|
RESTART_COUNT=0
|
|
65
71
|
REPO_OVERRIDE=""
|
|
66
|
-
VERSION="3.
|
|
72
|
+
VERSION="3.1.0"
|
|
67
73
|
|
|
68
74
|
# ─── Token Tracking ─────────────────────────────────────────────────────────
|
|
69
75
|
LOOP_INPUT_TOKENS=0
|
|
@@ -655,6 +661,9 @@ initialize_state() {
|
|
|
655
661
|
STATUS="running"
|
|
656
662
|
LOG_ENTRIES=""
|
|
657
663
|
|
|
664
|
+
# Record starting commit for cumulative diff in quality gates
|
|
665
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
|
|
666
|
+
|
|
658
667
|
write_state
|
|
659
668
|
}
|
|
660
669
|
|
|
@@ -726,6 +735,11 @@ resume_state() {
|
|
|
726
735
|
START_EPOCH="$(now_epoch)"
|
|
727
736
|
STATUS="running"
|
|
728
737
|
|
|
738
|
+
# Set starting commit for cumulative diff (approximate: use earliest tracked commit)
|
|
739
|
+
if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
|
|
740
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
|
|
741
|
+
fi
|
|
742
|
+
|
|
729
743
|
# If we hit max iterations before, warn user to extend
|
|
730
744
|
if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
|
|
731
745
|
warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
|
|
@@ -872,7 +886,8 @@ validate_claude_output() {
|
|
|
872
886
|
|
|
873
887
|
# Check for obviously corrupt output (API errors dumped as code)
|
|
874
888
|
local total_changed
|
|
875
|
-
total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null ||
|
|
889
|
+
total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || true)
|
|
890
|
+
total_changed="${total_changed:-0}"
|
|
876
891
|
if [[ "$total_changed" -eq 0 ]]; then
|
|
877
892
|
warn "Claude iteration produced no file changes"
|
|
878
893
|
issues=$((issues + 1))
|
|
@@ -966,7 +981,8 @@ check_fatal_error() {
|
|
|
966
981
|
# Non-zero exit + tiny output = likely CLI crash
|
|
967
982
|
if [[ "$cli_exit_code" -ne 0 ]]; then
|
|
968
983
|
local line_count
|
|
969
|
-
line_count=$(grep -cv '^$' "$log_file" 2>/dev/null ||
|
|
984
|
+
line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
|
|
985
|
+
line_count="${line_count:-0}"
|
|
970
986
|
if [[ "$line_count" -lt 3 ]]; then
|
|
971
987
|
local content
|
|
972
988
|
content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
|
|
@@ -1140,7 +1156,8 @@ diagnose_failure() {
|
|
|
1140
1156
|
local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
|
|
1141
1157
|
local repeat_count=0
|
|
1142
1158
|
if [[ -f "$diagnosis_file" ]]; then
|
|
1143
|
-
repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null ||
|
|
1159
|
+
repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
|
|
1160
|
+
repeat_count="${repeat_count:-0}"
|
|
1144
1161
|
fi
|
|
1145
1162
|
echo "$diagnosis" >> "$diagnosis_file"
|
|
1146
1163
|
|
|
@@ -1317,33 +1334,60 @@ run_audit_agent() {
|
|
|
1317
1334
|
local log_file="$LOG_DIR/iteration-${ITERATION}.log"
|
|
1318
1335
|
local audit_log="$LOG_DIR/audit-iter-${ITERATION}.log"
|
|
1319
1336
|
|
|
1320
|
-
# Gather context: tail of implementer output +
|
|
1337
|
+
# Gather context: tail of implementer output + cumulative diff
|
|
1321
1338
|
local impl_tail
|
|
1322
1339
|
impl_tail="$(tail -100 "$log_file" 2>/dev/null || echo "(no output)")"
|
|
1323
|
-
|
|
1324
|
-
|
|
1340
|
+
|
|
1341
|
+
# Use cumulative diff from loop start so auditor sees ALL work, not just latest commit
|
|
1342
|
+
local diff_stat cumulative_note=""
|
|
1343
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
|
|
1344
|
+
diff_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no changes)")"
|
|
1345
|
+
cumulative_note="Note: This diff shows ALL changes since the loop started (iteration 1 through ${ITERATION}), not just the latest commit."
|
|
1346
|
+
else
|
|
1347
|
+
diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
|
|
1348
|
+
fi
|
|
1349
|
+
|
|
1350
|
+
# Include verified test status so auditor doesn't have to guess
|
|
1351
|
+
local test_context=""
|
|
1352
|
+
if [[ -n "$TEST_CMD" ]]; then
|
|
1353
|
+
if [[ "${TEST_PASSED:-}" == "true" ]]; then
|
|
1354
|
+
test_context="## Verified Test Status (from harness, not from agent)
|
|
1355
|
+
Tests: ALL PASSING (command: ${TEST_CMD})"
|
|
1356
|
+
else
|
|
1357
|
+
test_context="## Verified Test Status (from harness)
|
|
1358
|
+
Tests: FAILING (command: ${TEST_CMD})
|
|
1359
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1360
|
+
fi
|
|
1361
|
+
fi
|
|
1325
1362
|
|
|
1326
1363
|
local audit_prompt
|
|
1327
1364
|
read -r -d '' audit_prompt <<AUDIT_PROMPT || true
|
|
1328
|
-
You are an independent code auditor reviewing an autonomous coding agent.
|
|
1365
|
+
You are an independent code auditor reviewing an autonomous coding agent's CUMULATIVE work.
|
|
1366
|
+
This is iteration ${ITERATION}. The agent may have done most of the work in earlier iterations.
|
|
1329
1367
|
|
|
1330
1368
|
## Goal the agent was working toward
|
|
1331
1369
|
${GOAL}
|
|
1332
1370
|
|
|
1333
|
-
## Agent Output (last 100 lines)
|
|
1371
|
+
## Agent Output This Iteration (last 100 lines)
|
|
1334
1372
|
${impl_tail}
|
|
1335
1373
|
|
|
1336
|
-
## Changes Made (git diff --stat)
|
|
1374
|
+
## Cumulative Changes Made (git diff --stat)
|
|
1375
|
+
${cumulative_note}
|
|
1337
1376
|
${diff_stat}
|
|
1338
1377
|
|
|
1378
|
+
${test_context}
|
|
1379
|
+
|
|
1339
1380
|
## Your Task
|
|
1340
|
-
Critically review the work:
|
|
1341
|
-
1.
|
|
1342
|
-
2. Are there obvious bugs, logic errors, or security issues?
|
|
1381
|
+
Critically review the CUMULATIVE work (not just the latest iteration):
|
|
1382
|
+
1. Has the agent made meaningful progress toward the goal across all iterations?
|
|
1383
|
+
2. Are there obvious bugs, logic errors, or security issues in the current codebase?
|
|
1343
1384
|
3. Did the agent leave incomplete work (TODOs, placeholder code)?
|
|
1344
1385
|
4. Are there any regressions or broken patterns?
|
|
1345
1386
|
5. Is the code quality acceptable?
|
|
1346
1387
|
|
|
1388
|
+
IMPORTANT: If the current iteration made small or no code changes, that may be acceptable
|
|
1389
|
+
if earlier iterations already completed the substantive work. Judge the whole body of work.
|
|
1390
|
+
|
|
1347
1391
|
If the work is acceptable and moves toward the goal, output exactly: AUDIT_PASS
|
|
1348
1392
|
Otherwise, list the specific issues that need fixing.
|
|
1349
1393
|
AUDIT_PROMPT
|
|
@@ -1429,21 +1473,52 @@ check_definition_of_done() {
|
|
|
1429
1473
|
|
|
1430
1474
|
local dod_content
|
|
1431
1475
|
dod_content="$(cat "$DOD_FILE")"
|
|
1476
|
+
|
|
1477
|
+
# Use cumulative diff from loop start (not just HEAD~1) so the evaluator
|
|
1478
|
+
# can see ALL work done across every iteration, not just the latest commit.
|
|
1432
1479
|
local diff_content
|
|
1433
|
-
|
|
1480
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
|
|
1481
|
+
diff_content="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no diff)")"
|
|
1482
|
+
diff_content="${diff_content}
|
|
1483
|
+
|
|
1484
|
+
## Detailed Changes (cumulative diff, truncated to 200 lines)
|
|
1485
|
+
$(git -C "$PROJECT_ROOT" diff "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -200 || echo "(no diff)")"
|
|
1486
|
+
else
|
|
1487
|
+
diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
|
|
1488
|
+
fi
|
|
1489
|
+
|
|
1490
|
+
# Inject verified runtime facts so the evaluator doesn't have to guess
|
|
1491
|
+
local runtime_facts=""
|
|
1492
|
+
if [[ -n "$TEST_CMD" ]]; then
|
|
1493
|
+
if [[ "${TEST_PASSED:-}" == "true" ]]; then
|
|
1494
|
+
runtime_facts="## Verified Runtime Facts (from the loop harness, not from the agent)
|
|
1495
|
+
- Tests: ALL PASSING (verified by running '${TEST_CMD}' after this iteration)
|
|
1496
|
+
- Test output (last 10 lines):
|
|
1497
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1498
|
+
else
|
|
1499
|
+
runtime_facts="## Verified Runtime Facts
|
|
1500
|
+
- Tests: FAILING (verified by running '${TEST_CMD}')
|
|
1501
|
+
- Test output (last 10 lines):
|
|
1502
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1503
|
+
fi
|
|
1504
|
+
fi
|
|
1434
1505
|
|
|
1435
1506
|
local dod_prompt
|
|
1436
1507
|
read -r -d '' dod_prompt <<DOD_PROMPT || true
|
|
1437
|
-
You are evaluating whether
|
|
1508
|
+
You are evaluating whether a project satisfies a Definition of Done checklist.
|
|
1509
|
+
You are reviewing the CUMULATIVE work across all iterations, not just the latest commit.
|
|
1438
1510
|
|
|
1439
1511
|
## Definition of Done
|
|
1440
1512
|
${dod_content}
|
|
1441
1513
|
|
|
1442
|
-
|
|
1514
|
+
${runtime_facts}
|
|
1515
|
+
|
|
1516
|
+
## Cumulative Changes Made (git diff from start of loop to now)
|
|
1443
1517
|
${diff_content}
|
|
1444
1518
|
|
|
1445
1519
|
## Your Task
|
|
1446
|
-
For each item in the Definition of Done, determine if the
|
|
1520
|
+
For each item in the Definition of Done, determine if the project satisfies it.
|
|
1521
|
+
The runtime facts above are verified by the harness — trust them as ground truth.
|
|
1447
1522
|
If ALL items are satisfied, output exactly: DOD_PASS
|
|
1448
1523
|
Otherwise, list which items are NOT satisfied and why.
|
|
1449
1524
|
DOD_PROMPT
|
|
@@ -1497,6 +1572,14 @@ guard_completion() {
|
|
|
1497
1572
|
rejection_reasons+=("tests failing")
|
|
1498
1573
|
fi
|
|
1499
1574
|
|
|
1575
|
+
# Holistic final gate: when all other gates pass, run a project-level assessment
|
|
1576
|
+
# that evaluates the entire codebase against the goal (not just the latest diff)
|
|
1577
|
+
if [[ ${#rejection_reasons[@]} -eq 0 ]]; then
|
|
1578
|
+
if ! run_holistic_gate; then
|
|
1579
|
+
rejection_reasons+=("holistic project assessment found gaps")
|
|
1580
|
+
fi
|
|
1581
|
+
fi
|
|
1582
|
+
|
|
1500
1583
|
if [[ ${#rejection_reasons[@]} -gt 0 ]]; then
|
|
1501
1584
|
local reasons_str
|
|
1502
1585
|
reasons_str="$(printf ', %s' "${rejection_reasons[@]}")"
|
|
@@ -1510,6 +1593,70 @@ guard_completion() {
|
|
|
1510
1593
|
return 0
|
|
1511
1594
|
}
|
|
1512
1595
|
|
|
1596
|
+
# Holistic gate: evaluates the full project against the original goal.
|
|
1597
|
+
# Only runs when all other gates pass (final checkpoint before acceptance).
|
|
1598
|
+
run_holistic_gate() {
|
|
1599
|
+
# Skip if no starting commit (can't compute cumulative diff)
|
|
1600
|
+
[[ -z "${LOOP_START_COMMIT:-}" ]] && return 0
|
|
1601
|
+
|
|
1602
|
+
local holistic_log="$LOG_DIR/holistic-iter-${ITERATION}.log"
|
|
1603
|
+
|
|
1604
|
+
# Build a project summary: file tree, test count, cumulative diff stats
|
|
1605
|
+
local file_count
|
|
1606
|
+
file_count=$(git -C "$PROJECT_ROOT" ls-files | wc -l | tr -d ' ')
|
|
1607
|
+
local cumulative_stat
|
|
1608
|
+
cumulative_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || echo "(no changes)")"
|
|
1609
|
+
local test_summary=""
|
|
1610
|
+
if [[ -n "${TEST_OUTPUT:-}" ]]; then
|
|
1611
|
+
test_summary="$(echo "$TEST_OUTPUT" | tail -5)"
|
|
1612
|
+
fi
|
|
1613
|
+
|
|
1614
|
+
local holistic_prompt
|
|
1615
|
+
read -r -d '' holistic_prompt <<HOLISTIC_PROMPT || true
|
|
1616
|
+
You are a final quality gate evaluating whether an autonomous coding agent has FULLY achieved its goal.
|
|
1617
|
+
|
|
1618
|
+
## Original Goal
|
|
1619
|
+
${GOAL}
|
|
1620
|
+
|
|
1621
|
+
## Project Stats
|
|
1622
|
+
- Files in repo: ${file_count}
|
|
1623
|
+
- Iterations completed: ${ITERATION}
|
|
1624
|
+
- Cumulative changes: ${cumulative_stat}
|
|
1625
|
+
- Tests: ${TEST_PASSED:-unknown} (command: ${TEST_CMD:-none})
|
|
1626
|
+
${test_summary:+- Test output: ${test_summary}}
|
|
1627
|
+
|
|
1628
|
+
## Cumulative Git Changes (diff --stat from start)
|
|
1629
|
+
$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -40 || echo "(none)")
|
|
1630
|
+
|
|
1631
|
+
## Your Task
|
|
1632
|
+
Based on the goal and the cumulative work done:
|
|
1633
|
+
1. Has the goal been FULLY achieved (not partially)?
|
|
1634
|
+
2. Is there any critical gap that would make this unacceptable for production?
|
|
1635
|
+
|
|
1636
|
+
If the goal is fully achieved, output exactly: HOLISTIC_PASS
|
|
1637
|
+
Otherwise, list the specific gaps remaining.
|
|
1638
|
+
HOLISTIC_PROMPT
|
|
1639
|
+
|
|
1640
|
+
echo -e " ${PURPLE}▸${RESET} Running holistic project assessment..."
|
|
1641
|
+
|
|
1642
|
+
local hol_model
|
|
1643
|
+
hol_model="$(select_audit_model)"
|
|
1644
|
+
local hol_flags=("--model" "$hol_model")
|
|
1645
|
+
if $SKIP_PERMISSIONS; then
|
|
1646
|
+
hol_flags+=("--dangerously-skip-permissions")
|
|
1647
|
+
fi
|
|
1648
|
+
|
|
1649
|
+
claude -p "$holistic_prompt" "${hol_flags[@]}" > "$holistic_log" 2>&1 || true
|
|
1650
|
+
|
|
1651
|
+
if grep -q "HOLISTIC_PASS" "$holistic_log" 2>/dev/null; then
|
|
1652
|
+
echo -e " ${GREEN}✓${RESET} Holistic assessment: passed"
|
|
1653
|
+
return 0
|
|
1654
|
+
else
|
|
1655
|
+
echo -e " ${YELLOW}⚠${RESET} Holistic assessment: gaps found"
|
|
1656
|
+
return 1
|
|
1657
|
+
fi
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1513
1660
|
# ─── Context Window Management ───────────────────────────────────────────────
|
|
1514
1661
|
# Prevents prompt from exceeding Claude's context limit (~200K tokens).
|
|
1515
1662
|
# Trims least-critical sections first when over budget.
|
|
@@ -1810,12 +1957,25 @@ ${_test_tail}
|
|
|
1810
1957
|
RESUMED_TEST_OUTPUT=""
|
|
1811
1958
|
fi
|
|
1812
1959
|
|
|
1960
|
+
# Build cumulative progress summary showing all iterations' work
|
|
1961
|
+
local cumulative_section=""
|
|
1962
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
|
|
1963
|
+
local cum_stat
|
|
1964
|
+
cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
|
|
1965
|
+
if [[ -n "$cum_stat" ]]; then
|
|
1966
|
+
cumulative_section="## Cumulative Progress (all iterations combined)
|
|
1967
|
+
${cum_stat}
|
|
1968
|
+
"
|
|
1969
|
+
fi
|
|
1970
|
+
fi
|
|
1971
|
+
|
|
1813
1972
|
cat <<PROMPT
|
|
1814
1973
|
You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
|
|
1815
1974
|
${resume_section}
|
|
1816
1975
|
## Your Goal
|
|
1817
1976
|
${GOAL}
|
|
1818
1977
|
|
|
1978
|
+
${cumulative_section}
|
|
1819
1979
|
## Current Progress
|
|
1820
1980
|
${recent_log}
|
|
1821
1981
|
|
|
@@ -1861,6 +2021,58 @@ ${stuckness_section}
|
|
|
1861
2021
|
PROMPT
|
|
1862
2022
|
}
|
|
1863
2023
|
|
|
2024
|
+
# ─── Alternative Strategy Exploration ─────────────────────────────────────────
|
|
2025
|
+
# When stuckness is detected, generate a context-aware alternative strategy.
|
|
2026
|
+
# Uses pattern matching on error type + iteration count to suggest different approaches.
|
|
2027
|
+
|
|
2028
|
+
explore_alternative_strategy() {
|
|
2029
|
+
local last_error="${1:-unknown}"
|
|
2030
|
+
local iteration="${2:-0}"
|
|
2031
|
+
local diagnosis="${3:-}"
|
|
2032
|
+
|
|
2033
|
+
# Track attempted strategies to avoid repeating them
|
|
2034
|
+
local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
|
|
2035
|
+
local attempted
|
|
2036
|
+
attempted=$(cat "$strategy_file" 2>/dev/null || true)
|
|
2037
|
+
|
|
2038
|
+
local strategy=""
|
|
2039
|
+
|
|
2040
|
+
# If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
|
|
2041
|
+
if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
|
|
2042
|
+
if ! echo "$attempted" | grep -q "evaluator_alignment"; then
|
|
2043
|
+
echo "evaluator_alignment" >> "$strategy_file"
|
|
2044
|
+
strategy="## Alternative Strategy: Evaluator Alignment
|
|
2045
|
+
The code appears functionally complete (tests pass). Focus on satisfying the remaining
|
|
2046
|
+
quality gate evaluators. Check the DoD log and audit log for specific complaints, then
|
|
2047
|
+
address those exact points rather than adding new features."
|
|
2048
|
+
fi
|
|
2049
|
+
fi
|
|
2050
|
+
|
|
2051
|
+
# If no code changes in last iteration, suggest verifying existing work
|
|
2052
|
+
if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
|
|
2053
|
+
if ! echo "$attempted" | grep -q "verify_existing"; then
|
|
2054
|
+
echo "verify_existing" >> "$strategy_file"
|
|
2055
|
+
strategy="## Alternative Strategy: Verify Existing Work
|
|
2056
|
+
Recent iterations made no code changes. The work may already be complete.
|
|
2057
|
+
Run the full test suite, verify all features work, and if everything passes,
|
|
2058
|
+
commit a verification message and declare LOOP_COMPLETE with evidence."
|
|
2059
|
+
fi
|
|
2060
|
+
fi
|
|
2061
|
+
|
|
2062
|
+
# Generic fallback: break the problem down
|
|
2063
|
+
if [[ -z "$strategy" ]]; then
|
|
2064
|
+
if ! echo "$attempted" | grep -q "decompose"; then
|
|
2065
|
+
echo "decompose" >> "$strategy_file"
|
|
2066
|
+
strategy="## Alternative Strategy: Decompose
|
|
2067
|
+
Break the remaining work into smaller, independent steps. Focus on one specific
|
|
2068
|
+
file or function at a time. Read error messages literally — the root cause may
|
|
2069
|
+
differ from your assumption."
|
|
2070
|
+
fi
|
|
2071
|
+
fi
|
|
2072
|
+
|
|
2073
|
+
echo "$strategy"
|
|
2074
|
+
}
|
|
2075
|
+
|
|
1864
2076
|
# ─── Stuckness Detection ─────────────────────────────────────────────────────
|
|
1865
2077
|
# Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
|
|
1866
2078
|
# Returns 0 when stuck, 1 when not. Outputs stuckness section and sets STUCKNESS_HINT when stuck.
|
|
@@ -1994,6 +2206,17 @@ detect_stuckness() {
|
|
|
1994
2206
|
stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
|
|
1995
2207
|
fi
|
|
1996
2208
|
|
|
2209
|
+
# Gate-aware dampening: if tests pass and the agent has made progress overall,
|
|
2210
|
+
# reduce stuckness signal count. The "no code changes" and "identical diffs" signals
|
|
2211
|
+
# fire when code is already complete and the agent is fighting evaluator quirks —
|
|
2212
|
+
# that's not genuine stuckness, it's "done but gates disagree."
|
|
2213
|
+
if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
|
|
2214
|
+
# If at least one quality signal is positive, dampen by 1
|
|
2215
|
+
if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
|
|
2216
|
+
stuckness_signals=$((stuckness_signals - 1))
|
|
2217
|
+
fi
|
|
2218
|
+
fi
|
|
2219
|
+
|
|
1997
2220
|
# Decision: 2+ signals = stuck
|
|
1998
2221
|
if [[ "$stuckness_signals" -ge 2 ]]; then
|
|
1999
2222
|
STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
|
|
@@ -2719,6 +2942,11 @@ run_single_agent_loop() {
|
|
|
2719
2942
|
initialize_state
|
|
2720
2943
|
fi
|
|
2721
2944
|
|
|
2945
|
+
# Ensure LOOP_START_COMMIT is set (may not be on resume/restart)
|
|
2946
|
+
if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
|
|
2947
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
|
|
2948
|
+
fi
|
|
2949
|
+
|
|
2722
2950
|
# Apply adaptive budget/model before showing banner
|
|
2723
2951
|
apply_adaptive_budget
|
|
2724
2952
|
MODEL="$(select_adaptive_model "build" "$MODEL")"
|
|
@@ -2746,6 +2974,16 @@ run_single_agent_loop() {
|
|
|
2746
2974
|
}
|
|
2747
2975
|
ITERATION=$(( ITERATION + 1 ))
|
|
2748
2976
|
|
|
2977
|
+
# Emit iteration start event for pipeline visibility
|
|
2978
|
+
if type emit_event >/dev/null 2>&1; then
|
|
2979
|
+
emit_event "loop.iteration_start" \
|
|
2980
|
+
"iteration=$ITERATION" \
|
|
2981
|
+
"max=$MAX_ITERATIONS" \
|
|
2982
|
+
"job_id=${PIPELINE_JOB_ID:-loop-$$}" \
|
|
2983
|
+
"agent=${AGENT_NUM:-1}" \
|
|
2984
|
+
"test_passed=${TEST_PASSED:-unknown}"
|
|
2985
|
+
fi
|
|
2986
|
+
|
|
2749
2987
|
# Root-cause diagnosis and memory-based fix on retry after test failure
|
|
2750
2988
|
if [[ "${TEST_PASSED:-}" == "false" ]]; then
|
|
2751
2989
|
# Source memory module for diagnosis and fix lookup
|
|
@@ -2915,6 +3153,18 @@ $summary
|
|
|
2915
3153
|
write_state
|
|
2916
3154
|
write_progress
|
|
2917
3155
|
|
|
3156
|
+
# Emit iteration complete event for pipeline visibility
|
|
3157
|
+
if type emit_event >/dev/null 2>&1; then
|
|
3158
|
+
emit_event "loop.iteration_complete" \
|
|
3159
|
+
"iteration=$ITERATION" \
|
|
3160
|
+
"max=$MAX_ITERATIONS" \
|
|
3161
|
+
"job_id=${PIPELINE_JOB_ID:-loop-$$}" \
|
|
3162
|
+
"agent=${AGENT_NUM:-1}" \
|
|
3163
|
+
"test_passed=${TEST_PASSED:-unknown}" \
|
|
3164
|
+
"commits=$TOTAL_COMMITS" \
|
|
3165
|
+
"status=${STATUS:-running}"
|
|
3166
|
+
fi
|
|
3167
|
+
|
|
2918
3168
|
# Update heartbeat
|
|
2919
3169
|
"$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
|
|
2920
3170
|
--pid $$ \
|
package/scripts/sw-memory.sh
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
set -euo pipefail
|
|
7
7
|
trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
|
|
8
8
|
|
|
9
|
-
VERSION="3.
|
|
9
|
+
VERSION="3.1.0"
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
REPO_DIR="${REPO_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}"
|
|
12
12
|
|
|
@@ -88,7 +88,13 @@ memory_ranked_search() {
|
|
|
88
88
|
memory_dir="$(repo_memory_dir)"
|
|
89
89
|
fi
|
|
90
90
|
memory_dir="${memory_dir:-$HOME/.shipwright/memory}"
|
|
91
|
-
[[ ! -d "$memory_dir" ]]
|
|
91
|
+
if [[ ! -d "$memory_dir" ]]; then
|
|
92
|
+
info "Memory dir not found at ${memory_dir} — auto-creating"
|
|
93
|
+
mkdir -p "$memory_dir"
|
|
94
|
+
emit_event "memory.not_available" "path=$memory_dir" "action=auto_created"
|
|
95
|
+
echo "[]"
|
|
96
|
+
return 0
|
|
97
|
+
fi
|
|
92
98
|
|
|
93
99
|
# Extract and expand query keywords
|
|
94
100
|
local keywords
|
|
@@ -372,7 +378,10 @@ memory_capture_failure() {
|
|
|
372
378
|
pattern=$(echo "$error_output" | head -1 | cut -c1-200)
|
|
373
379
|
fi
|
|
374
380
|
|
|
375
|
-
[[ -z "$pattern" ]]
|
|
381
|
+
if [[ -z "$pattern" ]]; then
|
|
382
|
+
warn "Memory capture: empty error pattern — skipping"
|
|
383
|
+
return 0
|
|
384
|
+
fi
|
|
376
385
|
|
|
377
386
|
# Check for duplicate — increment seen_count if pattern already exists
|
|
378
387
|
local existing_idx
|
|
@@ -987,6 +996,7 @@ memory_inject_context() {
|
|
|
987
996
|
done
|
|
988
997
|
|
|
989
998
|
if [[ "$has_memory" == "false" ]]; then
|
|
999
|
+
info "No memory available for repo (${mem_dir}) — first pipeline run will seed it"
|
|
990
1000
|
echo "# No memory available for this repository yet."
|
|
991
1001
|
return 0
|
|
992
1002
|
fi
|
|
@@ -1642,6 +1652,11 @@ memory_export() {
|
|
|
1642
1652
|
local mem_dir
|
|
1643
1653
|
mem_dir="$(repo_memory_dir)"
|
|
1644
1654
|
|
|
1655
|
+
# Ensure all memory files exist (jq --slurpfile fails on missing files)
|
|
1656
|
+
for f in patterns.json failures.json decisions.json metrics.json; do
|
|
1657
|
+
[[ -f "$mem_dir/$f" ]] || echo '{}' > "$mem_dir/$f"
|
|
1658
|
+
done
|
|
1659
|
+
|
|
1645
1660
|
# Merge all memory files into a single JSON export
|
|
1646
1661
|
local export_json
|
|
1647
1662
|
export_json=$(jq -n \
|
|
@@ -1757,8 +1772,10 @@ memory_stats() {
|
|
|
1757
1772
|
# Event-based hit rate
|
|
1758
1773
|
local inject_count capture_count
|
|
1759
1774
|
if [[ -f "$EVENTS_FILE" ]]; then
|
|
1760
|
-
inject_count=$(grep -c '"memory.inject"' "$EVENTS_FILE" 2>/dev/null ||
|
|
1761
|
-
|
|
1775
|
+
inject_count=$(grep -c '"memory.inject"' "$EVENTS_FILE" 2>/dev/null || true)
|
|
1776
|
+
inject_count="${inject_count:-0}"
|
|
1777
|
+
capture_count=$(grep -c '"memory.capture"' "$EVENTS_FILE" 2>/dev/null || true)
|
|
1778
|
+
capture_count="${capture_count:-0}"
|
|
1762
1779
|
echo ""
|
|
1763
1780
|
echo -e " ${BOLD}Usage${RESET}"
|
|
1764
1781
|
printf " %-18s %s\n" "Context injections:" "$inject_count"
|
package/scripts/sw-otel.sh
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
set -euo pipefail
|
|
7
7
|
trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
|
|
8
8
|
|
|
9
|
-
VERSION="3.
|
|
9
|
+
VERSION="3.1.0"
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
12
12
|
|
|
@@ -463,8 +463,10 @@ cmd_report() {
|
|
|
463
463
|
|
|
464
464
|
if [[ -f "$EVENTS_FILE" ]]; then
|
|
465
465
|
event_count=$(wc -l < "$EVENTS_FILE" || echo "0")
|
|
466
|
-
export_count=$(grep -c '"type":"otel_export"' "$EVENTS_FILE" 2>/dev/null ||
|
|
467
|
-
|
|
466
|
+
export_count=$(grep -c '"type":"otel_export"' "$EVENTS_FILE" 2>/dev/null || true)
|
|
467
|
+
export_count="${export_count:-0}"
|
|
468
|
+
webhook_count=$(grep -c '"type":"webhook_sent"' "$EVENTS_FILE" 2>/dev/null || true)
|
|
469
|
+
webhook_count="${webhook_count:-0}"
|
|
468
470
|
last_event_ts=$(tail -n1 "$EVENTS_FILE" | jq -r '.ts // "unknown"' 2>/dev/null || echo "unknown")
|
|
469
471
|
fi
|
|
470
472
|
|
package/scripts/sw-oversight.sh
CHANGED