shipwright-cli 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/README.md +3 -3
  2. package/completions/_shipwright +247 -93
  3. package/completions/shipwright.bash +69 -15
  4. package/completions/shipwright.fish +309 -41
  5. package/config/decision-tiers.json +55 -0
  6. package/config/event-schema.json +142 -5
  7. package/config/policy.json +8 -0
  8. package/package.json +3 -3
  9. package/scripts/lib/architecture.sh +2 -1
  10. package/scripts/lib/bootstrap.sh +0 -0
  11. package/scripts/lib/config.sh +0 -0
  12. package/scripts/lib/daemon-adaptive.sh +0 -0
  13. package/scripts/lib/daemon-dispatch.sh +24 -1
  14. package/scripts/lib/daemon-failure.sh +0 -0
  15. package/scripts/lib/daemon-health.sh +0 -0
  16. package/scripts/lib/daemon-patrol.sh +40 -5
  17. package/scripts/lib/daemon-poll.sh +17 -0
  18. package/scripts/lib/daemon-state.sh +10 -0
  19. package/scripts/lib/daemon-triage.sh +1 -1
  20. package/scripts/lib/decide-autonomy.sh +295 -0
  21. package/scripts/lib/decide-scoring.sh +228 -0
  22. package/scripts/lib/decide-signals.sh +462 -0
  23. package/scripts/lib/fleet-failover.sh +0 -0
  24. package/scripts/lib/helpers.sh +16 -17
  25. package/scripts/lib/pipeline-detection.sh +0 -0
  26. package/scripts/lib/pipeline-github.sh +0 -0
  27. package/scripts/lib/pipeline-intelligence.sh +20 -3
  28. package/scripts/lib/pipeline-quality-checks.sh +3 -2
  29. package/scripts/lib/pipeline-quality.sh +0 -0
  30. package/scripts/lib/pipeline-stages.sh +199 -32
  31. package/scripts/lib/pipeline-state.sh +14 -0
  32. package/scripts/lib/policy.sh +0 -0
  33. package/scripts/lib/test-helpers.sh +0 -0
  34. package/scripts/postinstall.mjs +75 -1
  35. package/scripts/signals/example-collector.sh +36 -0
  36. package/scripts/sw +8 -4
  37. package/scripts/sw-activity.sh +1 -1
  38. package/scripts/sw-adaptive.sh +1 -1
  39. package/scripts/sw-adversarial.sh +1 -1
  40. package/scripts/sw-architecture-enforcer.sh +1 -1
  41. package/scripts/sw-auth.sh +1 -1
  42. package/scripts/sw-autonomous.sh +1 -1
  43. package/scripts/sw-changelog.sh +1 -1
  44. package/scripts/sw-checkpoint.sh +1 -1
  45. package/scripts/sw-ci.sh +1 -1
  46. package/scripts/sw-cleanup.sh +1 -1
  47. package/scripts/sw-code-review.sh +1 -1
  48. package/scripts/sw-connect.sh +1 -1
  49. package/scripts/sw-context.sh +1 -1
  50. package/scripts/sw-cost.sh +12 -3
  51. package/scripts/sw-daemon.sh +2 -2
  52. package/scripts/sw-dashboard.sh +1 -1
  53. package/scripts/sw-db.sh +41 -34
  54. package/scripts/sw-decide.sh +685 -0
  55. package/scripts/sw-decompose.sh +1 -1
  56. package/scripts/sw-deps.sh +1 -1
  57. package/scripts/sw-developer-simulation.sh +1 -1
  58. package/scripts/sw-discovery.sh +27 -1
  59. package/scripts/sw-doc-fleet.sh +1 -1
  60. package/scripts/sw-docs-agent.sh +1 -1
  61. package/scripts/sw-docs.sh +1 -1
  62. package/scripts/sw-doctor.sh +1 -1
  63. package/scripts/sw-dora.sh +1 -1
  64. package/scripts/sw-durable.sh +1 -1
  65. package/scripts/sw-e2e-orchestrator.sh +1 -1
  66. package/scripts/sw-eventbus.sh +1 -1
  67. package/scripts/sw-evidence.sh +1 -1
  68. package/scripts/sw-feedback.sh +1 -1
  69. package/scripts/sw-fix.sh +1 -1
  70. package/scripts/sw-fleet-discover.sh +1 -1
  71. package/scripts/sw-fleet-viz.sh +1 -1
  72. package/scripts/sw-fleet.sh +1 -1
  73. package/scripts/sw-github-app.sh +1 -1
  74. package/scripts/sw-github-checks.sh +1 -1
  75. package/scripts/sw-github-deploy.sh +1 -1
  76. package/scripts/sw-github-graphql.sh +1 -1
  77. package/scripts/sw-guild.sh +1 -1
  78. package/scripts/sw-heartbeat.sh +1 -1
  79. package/scripts/sw-hygiene.sh +1 -1
  80. package/scripts/sw-incident.sh +1 -1
  81. package/scripts/sw-init.sh +1 -1
  82. package/scripts/sw-instrument.sh +1 -1
  83. package/scripts/sw-intelligence.sh +9 -5
  84. package/scripts/sw-jira.sh +1 -1
  85. package/scripts/sw-launchd.sh +1 -1
  86. package/scripts/sw-linear.sh +1 -1
  87. package/scripts/sw-logs.sh +1 -1
  88. package/scripts/sw-loop.sh +267 -17
  89. package/scripts/sw-memory.sh +22 -5
  90. package/scripts/sw-mission-control.sh +1 -1
  91. package/scripts/sw-model-router.sh +1 -1
  92. package/scripts/sw-otel.sh +5 -3
  93. package/scripts/sw-oversight.sh +1 -1
  94. package/scripts/sw-pipeline-composer.sh +1 -1
  95. package/scripts/sw-pipeline-vitals.sh +1 -1
  96. package/scripts/sw-pipeline.sh +73 -1
  97. package/scripts/sw-pm.sh +1 -1
  98. package/scripts/sw-pr-lifecycle.sh +7 -4
  99. package/scripts/sw-predictive.sh +1 -1
  100. package/scripts/sw-prep.sh +1 -1
  101. package/scripts/sw-ps.sh +1 -1
  102. package/scripts/sw-public-dashboard.sh +1 -1
  103. package/scripts/sw-quality.sh +9 -5
  104. package/scripts/sw-reaper.sh +1 -1
  105. package/scripts/sw-regression.sh +1 -1
  106. package/scripts/sw-release-manager.sh +1 -1
  107. package/scripts/sw-release.sh +1 -1
  108. package/scripts/sw-remote.sh +1 -1
  109. package/scripts/sw-replay.sh +1 -1
  110. package/scripts/sw-retro.sh +1 -1
  111. package/scripts/sw-review-rerun.sh +1 -1
  112. package/scripts/sw-scale.sh +66 -10
  113. package/scripts/sw-security-audit.sh +1 -1
  114. package/scripts/sw-self-optimize.sh +1 -1
  115. package/scripts/sw-session.sh +3 -3
  116. package/scripts/sw-setup.sh +1 -1
  117. package/scripts/sw-standup.sh +1 -1
  118. package/scripts/sw-status.sh +1 -1
  119. package/scripts/sw-strategic.sh +1 -1
  120. package/scripts/sw-stream.sh +1 -1
  121. package/scripts/sw-swarm.sh +1 -1
  122. package/scripts/sw-team-stages.sh +1 -1
  123. package/scripts/sw-templates.sh +1 -1
  124. package/scripts/sw-testgen.sh +1 -1
  125. package/scripts/sw-tmux-pipeline.sh +1 -1
  126. package/scripts/sw-tmux.sh +1 -1
  127. package/scripts/sw-trace.sh +1 -1
  128. package/scripts/sw-tracker.sh +1 -1
  129. package/scripts/sw-triage.sh +6 -6
  130. package/scripts/sw-upgrade.sh +1 -1
  131. package/scripts/sw-ux.sh +1 -1
  132. package/scripts/sw-webhook.sh +1 -1
  133. package/scripts/sw-widgets.sh +1 -1
  134. package/scripts/sw-worktree.sh +1 -1
  135. package/scripts/update-homebrew-sha.sh +21 -15
@@ -24,6 +24,12 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
24
24
  # shellcheck source=lib/helpers.sh
25
25
  [[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
26
26
  [[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
27
+ # Source DB for dual-write (emit_event → JSONL + SQLite).
28
+ # Note: do NOT call init_schema here — the pipeline (sw-pipeline.sh) owns schema
29
+ # initialization. Calling it here would create an empty DB that shadows JSON cost data.
30
+ if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
31
+ source "$SCRIPT_DIR/sw-db.sh" 2>/dev/null || true
32
+ fi
27
33
  # Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
28
34
  [[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
29
35
  [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
@@ -63,7 +69,7 @@ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
63
69
  SESSION_RESTART=false
64
70
  RESTART_COUNT=0
65
71
  REPO_OVERRIDE=""
66
- VERSION="3.0.0"
72
+ VERSION="3.1.0"
67
73
 
68
74
  # ─── Token Tracking ─────────────────────────────────────────────────────────
69
75
  LOOP_INPUT_TOKENS=0
@@ -655,6 +661,9 @@ initialize_state() {
655
661
  STATUS="running"
656
662
  LOG_ENTRIES=""
657
663
 
664
+ # Record starting commit for cumulative diff in quality gates
665
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
666
+
658
667
  write_state
659
668
  }
660
669
 
@@ -726,6 +735,11 @@ resume_state() {
726
735
  START_EPOCH="$(now_epoch)"
727
736
  STATUS="running"
728
737
 
738
+ # Set starting commit for cumulative diff (approximate: use earliest tracked commit)
739
+ if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
740
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
741
+ fi
742
+
729
743
  # If we hit max iterations before, warn user to extend
730
744
  if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
731
745
  warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
@@ -872,7 +886,8 @@ validate_claude_output() {
872
886
 
873
887
  # Check for obviously corrupt output (API errors dumped as code)
874
888
  local total_changed
875
- total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || echo "0")
889
+ total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || true)
890
+ total_changed="${total_changed:-0}"
876
891
  if [[ "$total_changed" -eq 0 ]]; then
877
892
  warn "Claude iteration produced no file changes"
878
893
  issues=$((issues + 1))
@@ -966,7 +981,8 @@ check_fatal_error() {
966
981
  # Non-zero exit + tiny output = likely CLI crash
967
982
  if [[ "$cli_exit_code" -ne 0 ]]; then
968
983
  local line_count
969
- line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || echo 0)
984
+ line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
985
+ line_count="${line_count:-0}"
970
986
  if [[ "$line_count" -lt 3 ]]; then
971
987
  local content
972
988
  content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
@@ -1140,7 +1156,8 @@ diagnose_failure() {
1140
1156
  local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
1141
1157
  local repeat_count=0
1142
1158
  if [[ -f "$diagnosis_file" ]]; then
1143
- repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || echo "0")
1159
+ repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
1160
+ repeat_count="${repeat_count:-0}"
1144
1161
  fi
1145
1162
  echo "$diagnosis" >> "$diagnosis_file"
1146
1163
 
@@ -1317,33 +1334,60 @@ run_audit_agent() {
1317
1334
  local log_file="$LOG_DIR/iteration-${ITERATION}.log"
1318
1335
  local audit_log="$LOG_DIR/audit-iter-${ITERATION}.log"
1319
1336
 
1320
- # Gather context: tail of implementer output + git diff
1337
+ # Gather context: tail of implementer output + cumulative diff
1321
1338
  local impl_tail
1322
1339
  impl_tail="$(tail -100 "$log_file" 2>/dev/null || echo "(no output)")"
1323
- local diff_stat
1324
- diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
1340
+
1341
+ # Use cumulative diff from loop start so auditor sees ALL work, not just latest commit
1342
+ local diff_stat cumulative_note=""
1343
+ if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
1344
+ diff_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no changes)")"
1345
+ cumulative_note="Note: This diff shows ALL changes since the loop started (iteration 1 through ${ITERATION}), not just the latest commit."
1346
+ else
1347
+ diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
1348
+ fi
1349
+
1350
+ # Include verified test status so auditor doesn't have to guess
1351
+ local test_context=""
1352
+ if [[ -n "$TEST_CMD" ]]; then
1353
+ if [[ "${TEST_PASSED:-}" == "true" ]]; then
1354
+ test_context="## Verified Test Status (from harness, not from agent)
1355
+ Tests: ALL PASSING (command: ${TEST_CMD})"
1356
+ else
1357
+ test_context="## Verified Test Status (from harness)
1358
+ Tests: FAILING (command: ${TEST_CMD})
1359
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1360
+ fi
1361
+ fi
1325
1362
 
1326
1363
  local audit_prompt
1327
1364
  read -r -d '' audit_prompt <<AUDIT_PROMPT || true
1328
- You are an independent code auditor reviewing an autonomous coding agent.
1365
+ You are an independent code auditor reviewing an autonomous coding agent's CUMULATIVE work.
1366
+ This is iteration ${ITERATION}. The agent may have done most of the work in earlier iterations.
1329
1367
 
1330
1368
  ## Goal the agent was working toward
1331
1369
  ${GOAL}
1332
1370
 
1333
- ## Agent Output (last 100 lines)
1371
+ ## Agent Output This Iteration (last 100 lines)
1334
1372
  ${impl_tail}
1335
1373
 
1336
- ## Changes Made (git diff --stat)
1374
+ ## Cumulative Changes Made (git diff --stat)
1375
+ ${cumulative_note}
1337
1376
  ${diff_stat}
1338
1377
 
1378
+ ${test_context}
1379
+
1339
1380
  ## Your Task
1340
- Critically review the work:
1341
- 1. Did the agent make meaningful progress toward the goal?
1342
- 2. Are there obvious bugs, logic errors, or security issues?
1381
+ Critically review the CUMULATIVE work (not just the latest iteration):
1382
+ 1. Has the agent made meaningful progress toward the goal across all iterations?
1383
+ 2. Are there obvious bugs, logic errors, or security issues in the current codebase?
1343
1384
  3. Did the agent leave incomplete work (TODOs, placeholder code)?
1344
1385
  4. Are there any regressions or broken patterns?
1345
1386
  5. Is the code quality acceptable?
1346
1387
 
1388
+ IMPORTANT: If the current iteration made small or no code changes, that may be acceptable
1389
+ if earlier iterations already completed the substantive work. Judge the whole body of work.
1390
+
1347
1391
  If the work is acceptable and moves toward the goal, output exactly: AUDIT_PASS
1348
1392
  Otherwise, list the specific issues that need fixing.
1349
1393
  AUDIT_PROMPT
@@ -1429,21 +1473,52 @@ check_definition_of_done() {
1429
1473
 
1430
1474
  local dod_content
1431
1475
  dod_content="$(cat "$DOD_FILE")"
1476
+
1477
+ # Use cumulative diff from loop start (not just HEAD~1) so the evaluator
1478
+ # can see ALL work done across every iteration, not just the latest commit.
1432
1479
  local diff_content
1433
- diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
1480
+ if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
1481
+ diff_content="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no diff)")"
1482
+ diff_content="${diff_content}
1483
+
1484
+ ## Detailed Changes (cumulative diff, truncated to 200 lines)
1485
+ $(git -C "$PROJECT_ROOT" diff "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -200 || echo "(no diff)")"
1486
+ else
1487
+ diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
1488
+ fi
1489
+
1490
+ # Inject verified runtime facts so the evaluator doesn't have to guess
1491
+ local runtime_facts=""
1492
+ if [[ -n "$TEST_CMD" ]]; then
1493
+ if [[ "${TEST_PASSED:-}" == "true" ]]; then
1494
+ runtime_facts="## Verified Runtime Facts (from the loop harness, not from the agent)
1495
+ - Tests: ALL PASSING (verified by running '${TEST_CMD}' after this iteration)
1496
+ - Test output (last 10 lines):
1497
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1498
+ else
1499
+ runtime_facts="## Verified Runtime Facts
1500
+ - Tests: FAILING (verified by running '${TEST_CMD}')
1501
+ - Test output (last 10 lines):
1502
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1503
+ fi
1504
+ fi
1434
1505
 
1435
1506
  local dod_prompt
1436
1507
  read -r -d '' dod_prompt <<DOD_PROMPT || true
1437
- You are evaluating whether code changes satisfy a Definition of Done checklist.
1508
+ You are evaluating whether a project satisfies a Definition of Done checklist.
1509
+ You are reviewing the CUMULATIVE work across all iterations, not just the latest commit.
1438
1510
 
1439
1511
  ## Definition of Done
1440
1512
  ${dod_content}
1441
1513
 
1442
- ## Changes Made (git diff)
1514
+ ${runtime_facts}
1515
+
1516
+ ## Cumulative Changes Made (git diff from start of loop to now)
1443
1517
  ${diff_content}
1444
1518
 
1445
1519
  ## Your Task
1446
- For each item in the Definition of Done, determine if the changes satisfy it.
1520
+ For each item in the Definition of Done, determine if the project satisfies it.
1521
+ The runtime facts above are verified by the harness — trust them as ground truth.
1447
1522
  If ALL items are satisfied, output exactly: DOD_PASS
1448
1523
  Otherwise, list which items are NOT satisfied and why.
1449
1524
  DOD_PROMPT
@@ -1497,6 +1572,14 @@ guard_completion() {
1497
1572
  rejection_reasons+=("tests failing")
1498
1573
  fi
1499
1574
 
1575
+ # Holistic final gate: when all other gates pass, run a project-level assessment
1576
+ # that evaluates the entire codebase against the goal (not just the latest diff)
1577
+ if [[ ${#rejection_reasons[@]} -eq 0 ]]; then
1578
+ if ! run_holistic_gate; then
1579
+ rejection_reasons+=("holistic project assessment found gaps")
1580
+ fi
1581
+ fi
1582
+
1500
1583
  if [[ ${#rejection_reasons[@]} -gt 0 ]]; then
1501
1584
  local reasons_str
1502
1585
  reasons_str="$(printf ', %s' "${rejection_reasons[@]}")"
@@ -1510,6 +1593,70 @@ guard_completion() {
1510
1593
  return 0
1511
1594
  }
1512
1595
 
1596
+ # Holistic gate: evaluates the full project against the original goal.
1597
+ # Only runs when all other gates pass (final checkpoint before acceptance).
1598
+ run_holistic_gate() {
1599
+ # Skip if no starting commit (can't compute cumulative diff)
1600
+ [[ -z "${LOOP_START_COMMIT:-}" ]] && return 0
1601
+
1602
+ local holistic_log="$LOG_DIR/holistic-iter-${ITERATION}.log"
1603
+
1604
+ # Build a project summary: file tree, test count, cumulative diff stats
1605
+ local file_count
1606
+ file_count=$(git -C "$PROJECT_ROOT" ls-files | wc -l | tr -d ' ')
1607
+ local cumulative_stat
1608
+ cumulative_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || echo "(no changes)")"
1609
+ local test_summary=""
1610
+ if [[ -n "${TEST_OUTPUT:-}" ]]; then
1611
+ test_summary="$(echo "$TEST_OUTPUT" | tail -5)"
1612
+ fi
1613
+
1614
+ local holistic_prompt
1615
+ read -r -d '' holistic_prompt <<HOLISTIC_PROMPT || true
1616
+ You are a final quality gate evaluating whether an autonomous coding agent has FULLY achieved its goal.
1617
+
1618
+ ## Original Goal
1619
+ ${GOAL}
1620
+
1621
+ ## Project Stats
1622
+ - Files in repo: ${file_count}
1623
+ - Iterations completed: ${ITERATION}
1624
+ - Cumulative changes: ${cumulative_stat}
1625
+ - Tests: ${TEST_PASSED:-unknown} (command: ${TEST_CMD:-none})
1626
+ ${test_summary:+- Test output: ${test_summary}}
1627
+
1628
+ ## Cumulative Git Changes (diff --stat from start)
1629
+ $(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -40 || echo "(none)")
1630
+
1631
+ ## Your Task
1632
+ Based on the goal and the cumulative work done:
1633
+ 1. Has the goal been FULLY achieved (not partially)?
1634
+ 2. Is there any critical gap that would make this unacceptable for production?
1635
+
1636
+ If the goal is fully achieved, output exactly: HOLISTIC_PASS
1637
+ Otherwise, list the specific gaps remaining.
1638
+ HOLISTIC_PROMPT
1639
+
1640
+ echo -e " ${PURPLE}▸${RESET} Running holistic project assessment..."
1641
+
1642
+ local hol_model
1643
+ hol_model="$(select_audit_model)"
1644
+ local hol_flags=("--model" "$hol_model")
1645
+ if $SKIP_PERMISSIONS; then
1646
+ hol_flags+=("--dangerously-skip-permissions")
1647
+ fi
1648
+
1649
+ claude -p "$holistic_prompt" "${hol_flags[@]}" > "$holistic_log" 2>&1 || true
1650
+
1651
+ if grep -q "HOLISTIC_PASS" "$holistic_log" 2>/dev/null; then
1652
+ echo -e " ${GREEN}✓${RESET} Holistic assessment: passed"
1653
+ return 0
1654
+ else
1655
+ echo -e " ${YELLOW}⚠${RESET} Holistic assessment: gaps found"
1656
+ return 1
1657
+ fi
1658
+ }
1659
+
1513
1660
  # ─── Context Window Management ───────────────────────────────────────────────
1514
1661
  # Prevents prompt from exceeding Claude's context limit (~200K tokens).
1515
1662
  # Trims least-critical sections first when over budget.
@@ -1810,12 +1957,25 @@ ${_test_tail}
1810
1957
  RESUMED_TEST_OUTPUT=""
1811
1958
  fi
1812
1959
 
1960
+ # Build cumulative progress summary showing all iterations' work
1961
+ local cumulative_section=""
1962
+ if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
1963
+ local cum_stat
1964
+ cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
1965
+ if [[ -n "$cum_stat" ]]; then
1966
+ cumulative_section="## Cumulative Progress (all iterations combined)
1967
+ ${cum_stat}
1968
+ "
1969
+ fi
1970
+ fi
1971
+
1813
1972
  cat <<PROMPT
1814
1973
  You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
1815
1974
  ${resume_section}
1816
1975
  ## Your Goal
1817
1976
  ${GOAL}
1818
1977
 
1978
+ ${cumulative_section}
1819
1979
  ## Current Progress
1820
1980
  ${recent_log}
1821
1981
 
@@ -1861,6 +2021,58 @@ ${stuckness_section}
1861
2021
  PROMPT
1862
2022
  }
1863
2023
 
2024
+ # ─── Alternative Strategy Exploration ─────────────────────────────────────────
2025
+ # When stuckness is detected, generate a context-aware alternative strategy.
2026
+ # Uses pattern matching on error type + iteration count to suggest different approaches.
2027
+
2028
+ explore_alternative_strategy() {
2029
+ local last_error="${1:-unknown}"
2030
+ local iteration="${2:-0}"
2031
+ local diagnosis="${3:-}"
2032
+
2033
+ # Track attempted strategies to avoid repeating them
2034
+ local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
2035
+ local attempted
2036
+ attempted=$(cat "$strategy_file" 2>/dev/null || true)
2037
+
2038
+ local strategy=""
2039
+
2040
+ # If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
2041
+ if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
2042
+ if ! echo "$attempted" | grep -q "evaluator_alignment"; then
2043
+ echo "evaluator_alignment" >> "$strategy_file"
2044
+ strategy="## Alternative Strategy: Evaluator Alignment
2045
+ The code appears functionally complete (tests pass). Focus on satisfying the remaining
2046
+ quality gate evaluators. Check the DoD log and audit log for specific complaints, then
2047
+ address those exact points rather than adding new features."
2048
+ fi
2049
+ fi
2050
+
2051
+ # If no code changes in last iteration, suggest verifying existing work
2052
+ if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
2053
+ if ! echo "$attempted" | grep -q "verify_existing"; then
2054
+ echo "verify_existing" >> "$strategy_file"
2055
+ strategy="## Alternative Strategy: Verify Existing Work
2056
+ Recent iterations made no code changes. The work may already be complete.
2057
+ Run the full test suite, verify all features work, and if everything passes,
2058
+ commit a verification message and declare LOOP_COMPLETE with evidence."
2059
+ fi
2060
+ fi
2061
+
2062
+ # Generic fallback: break the problem down
2063
+ if [[ -z "$strategy" ]]; then
2064
+ if ! echo "$attempted" | grep -q "decompose"; then
2065
+ echo "decompose" >> "$strategy_file"
2066
+ strategy="## Alternative Strategy: Decompose
2067
+ Break the remaining work into smaller, independent steps. Focus on one specific
2068
+ file or function at a time. Read error messages literally — the root cause may
2069
+ differ from your assumption."
2070
+ fi
2071
+ fi
2072
+
2073
+ echo "$strategy"
2074
+ }
2075
+
1864
2076
  # ─── Stuckness Detection ─────────────────────────────────────────────────────
1865
2077
  # Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
1866
2078
  # Returns 0 when stuck, 1 when not. Outputs stuckness section and sets STUCKNESS_HINT when stuck.
@@ -1994,6 +2206,17 @@ detect_stuckness() {
1994
2206
  stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
1995
2207
  fi
1996
2208
 
2209
+ # Gate-aware dampening: if tests pass and the agent has made progress overall,
2210
+ # reduce stuckness signal count. The "no code changes" and "identical diffs" signals
2211
+ # fire when code is already complete and the agent is fighting evaluator quirks —
2212
+ # that's not genuine stuckness, it's "done but gates disagree."
2213
+ if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
2214
+ # If at least one quality signal is positive, dampen by 1
2215
+ if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
2216
+ stuckness_signals=$((stuckness_signals - 1))
2217
+ fi
2218
+ fi
2219
+
1997
2220
  # Decision: 2+ signals = stuck
1998
2221
  if [[ "$stuckness_signals" -ge 2 ]]; then
1999
2222
  STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
@@ -2719,6 +2942,11 @@ run_single_agent_loop() {
2719
2942
  initialize_state
2720
2943
  fi
2721
2944
 
2945
+ # Ensure LOOP_START_COMMIT is set (may not be on resume/restart)
2946
+ if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
2947
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
2948
+ fi
2949
+
2722
2950
  # Apply adaptive budget/model before showing banner
2723
2951
  apply_adaptive_budget
2724
2952
  MODEL="$(select_adaptive_model "build" "$MODEL")"
@@ -2746,6 +2974,16 @@ run_single_agent_loop() {
2746
2974
  }
2747
2975
  ITERATION=$(( ITERATION + 1 ))
2748
2976
 
2977
+ # Emit iteration start event for pipeline visibility
2978
+ if type emit_event >/dev/null 2>&1; then
2979
+ emit_event "loop.iteration_start" \
2980
+ "iteration=$ITERATION" \
2981
+ "max=$MAX_ITERATIONS" \
2982
+ "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
2983
+ "agent=${AGENT_NUM:-1}" \
2984
+ "test_passed=${TEST_PASSED:-unknown}"
2985
+ fi
2986
+
2749
2987
  # Root-cause diagnosis and memory-based fix on retry after test failure
2750
2988
  if [[ "${TEST_PASSED:-}" == "false" ]]; then
2751
2989
  # Source memory module for diagnosis and fix lookup
@@ -2915,6 +3153,18 @@ $summary
2915
3153
  write_state
2916
3154
  write_progress
2917
3155
 
3156
+ # Emit iteration complete event for pipeline visibility
3157
+ if type emit_event >/dev/null 2>&1; then
3158
+ emit_event "loop.iteration_complete" \
3159
+ "iteration=$ITERATION" \
3160
+ "max=$MAX_ITERATIONS" \
3161
+ "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
3162
+ "agent=${AGENT_NUM:-1}" \
3163
+ "test_passed=${TEST_PASSED:-unknown}" \
3164
+ "commits=$TOTAL_COMMITS" \
3165
+ "status=${STATUS:-running}"
3166
+ fi
3167
+
2918
3168
  # Update heartbeat
2919
3169
  "$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
2920
3170
  --pid $$ \
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="3.0.0"
9
+ VERSION="3.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="${REPO_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}"
12
12
 
@@ -88,7 +88,13 @@ memory_ranked_search() {
88
88
  memory_dir="$(repo_memory_dir)"
89
89
  fi
90
90
  memory_dir="${memory_dir:-$HOME/.shipwright/memory}"
91
- [[ ! -d "$memory_dir" ]] && echo "[]" && return 0
91
+ if [[ ! -d "$memory_dir" ]]; then
92
+ info "Memory dir not found at ${memory_dir} — auto-creating"
93
+ mkdir -p "$memory_dir"
94
+ emit_event "memory.not_available" "path=$memory_dir" "action=auto_created"
95
+ echo "[]"
96
+ return 0
97
+ fi
92
98
 
93
99
  # Extract and expand query keywords
94
100
  local keywords
@@ -372,7 +378,10 @@ memory_capture_failure() {
372
378
  pattern=$(echo "$error_output" | head -1 | cut -c1-200)
373
379
  fi
374
380
 
375
- [[ -z "$pattern" ]] && return 0
381
+ if [[ -z "$pattern" ]]; then
382
+ warn "Memory capture: empty error pattern — skipping"
383
+ return 0
384
+ fi
376
385
 
377
386
  # Check for duplicate — increment seen_count if pattern already exists
378
387
  local existing_idx
@@ -987,6 +996,7 @@ memory_inject_context() {
987
996
  done
988
997
 
989
998
  if [[ "$has_memory" == "false" ]]; then
999
+ info "No memory available for repo (${mem_dir}) — first pipeline run will seed it"
990
1000
  echo "# No memory available for this repository yet."
991
1001
  return 0
992
1002
  fi
@@ -1642,6 +1652,11 @@ memory_export() {
1642
1652
  local mem_dir
1643
1653
  mem_dir="$(repo_memory_dir)"
1644
1654
 
1655
+ # Ensure all memory files exist (jq --slurpfile fails on missing files)
1656
+ for f in patterns.json failures.json decisions.json metrics.json; do
1657
+ [[ -f "$mem_dir/$f" ]] || echo '{}' > "$mem_dir/$f"
1658
+ done
1659
+
1645
1660
  # Merge all memory files into a single JSON export
1646
1661
  local export_json
1647
1662
  export_json=$(jq -n \
@@ -1757,8 +1772,10 @@ memory_stats() {
1757
1772
  # Event-based hit rate
1758
1773
  local inject_count capture_count
1759
1774
  if [[ -f "$EVENTS_FILE" ]]; then
1760
- inject_count=$(grep -c '"memory.inject"' "$EVENTS_FILE" 2>/dev/null || echo 0)
1761
- capture_count=$(grep -c '"memory.capture"' "$EVENTS_FILE" 2>/dev/null || echo 0)
1775
+ inject_count=$(grep -c '"memory.inject"' "$EVENTS_FILE" 2>/dev/null || true)
1776
+ inject_count="${inject_count:-0}"
1777
+ capture_count=$(grep -c '"memory.capture"' "$EVENTS_FILE" 2>/dev/null || true)
1778
+ capture_count="${capture_count:-0}"
1762
1779
  echo ""
1763
1780
  echo -e " ${BOLD}Usage${RESET}"
1764
1781
  printf " %-18s %s\n" "Context injections:" "$inject_count"
@@ -7,7 +7,7 @@
7
7
  set -euo pipefail
8
8
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
9
9
 
10
- VERSION="3.0.0"
10
+ VERSION="3.1.0"
11
11
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
12
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
13
13
 
@@ -7,7 +7,7 @@
7
7
  set -euo pipefail
8
8
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
9
9
 
10
- VERSION="3.0.0"
10
+ VERSION="3.1.0"
11
11
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
12
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
13
13
 
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="3.0.0"
9
+ VERSION="3.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12
 
@@ -463,8 +463,10 @@ cmd_report() {
463
463
 
464
464
  if [[ -f "$EVENTS_FILE" ]]; then
465
465
  event_count=$(wc -l < "$EVENTS_FILE" || echo "0")
466
- export_count=$(grep -c '"type":"otel_export"' "$EVENTS_FILE" 2>/dev/null || echo "0")
467
- webhook_count=$(grep -c '"type":"webhook_sent"' "$EVENTS_FILE" 2>/dev/null || echo "0")
466
+ export_count=$(grep -c '"type":"otel_export"' "$EVENTS_FILE" 2>/dev/null || true)
467
+ export_count="${export_count:-0}"
468
+ webhook_count=$(grep -c '"type":"webhook_sent"' "$EVENTS_FILE" 2>/dev/null || true)
469
+ webhook_count="${webhook_count:-0}"
468
470
  last_event_ts=$(tail -n1 "$EVENTS_FILE" | jq -r '.ts // "unknown"' 2>/dev/null || echo "unknown")
469
471
  fi
470
472
 
@@ -7,7 +7,7 @@
7
7
  set -euo pipefail
8
8
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
9
9
 
10
- VERSION="3.0.0"
10
+ VERSION="3.1.0"
11
11
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
12
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
13
13
 
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="3.0.0"
9
+ VERSION="3.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12
 
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="3.0.0"
9
+ VERSION="3.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12