shipwright-cli 3.0.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +21 -7
  2. package/completions/_shipwright +247 -93
  3. package/completions/shipwright.bash +69 -15
  4. package/completions/shipwright.fish +309 -41
  5. package/config/decision-tiers.json +55 -0
  6. package/config/defaults.json +25 -2
  7. package/config/event-schema.json +142 -5
  8. package/config/policy.json +8 -0
  9. package/dashboard/public/index.html +6 -0
  10. package/dashboard/public/styles.css +76 -0
  11. package/dashboard/server.ts +51 -0
  12. package/dashboard/src/core/api.ts +5 -0
  13. package/dashboard/src/types/api.ts +10 -0
  14. package/dashboard/src/views/metrics.ts +69 -1
  15. package/package.json +3 -3
  16. package/scripts/lib/architecture.sh +2 -1
  17. package/scripts/lib/bootstrap.sh +0 -0
  18. package/scripts/lib/config.sh +0 -0
  19. package/scripts/lib/daemon-adaptive.sh +4 -2
  20. package/scripts/lib/daemon-dispatch.sh +24 -1
  21. package/scripts/lib/daemon-failure.sh +0 -0
  22. package/scripts/lib/daemon-health.sh +0 -0
  23. package/scripts/lib/daemon-patrol.sh +42 -7
  24. package/scripts/lib/daemon-poll.sh +17 -0
  25. package/scripts/lib/daemon-state.sh +17 -0
  26. package/scripts/lib/daemon-triage.sh +1 -1
  27. package/scripts/lib/decide-autonomy.sh +295 -0
  28. package/scripts/lib/decide-scoring.sh +228 -0
  29. package/scripts/lib/decide-signals.sh +462 -0
  30. package/scripts/lib/fleet-failover.sh +0 -0
  31. package/scripts/lib/helpers.sh +19 -18
  32. package/scripts/lib/pipeline-detection.sh +1 -1
  33. package/scripts/lib/pipeline-github.sh +0 -0
  34. package/scripts/lib/pipeline-intelligence.sh +23 -4
  35. package/scripts/lib/pipeline-quality-checks.sh +11 -6
  36. package/scripts/lib/pipeline-quality.sh +0 -0
  37. package/scripts/lib/pipeline-stages.sh +330 -33
  38. package/scripts/lib/pipeline-state.sh +14 -0
  39. package/scripts/lib/policy.sh +0 -0
  40. package/scripts/lib/test-helpers.sh +0 -0
  41. package/scripts/postinstall.mjs +75 -1
  42. package/scripts/signals/example-collector.sh +36 -0
  43. package/scripts/sw +8 -4
  44. package/scripts/sw-activity.sh +1 -7
  45. package/scripts/sw-adaptive.sh +7 -7
  46. package/scripts/sw-adversarial.sh +1 -1
  47. package/scripts/sw-architecture-enforcer.sh +1 -1
  48. package/scripts/sw-auth.sh +1 -1
  49. package/scripts/sw-autonomous.sh +1 -1
  50. package/scripts/sw-changelog.sh +1 -1
  51. package/scripts/sw-checkpoint.sh +1 -1
  52. package/scripts/sw-ci.sh +11 -6
  53. package/scripts/sw-cleanup.sh +1 -1
  54. package/scripts/sw-code-review.sh +36 -17
  55. package/scripts/sw-connect.sh +1 -1
  56. package/scripts/sw-context.sh +1 -1
  57. package/scripts/sw-cost.sh +71 -5
  58. package/scripts/sw-daemon.sh +6 -3
  59. package/scripts/sw-dashboard.sh +1 -1
  60. package/scripts/sw-db.sh +53 -38
  61. package/scripts/sw-decide.sh +685 -0
  62. package/scripts/sw-decompose.sh +1 -1
  63. package/scripts/sw-deps.sh +1 -1
  64. package/scripts/sw-developer-simulation.sh +1 -1
  65. package/scripts/sw-discovery.sh +80 -4
  66. package/scripts/sw-doc-fleet.sh +1 -1
  67. package/scripts/sw-docs-agent.sh +1 -1
  68. package/scripts/sw-docs.sh +1 -1
  69. package/scripts/sw-doctor.sh +1 -1
  70. package/scripts/sw-dora.sh +1 -1
  71. package/scripts/sw-durable.sh +9 -5
  72. package/scripts/sw-e2e-orchestrator.sh +1 -1
  73. package/scripts/sw-eventbus.sh +7 -4
  74. package/scripts/sw-evidence.sh +1 -1
  75. package/scripts/sw-feedback.sh +1 -1
  76. package/scripts/sw-fix.sh +1 -1
  77. package/scripts/sw-fleet-discover.sh +1 -1
  78. package/scripts/sw-fleet-viz.sh +6 -4
  79. package/scripts/sw-fleet.sh +1 -1
  80. package/scripts/sw-github-app.sh +3 -2
  81. package/scripts/sw-github-checks.sh +1 -1
  82. package/scripts/sw-github-deploy.sh +1 -1
  83. package/scripts/sw-github-graphql.sh +1 -1
  84. package/scripts/sw-guild.sh +1 -1
  85. package/scripts/sw-heartbeat.sh +1 -1
  86. package/scripts/sw-hygiene.sh +5 -3
  87. package/scripts/sw-incident.sh +9 -5
  88. package/scripts/sw-init.sh +1 -1
  89. package/scripts/sw-instrument.sh +1 -1
  90. package/scripts/sw-intelligence.sh +11 -6
  91. package/scripts/sw-jira.sh +1 -1
  92. package/scripts/sw-launchd.sh +1 -1
  93. package/scripts/sw-linear.sh +1 -1
  94. package/scripts/sw-logs.sh +1 -1
  95. package/scripts/sw-loop.sh +338 -32
  96. package/scripts/sw-memory.sh +23 -6
  97. package/scripts/sw-mission-control.sh +1 -1
  98. package/scripts/sw-model-router.sh +3 -2
  99. package/scripts/sw-otel.sh +8 -4
  100. package/scripts/sw-oversight.sh +1 -1
  101. package/scripts/sw-pipeline-composer.sh +3 -1
  102. package/scripts/sw-pipeline-vitals.sh +11 -6
  103. package/scripts/sw-pipeline.sh +92 -8
  104. package/scripts/sw-pm.sh +5 -4
  105. package/scripts/sw-pr-lifecycle.sh +7 -4
  106. package/scripts/sw-predictive.sh +11 -5
  107. package/scripts/sw-prep.sh +1 -1
  108. package/scripts/sw-ps.sh +1 -1
  109. package/scripts/sw-public-dashboard.sh +3 -2
  110. package/scripts/sw-quality.sh +21 -10
  111. package/scripts/sw-reaper.sh +1 -1
  112. package/scripts/sw-recruit.sh +1 -1
  113. package/scripts/sw-regression.sh +1 -1
  114. package/scripts/sw-release-manager.sh +1 -1
  115. package/scripts/sw-release.sh +1 -1
  116. package/scripts/sw-remote.sh +1 -1
  117. package/scripts/sw-replay.sh +1 -1
  118. package/scripts/sw-retro.sh +1 -1
  119. package/scripts/sw-review-rerun.sh +1 -1
  120. package/scripts/sw-scale.sh +69 -11
  121. package/scripts/sw-security-audit.sh +1 -1
  122. package/scripts/sw-self-optimize.sh +168 -4
  123. package/scripts/sw-session.sh +3 -3
  124. package/scripts/sw-setup.sh +1 -1
  125. package/scripts/sw-standup.sh +1 -1
  126. package/scripts/sw-status.sh +1 -1
  127. package/scripts/sw-strategic.sh +11 -6
  128. package/scripts/sw-stream.sh +7 -4
  129. package/scripts/sw-swarm.sh +3 -2
  130. package/scripts/sw-team-stages.sh +1 -1
  131. package/scripts/sw-templates.sh +3 -3
  132. package/scripts/sw-testgen.sh +11 -6
  133. package/scripts/sw-tmux-pipeline.sh +1 -1
  134. package/scripts/sw-tmux.sh +35 -1
  135. package/scripts/sw-trace.sh +1 -1
  136. package/scripts/sw-tracker.sh +1 -1
  137. package/scripts/sw-triage.sh +7 -7
  138. package/scripts/sw-upgrade.sh +1 -1
  139. package/scripts/sw-ux.sh +1 -1
  140. package/scripts/sw-webhook.sh +3 -2
  141. package/scripts/sw-widgets.sh +7 -4
  142. package/scripts/sw-worktree.sh +1 -1
  143. package/scripts/update-homebrew-sha.sh +21 -15
@@ -14,6 +14,7 @@ trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
14
14
  unset CLAUDECODE 2>/dev/null || true
15
15
  # Ignore SIGHUP so tmux attach/detach doesn't kill long-running agent sessions
16
16
  trap '' HUP
17
+ trap '' SIGPIPE
17
18
 
18
19
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19
20
 
@@ -24,6 +25,14 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
24
25
  # shellcheck source=lib/helpers.sh
25
26
  [[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
26
27
  [[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
28
+ # Source DB for dual-write (emit_event → JSONL + SQLite).
29
+ # Note: do NOT call init_schema here — the pipeline (sw-pipeline.sh) owns schema
30
+ # initialization. Calling it here would create an empty DB that shadows JSON cost data.
31
+ if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
32
+ source "$SCRIPT_DIR/sw-db.sh" 2>/dev/null || true
33
+ fi
34
+ # Cross-pipeline discovery (learnings from other pipeline runs)
35
+ [[ -f "$SCRIPT_DIR/sw-discovery.sh" ]] && source "$SCRIPT_DIR/sw-discovery.sh" 2>/dev/null || true
27
36
  # Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
28
37
  [[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
29
38
  [[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
@@ -63,7 +72,7 @@ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
63
72
  SESSION_RESTART=false
64
73
  RESTART_COUNT=0
65
74
  REPO_OVERRIDE=""
66
- VERSION="3.0.0"
75
+ VERSION="3.2.0"
67
76
 
68
77
  # ─── Token Tracking ─────────────────────────────────────────────────────────
69
78
  LOOP_INPUT_TOKENS=0
@@ -655,6 +664,9 @@ initialize_state() {
655
664
  STATUS="running"
656
665
  LOG_ENTRIES=""
657
666
 
667
+ # Record starting commit for cumulative diff in quality gates
668
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
669
+
658
670
  write_state
659
671
  }
660
672
 
@@ -726,6 +738,11 @@ resume_state() {
726
738
  START_EPOCH="$(now_epoch)"
727
739
  STATUS="running"
728
740
 
741
+ # Set starting commit for cumulative diff (approximate: use earliest tracked commit)
742
+ if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
743
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
744
+ fi
745
+
729
746
  # If we hit max iterations before, warn user to extend
730
747
  if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
731
748
  warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
@@ -872,7 +889,8 @@ validate_claude_output() {
872
889
 
873
890
  # Check for obviously corrupt output (API errors dumped as code)
874
891
  local total_changed
875
- total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || echo "0")
892
+ total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || true)
893
+ total_changed="${total_changed:-0}"
876
894
  if [[ "$total_changed" -eq 0 ]]; then
877
895
  warn "Claude iteration produced no file changes"
878
896
  issues=$((issues + 1))
@@ -960,13 +978,14 @@ check_fatal_error() {
960
978
  local match
961
979
  match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
962
980
  error "Fatal CLI error: $match"
963
- return 0 # fatal error detected
981
+ return 1 # fatal error detected
964
982
  fi
965
983
 
966
984
  # Non-zero exit + tiny output = likely CLI crash
967
985
  if [[ "$cli_exit_code" -ne 0 ]]; then
968
986
  local line_count
969
- line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || echo 0)
987
+ line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
988
+ line_count="${line_count:-0}"
970
989
  if [[ "$line_count" -lt 3 ]]; then
971
990
  local content
972
991
  content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
@@ -1140,7 +1159,8 @@ diagnose_failure() {
1140
1159
  local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
1141
1160
  local repeat_count=0
1142
1161
  if [[ -f "$diagnosis_file" ]]; then
1143
- repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || echo "0")
1162
+ repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
1163
+ repeat_count="${repeat_count:-0}"
1144
1164
  fi
1145
1165
  echo "$diagnosis" >> "$diagnosis_file"
1146
1166
 
@@ -1317,33 +1337,60 @@ run_audit_agent() {
1317
1337
  local log_file="$LOG_DIR/iteration-${ITERATION}.log"
1318
1338
  local audit_log="$LOG_DIR/audit-iter-${ITERATION}.log"
1319
1339
 
1320
- # Gather context: tail of implementer output + git diff
1340
+ # Gather context: tail of implementer output + cumulative diff
1321
1341
  local impl_tail
1322
1342
  impl_tail="$(tail -100 "$log_file" 2>/dev/null || echo "(no output)")"
1323
- local diff_stat
1324
- diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
1343
+
1344
+ # Use cumulative diff from loop start so auditor sees ALL work, not just latest commit
1345
+ local diff_stat cumulative_note=""
1346
+ if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
1347
+ diff_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no changes)")"
1348
+ cumulative_note="Note: This diff shows ALL changes since the loop started (iteration 1 through ${ITERATION}), not just the latest commit."
1349
+ else
1350
+ diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
1351
+ fi
1352
+
1353
+ # Include verified test status so auditor doesn't have to guess
1354
+ local test_context=""
1355
+ if [[ -n "$TEST_CMD" ]]; then
1356
+ if [[ "${TEST_PASSED:-}" == "true" ]]; then
1357
+ test_context="## Verified Test Status (from harness, not from agent)
1358
+ Tests: ALL PASSING (command: ${TEST_CMD})"
1359
+ else
1360
+ test_context="## Verified Test Status (from harness)
1361
+ Tests: FAILING (command: ${TEST_CMD})
1362
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1363
+ fi
1364
+ fi
1325
1365
 
1326
1366
  local audit_prompt
1327
1367
  read -r -d '' audit_prompt <<AUDIT_PROMPT || true
1328
- You are an independent code auditor reviewing an autonomous coding agent.
1368
+ You are an independent code auditor reviewing an autonomous coding agent's CUMULATIVE work.
1369
+ This is iteration ${ITERATION}. The agent may have done most of the work in earlier iterations.
1329
1370
 
1330
1371
  ## Goal the agent was working toward
1331
1372
  ${GOAL}
1332
1373
 
1333
- ## Agent Output (last 100 lines)
1374
+ ## Agent Output This Iteration (last 100 lines)
1334
1375
  ${impl_tail}
1335
1376
 
1336
- ## Changes Made (git diff --stat)
1377
+ ## Cumulative Changes Made (git diff --stat)
1378
+ ${cumulative_note}
1337
1379
  ${diff_stat}
1338
1380
 
1381
+ ${test_context}
1382
+
1339
1383
  ## Your Task
1340
- Critically review the work:
1341
- 1. Did the agent make meaningful progress toward the goal?
1342
- 2. Are there obvious bugs, logic errors, or security issues?
1384
+ Critically review the CUMULATIVE work (not just the latest iteration):
1385
+ 1. Has the agent made meaningful progress toward the goal across all iterations?
1386
+ 2. Are there obvious bugs, logic errors, or security issues in the current codebase?
1343
1387
  3. Did the agent leave incomplete work (TODOs, placeholder code)?
1344
1388
  4. Are there any regressions or broken patterns?
1345
1389
  5. Is the code quality acceptable?
1346
1390
 
1391
+ IMPORTANT: If the current iteration made small or no code changes, that may be acceptable
1392
+ if earlier iterations already completed the substantive work. Judge the whole body of work.
1393
+
1347
1394
  If the work is acceptable and moves toward the goal, output exactly: AUDIT_PASS
1348
1395
  Otherwise, list the specific issues that need fixing.
1349
1396
  AUDIT_PROMPT
@@ -1429,21 +1476,52 @@ check_definition_of_done() {
1429
1476
 
1430
1477
  local dod_content
1431
1478
  dod_content="$(cat "$DOD_FILE")"
1479
+
1480
+ # Use cumulative diff from loop start (not just HEAD~1) so the evaluator
1481
+ # can see ALL work done across every iteration, not just the latest commit.
1432
1482
  local diff_content
1433
- diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
1483
+ if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
1484
+ diff_content="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no diff)")"
1485
+ diff_content="${diff_content}
1486
+
1487
+ ## Detailed Changes (cumulative diff, truncated to 200 lines)
1488
+ $(git -C "$PROJECT_ROOT" diff "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -200 || echo "(no diff)")"
1489
+ else
1490
+ diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
1491
+ fi
1492
+
1493
+ # Inject verified runtime facts so the evaluator doesn't have to guess
1494
+ local runtime_facts=""
1495
+ if [[ -n "$TEST_CMD" ]]; then
1496
+ if [[ "${TEST_PASSED:-}" == "true" ]]; then
1497
+ runtime_facts="## Verified Runtime Facts (from the loop harness, not from the agent)
1498
+ - Tests: ALL PASSING (verified by running '${TEST_CMD}' after this iteration)
1499
+ - Test output (last 10 lines):
1500
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1501
+ else
1502
+ runtime_facts="## Verified Runtime Facts
1503
+ - Tests: FAILING (verified by running '${TEST_CMD}')
1504
+ - Test output (last 10 lines):
1505
+ $(echo "${TEST_OUTPUT:-}" | tail -10)"
1506
+ fi
1507
+ fi
1434
1508
 
1435
1509
  local dod_prompt
1436
1510
  read -r -d '' dod_prompt <<DOD_PROMPT || true
1437
- You are evaluating whether code changes satisfy a Definition of Done checklist.
1511
+ You are evaluating whether a project satisfies a Definition of Done checklist.
1512
+ You are reviewing the CUMULATIVE work across all iterations, not just the latest commit.
1438
1513
 
1439
1514
  ## Definition of Done
1440
1515
  ${dod_content}
1441
1516
 
1442
- ## Changes Made (git diff)
1517
+ ${runtime_facts}
1518
+
1519
+ ## Cumulative Changes Made (git diff from start of loop to now)
1443
1520
  ${diff_content}
1444
1521
 
1445
1522
  ## Your Task
1446
- For each item in the Definition of Done, determine if the changes satisfy it.
1523
+ For each item in the Definition of Done, determine if the project satisfies it.
1524
+ The runtime facts above are verified by the harness — trust them as ground truth.
1447
1525
  If ALL items are satisfied, output exactly: DOD_PASS
1448
1526
  Otherwise, list which items are NOT satisfied and why.
1449
1527
  DOD_PROMPT
@@ -1497,6 +1575,14 @@ guard_completion() {
1497
1575
  rejection_reasons+=("tests failing")
1498
1576
  fi
1499
1577
 
1578
+ # Holistic final gate: when all other gates pass, run a project-level assessment
1579
+ # that evaluates the entire codebase against the goal (not just the latest diff)
1580
+ if [[ ${#rejection_reasons[@]} -eq 0 ]]; then
1581
+ if ! run_holistic_gate; then
1582
+ rejection_reasons+=("holistic project assessment found gaps")
1583
+ fi
1584
+ fi
1585
+
1500
1586
  if [[ ${#rejection_reasons[@]} -gt 0 ]]; then
1501
1587
  local reasons_str
1502
1588
  reasons_str="$(printf ', %s' "${rejection_reasons[@]}")"
@@ -1510,17 +1596,88 @@ guard_completion() {
1510
1596
  return 0
1511
1597
  }
1512
1598
 
1599
+ # Holistic gate: evaluates the full project against the original goal.
1600
+ # Only runs when all other gates pass (final checkpoint before acceptance).
1601
+ run_holistic_gate() {
1602
+ # Skip if no starting commit (can't compute cumulative diff)
1603
+ [[ -z "${LOOP_START_COMMIT:-}" ]] && return 0
1604
+
1605
+ local holistic_log="$LOG_DIR/holistic-iter-${ITERATION}.log"
1606
+
1607
+ # Build a project summary: file tree, test count, cumulative diff stats
1608
+ local file_count
1609
+ file_count=$(git -C "$PROJECT_ROOT" ls-files | wc -l | tr -d ' ')
1610
+ local cumulative_stat
1611
+ cumulative_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || echo "(no changes)")"
1612
+ local test_summary=""
1613
+ if [[ -n "${TEST_OUTPUT:-}" ]]; then
1614
+ test_summary="$(echo "$TEST_OUTPUT" | tail -5)"
1615
+ fi
1616
+
1617
+ local holistic_prompt
1618
+ read -r -d '' holistic_prompt <<HOLISTIC_PROMPT || true
1619
+ You are a final quality gate evaluating whether an autonomous coding agent has FULLY achieved its goal.
1620
+
1621
+ ## Original Goal
1622
+ ${GOAL}
1623
+
1624
+ ## Project Stats
1625
+ - Files in repo: ${file_count}
1626
+ - Iterations completed: ${ITERATION}
1627
+ - Cumulative changes: ${cumulative_stat}
1628
+ - Tests: ${TEST_PASSED:-unknown} (command: ${TEST_CMD:-none})
1629
+ ${test_summary:+- Test output: ${test_summary}}
1630
+
1631
+ ## Cumulative Git Changes (diff --stat from start)
1632
+ $(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -40 || echo "(none)")
1633
+
1634
+ ## Your Task
1635
+ Based on the goal and the cumulative work done:
1636
+ 1. Has the goal been FULLY achieved (not partially)?
1637
+ 2. Is there any critical gap that would make this unacceptable for production?
1638
+
1639
+ If the goal is fully achieved, output exactly: HOLISTIC_PASS
1640
+ Otherwise, list the specific gaps remaining.
1641
+ HOLISTIC_PROMPT
1642
+
1643
+ echo -e " ${PURPLE}▸${RESET} Running holistic project assessment..."
1644
+
1645
+ local hol_model
1646
+ hol_model="$(select_audit_model)"
1647
+ local hol_flags=("--model" "$hol_model")
1648
+ if $SKIP_PERMISSIONS; then
1649
+ hol_flags+=("--dangerously-skip-permissions")
1650
+ fi
1651
+
1652
+ claude -p "$holistic_prompt" "${hol_flags[@]}" > "$holistic_log" 2>&1 || true
1653
+
1654
+ if grep -q "HOLISTIC_PASS" "$holistic_log" 2>/dev/null; then
1655
+ echo -e " ${GREEN}✓${RESET} Holistic assessment: passed"
1656
+ return 0
1657
+ else
1658
+ echo -e " ${YELLOW}⚠${RESET} Holistic assessment: gaps found"
1659
+ return 1
1660
+ fi
1661
+ }
1662
+
1513
1663
  # ─── Context Window Management ───────────────────────────────────────────────
1514
1664
  # Prevents prompt from exceeding Claude's context limit (~200K tokens).
1515
1665
  # Trims least-critical sections first when over budget.
1516
1666
 
1517
- CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-180000}" # ~45K tokens at 4 chars/token
1667
+ CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-$(_config_get_int "loop.context_budget_chars" 180000 2>/dev/null || echo 180000)}" # ~45K tokens at 4 chars/token
1518
1668
 
1519
1669
  manage_context_window() {
1520
1670
  local prompt="$1"
1521
1671
  local budget="${CONTEXT_BUDGET_CHARS}"
1522
1672
  local current_len=${#prompt}
1523
1673
 
1674
+ # Read trimming tunables from config (env > daemon-config > policy > defaults.json)
1675
+ local trim_memory_chars trim_git_entries trim_hotspot_files trim_test_lines
1676
+ trim_memory_chars=$(_config_get_int "loop.context_trim_memory_chars" 20000 2>/dev/null || echo 20000)
1677
+ trim_git_entries=$(_config_get_int "loop.context_trim_git_entries" 10 2>/dev/null || echo 10)
1678
+ trim_hotspot_files=$(_config_get_int "loop.context_trim_hotspot_files" 5 2>/dev/null || echo 5)
1679
+ trim_test_lines=$(_config_get_int "loop.context_trim_test_lines" 50 2>/dev/null || echo 50)
1680
+
1524
1681
  if [[ "$current_len" -le "$budget" ]]; then
1525
1682
  echo "$prompt"
1526
1683
  return
@@ -1534,19 +1691,19 @@ manage_context_window() {
1534
1691
  trimmed=$(echo "$trimmed" | awk '/^## Performance Baselines/{skip=1; next} skip && /^## [^#]/{skip=0} !skip{print}')
1535
1692
  fi
1536
1693
 
1537
- # 2. Trim file hotspots to top 5
1694
+ # 2. Trim file hotspots to top N
1538
1695
  if [[ "${#trimmed}" -gt "$budget" ]]; then
1539
- trimmed=$(echo "$trimmed" | awk '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>5) next} {print}')
1696
+ trimmed=$(echo "$trimmed" | awk -v max="$trim_hotspot_files" '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>max) next} {print}')
1540
1697
  fi
1541
1698
 
1542
- # 3. Trim git log to last 10 entries
1699
+ # 3. Trim git log to last N entries
1543
1700
  if [[ "${#trimmed}" -gt "$budget" ]]; then
1544
- trimmed=$(echo "$trimmed" | awk '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>10) next} {print}')
1701
+ trimmed=$(echo "$trimmed" | awk -v max="$trim_git_entries" '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>max) next} {print}')
1545
1702
  fi
1546
1703
 
1547
- # 4. Truncate memory context to first 20K chars
1704
+ # 4. Truncate memory context to first N chars
1548
1705
  if [[ "${#trimmed}" -gt "$budget" ]]; then
1549
- trimmed=$(echo "$trimmed" | awk -v max=20000 '
1706
+ trimmed=$(echo "$trimmed" | awk -v max="$trim_memory_chars" '
1550
1707
  /## Memory Context/{mem=1; skip_rest=0; chars=0; print; next}
1551
1708
  mem && /^## [^#]/{mem=0; print; next}
1552
1709
  mem{chars+=length($0)+1; if(chars>max){print "... (memory truncated for context budget)"; skip_rest=1; mem=0; next}}
@@ -1556,11 +1713,11 @@ manage_context_window() {
1556
1713
  ')
1557
1714
  fi
1558
1715
 
1559
- # 5. Truncate test output to last 50 lines
1716
+ # 5. Truncate test output to last N lines
1560
1717
  if [[ "${#trimmed}" -gt "$budget" ]]; then
1561
- trimmed=$(echo "$trimmed" | awk '
1718
+ trimmed=$(echo "$trimmed" | awk -v max="$trim_test_lines" '
1562
1719
  /## Test Results/{found=1; buf=""; print; next}
1563
- found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>50)?(n-49):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
1720
+ found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>max)?(n-max+1):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
1564
1721
  found{buf=buf $0 "\n"; next}
1565
1722
  {print}
1566
1723
  ')
@@ -1639,6 +1796,16 @@ Fix these specific errors. Each line above is one distinct error from the test o
1639
1796
  memory_section="$("$SCRIPT_DIR/sw-memory.sh" inject build 2>/dev/null || true)"
1640
1797
  fi
1641
1798
 
1799
+ # Cross-pipeline discovery injection (learnings from other pipeline runs)
1800
+ local discovery_section=""
1801
+ if type inject_discoveries >/dev/null 2>&1; then
1802
+ local disc_output
1803
+ disc_output="$(inject_discoveries "${GOAL:-}" 2>/dev/null || true)"
1804
+ if [[ -n "$disc_output" ]]; then
1805
+ discovery_section="$disc_output"
1806
+ fi
1807
+ fi
1808
+
1642
1809
  # DORA baselines for context
1643
1810
  local dora_section=""
1644
1811
  if type memory_get_dora_baseline >/dev/null 2>&1; then
@@ -1810,12 +1977,25 @@ ${_test_tail}
1810
1977
  RESUMED_TEST_OUTPUT=""
1811
1978
  fi
1812
1979
 
1980
+ # Build cumulative progress summary showing all iterations' work
1981
+ local cumulative_section=""
1982
+ if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
1983
+ local cum_stat
1984
+ cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
1985
+ if [[ -n "$cum_stat" ]]; then
1986
+ cumulative_section="## Cumulative Progress (all iterations combined)
1987
+ ${cum_stat}
1988
+ "
1989
+ fi
1990
+ fi
1991
+
1813
1992
  cat <<PROMPT
1814
1993
  You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
1815
1994
  ${resume_section}
1816
1995
  ## Your Goal
1817
1996
  ${GOAL}
1818
1997
 
1998
+ ${cumulative_section}
1819
1999
  ## Current Progress
1820
2000
  ${recent_log}
1821
2001
 
@@ -1830,6 +2010,9 @@ ${error_summary_section:+$error_summary_section
1830
2010
  ${memory_section:+## Memory Context
1831
2011
  $memory_section
1832
2012
  }
2013
+ ${discovery_section:+## Cross-Pipeline Learnings
2014
+ $discovery_section
2015
+ }
1833
2016
  ${dora_section:+$dora_section
1834
2017
  }
1835
2018
  ${intelligence_section:+$intelligence_section
@@ -1844,6 +2027,13 @@ ${restart_section:+$restart_section
1844
2027
  5. Commit your work with a descriptive message
1845
2028
  6. When the goal is FULLY achieved, output exactly: LOOP_COMPLETE
1846
2029
 
2030
+ ## Context Efficiency
2031
+ - Batch independent tool calls in parallel — avoid sequential round-trips
2032
+ - Use targeted file reads (offset/limit) instead of reading entire large files
2033
+ - Delegate large searches to subagents — only import the summary
2034
+ - Filter tool results with grep/jq before reasoning over them
2035
+ - Keep working memory lean — summarize completed steps, don't preserve full outputs
2036
+
1847
2037
  ${audit_section}
1848
2038
 
1849
2039
  ${audit_feedback_section}
@@ -1861,6 +2051,58 @@ ${stuckness_section}
1861
2051
  PROMPT
1862
2052
  }
1863
2053
 
2054
+ # ─── Alternative Strategy Exploration ─────────────────────────────────────────
2055
+ # When stuckness is detected, generate a context-aware alternative strategy.
2056
+ # Uses pattern matching on error type + iteration count to suggest different approaches.
2057
+
2058
+ explore_alternative_strategy() {
2059
+ local last_error="${1:-unknown}"
2060
+ local iteration="${2:-0}"
2061
+ local diagnosis="${3:-}"
2062
+
2063
+ # Track attempted strategies to avoid repeating them
2064
+ local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
2065
+ local attempted
2066
+ attempted=$(cat "$strategy_file" 2>/dev/null || true)
2067
+
2068
+ local strategy=""
2069
+
2070
+ # If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
2071
+ if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
2072
+ if ! echo "$attempted" | grep -q "evaluator_alignment"; then
2073
+ echo "evaluator_alignment" >> "$strategy_file"
2074
+ strategy="## Alternative Strategy: Evaluator Alignment
2075
+ The code appears functionally complete (tests pass). Focus on satisfying the remaining
2076
+ quality gate evaluators. Check the DoD log and audit log for specific complaints, then
2077
+ address those exact points rather than adding new features."
2078
+ fi
2079
+ fi
2080
+
2081
+ # If no code changes in last iteration, suggest verifying existing work
2082
+ if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
2083
+ if ! echo "$attempted" | grep -q "verify_existing"; then
2084
+ echo "verify_existing" >> "$strategy_file"
2085
+ strategy="## Alternative Strategy: Verify Existing Work
2086
+ Recent iterations made no code changes. The work may already be complete.
2087
+ Run the full test suite, verify all features work, and if everything passes,
2088
+ commit a verification message and declare LOOP_COMPLETE with evidence."
2089
+ fi
2090
+ fi
2091
+
2092
+ # Generic fallback: break the problem down
2093
+ if [[ -z "$strategy" ]]; then
2094
+ if ! echo "$attempted" | grep -q "decompose"; then
2095
+ echo "decompose" >> "$strategy_file"
2096
+ strategy="## Alternative Strategy: Decompose
2097
+ Break the remaining work into smaller, independent steps. Focus on one specific
2098
+ file or function at a time. Read error messages literally — the root cause may
2099
+ differ from your assumption."
2100
+ fi
2101
+ fi
2102
+
2103
+ echo "$strategy"
2104
+ }
2105
+
1864
2106
  # ─── Stuckness Detection ─────────────────────────────────────────────────────
1865
2107
  # Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
1866
2108
  # Returns 0 when stuck, 1 when not. Outputs stuckness section and sets STUCKNESS_HINT when stuck.
@@ -1890,7 +2132,8 @@ detect_stuckness() {
1890
2132
  local stuckness_reasons=()
1891
2133
  local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
1892
2134
  local tracking_lines
1893
- tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || echo "0")
2135
+ tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || true)
2136
+ tracking_lines="${tracking_lines:-0}"
1894
2137
 
1895
2138
  # Signal 1: Text overlap (existing logic) — compare last 2 iteration logs
1896
2139
  if [[ "$iteration" -ge 3 ]]; then
@@ -1905,7 +2148,8 @@ detect_stuckness() {
1905
2148
 
1906
2149
  if [[ -n "$lines1" && -n "$lines2" ]]; then
1907
2150
  total=$(echo "$lines1" | wc -l | tr -d ' ')
1908
- common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || echo "0")
2151
+ common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || true)
2152
+ common="${common:-0}"
1909
2153
  if [[ "$total" -gt 0 ]]; then
1910
2154
  overlap_pct=$(( common * 100 / total ))
1911
2155
  else
@@ -1977,7 +2221,8 @@ detect_stuckness() {
1977
2221
 
1978
2222
  # Signal 6: Git diff size — no or minimal code changes (existing)
1979
2223
  local diff_lines
1980
- diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || echo "0")
2224
+ diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || true)
2225
+ diff_lines="${diff_lines:-0}"
1981
2226
  if [[ "${diff_lines:-0}" -lt 5 ]] && [[ "$iteration" -gt 2 ]]; then
1982
2227
  stuckness_signals=$((stuckness_signals + 1))
1983
2228
  stuckness_reasons+=("no code changes in last iteration")
@@ -1994,6 +2239,17 @@ detect_stuckness() {
1994
2239
  stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
1995
2240
  fi
1996
2241
 
2242
+ # Gate-aware dampening: if tests pass and the agent has made progress overall,
2243
+ # reduce stuckness signal count. The "no code changes" and "identical diffs" signals
2244
+ # fire when code is already complete and the agent is fighting evaluator quirks —
2245
+ # that's not genuine stuckness, it's "done but gates disagree."
2246
+ if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
2247
+ # If at least one quality signal is positive, dampen by 1
2248
+ if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
2249
+ stuckness_signals=$((stuckness_signals - 1))
2250
+ fi
2251
+ fi
2252
+
1997
2253
  # Decision: 2+ signals = stuck
1998
2254
  if [[ "$stuckness_signals" -ge 2 ]]; then
1999
2255
  STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
@@ -2133,7 +2389,7 @@ compose_worker_prompt() {
2133
2389
  role_desc="$recruit_desc"
2134
2390
  fi
2135
2391
  fi
2136
- # Fallback to hardcoded descriptions
2392
+ # Fallback to built-in role descriptions
2137
2393
  if [[ -z "$role_desc" ]]; then
2138
2394
  case "$role" in
2139
2395
  builder) role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
@@ -2189,10 +2445,33 @@ run_claude_iteration() {
2189
2445
  local final_prompt
2190
2446
  final_prompt=$(manage_context_window "$prompt")
2191
2447
 
2448
+ local raw_prompt_chars=${#prompt}
2192
2449
  local prompt_chars=${#final_prompt}
2193
2450
  local approx_tokens=$((prompt_chars / 4))
2194
2451
  info "Prompt: ~${approx_tokens} tokens (${prompt_chars} chars)"
2195
2452
 
2453
+ # Emit context efficiency metrics
2454
+ if type emit_event >/dev/null 2>&1; then
2455
+ local trim_ratio=0
2456
+ local budget_utilization=0
2457
+ if [[ "$raw_prompt_chars" -gt 0 ]]; then
2458
+ trim_ratio=$(awk -v raw="$raw_prompt_chars" -v trimmed="$prompt_chars" \
2459
+ 'BEGIN { printf "%.1f", ((raw - trimmed) / raw) * 100 }')
2460
+ fi
2461
+ if [[ "${CONTEXT_BUDGET_CHARS:-0}" -gt 0 ]]; then
2462
+ budget_utilization=$(awk -v used="$prompt_chars" -v budget="${CONTEXT_BUDGET_CHARS}" \
2463
+ 'BEGIN { printf "%.1f", (used / budget) * 100 }')
2464
+ fi
2465
+ emit_event "loop.context_efficiency" \
2466
+ "iteration=$ITERATION" \
2467
+ "raw_prompt_chars=$raw_prompt_chars" \
2468
+ "trimmed_prompt_chars=$prompt_chars" \
2469
+ "trim_ratio=$trim_ratio" \
2470
+ "budget_utilization=$budget_utilization" \
2471
+ "budget_chars=${CONTEXT_BUDGET_CHARS:-0}" \
2472
+ "job_id=${PIPELINE_JOB_ID:-loop-$$}" 2>/dev/null || true
2473
+ fi
2474
+
2196
2475
  local flags
2197
2476
  flags="$(build_claude_flags)"
2198
2477
 
@@ -2719,6 +2998,11 @@ run_single_agent_loop() {
2719
2998
  initialize_state
2720
2999
  fi
2721
3000
 
3001
+ # Ensure LOOP_START_COMMIT is set (may not be on resume/restart)
3002
+ if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
3003
+ LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
3004
+ fi
3005
+
2722
3006
  # Apply adaptive budget/model before showing banner
2723
3007
  apply_adaptive_budget
2724
3008
  MODEL="$(select_adaptive_model "build" "$MODEL")"
@@ -2746,6 +3030,16 @@ run_single_agent_loop() {
2746
3030
  }
2747
3031
  ITERATION=$(( ITERATION + 1 ))
2748
3032
 
3033
+ # Emit iteration start event for pipeline visibility
3034
+ if type emit_event >/dev/null 2>&1; then
3035
+ emit_event "loop.iteration_start" \
3036
+ "iteration=$ITERATION" \
3037
+ "max=$MAX_ITERATIONS" \
3038
+ "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
3039
+ "agent=${AGENT_NUM:-1}" \
3040
+ "test_passed=${TEST_PASSED:-unknown}"
3041
+ fi
3042
+
2749
3043
  # Root-cause diagnosis and memory-based fix on retry after test failure
2750
3044
  if [[ "${TEST_PASSED:-}" == "false" ]]; then
2751
3045
  # Source memory module for diagnosis and fix lookup
@@ -2915,6 +3209,18 @@ $summary
2915
3209
  write_state
2916
3210
  write_progress
2917
3211
 
3212
+ # Emit iteration complete event for pipeline visibility
3213
+ if type emit_event >/dev/null 2>&1; then
3214
+ emit_event "loop.iteration_complete" \
3215
+ "iteration=$ITERATION" \
3216
+ "max=$MAX_ITERATIONS" \
3217
+ "job_id=${PIPELINE_JOB_ID:-loop-$$}" \
3218
+ "agent=${AGENT_NUM:-1}" \
3219
+ "test_passed=${TEST_PASSED:-unknown}" \
3220
+ "commits=$TOTAL_COMMITS" \
3221
+ "status=${STATUS:-running}"
3222
+ fi
3223
+
2918
3224
  # Update heartbeat
2919
3225
  "$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
2920
3226
  --pid $$ \