shipwright-cli 3.0.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/completions/_shipwright +247 -93
- package/completions/shipwright.bash +69 -15
- package/completions/shipwright.fish +309 -41
- package/config/decision-tiers.json +55 -0
- package/config/defaults.json +25 -2
- package/config/event-schema.json +142 -5
- package/config/policy.json +8 -0
- package/dashboard/public/index.html +6 -0
- package/dashboard/public/styles.css +76 -0
- package/dashboard/server.ts +51 -0
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/types/api.ts +10 -0
- package/dashboard/src/views/metrics.ts +69 -1
- package/package.json +3 -3
- package/scripts/lib/architecture.sh +2 -1
- package/scripts/lib/bootstrap.sh +0 -0
- package/scripts/lib/config.sh +0 -0
- package/scripts/lib/daemon-adaptive.sh +4 -2
- package/scripts/lib/daemon-dispatch.sh +24 -1
- package/scripts/lib/daemon-failure.sh +0 -0
- package/scripts/lib/daemon-health.sh +0 -0
- package/scripts/lib/daemon-patrol.sh +42 -7
- package/scripts/lib/daemon-poll.sh +17 -0
- package/scripts/lib/daemon-state.sh +17 -0
- package/scripts/lib/daemon-triage.sh +1 -1
- package/scripts/lib/decide-autonomy.sh +295 -0
- package/scripts/lib/decide-scoring.sh +228 -0
- package/scripts/lib/decide-signals.sh +462 -0
- package/scripts/lib/fleet-failover.sh +0 -0
- package/scripts/lib/helpers.sh +19 -18
- package/scripts/lib/pipeline-detection.sh +1 -1
- package/scripts/lib/pipeline-github.sh +0 -0
- package/scripts/lib/pipeline-intelligence.sh +23 -4
- package/scripts/lib/pipeline-quality-checks.sh +11 -6
- package/scripts/lib/pipeline-quality.sh +0 -0
- package/scripts/lib/pipeline-stages.sh +330 -33
- package/scripts/lib/pipeline-state.sh +14 -0
- package/scripts/lib/policy.sh +0 -0
- package/scripts/lib/test-helpers.sh +0 -0
- package/scripts/postinstall.mjs +75 -1
- package/scripts/signals/example-collector.sh +36 -0
- package/scripts/sw +8 -4
- package/scripts/sw-activity.sh +1 -7
- package/scripts/sw-adaptive.sh +7 -7
- package/scripts/sw-adversarial.sh +1 -1
- package/scripts/sw-architecture-enforcer.sh +1 -1
- package/scripts/sw-auth.sh +1 -1
- package/scripts/sw-autonomous.sh +1 -1
- package/scripts/sw-changelog.sh +1 -1
- package/scripts/sw-checkpoint.sh +1 -1
- package/scripts/sw-ci.sh +11 -6
- package/scripts/sw-cleanup.sh +1 -1
- package/scripts/sw-code-review.sh +36 -17
- package/scripts/sw-connect.sh +1 -1
- package/scripts/sw-context.sh +1 -1
- package/scripts/sw-cost.sh +71 -5
- package/scripts/sw-daemon.sh +6 -3
- package/scripts/sw-dashboard.sh +1 -1
- package/scripts/sw-db.sh +53 -38
- package/scripts/sw-decide.sh +685 -0
- package/scripts/sw-decompose.sh +1 -1
- package/scripts/sw-deps.sh +1 -1
- package/scripts/sw-developer-simulation.sh +1 -1
- package/scripts/sw-discovery.sh +80 -4
- package/scripts/sw-doc-fleet.sh +1 -1
- package/scripts/sw-docs-agent.sh +1 -1
- package/scripts/sw-docs.sh +1 -1
- package/scripts/sw-doctor.sh +1 -1
- package/scripts/sw-dora.sh +1 -1
- package/scripts/sw-durable.sh +9 -5
- package/scripts/sw-e2e-orchestrator.sh +1 -1
- package/scripts/sw-eventbus.sh +7 -4
- package/scripts/sw-evidence.sh +1 -1
- package/scripts/sw-feedback.sh +1 -1
- package/scripts/sw-fix.sh +1 -1
- package/scripts/sw-fleet-discover.sh +1 -1
- package/scripts/sw-fleet-viz.sh +6 -4
- package/scripts/sw-fleet.sh +1 -1
- package/scripts/sw-github-app.sh +3 -2
- package/scripts/sw-github-checks.sh +1 -1
- package/scripts/sw-github-deploy.sh +1 -1
- package/scripts/sw-github-graphql.sh +1 -1
- package/scripts/sw-guild.sh +1 -1
- package/scripts/sw-heartbeat.sh +1 -1
- package/scripts/sw-hygiene.sh +5 -3
- package/scripts/sw-incident.sh +9 -5
- package/scripts/sw-init.sh +1 -1
- package/scripts/sw-instrument.sh +1 -1
- package/scripts/sw-intelligence.sh +11 -6
- package/scripts/sw-jira.sh +1 -1
- package/scripts/sw-launchd.sh +1 -1
- package/scripts/sw-linear.sh +1 -1
- package/scripts/sw-logs.sh +1 -1
- package/scripts/sw-loop.sh +338 -32
- package/scripts/sw-memory.sh +23 -6
- package/scripts/sw-mission-control.sh +1 -1
- package/scripts/sw-model-router.sh +3 -2
- package/scripts/sw-otel.sh +8 -4
- package/scripts/sw-oversight.sh +1 -1
- package/scripts/sw-pipeline-composer.sh +3 -1
- package/scripts/sw-pipeline-vitals.sh +11 -6
- package/scripts/sw-pipeline.sh +92 -8
- package/scripts/sw-pm.sh +5 -4
- package/scripts/sw-pr-lifecycle.sh +7 -4
- package/scripts/sw-predictive.sh +11 -5
- package/scripts/sw-prep.sh +1 -1
- package/scripts/sw-ps.sh +1 -1
- package/scripts/sw-public-dashboard.sh +3 -2
- package/scripts/sw-quality.sh +21 -10
- package/scripts/sw-reaper.sh +1 -1
- package/scripts/sw-recruit.sh +1 -1
- package/scripts/sw-regression.sh +1 -1
- package/scripts/sw-release-manager.sh +1 -1
- package/scripts/sw-release.sh +1 -1
- package/scripts/sw-remote.sh +1 -1
- package/scripts/sw-replay.sh +1 -1
- package/scripts/sw-retro.sh +1 -1
- package/scripts/sw-review-rerun.sh +1 -1
- package/scripts/sw-scale.sh +69 -11
- package/scripts/sw-security-audit.sh +1 -1
- package/scripts/sw-self-optimize.sh +168 -4
- package/scripts/sw-session.sh +3 -3
- package/scripts/sw-setup.sh +1 -1
- package/scripts/sw-standup.sh +1 -1
- package/scripts/sw-status.sh +1 -1
- package/scripts/sw-strategic.sh +11 -6
- package/scripts/sw-stream.sh +7 -4
- package/scripts/sw-swarm.sh +3 -2
- package/scripts/sw-team-stages.sh +1 -1
- package/scripts/sw-templates.sh +3 -3
- package/scripts/sw-testgen.sh +11 -6
- package/scripts/sw-tmux-pipeline.sh +1 -1
- package/scripts/sw-tmux.sh +35 -1
- package/scripts/sw-trace.sh +1 -1
- package/scripts/sw-tracker.sh +1 -1
- package/scripts/sw-triage.sh +7 -7
- package/scripts/sw-upgrade.sh +1 -1
- package/scripts/sw-ux.sh +1 -1
- package/scripts/sw-webhook.sh +3 -2
- package/scripts/sw-widgets.sh +7 -4
- package/scripts/sw-worktree.sh +1 -1
- package/scripts/update-homebrew-sha.sh +21 -15
package/scripts/sw-loop.sh
CHANGED
|
@@ -14,6 +14,7 @@ trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
|
|
|
14
14
|
unset CLAUDECODE 2>/dev/null || true
|
|
15
15
|
# Ignore SIGHUP so tmux attach/detach doesn't kill long-running agent sessions
|
|
16
16
|
trap '' HUP
|
|
17
|
+
trap '' SIGPIPE
|
|
17
18
|
|
|
18
19
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
19
20
|
|
|
@@ -24,6 +25,14 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
24
25
|
# shellcheck source=lib/helpers.sh
|
|
25
26
|
[[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
|
|
26
27
|
[[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
|
|
28
|
+
# Source DB for dual-write (emit_event → JSONL + SQLite).
|
|
29
|
+
# Note: do NOT call init_schema here — the pipeline (sw-pipeline.sh) owns schema
|
|
30
|
+
# initialization. Calling it here would create an empty DB that shadows JSON cost data.
|
|
31
|
+
if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
|
|
32
|
+
source "$SCRIPT_DIR/sw-db.sh" 2>/dev/null || true
|
|
33
|
+
fi
|
|
34
|
+
# Cross-pipeline discovery (learnings from other pipeline runs)
|
|
35
|
+
[[ -f "$SCRIPT_DIR/sw-discovery.sh" ]] && source "$SCRIPT_DIR/sw-discovery.sh" 2>/dev/null || true
|
|
27
36
|
# Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
|
|
28
37
|
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
29
38
|
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
@@ -63,7 +72,7 @@ MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
|
|
|
63
72
|
SESSION_RESTART=false
|
|
64
73
|
RESTART_COUNT=0
|
|
65
74
|
REPO_OVERRIDE=""
|
|
66
|
-
VERSION="3.
|
|
75
|
+
VERSION="3.2.0"
|
|
67
76
|
|
|
68
77
|
# ─── Token Tracking ─────────────────────────────────────────────────────────
|
|
69
78
|
LOOP_INPUT_TOKENS=0
|
|
@@ -655,6 +664,9 @@ initialize_state() {
|
|
|
655
664
|
STATUS="running"
|
|
656
665
|
LOG_ENTRIES=""
|
|
657
666
|
|
|
667
|
+
# Record starting commit for cumulative diff in quality gates
|
|
668
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
|
|
669
|
+
|
|
658
670
|
write_state
|
|
659
671
|
}
|
|
660
672
|
|
|
@@ -726,6 +738,11 @@ resume_state() {
|
|
|
726
738
|
START_EPOCH="$(now_epoch)"
|
|
727
739
|
STATUS="running"
|
|
728
740
|
|
|
741
|
+
# Set starting commit for cumulative diff (approximate: use earliest tracked commit)
|
|
742
|
+
if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
|
|
743
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
|
|
744
|
+
fi
|
|
745
|
+
|
|
729
746
|
# If we hit max iterations before, warn user to extend
|
|
730
747
|
if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
|
|
731
748
|
warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
|
|
@@ -872,7 +889,8 @@ validate_claude_output() {
|
|
|
872
889
|
|
|
873
890
|
# Check for obviously corrupt output (API errors dumped as code)
|
|
874
891
|
local total_changed
|
|
875
|
-
total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null ||
|
|
892
|
+
total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || true)
|
|
893
|
+
total_changed="${total_changed:-0}"
|
|
876
894
|
if [[ "$total_changed" -eq 0 ]]; then
|
|
877
895
|
warn "Claude iteration produced no file changes"
|
|
878
896
|
issues=$((issues + 1))
|
|
@@ -960,13 +978,14 @@ check_fatal_error() {
|
|
|
960
978
|
local match
|
|
961
979
|
match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
|
|
962
980
|
error "Fatal CLI error: $match"
|
|
963
|
-
return
|
|
981
|
+
return 1 # fatal error detected
|
|
964
982
|
fi
|
|
965
983
|
|
|
966
984
|
# Non-zero exit + tiny output = likely CLI crash
|
|
967
985
|
if [[ "$cli_exit_code" -ne 0 ]]; then
|
|
968
986
|
local line_count
|
|
969
|
-
line_count=$(grep -cv '^$' "$log_file" 2>/dev/null ||
|
|
987
|
+
line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
|
|
988
|
+
line_count="${line_count:-0}"
|
|
970
989
|
if [[ "$line_count" -lt 3 ]]; then
|
|
971
990
|
local content
|
|
972
991
|
content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
|
|
@@ -1140,7 +1159,8 @@ diagnose_failure() {
|
|
|
1140
1159
|
local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
|
|
1141
1160
|
local repeat_count=0
|
|
1142
1161
|
if [[ -f "$diagnosis_file" ]]; then
|
|
1143
|
-
repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null ||
|
|
1162
|
+
repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
|
|
1163
|
+
repeat_count="${repeat_count:-0}"
|
|
1144
1164
|
fi
|
|
1145
1165
|
echo "$diagnosis" >> "$diagnosis_file"
|
|
1146
1166
|
|
|
@@ -1317,33 +1337,60 @@ run_audit_agent() {
|
|
|
1317
1337
|
local log_file="$LOG_DIR/iteration-${ITERATION}.log"
|
|
1318
1338
|
local audit_log="$LOG_DIR/audit-iter-${ITERATION}.log"
|
|
1319
1339
|
|
|
1320
|
-
# Gather context: tail of implementer output +
|
|
1340
|
+
# Gather context: tail of implementer output + cumulative diff
|
|
1321
1341
|
local impl_tail
|
|
1322
1342
|
impl_tail="$(tail -100 "$log_file" 2>/dev/null || echo "(no output)")"
|
|
1323
|
-
|
|
1324
|
-
|
|
1343
|
+
|
|
1344
|
+
# Use cumulative diff from loop start so auditor sees ALL work, not just latest commit
|
|
1345
|
+
local diff_stat cumulative_note=""
|
|
1346
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
|
|
1347
|
+
diff_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no changes)")"
|
|
1348
|
+
cumulative_note="Note: This diff shows ALL changes since the loop started (iteration 1 through ${ITERATION}), not just the latest commit."
|
|
1349
|
+
else
|
|
1350
|
+
diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
|
|
1351
|
+
fi
|
|
1352
|
+
|
|
1353
|
+
# Include verified test status so auditor doesn't have to guess
|
|
1354
|
+
local test_context=""
|
|
1355
|
+
if [[ -n "$TEST_CMD" ]]; then
|
|
1356
|
+
if [[ "${TEST_PASSED:-}" == "true" ]]; then
|
|
1357
|
+
test_context="## Verified Test Status (from harness, not from agent)
|
|
1358
|
+
Tests: ALL PASSING (command: ${TEST_CMD})"
|
|
1359
|
+
else
|
|
1360
|
+
test_context="## Verified Test Status (from harness)
|
|
1361
|
+
Tests: FAILING (command: ${TEST_CMD})
|
|
1362
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1363
|
+
fi
|
|
1364
|
+
fi
|
|
1325
1365
|
|
|
1326
1366
|
local audit_prompt
|
|
1327
1367
|
read -r -d '' audit_prompt <<AUDIT_PROMPT || true
|
|
1328
|
-
You are an independent code auditor reviewing an autonomous coding agent.
|
|
1368
|
+
You are an independent code auditor reviewing an autonomous coding agent's CUMULATIVE work.
|
|
1369
|
+
This is iteration ${ITERATION}. The agent may have done most of the work in earlier iterations.
|
|
1329
1370
|
|
|
1330
1371
|
## Goal the agent was working toward
|
|
1331
1372
|
${GOAL}
|
|
1332
1373
|
|
|
1333
|
-
## Agent Output (last 100 lines)
|
|
1374
|
+
## Agent Output This Iteration (last 100 lines)
|
|
1334
1375
|
${impl_tail}
|
|
1335
1376
|
|
|
1336
|
-
## Changes Made (git diff --stat)
|
|
1377
|
+
## Cumulative Changes Made (git diff --stat)
|
|
1378
|
+
${cumulative_note}
|
|
1337
1379
|
${diff_stat}
|
|
1338
1380
|
|
|
1381
|
+
${test_context}
|
|
1382
|
+
|
|
1339
1383
|
## Your Task
|
|
1340
|
-
Critically review the work:
|
|
1341
|
-
1.
|
|
1342
|
-
2. Are there obvious bugs, logic errors, or security issues?
|
|
1384
|
+
Critically review the CUMULATIVE work (not just the latest iteration):
|
|
1385
|
+
1. Has the agent made meaningful progress toward the goal across all iterations?
|
|
1386
|
+
2. Are there obvious bugs, logic errors, or security issues in the current codebase?
|
|
1343
1387
|
3. Did the agent leave incomplete work (TODOs, placeholder code)?
|
|
1344
1388
|
4. Are there any regressions or broken patterns?
|
|
1345
1389
|
5. Is the code quality acceptable?
|
|
1346
1390
|
|
|
1391
|
+
IMPORTANT: If the current iteration made small or no code changes, that may be acceptable
|
|
1392
|
+
if earlier iterations already completed the substantive work. Judge the whole body of work.
|
|
1393
|
+
|
|
1347
1394
|
If the work is acceptable and moves toward the goal, output exactly: AUDIT_PASS
|
|
1348
1395
|
Otherwise, list the specific issues that need fixing.
|
|
1349
1396
|
AUDIT_PROMPT
|
|
@@ -1429,21 +1476,52 @@ check_definition_of_done() {
|
|
|
1429
1476
|
|
|
1430
1477
|
local dod_content
|
|
1431
1478
|
dod_content="$(cat "$DOD_FILE")"
|
|
1479
|
+
|
|
1480
|
+
# Use cumulative diff from loop start (not just HEAD~1) so the evaluator
|
|
1481
|
+
# can see ALL work done across every iteration, not just the latest commit.
|
|
1432
1482
|
local diff_content
|
|
1433
|
-
|
|
1483
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
|
|
1484
|
+
diff_content="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no diff)")"
|
|
1485
|
+
diff_content="${diff_content}
|
|
1486
|
+
|
|
1487
|
+
## Detailed Changes (cumulative diff, truncated to 200 lines)
|
|
1488
|
+
$(git -C "$PROJECT_ROOT" diff "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -200 || echo "(no diff)")"
|
|
1489
|
+
else
|
|
1490
|
+
diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
|
|
1491
|
+
fi
|
|
1492
|
+
|
|
1493
|
+
# Inject verified runtime facts so the evaluator doesn't have to guess
|
|
1494
|
+
local runtime_facts=""
|
|
1495
|
+
if [[ -n "$TEST_CMD" ]]; then
|
|
1496
|
+
if [[ "${TEST_PASSED:-}" == "true" ]]; then
|
|
1497
|
+
runtime_facts="## Verified Runtime Facts (from the loop harness, not from the agent)
|
|
1498
|
+
- Tests: ALL PASSING (verified by running '${TEST_CMD}' after this iteration)
|
|
1499
|
+
- Test output (last 10 lines):
|
|
1500
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1501
|
+
else
|
|
1502
|
+
runtime_facts="## Verified Runtime Facts
|
|
1503
|
+
- Tests: FAILING (verified by running '${TEST_CMD}')
|
|
1504
|
+
- Test output (last 10 lines):
|
|
1505
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1506
|
+
fi
|
|
1507
|
+
fi
|
|
1434
1508
|
|
|
1435
1509
|
local dod_prompt
|
|
1436
1510
|
read -r -d '' dod_prompt <<DOD_PROMPT || true
|
|
1437
|
-
You are evaluating whether
|
|
1511
|
+
You are evaluating whether a project satisfies a Definition of Done checklist.
|
|
1512
|
+
You are reviewing the CUMULATIVE work across all iterations, not just the latest commit.
|
|
1438
1513
|
|
|
1439
1514
|
## Definition of Done
|
|
1440
1515
|
${dod_content}
|
|
1441
1516
|
|
|
1442
|
-
|
|
1517
|
+
${runtime_facts}
|
|
1518
|
+
|
|
1519
|
+
## Cumulative Changes Made (git diff from start of loop to now)
|
|
1443
1520
|
${diff_content}
|
|
1444
1521
|
|
|
1445
1522
|
## Your Task
|
|
1446
|
-
For each item in the Definition of Done, determine if the
|
|
1523
|
+
For each item in the Definition of Done, determine if the project satisfies it.
|
|
1524
|
+
The runtime facts above are verified by the harness — trust them as ground truth.
|
|
1447
1525
|
If ALL items are satisfied, output exactly: DOD_PASS
|
|
1448
1526
|
Otherwise, list which items are NOT satisfied and why.
|
|
1449
1527
|
DOD_PROMPT
|
|
@@ -1497,6 +1575,14 @@ guard_completion() {
|
|
|
1497
1575
|
rejection_reasons+=("tests failing")
|
|
1498
1576
|
fi
|
|
1499
1577
|
|
|
1578
|
+
# Holistic final gate: when all other gates pass, run a project-level assessment
|
|
1579
|
+
# that evaluates the entire codebase against the goal (not just the latest diff)
|
|
1580
|
+
if [[ ${#rejection_reasons[@]} -eq 0 ]]; then
|
|
1581
|
+
if ! run_holistic_gate; then
|
|
1582
|
+
rejection_reasons+=("holistic project assessment found gaps")
|
|
1583
|
+
fi
|
|
1584
|
+
fi
|
|
1585
|
+
|
|
1500
1586
|
if [[ ${#rejection_reasons[@]} -gt 0 ]]; then
|
|
1501
1587
|
local reasons_str
|
|
1502
1588
|
reasons_str="$(printf ', %s' "${rejection_reasons[@]}")"
|
|
@@ -1510,17 +1596,88 @@ guard_completion() {
|
|
|
1510
1596
|
return 0
|
|
1511
1597
|
}
|
|
1512
1598
|
|
|
1599
|
+
# Holistic gate: evaluates the full project against the original goal.
|
|
1600
|
+
# Only runs when all other gates pass (final checkpoint before acceptance).
|
|
1601
|
+
run_holistic_gate() {
|
|
1602
|
+
# Skip if no starting commit (can't compute cumulative diff)
|
|
1603
|
+
[[ -z "${LOOP_START_COMMIT:-}" ]] && return 0
|
|
1604
|
+
|
|
1605
|
+
local holistic_log="$LOG_DIR/holistic-iter-${ITERATION}.log"
|
|
1606
|
+
|
|
1607
|
+
# Build a project summary: file tree, test count, cumulative diff stats
|
|
1608
|
+
local file_count
|
|
1609
|
+
file_count=$(git -C "$PROJECT_ROOT" ls-files | wc -l | tr -d ' ')
|
|
1610
|
+
local cumulative_stat
|
|
1611
|
+
cumulative_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || echo "(no changes)")"
|
|
1612
|
+
local test_summary=""
|
|
1613
|
+
if [[ -n "${TEST_OUTPUT:-}" ]]; then
|
|
1614
|
+
test_summary="$(echo "$TEST_OUTPUT" | tail -5)"
|
|
1615
|
+
fi
|
|
1616
|
+
|
|
1617
|
+
local holistic_prompt
|
|
1618
|
+
read -r -d '' holistic_prompt <<HOLISTIC_PROMPT || true
|
|
1619
|
+
You are a final quality gate evaluating whether an autonomous coding agent has FULLY achieved its goal.
|
|
1620
|
+
|
|
1621
|
+
## Original Goal
|
|
1622
|
+
${GOAL}
|
|
1623
|
+
|
|
1624
|
+
## Project Stats
|
|
1625
|
+
- Files in repo: ${file_count}
|
|
1626
|
+
- Iterations completed: ${ITERATION}
|
|
1627
|
+
- Cumulative changes: ${cumulative_stat}
|
|
1628
|
+
- Tests: ${TEST_PASSED:-unknown} (command: ${TEST_CMD:-none})
|
|
1629
|
+
${test_summary:+- Test output: ${test_summary}}
|
|
1630
|
+
|
|
1631
|
+
## Cumulative Git Changes (diff --stat from start)
|
|
1632
|
+
$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -40 || echo "(none)")
|
|
1633
|
+
|
|
1634
|
+
## Your Task
|
|
1635
|
+
Based on the goal and the cumulative work done:
|
|
1636
|
+
1. Has the goal been FULLY achieved (not partially)?
|
|
1637
|
+
2. Is there any critical gap that would make this unacceptable for production?
|
|
1638
|
+
|
|
1639
|
+
If the goal is fully achieved, output exactly: HOLISTIC_PASS
|
|
1640
|
+
Otherwise, list the specific gaps remaining.
|
|
1641
|
+
HOLISTIC_PROMPT
|
|
1642
|
+
|
|
1643
|
+
echo -e " ${PURPLE}▸${RESET} Running holistic project assessment..."
|
|
1644
|
+
|
|
1645
|
+
local hol_model
|
|
1646
|
+
hol_model="$(select_audit_model)"
|
|
1647
|
+
local hol_flags=("--model" "$hol_model")
|
|
1648
|
+
if $SKIP_PERMISSIONS; then
|
|
1649
|
+
hol_flags+=("--dangerously-skip-permissions")
|
|
1650
|
+
fi
|
|
1651
|
+
|
|
1652
|
+
claude -p "$holistic_prompt" "${hol_flags[@]}" > "$holistic_log" 2>&1 || true
|
|
1653
|
+
|
|
1654
|
+
if grep -q "HOLISTIC_PASS" "$holistic_log" 2>/dev/null; then
|
|
1655
|
+
echo -e " ${GREEN}✓${RESET} Holistic assessment: passed"
|
|
1656
|
+
return 0
|
|
1657
|
+
else
|
|
1658
|
+
echo -e " ${YELLOW}⚠${RESET} Holistic assessment: gaps found"
|
|
1659
|
+
return 1
|
|
1660
|
+
fi
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1513
1663
|
# ─── Context Window Management ───────────────────────────────────────────────
|
|
1514
1664
|
# Prevents prompt from exceeding Claude's context limit (~200K tokens).
|
|
1515
1665
|
# Trims least-critical sections first when over budget.
|
|
1516
1666
|
|
|
1517
|
-
CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS
|
|
1667
|
+
CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-$(_config_get_int "loop.context_budget_chars" 180000 2>/dev/null || echo 180000)}" # ~45K tokens at 4 chars/token
|
|
1518
1668
|
|
|
1519
1669
|
manage_context_window() {
|
|
1520
1670
|
local prompt="$1"
|
|
1521
1671
|
local budget="${CONTEXT_BUDGET_CHARS}"
|
|
1522
1672
|
local current_len=${#prompt}
|
|
1523
1673
|
|
|
1674
|
+
# Read trimming tunables from config (env > daemon-config > policy > defaults.json)
|
|
1675
|
+
local trim_memory_chars trim_git_entries trim_hotspot_files trim_test_lines
|
|
1676
|
+
trim_memory_chars=$(_config_get_int "loop.context_trim_memory_chars" 20000 2>/dev/null || echo 20000)
|
|
1677
|
+
trim_git_entries=$(_config_get_int "loop.context_trim_git_entries" 10 2>/dev/null || echo 10)
|
|
1678
|
+
trim_hotspot_files=$(_config_get_int "loop.context_trim_hotspot_files" 5 2>/dev/null || echo 5)
|
|
1679
|
+
trim_test_lines=$(_config_get_int "loop.context_trim_test_lines" 50 2>/dev/null || echo 50)
|
|
1680
|
+
|
|
1524
1681
|
if [[ "$current_len" -le "$budget" ]]; then
|
|
1525
1682
|
echo "$prompt"
|
|
1526
1683
|
return
|
|
@@ -1534,19 +1691,19 @@ manage_context_window() {
|
|
|
1534
1691
|
trimmed=$(echo "$trimmed" | awk '/^## Performance Baselines/{skip=1; next} skip && /^## [^#]/{skip=0} !skip{print}')
|
|
1535
1692
|
fi
|
|
1536
1693
|
|
|
1537
|
-
# 2. Trim file hotspots to top
|
|
1694
|
+
# 2. Trim file hotspots to top N
|
|
1538
1695
|
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1539
|
-
trimmed=$(echo "$trimmed" | awk '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>
|
|
1696
|
+
trimmed=$(echo "$trimmed" | awk -v max="$trim_hotspot_files" '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>max) next} {print}')
|
|
1540
1697
|
fi
|
|
1541
1698
|
|
|
1542
|
-
# 3. Trim git log to last
|
|
1699
|
+
# 3. Trim git log to last N entries
|
|
1543
1700
|
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1544
|
-
trimmed=$(echo "$trimmed" | awk '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>
|
|
1701
|
+
trimmed=$(echo "$trimmed" | awk -v max="$trim_git_entries" '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>max) next} {print}')
|
|
1545
1702
|
fi
|
|
1546
1703
|
|
|
1547
|
-
# 4. Truncate memory context to first
|
|
1704
|
+
# 4. Truncate memory context to first N chars
|
|
1548
1705
|
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1549
|
-
trimmed=$(echo "$trimmed" | awk -v max=
|
|
1706
|
+
trimmed=$(echo "$trimmed" | awk -v max="$trim_memory_chars" '
|
|
1550
1707
|
/## Memory Context/{mem=1; skip_rest=0; chars=0; print; next}
|
|
1551
1708
|
mem && /^## [^#]/{mem=0; print; next}
|
|
1552
1709
|
mem{chars+=length($0)+1; if(chars>max){print "... (memory truncated for context budget)"; skip_rest=1; mem=0; next}}
|
|
@@ -1556,11 +1713,11 @@ manage_context_window() {
|
|
|
1556
1713
|
')
|
|
1557
1714
|
fi
|
|
1558
1715
|
|
|
1559
|
-
# 5. Truncate test output to last
|
|
1716
|
+
# 5. Truncate test output to last N lines
|
|
1560
1717
|
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1561
|
-
trimmed=$(echo "$trimmed" | awk '
|
|
1718
|
+
trimmed=$(echo "$trimmed" | awk -v max="$trim_test_lines" '
|
|
1562
1719
|
/## Test Results/{found=1; buf=""; print; next}
|
|
1563
|
-
found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>
|
|
1720
|
+
found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>max)?(n-max+1):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
|
|
1564
1721
|
found{buf=buf $0 "\n"; next}
|
|
1565
1722
|
{print}
|
|
1566
1723
|
')
|
|
@@ -1639,6 +1796,16 @@ Fix these specific errors. Each line above is one distinct error from the test o
|
|
|
1639
1796
|
memory_section="$("$SCRIPT_DIR/sw-memory.sh" inject build 2>/dev/null || true)"
|
|
1640
1797
|
fi
|
|
1641
1798
|
|
|
1799
|
+
# Cross-pipeline discovery injection (learnings from other pipeline runs)
|
|
1800
|
+
local discovery_section=""
|
|
1801
|
+
if type inject_discoveries >/dev/null 2>&1; then
|
|
1802
|
+
local disc_output
|
|
1803
|
+
disc_output="$(inject_discoveries "${GOAL:-}" 2>/dev/null || true)"
|
|
1804
|
+
if [[ -n "$disc_output" ]]; then
|
|
1805
|
+
discovery_section="$disc_output"
|
|
1806
|
+
fi
|
|
1807
|
+
fi
|
|
1808
|
+
|
|
1642
1809
|
# DORA baselines for context
|
|
1643
1810
|
local dora_section=""
|
|
1644
1811
|
if type memory_get_dora_baseline >/dev/null 2>&1; then
|
|
@@ -1810,12 +1977,25 @@ ${_test_tail}
|
|
|
1810
1977
|
RESUMED_TEST_OUTPUT=""
|
|
1811
1978
|
fi
|
|
1812
1979
|
|
|
1980
|
+
# Build cumulative progress summary showing all iterations' work
|
|
1981
|
+
local cumulative_section=""
|
|
1982
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
|
|
1983
|
+
local cum_stat
|
|
1984
|
+
cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
|
|
1985
|
+
if [[ -n "$cum_stat" ]]; then
|
|
1986
|
+
cumulative_section="## Cumulative Progress (all iterations combined)
|
|
1987
|
+
${cum_stat}
|
|
1988
|
+
"
|
|
1989
|
+
fi
|
|
1990
|
+
fi
|
|
1991
|
+
|
|
1813
1992
|
cat <<PROMPT
|
|
1814
1993
|
You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
|
|
1815
1994
|
${resume_section}
|
|
1816
1995
|
## Your Goal
|
|
1817
1996
|
${GOAL}
|
|
1818
1997
|
|
|
1998
|
+
${cumulative_section}
|
|
1819
1999
|
## Current Progress
|
|
1820
2000
|
${recent_log}
|
|
1821
2001
|
|
|
@@ -1830,6 +2010,9 @@ ${error_summary_section:+$error_summary_section
|
|
|
1830
2010
|
${memory_section:+## Memory Context
|
|
1831
2011
|
$memory_section
|
|
1832
2012
|
}
|
|
2013
|
+
${discovery_section:+## Cross-Pipeline Learnings
|
|
2014
|
+
$discovery_section
|
|
2015
|
+
}
|
|
1833
2016
|
${dora_section:+$dora_section
|
|
1834
2017
|
}
|
|
1835
2018
|
${intelligence_section:+$intelligence_section
|
|
@@ -1844,6 +2027,13 @@ ${restart_section:+$restart_section
|
|
|
1844
2027
|
5. Commit your work with a descriptive message
|
|
1845
2028
|
6. When the goal is FULLY achieved, output exactly: LOOP_COMPLETE
|
|
1846
2029
|
|
|
2030
|
+
## Context Efficiency
|
|
2031
|
+
- Batch independent tool calls in parallel — avoid sequential round-trips
|
|
2032
|
+
- Use targeted file reads (offset/limit) instead of reading entire large files
|
|
2033
|
+
- Delegate large searches to subagents — only import the summary
|
|
2034
|
+
- Filter tool results with grep/jq before reasoning over them
|
|
2035
|
+
- Keep working memory lean — summarize completed steps, don't preserve full outputs
|
|
2036
|
+
|
|
1847
2037
|
${audit_section}
|
|
1848
2038
|
|
|
1849
2039
|
${audit_feedback_section}
|
|
@@ -1861,6 +2051,58 @@ ${stuckness_section}
|
|
|
1861
2051
|
PROMPT
|
|
1862
2052
|
}
|
|
1863
2053
|
|
|
2054
|
+
# ─── Alternative Strategy Exploration ─────────────────────────────────────────
|
|
2055
|
+
# When stuckness is detected, generate a context-aware alternative strategy.
|
|
2056
|
+
# Uses pattern matching on error type + iteration count to suggest different approaches.
|
|
2057
|
+
|
|
2058
|
+
explore_alternative_strategy() {
|
|
2059
|
+
local last_error="${1:-unknown}"
|
|
2060
|
+
local iteration="${2:-0}"
|
|
2061
|
+
local diagnosis="${3:-}"
|
|
2062
|
+
|
|
2063
|
+
# Track attempted strategies to avoid repeating them
|
|
2064
|
+
local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
|
|
2065
|
+
local attempted
|
|
2066
|
+
attempted=$(cat "$strategy_file" 2>/dev/null || true)
|
|
2067
|
+
|
|
2068
|
+
local strategy=""
|
|
2069
|
+
|
|
2070
|
+
# If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
|
|
2071
|
+
if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
|
|
2072
|
+
if ! echo "$attempted" | grep -q "evaluator_alignment"; then
|
|
2073
|
+
echo "evaluator_alignment" >> "$strategy_file"
|
|
2074
|
+
strategy="## Alternative Strategy: Evaluator Alignment
|
|
2075
|
+
The code appears functionally complete (tests pass). Focus on satisfying the remaining
|
|
2076
|
+
quality gate evaluators. Check the DoD log and audit log for specific complaints, then
|
|
2077
|
+
address those exact points rather than adding new features."
|
|
2078
|
+
fi
|
|
2079
|
+
fi
|
|
2080
|
+
|
|
2081
|
+
# If no code changes in last iteration, suggest verifying existing work
|
|
2082
|
+
if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
|
|
2083
|
+
if ! echo "$attempted" | grep -q "verify_existing"; then
|
|
2084
|
+
echo "verify_existing" >> "$strategy_file"
|
|
2085
|
+
strategy="## Alternative Strategy: Verify Existing Work
|
|
2086
|
+
Recent iterations made no code changes. The work may already be complete.
|
|
2087
|
+
Run the full test suite, verify all features work, and if everything passes,
|
|
2088
|
+
commit a verification message and declare LOOP_COMPLETE with evidence."
|
|
2089
|
+
fi
|
|
2090
|
+
fi
|
|
2091
|
+
|
|
2092
|
+
# Generic fallback: break the problem down
|
|
2093
|
+
if [[ -z "$strategy" ]]; then
|
|
2094
|
+
if ! echo "$attempted" | grep -q "decompose"; then
|
|
2095
|
+
echo "decompose" >> "$strategy_file"
|
|
2096
|
+
strategy="## Alternative Strategy: Decompose
|
|
2097
|
+
Break the remaining work into smaller, independent steps. Focus on one specific
|
|
2098
|
+
file or function at a time. Read error messages literally — the root cause may
|
|
2099
|
+
differ from your assumption."
|
|
2100
|
+
fi
|
|
2101
|
+
fi
|
|
2102
|
+
|
|
2103
|
+
echo "$strategy"
|
|
2104
|
+
}
|
|
2105
|
+
|
|
1864
2106
|
# ─── Stuckness Detection ─────────────────────────────────────────────────────
|
|
1865
2107
|
# Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
|
|
1866
2108
|
# Returns 0 when stuck, 1 when not. Outputs stuckness section and sets STUCKNESS_HINT when stuck.
|
|
@@ -1890,7 +2132,8 @@ detect_stuckness() {
|
|
|
1890
2132
|
local stuckness_reasons=()
|
|
1891
2133
|
local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
|
|
1892
2134
|
local tracking_lines
|
|
1893
|
-
tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null ||
|
|
2135
|
+
tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || true)
|
|
2136
|
+
tracking_lines="${tracking_lines:-0}"
|
|
1894
2137
|
|
|
1895
2138
|
# Signal 1: Text overlap (existing logic) — compare last 2 iteration logs
|
|
1896
2139
|
if [[ "$iteration" -ge 3 ]]; then
|
|
@@ -1905,7 +2148,8 @@ detect_stuckness() {
|
|
|
1905
2148
|
|
|
1906
2149
|
if [[ -n "$lines1" && -n "$lines2" ]]; then
|
|
1907
2150
|
total=$(echo "$lines1" | wc -l | tr -d ' ')
|
|
1908
|
-
common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' ||
|
|
2151
|
+
common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || true)
|
|
2152
|
+
common="${common:-0}"
|
|
1909
2153
|
if [[ "$total" -gt 0 ]]; then
|
|
1910
2154
|
overlap_pct=$(( common * 100 / total ))
|
|
1911
2155
|
else
|
|
@@ -1977,7 +2221,8 @@ detect_stuckness() {
|
|
|
1977
2221
|
|
|
1978
2222
|
# Signal 6: Git diff size — no or minimal code changes (existing)
|
|
1979
2223
|
local diff_lines
|
|
1980
|
-
diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' ||
|
|
2224
|
+
diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || true)
|
|
2225
|
+
diff_lines="${diff_lines:-0}"
|
|
1981
2226
|
if [[ "${diff_lines:-0}" -lt 5 ]] && [[ "$iteration" -gt 2 ]]; then
|
|
1982
2227
|
stuckness_signals=$((stuckness_signals + 1))
|
|
1983
2228
|
stuckness_reasons+=("no code changes in last iteration")
|
|
@@ -1994,6 +2239,17 @@ detect_stuckness() {
|
|
|
1994
2239
|
stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
|
|
1995
2240
|
fi
|
|
1996
2241
|
|
|
2242
|
+
# Gate-aware dampening: if tests pass and the agent has made progress overall,
|
|
2243
|
+
# reduce stuckness signal count. The "no code changes" and "identical diffs" signals
|
|
2244
|
+
# fire when code is already complete and the agent is fighting evaluator quirks —
|
|
2245
|
+
# that's not genuine stuckness, it's "done but gates disagree."
|
|
2246
|
+
if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
|
|
2247
|
+
# If at least one quality signal is positive, dampen by 1
|
|
2248
|
+
if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
|
|
2249
|
+
stuckness_signals=$((stuckness_signals - 1))
|
|
2250
|
+
fi
|
|
2251
|
+
fi
|
|
2252
|
+
|
|
1997
2253
|
# Decision: 2+ signals = stuck
|
|
1998
2254
|
if [[ "$stuckness_signals" -ge 2 ]]; then
|
|
1999
2255
|
STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
|
|
@@ -2133,7 +2389,7 @@ compose_worker_prompt() {
|
|
|
2133
2389
|
role_desc="$recruit_desc"
|
|
2134
2390
|
fi
|
|
2135
2391
|
fi
|
|
2136
|
-
# Fallback to
|
|
2392
|
+
# Fallback to built-in role descriptions
|
|
2137
2393
|
if [[ -z "$role_desc" ]]; then
|
|
2138
2394
|
case "$role" in
|
|
2139
2395
|
builder) role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
|
|
@@ -2189,10 +2445,33 @@ run_claude_iteration() {
|
|
|
2189
2445
|
local final_prompt
|
|
2190
2446
|
final_prompt=$(manage_context_window "$prompt")
|
|
2191
2447
|
|
|
2448
|
+
local raw_prompt_chars=${#prompt}
|
|
2192
2449
|
local prompt_chars=${#final_prompt}
|
|
2193
2450
|
local approx_tokens=$((prompt_chars / 4))
|
|
2194
2451
|
info "Prompt: ~${approx_tokens} tokens (${prompt_chars} chars)"
|
|
2195
2452
|
|
|
2453
|
+
# Emit context efficiency metrics
|
|
2454
|
+
if type emit_event >/dev/null 2>&1; then
|
|
2455
|
+
local trim_ratio=0
|
|
2456
|
+
local budget_utilization=0
|
|
2457
|
+
if [[ "$raw_prompt_chars" -gt 0 ]]; then
|
|
2458
|
+
trim_ratio=$(awk -v raw="$raw_prompt_chars" -v trimmed="$prompt_chars" \
|
|
2459
|
+
'BEGIN { printf "%.1f", ((raw - trimmed) / raw) * 100 }')
|
|
2460
|
+
fi
|
|
2461
|
+
if [[ "${CONTEXT_BUDGET_CHARS:-0}" -gt 0 ]]; then
|
|
2462
|
+
budget_utilization=$(awk -v used="$prompt_chars" -v budget="${CONTEXT_BUDGET_CHARS}" \
|
|
2463
|
+
'BEGIN { printf "%.1f", (used / budget) * 100 }')
|
|
2464
|
+
fi
|
|
2465
|
+
emit_event "loop.context_efficiency" \
|
|
2466
|
+
"iteration=$ITERATION" \
|
|
2467
|
+
"raw_prompt_chars=$raw_prompt_chars" \
|
|
2468
|
+
"trimmed_prompt_chars=$prompt_chars" \
|
|
2469
|
+
"trim_ratio=$trim_ratio" \
|
|
2470
|
+
"budget_utilization=$budget_utilization" \
|
|
2471
|
+
"budget_chars=${CONTEXT_BUDGET_CHARS:-0}" \
|
|
2472
|
+
"job_id=${PIPELINE_JOB_ID:-loop-$$}" 2>/dev/null || true
|
|
2473
|
+
fi
|
|
2474
|
+
|
|
2196
2475
|
local flags
|
|
2197
2476
|
flags="$(build_claude_flags)"
|
|
2198
2477
|
|
|
@@ -2719,6 +2998,11 @@ run_single_agent_loop() {
|
|
|
2719
2998
|
initialize_state
|
|
2720
2999
|
fi
|
|
2721
3000
|
|
|
3001
|
+
# Ensure LOOP_START_COMMIT is set (may not be on resume/restart)
|
|
3002
|
+
if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
|
|
3003
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
|
|
3004
|
+
fi
|
|
3005
|
+
|
|
2722
3006
|
# Apply adaptive budget/model before showing banner
|
|
2723
3007
|
apply_adaptive_budget
|
|
2724
3008
|
MODEL="$(select_adaptive_model "build" "$MODEL")"
|
|
@@ -2746,6 +3030,16 @@ run_single_agent_loop() {
|
|
|
2746
3030
|
}
|
|
2747
3031
|
ITERATION=$(( ITERATION + 1 ))
|
|
2748
3032
|
|
|
3033
|
+
# Emit iteration start event for pipeline visibility
|
|
3034
|
+
if type emit_event >/dev/null 2>&1; then
|
|
3035
|
+
emit_event "loop.iteration_start" \
|
|
3036
|
+
"iteration=$ITERATION" \
|
|
3037
|
+
"max=$MAX_ITERATIONS" \
|
|
3038
|
+
"job_id=${PIPELINE_JOB_ID:-loop-$$}" \
|
|
3039
|
+
"agent=${AGENT_NUM:-1}" \
|
|
3040
|
+
"test_passed=${TEST_PASSED:-unknown}"
|
|
3041
|
+
fi
|
|
3042
|
+
|
|
2749
3043
|
# Root-cause diagnosis and memory-based fix on retry after test failure
|
|
2750
3044
|
if [[ "${TEST_PASSED:-}" == "false" ]]; then
|
|
2751
3045
|
# Source memory module for diagnosis and fix lookup
|
|
@@ -2915,6 +3209,18 @@ $summary
|
|
|
2915
3209
|
write_state
|
|
2916
3210
|
write_progress
|
|
2917
3211
|
|
|
3212
|
+
# Emit iteration complete event for pipeline visibility
|
|
3213
|
+
if type emit_event >/dev/null 2>&1; then
|
|
3214
|
+
emit_event "loop.iteration_complete" \
|
|
3215
|
+
"iteration=$ITERATION" \
|
|
3216
|
+
"max=$MAX_ITERATIONS" \
|
|
3217
|
+
"job_id=${PIPELINE_JOB_ID:-loop-$$}" \
|
|
3218
|
+
"agent=${AGENT_NUM:-1}" \
|
|
3219
|
+
"test_passed=${TEST_PASSED:-unknown}" \
|
|
3220
|
+
"commits=$TOTAL_COMMITS" \
|
|
3221
|
+
"status=${STATUS:-running}"
|
|
3222
|
+
fi
|
|
3223
|
+
|
|
2918
3224
|
# Update heartbeat
|
|
2919
3225
|
"$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
|
|
2920
3226
|
--pid $$ \
|