shipwright-cli 2.4.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -11
- package/completions/_shipwright +248 -94
- package/completions/shipwright.bash +68 -19
- package/completions/shipwright.fish +310 -42
- package/config/decision-tiers.json +55 -0
- package/config/defaults.json +111 -0
- package/config/event-schema.json +218 -0
- package/config/policy.json +21 -18
- package/dashboard/coverage/coverage-summary.json +14 -0
- package/dashboard/public/index.html +1 -1
- package/dashboard/server.ts +306 -17
- package/dashboard/src/components/charts/bar.test.ts +79 -0
- package/dashboard/src/components/charts/donut.test.ts +68 -0
- package/dashboard/src/components/charts/pipeline-rail.test.ts +117 -0
- package/dashboard/src/components/charts/sparkline.test.ts +125 -0
- package/dashboard/src/core/api.test.ts +309 -0
- package/dashboard/src/core/helpers.test.ts +301 -0
- package/dashboard/src/core/router.test.ts +307 -0
- package/dashboard/src/core/router.ts +7 -0
- package/dashboard/src/core/sse.test.ts +144 -0
- package/dashboard/src/views/metrics.test.ts +186 -0
- package/dashboard/src/views/overview.test.ts +173 -0
- package/dashboard/src/views/pipelines.test.ts +183 -0
- package/dashboard/src/views/team.test.ts +253 -0
- package/dashboard/vitest.config.ts +14 -5
- package/docs/TIPS.md +1 -1
- package/docs/patterns/README.md +1 -1
- package/package.json +7 -9
- package/scripts/adapters/docker-deploy.sh +1 -1
- package/scripts/adapters/tmux-adapter.sh +11 -1
- package/scripts/adapters/wezterm-adapter.sh +1 -1
- package/scripts/check-version-consistency.sh +1 -1
- package/scripts/lib/architecture.sh +127 -0
- package/scripts/lib/bootstrap.sh +75 -0
- package/scripts/lib/compat.sh +89 -6
- package/scripts/lib/config.sh +91 -0
- package/scripts/lib/daemon-adaptive.sh +3 -3
- package/scripts/lib/daemon-dispatch.sh +63 -17
- package/scripts/lib/daemon-failure.sh +0 -0
- package/scripts/lib/daemon-health.sh +1 -1
- package/scripts/lib/daemon-patrol.sh +64 -17
- package/scripts/lib/daemon-poll.sh +54 -25
- package/scripts/lib/daemon-state.sh +125 -23
- package/scripts/lib/daemon-triage.sh +31 -9
- package/scripts/lib/decide-autonomy.sh +295 -0
- package/scripts/lib/decide-scoring.sh +228 -0
- package/scripts/lib/decide-signals.sh +462 -0
- package/scripts/lib/fleet-failover.sh +63 -0
- package/scripts/lib/helpers.sh +29 -6
- package/scripts/lib/pipeline-detection.sh +2 -2
- package/scripts/lib/pipeline-github.sh +9 -9
- package/scripts/lib/pipeline-intelligence.sh +105 -38
- package/scripts/lib/pipeline-quality-checks.sh +17 -16
- package/scripts/lib/pipeline-quality.sh +1 -1
- package/scripts/lib/pipeline-stages.sh +440 -59
- package/scripts/lib/pipeline-state.sh +54 -4
- package/scripts/lib/policy.sh +0 -0
- package/scripts/lib/test-helpers.sh +247 -0
- package/scripts/postinstall.mjs +78 -12
- package/scripts/signals/example-collector.sh +36 -0
- package/scripts/sw +17 -7
- package/scripts/sw-activity.sh +1 -11
- package/scripts/sw-adaptive.sh +109 -85
- package/scripts/sw-adversarial.sh +4 -14
- package/scripts/sw-architecture-enforcer.sh +1 -11
- package/scripts/sw-auth.sh +8 -17
- package/scripts/sw-autonomous.sh +111 -49
- package/scripts/sw-changelog.sh +1 -11
- package/scripts/sw-checkpoint.sh +144 -20
- package/scripts/sw-ci.sh +2 -12
- package/scripts/sw-cleanup.sh +13 -17
- package/scripts/sw-code-review.sh +16 -36
- package/scripts/sw-connect.sh +5 -12
- package/scripts/sw-context.sh +9 -26
- package/scripts/sw-cost.sh +17 -18
- package/scripts/sw-daemon.sh +76 -71
- package/scripts/sw-dashboard.sh +57 -17
- package/scripts/sw-db.sh +524 -26
- package/scripts/sw-decide.sh +685 -0
- package/scripts/sw-decompose.sh +1 -11
- package/scripts/sw-deps.sh +15 -25
- package/scripts/sw-developer-simulation.sh +1 -11
- package/scripts/sw-discovery.sh +138 -30
- package/scripts/sw-doc-fleet.sh +7 -17
- package/scripts/sw-docs-agent.sh +6 -16
- package/scripts/sw-docs.sh +4 -12
- package/scripts/sw-doctor.sh +134 -43
- package/scripts/sw-dora.sh +11 -19
- package/scripts/sw-durable.sh +35 -52
- package/scripts/sw-e2e-orchestrator.sh +11 -27
- package/scripts/sw-eventbus.sh +115 -115
- package/scripts/sw-evidence.sh +114 -30
- package/scripts/sw-feedback.sh +3 -13
- package/scripts/sw-fix.sh +2 -20
- package/scripts/sw-fleet-discover.sh +1 -11
- package/scripts/sw-fleet-viz.sh +10 -18
- package/scripts/sw-fleet.sh +13 -17
- package/scripts/sw-github-app.sh +6 -16
- package/scripts/sw-github-checks.sh +1 -11
- package/scripts/sw-github-deploy.sh +1 -11
- package/scripts/sw-github-graphql.sh +2 -12
- package/scripts/sw-guild.sh +1 -11
- package/scripts/sw-heartbeat.sh +49 -12
- package/scripts/sw-hygiene.sh +45 -43
- package/scripts/sw-incident.sh +48 -74
- package/scripts/sw-init.sh +35 -37
- package/scripts/sw-instrument.sh +1 -11
- package/scripts/sw-intelligence.sh +368 -53
- package/scripts/sw-jira.sh +5 -14
- package/scripts/sw-launchd.sh +2 -12
- package/scripts/sw-linear.sh +8 -17
- package/scripts/sw-logs.sh +4 -12
- package/scripts/sw-loop.sh +905 -104
- package/scripts/sw-memory.sh +263 -20
- package/scripts/sw-mission-control.sh +2 -12
- package/scripts/sw-model-router.sh +73 -34
- package/scripts/sw-otel.sh +15 -23
- package/scripts/sw-oversight.sh +1 -11
- package/scripts/sw-patrol-meta.sh +5 -11
- package/scripts/sw-pipeline-composer.sh +7 -17
- package/scripts/sw-pipeline-vitals.sh +1 -11
- package/scripts/sw-pipeline.sh +550 -122
- package/scripts/sw-pm.sh +2 -12
- package/scripts/sw-pr-lifecycle.sh +33 -28
- package/scripts/sw-predictive.sh +16 -22
- package/scripts/sw-prep.sh +6 -16
- package/scripts/sw-ps.sh +1 -11
- package/scripts/sw-public-dashboard.sh +2 -12
- package/scripts/sw-quality.sh +85 -14
- package/scripts/sw-reaper.sh +1 -11
- package/scripts/sw-recruit.sh +15 -25
- package/scripts/sw-regression.sh +11 -21
- package/scripts/sw-release-manager.sh +19 -28
- package/scripts/sw-release.sh +8 -16
- package/scripts/sw-remote.sh +1 -11
- package/scripts/sw-replay.sh +48 -44
- package/scripts/sw-retro.sh +70 -92
- package/scripts/sw-review-rerun.sh +1 -1
- package/scripts/sw-scale.sh +174 -41
- package/scripts/sw-security-audit.sh +12 -22
- package/scripts/sw-self-optimize.sh +239 -23
- package/scripts/sw-session.sh +5 -15
- package/scripts/sw-setup.sh +8 -18
- package/scripts/sw-standup.sh +5 -15
- package/scripts/sw-status.sh +32 -23
- package/scripts/sw-strategic.sh +129 -13
- package/scripts/sw-stream.sh +1 -11
- package/scripts/sw-swarm.sh +76 -36
- package/scripts/sw-team-stages.sh +10 -20
- package/scripts/sw-templates.sh +4 -14
- package/scripts/sw-testgen.sh +3 -13
- package/scripts/sw-tmux-pipeline.sh +1 -19
- package/scripts/sw-tmux-role-color.sh +0 -10
- package/scripts/sw-tmux-status.sh +3 -11
- package/scripts/sw-tmux.sh +2 -20
- package/scripts/sw-trace.sh +1 -19
- package/scripts/sw-tracker-github.sh +0 -10
- package/scripts/sw-tracker-jira.sh +1 -11
- package/scripts/sw-tracker-linear.sh +1 -11
- package/scripts/sw-tracker.sh +7 -24
- package/scripts/sw-triage.sh +29 -39
- package/scripts/sw-upgrade.sh +5 -23
- package/scripts/sw-ux.sh +1 -19
- package/scripts/sw-webhook.sh +18 -32
- package/scripts/sw-widgets.sh +3 -21
- package/scripts/sw-worktree.sh +11 -27
- package/scripts/update-homebrew-sha.sh +73 -0
- package/templates/pipelines/tdd.json +72 -0
- package/scripts/sw-pipeline.sh.mock +0 -7
package/scripts/sw-loop.sh
CHANGED
|
@@ -23,6 +23,13 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
23
23
|
# Canonical helpers (colors, output, events)
|
|
24
24
|
# shellcheck source=lib/helpers.sh
|
|
25
25
|
[[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh"
|
|
26
|
+
[[ -f "$SCRIPT_DIR/lib/config.sh" ]] && source "$SCRIPT_DIR/lib/config.sh"
|
|
27
|
+
# Source DB for dual-write (emit_event → JSONL + SQLite).
|
|
28
|
+
# Note: do NOT call init_schema here — the pipeline (sw-pipeline.sh) owns schema
|
|
29
|
+
# initialization. Calling it here would create an empty DB that shadows JSON cost data.
|
|
30
|
+
if [[ -f "$SCRIPT_DIR/sw-db.sh" ]]; then
|
|
31
|
+
source "$SCRIPT_DIR/sw-db.sh" 2>/dev/null || true
|
|
32
|
+
fi
|
|
26
33
|
# Fallbacks when helpers not loaded (e.g. test env with overridden SCRIPT_DIR)
|
|
27
34
|
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
28
35
|
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
@@ -40,15 +47,6 @@ if [[ "$(type -t emit_event 2>/dev/null)" != "function" ]]; then
|
|
|
40
47
|
echo "${payload}}" >> "${HOME}/.shipwright/events.jsonl"
|
|
41
48
|
}
|
|
42
49
|
fi
|
|
43
|
-
CYAN="${CYAN:-\033[38;2;0;212;255m}"
|
|
44
|
-
PURPLE="${PURPLE:-\033[38;2;124;58;237m}"
|
|
45
|
-
BLUE="${BLUE:-\033[38;2;0;102;255m}"
|
|
46
|
-
GREEN="${GREEN:-\033[38;2;74;222;128m}"
|
|
47
|
-
YELLOW="${YELLOW:-\033[38;2;250;204;21m}"
|
|
48
|
-
RED="${RED:-\033[38;2;248;113;113m}"
|
|
49
|
-
DIM="${DIM:-\033[2m}"
|
|
50
|
-
BOLD="${BOLD:-\033[1m}"
|
|
51
|
-
RESET="${RESET:-\033[0m}"
|
|
52
50
|
|
|
53
51
|
# ─── Defaults ─────────────────────────────────────────────────────────────────
|
|
54
52
|
GOAL=""
|
|
@@ -67,11 +65,11 @@ MAX_TURNS=""
|
|
|
67
65
|
RESUME=false
|
|
68
66
|
VERBOSE=false
|
|
69
67
|
MAX_ITERATIONS_EXPLICIT=false
|
|
70
|
-
MAX_RESTARTS
|
|
68
|
+
MAX_RESTARTS=$(_config_get_int "loop.max_restarts" 0 2>/dev/null || echo 0)
|
|
71
69
|
SESSION_RESTART=false
|
|
72
70
|
RESTART_COUNT=0
|
|
73
71
|
REPO_OVERRIDE=""
|
|
74
|
-
VERSION="
|
|
72
|
+
VERSION="3.1.0"
|
|
75
73
|
|
|
76
74
|
# ─── Token Tracking ─────────────────────────────────────────────────────────
|
|
77
75
|
LOOP_INPUT_TOKENS=0
|
|
@@ -335,13 +333,13 @@ if [[ -n "$REPO_OVERRIDE" ]]; then
|
|
|
335
333
|
info "Using repository: $(pwd)"
|
|
336
334
|
fi
|
|
337
335
|
|
|
338
|
-
if ! command -v claude
|
|
336
|
+
if ! command -v claude >/dev/null 2>&1; then
|
|
339
337
|
error "Claude Code CLI not found. Install it first:"
|
|
340
338
|
echo -e " ${DIM}npm install -g @anthropic-ai/claude-code${RESET}"
|
|
341
339
|
exit 1
|
|
342
340
|
fi
|
|
343
341
|
|
|
344
|
-
if ! git rev-parse --is-inside-work-tree
|
|
342
|
+
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
|
345
343
|
error "Not inside a git repository. The loop requires git for progress tracking."
|
|
346
344
|
exit 1
|
|
347
345
|
fi
|
|
@@ -351,15 +349,15 @@ ORIGINAL_GOAL="$GOAL"
|
|
|
351
349
|
|
|
352
350
|
# ─── Timeout Detection ────────────────────────────────────────────────────────
|
|
353
351
|
TIMEOUT_CMD=""
|
|
354
|
-
if command -v timeout
|
|
352
|
+
if command -v timeout >/dev/null 2>&1; then
|
|
355
353
|
TIMEOUT_CMD="timeout"
|
|
356
|
-
elif command -v gtimeout
|
|
354
|
+
elif command -v gtimeout >/dev/null 2>&1; then
|
|
357
355
|
TIMEOUT_CMD="gtimeout"
|
|
358
356
|
fi
|
|
359
|
-
CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT
|
|
357
|
+
CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-$(_config_get_int "loop.claude_timeout" 1800 2>/dev/null || echo 1800)}" # 30 min default
|
|
360
358
|
|
|
361
359
|
if [[ "$AGENTS" -gt 1 ]]; then
|
|
362
|
-
if ! command -v tmux
|
|
360
|
+
if ! command -v tmux >/dev/null 2>&1; then
|
|
363
361
|
error "tmux is required for multi-agent mode."
|
|
364
362
|
echo -e " ${DIM}brew install tmux${RESET} (macOS)"
|
|
365
363
|
exit 1
|
|
@@ -393,7 +391,7 @@ select_adaptive_model() {
|
|
|
393
391
|
fi
|
|
394
392
|
# Read learned model routing
|
|
395
393
|
local _routing_file="${HOME}/.shipwright/optimization/model-routing.json"
|
|
396
|
-
if [[ -f "$_routing_file" ]] && command -v jq
|
|
394
|
+
if [[ -f "$_routing_file" ]] && command -v jq >/dev/null 2>&1; then
|
|
397
395
|
local _routed_model
|
|
398
396
|
_routed_model=$(jq -r --arg r "$role" '.routes[$r].model // ""' "$_routing_file" 2>/dev/null) || true
|
|
399
397
|
if [[ -n "${_routed_model:-}" && "${_routed_model:-}" != "null" ]]; then
|
|
@@ -403,7 +401,7 @@ select_adaptive_model() {
|
|
|
403
401
|
fi
|
|
404
402
|
|
|
405
403
|
# Try intelligence-based recommendation
|
|
406
|
-
if type intelligence_recommend_model
|
|
404
|
+
if type intelligence_recommend_model >/dev/null 2>&1; then
|
|
407
405
|
local rec
|
|
408
406
|
rec=$(intelligence_recommend_model "$role" "${COMPLEXITY:-5}" "${BUDGET:-0}" 2>/dev/null || echo "")
|
|
409
407
|
if [[ -n "$rec" ]]; then
|
|
@@ -422,7 +420,7 @@ select_adaptive_model() {
|
|
|
422
420
|
select_audit_model() {
|
|
423
421
|
local default_model="haiku"
|
|
424
422
|
local opt_file="$HOME/.shipwright/optimization/audit-tuning.json"
|
|
425
|
-
if [[ -f "$opt_file" ]] && command -v jq
|
|
423
|
+
if [[ -f "$opt_file" ]] && command -v jq >/dev/null 2>&1; then
|
|
426
424
|
local success_rate
|
|
427
425
|
success_rate=$(jq -r '.haiku_success_rate // 100' "$opt_file" 2>/dev/null || echo "100")
|
|
428
426
|
if [[ "${success_rate%%.*}" -lt 90 ]]; then
|
|
@@ -442,7 +440,7 @@ accumulate_loop_tokens() {
|
|
|
442
440
|
[[ ! -f "$log_file" ]] && return 0
|
|
443
441
|
|
|
444
442
|
# If jq is available and the file looks like JSON, parse structured output
|
|
445
|
-
if command -v jq
|
|
443
|
+
if command -v jq >/dev/null 2>&1 && head -c1 "$log_file" 2>/dev/null | grep -q '\['; then
|
|
446
444
|
local input_tok output_tok cache_read cache_create cost_usd
|
|
447
445
|
# The result object is the last element in the JSON array
|
|
448
446
|
input_tok=$(jq -r '.[-1].usage.input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
|
|
@@ -458,6 +456,20 @@ accumulate_loop_tokens() {
|
|
|
458
456
|
local cost_millicents
|
|
459
457
|
cost_millicents=$(echo "$cost_usd" | awk '{printf "%.0f", $1 * 100000}' 2>/dev/null || echo "0")
|
|
460
458
|
LOOP_COST_MILLICENTS=$(( ${LOOP_COST_MILLICENTS:-0} + ${cost_millicents:-0} ))
|
|
459
|
+
else
|
|
460
|
+
# Estimate cost from tokens when Claude doesn't provide it (rates per million tokens)
|
|
461
|
+
local total_in total_out
|
|
462
|
+
total_in=$(( ${input_tok:-0} + ${cache_read:-0} + ${cache_create:-0} ))
|
|
463
|
+
total_out=${output_tok:-0}
|
|
464
|
+
local cost=0
|
|
465
|
+
case "${MODEL:-${CLAUDE_MODEL:-sonnet}}" in
|
|
466
|
+
*opus*) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 15 + o * 75) / 1000000}') ;;
|
|
467
|
+
*sonnet*) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 3 + o * 15) / 1000000}') ;;
|
|
468
|
+
*haiku*) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 0.25 + o * 1.25) / 1000000}') ;;
|
|
469
|
+
*) cost=$(awk -v i="$total_in" -v o="$total_out" 'BEGIN{printf "%.6f", (i * 3 + o * 15) / 1000000}') ;;
|
|
470
|
+
esac
|
|
471
|
+
cost_millicents=$(echo "$cost" | awk '{printf "%.0f", $1 * 100000}' 2>/dev/null || echo "0")
|
|
472
|
+
LOOP_COST_MILLICENTS=$(( ${LOOP_COST_MILLICENTS:-0} + ${cost_millicents:-0} ))
|
|
461
473
|
fi
|
|
462
474
|
else
|
|
463
475
|
# Fallback: regex-based parsing for non-JSON output
|
|
@@ -491,7 +503,7 @@ _extract_text_from_json() {
|
|
|
491
503
|
first_char=$(head -c1 "$json_file" 2>/dev/null || true)
|
|
492
504
|
|
|
493
505
|
# Case 2: Valid JSON array — extract .result from last element
|
|
494
|
-
if [[ "$first_char" == "[" ]] && command -v jq
|
|
506
|
+
if [[ "$first_char" == "[" ]] && command -v jq >/dev/null 2>&1; then
|
|
495
507
|
local extracted
|
|
496
508
|
extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
|
|
497
509
|
if [[ -n "$extracted" ]]; then
|
|
@@ -542,7 +554,7 @@ TOKJSON
|
|
|
542
554
|
# Reads tuning config for smarter iteration/circuit-breaker thresholds.
|
|
543
555
|
apply_adaptive_budget() {
|
|
544
556
|
local tuning_file="$HOME/.shipwright/optimization/loop-tuning.json"
|
|
545
|
-
if [[ -f "$tuning_file" ]] && command -v jq
|
|
557
|
+
if [[ -f "$tuning_file" ]] && command -v jq >/dev/null 2>&1; then
|
|
546
558
|
local tuned_max tuned_ext tuned_ext_count tuned_cb
|
|
547
559
|
tuned_max=$(jq -r '.max_iterations // ""' "$tuning_file" 2>/dev/null || echo "")
|
|
548
560
|
tuned_ext=$(jq -r '.extension_size // ""' "$tuning_file" 2>/dev/null || echo "")
|
|
@@ -560,7 +572,7 @@ apply_adaptive_budget() {
|
|
|
560
572
|
|
|
561
573
|
# Read learned iteration model
|
|
562
574
|
local _iter_model="${HOME}/.shipwright/optimization/iteration-model.json"
|
|
563
|
-
if [[ -f "$_iter_model" ]] && ! $MAX_ITERATIONS_EXPLICIT && command -v jq
|
|
575
|
+
if [[ -f "$_iter_model" ]] && ! $MAX_ITERATIONS_EXPLICIT && command -v jq >/dev/null 2>&1; then
|
|
564
576
|
local _complexity="${ISSUE_COMPLEXITY:-${COMPLEXITY:-medium}}"
|
|
565
577
|
local _predicted_max
|
|
566
578
|
_predicted_max=$(jq -r --arg c "$_complexity" '.predictions[$c].max_iterations // ""' "$_iter_model" 2>/dev/null) || true
|
|
@@ -571,7 +583,7 @@ apply_adaptive_budget() {
|
|
|
571
583
|
fi
|
|
572
584
|
|
|
573
585
|
# Try intelligence-based iteration estimate
|
|
574
|
-
if type intelligence_estimate_iterations
|
|
586
|
+
if type intelligence_estimate_iterations >/dev/null 2>&1 && ! $MAX_ITERATIONS_EXPLICIT; then
|
|
575
587
|
local est
|
|
576
588
|
est=$(intelligence_estimate_iterations "${GOAL:-}" "${COMPLEXITY:-5}" 2>/dev/null || echo "")
|
|
577
589
|
if [[ -n "$est" && "$est" =~ ^[0-9]+$ ]]; then
|
|
@@ -619,9 +631,6 @@ compute_velocity_avg() {
|
|
|
619
631
|
|
|
620
632
|
# ─── Timing Helpers ───────────────────────────────────────────────────────────
|
|
621
633
|
|
|
622
|
-
now_iso() { date -u +%Y-%m-%dT%H:%M:%SZ; }
|
|
623
|
-
now_epoch() { date +%s; }
|
|
624
|
-
|
|
625
634
|
format_duration() {
|
|
626
635
|
local secs="$1"
|
|
627
636
|
local mins=$(( secs / 60 ))
|
|
@@ -652,6 +661,9 @@ initialize_state() {
|
|
|
652
661
|
STATUS="running"
|
|
653
662
|
LOG_ENTRIES=""
|
|
654
663
|
|
|
664
|
+
# Record starting commit for cumulative diff in quality gates
|
|
665
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
|
|
666
|
+
|
|
655
667
|
write_state
|
|
656
668
|
}
|
|
657
669
|
|
|
@@ -723,6 +735,11 @@ resume_state() {
|
|
|
723
735
|
START_EPOCH="$(now_epoch)"
|
|
724
736
|
STATUS="running"
|
|
725
737
|
|
|
738
|
+
# Set starting commit for cumulative diff (approximate: use earliest tracked commit)
|
|
739
|
+
if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
|
|
740
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-list --max-parents=0 HEAD 2>/dev/null | tail -1 || echo "")"
|
|
741
|
+
fi
|
|
742
|
+
|
|
726
743
|
# If we hit max iterations before, warn user to extend
|
|
727
744
|
if [[ "$ITERATION" -ge "$MAX_ITERATIONS" ]] && ! $MAX_ITERATIONS_EXPLICIT; then
|
|
728
745
|
warn "Previous run stopped at iteration $ITERATION/$MAX_ITERATIONS."
|
|
@@ -730,6 +747,21 @@ resume_state() {
|
|
|
730
747
|
exit 0
|
|
731
748
|
fi
|
|
732
749
|
|
|
750
|
+
# Restore Claude context for meaningful resume (source so exports persist to this shell)
|
|
751
|
+
if [[ -f "$SCRIPT_DIR/sw-checkpoint.sh" ]] && [[ -d "${PROJECT_ROOT:-}" ]]; then
|
|
752
|
+
source "$SCRIPT_DIR/sw-checkpoint.sh"
|
|
753
|
+
local _orig_pwd="$PWD"
|
|
754
|
+
cd "$PROJECT_ROOT" 2>/dev/null || true
|
|
755
|
+
if checkpoint_restore_context "build" 2>/dev/null; then
|
|
756
|
+
RESUMED_FROM_ITERATION="${RESTORED_ITERATION:-}"
|
|
757
|
+
RESUMED_MODIFIED="${RESTORED_MODIFIED:-}"
|
|
758
|
+
RESUMED_FINDINGS="${RESTORED_FINDINGS:-}"
|
|
759
|
+
RESUMED_TEST_OUTPUT="${RESTORED_TEST_OUTPUT:-}"
|
|
760
|
+
[[ -n "${RESTORED_ITERATION:-}" && "${RESTORED_ITERATION:-0}" -gt 0 ]] && info "Restored context from iteration ${RESTORED_ITERATION}"
|
|
761
|
+
fi
|
|
762
|
+
cd "$_orig_pwd" 2>/dev/null || true
|
|
763
|
+
fi
|
|
764
|
+
|
|
733
765
|
success "Resumed: iteration $ITERATION/$MAX_ITERATIONS"
|
|
734
766
|
}
|
|
735
767
|
|
|
@@ -807,6 +839,86 @@ ${entry}"
|
|
|
807
839
|
fi
|
|
808
840
|
}
|
|
809
841
|
|
|
842
|
+
# ─── Semantic Validation for Claude Output ─────────────────────────────────────
|
|
843
|
+
# Validates changed files before commit to catch syntax errors and API error leakage.
|
|
844
|
+
validate_claude_output() {
|
|
845
|
+
local workdir="${1:-.}"
|
|
846
|
+
local issues=0
|
|
847
|
+
|
|
848
|
+
# Check for syntax errors in changed files
|
|
849
|
+
local changed_files
|
|
850
|
+
changed_files=$(git -C "$workdir" diff --cached --name-only 2>/dev/null || git -C "$workdir" diff --name-only 2>/dev/null)
|
|
851
|
+
|
|
852
|
+
while IFS= read -r file; do
|
|
853
|
+
[[ -z "$file" ]] && continue
|
|
854
|
+
[[ ! -f "$workdir/$file" ]] && continue
|
|
855
|
+
|
|
856
|
+
case "$file" in
|
|
857
|
+
*.sh)
|
|
858
|
+
if ! bash -n "$workdir/$file" 2>/dev/null; then
|
|
859
|
+
warn "Syntax error in shell script: $file"
|
|
860
|
+
issues=$((issues + 1))
|
|
861
|
+
fi
|
|
862
|
+
;;
|
|
863
|
+
*.py)
|
|
864
|
+
if command -v python3 >/dev/null 2>&1; then
|
|
865
|
+
if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$workdir/$file" 2>/dev/null; then
|
|
866
|
+
warn "Syntax error in Python file: $file"
|
|
867
|
+
issues=$((issues + 1))
|
|
868
|
+
fi
|
|
869
|
+
fi
|
|
870
|
+
;;
|
|
871
|
+
*.json)
|
|
872
|
+
if command -v jq >/dev/null 2>&1 && ! jq empty "$workdir/$file" 2>/dev/null; then
|
|
873
|
+
warn "Invalid JSON: $file"
|
|
874
|
+
issues=$((issues + 1))
|
|
875
|
+
fi
|
|
876
|
+
;;
|
|
877
|
+
*.ts|*.js|*.tsx|*.jsx)
|
|
878
|
+
# Check for obvious corruption: API error text leaked into source
|
|
879
|
+
if grep -qE '(CLAUDE_CODE_OAUTH_TOKEN|api key|rate limit|503 Service|DOCTYPE html)' "$workdir/$file" 2>/dev/null; then
|
|
880
|
+
warn "Claude API error leaked into source file: $file"
|
|
881
|
+
issues=$((issues + 1))
|
|
882
|
+
fi
|
|
883
|
+
;;
|
|
884
|
+
esac
|
|
885
|
+
done <<< "$changed_files"
|
|
886
|
+
|
|
887
|
+
# Check for obviously corrupt output (API errors dumped as code)
|
|
888
|
+
local total_changed
|
|
889
|
+
total_changed=$(echo "$changed_files" | grep -c '.' 2>/dev/null || true)
|
|
890
|
+
total_changed="${total_changed:-0}"
|
|
891
|
+
if [[ "$total_changed" -eq 0 ]]; then
|
|
892
|
+
warn "Claude iteration produced no file changes"
|
|
893
|
+
issues=$((issues + 1))
|
|
894
|
+
fi
|
|
895
|
+
|
|
896
|
+
return "$issues"
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
# ─── Budget Gate (hard stop when exhausted) ───────────────────────────────────
|
|
900
|
+
check_budget_gate() {
|
|
901
|
+
[[ ! -x "$SCRIPT_DIR/sw-cost.sh" ]] && return 0
|
|
902
|
+
local remaining
|
|
903
|
+
remaining=$(bash "$SCRIPT_DIR/sw-cost.sh" remaining-budget 2>/dev/null || echo "")
|
|
904
|
+
[[ -z "$remaining" ]] && return 0
|
|
905
|
+
[[ "$remaining" == "unlimited" ]] && return 0
|
|
906
|
+
|
|
907
|
+
# Parse remaining as float, check if <= 0
|
|
908
|
+
if awk -v r="$remaining" 'BEGIN { exit !(r <= 0) }' 2>/dev/null; then
|
|
909
|
+
error "Budget exhausted (remaining: \$${remaining}) — stopping pipeline"
|
|
910
|
+
emit_event "pipeline.budget_exhausted" "remaining=$remaining"
|
|
911
|
+
return 1
|
|
912
|
+
fi
|
|
913
|
+
|
|
914
|
+
# Warn at 10% threshold (remaining < 1.0 when typical job ~$5+)
|
|
915
|
+
if awk -v r="$remaining" 'BEGIN { exit !(r < 1.0) }' 2>/dev/null; then
|
|
916
|
+
warn "Budget low: \$${remaining} remaining"
|
|
917
|
+
fi
|
|
918
|
+
|
|
919
|
+
return 0
|
|
920
|
+
}
|
|
921
|
+
|
|
810
922
|
# ─── Git Helpers ──────────────────────────────────────────────────────────────
|
|
811
923
|
|
|
812
924
|
git_commit_count() {
|
|
@@ -834,6 +946,14 @@ git_auto_commit() {
|
|
|
834
946
|
fi
|
|
835
947
|
|
|
836
948
|
git -C "$work_dir" add -A 2>/dev/null || true
|
|
949
|
+
|
|
950
|
+
# Semantic validation before commit — skip commit if validation fails
|
|
951
|
+
if ! validate_claude_output "$work_dir"; then
|
|
952
|
+
warn "Validation failed — skipping commit for this iteration"
|
|
953
|
+
git -C "$work_dir" reset --hard HEAD 2>/dev/null || true
|
|
954
|
+
return 1
|
|
955
|
+
fi
|
|
956
|
+
|
|
837
957
|
git -C "$work_dir" commit -m "loop: iteration $ITERATION — autonomous progress" --no-verify 2>/dev/null || return 1
|
|
838
958
|
return 0
|
|
839
959
|
}
|
|
@@ -861,7 +981,8 @@ check_fatal_error() {
|
|
|
861
981
|
# Non-zero exit + tiny output = likely CLI crash
|
|
862
982
|
if [[ "$cli_exit_code" -ne 0 ]]; then
|
|
863
983
|
local line_count
|
|
864
|
-
line_count=$(grep -cv '^$' "$log_file" 2>/dev/null ||
|
|
984
|
+
line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || true)
|
|
985
|
+
line_count="${line_count:-0}"
|
|
865
986
|
if [[ "$line_count" -lt 3 ]]; then
|
|
866
987
|
local content
|
|
867
988
|
content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
|
|
@@ -897,7 +1018,7 @@ check_completion() {
|
|
|
897
1018
|
|
|
898
1019
|
check_circuit_breaker() {
|
|
899
1020
|
# Vitals-driven circuit breaker (preferred over static threshold)
|
|
900
|
-
if type pipeline_compute_vitals
|
|
1021
|
+
if type pipeline_compute_vitals >/dev/null 2>&1 && type pipeline_health_verdict >/dev/null 2>&1; then
|
|
901
1022
|
local _vitals_json _verdict
|
|
902
1023
|
local _loop_state="${STATE_FILE:-}"
|
|
903
1024
|
local _loop_artifacts="${ARTIFACTS_DIR:-}"
|
|
@@ -989,6 +1110,114 @@ check_max_iterations() {
|
|
|
989
1110
|
return 1
|
|
990
1111
|
}
|
|
991
1112
|
|
|
1113
|
+
# ─── Failure Diagnosis ─────────────────────────────────────────────────────────
|
|
1114
|
+
# Pattern-based root-cause classification for smarter retries (no Claude needed).
|
|
1115
|
+
# Returns markdown context to inject into the next iteration's goal.
|
|
1116
|
+
|
|
1117
|
+
diagnose_failure() {
|
|
1118
|
+
local error_output="$1"
|
|
1119
|
+
local changed_files="$2"
|
|
1120
|
+
local iteration="$3"
|
|
1121
|
+
|
|
1122
|
+
local diagnosis=""
|
|
1123
|
+
local strategy="retry_with_context" # default
|
|
1124
|
+
|
|
1125
|
+
# Pattern-based classification (fast, no Claude needed)
|
|
1126
|
+
if echo "$error_output" | grep -qiE 'import.*not found|cannot find module|no module named'; then
|
|
1127
|
+
diagnosis="missing_import"
|
|
1128
|
+
strategy="fix_imports"
|
|
1129
|
+
elif echo "$error_output" | grep -qiE 'syntax error|unexpected token|parse error'; then
|
|
1130
|
+
diagnosis="syntax_error"
|
|
1131
|
+
strategy="fix_syntax"
|
|
1132
|
+
elif echo "$error_output" | grep -qiE 'type.*not assignable|type error|TypeError'; then
|
|
1133
|
+
diagnosis="type_error"
|
|
1134
|
+
strategy="fix_types"
|
|
1135
|
+
elif echo "$error_output" | grep -qiE 'undefined.*variable|not defined|ReferenceError'; then
|
|
1136
|
+
diagnosis="undefined_reference"
|
|
1137
|
+
strategy="fix_references"
|
|
1138
|
+
elif echo "$error_output" | grep -qiE 'timeout|timed out|ETIMEDOUT'; then
|
|
1139
|
+
diagnosis="timeout"
|
|
1140
|
+
strategy="optimize_performance"
|
|
1141
|
+
elif echo "$error_output" | grep -qiE 'assertion.*fail|expect.*to|AssertionError'; then
|
|
1142
|
+
diagnosis="test_assertion"
|
|
1143
|
+
strategy="fix_logic"
|
|
1144
|
+
elif echo "$error_output" | grep -qiE 'permission denied|EACCES|forbidden'; then
|
|
1145
|
+
diagnosis="permission_error"
|
|
1146
|
+
strategy="fix_permissions"
|
|
1147
|
+
elif echo "$error_output" | grep -qiE 'out of memory|heap|OOM|ENOMEM'; then
|
|
1148
|
+
diagnosis="resource_error"
|
|
1149
|
+
strategy="reduce_resource_usage"
|
|
1150
|
+
else
|
|
1151
|
+
diagnosis="unknown"
|
|
1152
|
+
strategy="retry_with_context"
|
|
1153
|
+
fi
|
|
1154
|
+
|
|
1155
|
+
# Check if we've seen this diagnosis before in this session
|
|
1156
|
+
local diagnosis_file="${LOG_DIR:-/tmp}/diagnoses.txt"
|
|
1157
|
+
local repeat_count=0
|
|
1158
|
+
if [[ -f "$diagnosis_file" ]]; then
|
|
1159
|
+
repeat_count=$(grep -c "^${diagnosis}$" "$diagnosis_file" 2>/dev/null || true)
|
|
1160
|
+
repeat_count="${repeat_count:-0}"
|
|
1161
|
+
fi
|
|
1162
|
+
echo "$diagnosis" >> "$diagnosis_file"
|
|
1163
|
+
|
|
1164
|
+
# Escalate strategy if same diagnosis repeats
|
|
1165
|
+
if [[ "$repeat_count" -ge 2 ]]; then
|
|
1166
|
+
strategy="alternative_approach"
|
|
1167
|
+
fi
|
|
1168
|
+
|
|
1169
|
+
# Try memory-based fix lookup
|
|
1170
|
+
local known_fix=""
|
|
1171
|
+
if type memory_query_fix_for_error &>/dev/null; then
|
|
1172
|
+
local fix_json
|
|
1173
|
+
fix_json=$(memory_query_fix_for_error "$error_output" 2>/dev/null || true)
|
|
1174
|
+
if [[ -n "$fix_json" && "$fix_json" != "null" ]]; then
|
|
1175
|
+
known_fix=$(echo "$fix_json" | jq -r '.fix // ""' 2>/dev/null | head -5)
|
|
1176
|
+
fi
|
|
1177
|
+
fi
|
|
1178
|
+
|
|
1179
|
+
# Build diagnosis context for Claude
|
|
1180
|
+
local diagnosis_context="## Failure Diagnosis (Iteration $iteration)
|
|
1181
|
+
Classification: $diagnosis
|
|
1182
|
+
Strategy: $strategy
|
|
1183
|
+
Repeat count: $repeat_count"
|
|
1184
|
+
|
|
1185
|
+
if [[ -n "$known_fix" ]]; then
|
|
1186
|
+
diagnosis_context+="
|
|
1187
|
+
Known fix from memory: $known_fix"
|
|
1188
|
+
fi
|
|
1189
|
+
|
|
1190
|
+
# Strategy-specific guidance
|
|
1191
|
+
case "$strategy" in
|
|
1192
|
+
fix_imports)
|
|
1193
|
+
diagnosis_context+="
|
|
1194
|
+
INSTRUCTION: The error is about missing imports/modules. Check that all imports are correct, packages are installed, and paths are right. Do NOT change the logic - just fix the imports."
|
|
1195
|
+
;;
|
|
1196
|
+
fix_syntax)
|
|
1197
|
+
diagnosis_context+="
|
|
1198
|
+
INSTRUCTION: This is a syntax error. Carefully check the exact line mentioned in the error. Look for missing brackets, semicolons, commas, or mismatched quotes."
|
|
1199
|
+
;;
|
|
1200
|
+
fix_types)
|
|
1201
|
+
diagnosis_context+="
|
|
1202
|
+
INSTRUCTION: Type mismatch error. Check the types at the error location. Ensure function signatures match their usage."
|
|
1203
|
+
;;
|
|
1204
|
+
fix_logic)
|
|
1205
|
+
diagnosis_context+="
|
|
1206
|
+
INSTRUCTION: Test assertion failure. The code logic is wrong, not the syntax. Re-read the test expectations and fix the implementation to match."
|
|
1207
|
+
;;
|
|
1208
|
+
alternative_approach)
|
|
1209
|
+
diagnosis_context+="
|
|
1210
|
+
INSTRUCTION: This error has occurred $repeat_count times. The previous approach is not working. Try a FUNDAMENTALLY DIFFERENT approach:
|
|
1211
|
+
- If you were modifying existing code, try rewriting the function from scratch
|
|
1212
|
+
- If you were using one library, try a different one
|
|
1213
|
+
- If you were adding to a file, try creating a new file instead
|
|
1214
|
+
- Step back and reconsider the requirements"
|
|
1215
|
+
;;
|
|
1216
|
+
esac
|
|
1217
|
+
|
|
1218
|
+
echo "$diagnosis_context"
|
|
1219
|
+
}
|
|
1220
|
+
|
|
992
1221
|
# ─── Test Gate ────────────────────────────────────────────────────────────────
|
|
993
1222
|
|
|
994
1223
|
run_test_gate() {
|
|
@@ -1018,9 +1247,9 @@ run_test_gate() {
|
|
|
1018
1247
|
# Wrap test command with timeout (5 min default) to prevent hanging
|
|
1019
1248
|
local test_timeout="${SW_TEST_TIMEOUT:-300}"
|
|
1020
1249
|
local test_wrapper="$active_test_cmd"
|
|
1021
|
-
if command -v timeout
|
|
1250
|
+
if command -v timeout >/dev/null 2>&1; then
|
|
1022
1251
|
test_wrapper="timeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
|
|
1023
|
-
elif command -v gtimeout
|
|
1252
|
+
elif command -v gtimeout >/dev/null 2>&1; then
|
|
1024
1253
|
test_wrapper="gtimeout ${test_timeout} bash -c $(printf '%q' "$active_test_cmd")"
|
|
1025
1254
|
fi
|
|
1026
1255
|
if bash -c "$test_wrapper" > "$test_log" 2>&1; then
|
|
@@ -1072,7 +1301,7 @@ write_error_summary() {
|
|
|
1072
1301
|
local tmp_json="${error_json}.tmp.$$"
|
|
1073
1302
|
|
|
1074
1303
|
# Build JSON with jq (preferred) or plain-text fallback
|
|
1075
|
-
if command -v jq
|
|
1304
|
+
if command -v jq >/dev/null 2>&1; then
|
|
1076
1305
|
jq -n \
|
|
1077
1306
|
--argjson iteration "${ITERATION:-0}" \
|
|
1078
1307
|
--arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \
|
|
@@ -1105,33 +1334,60 @@ run_audit_agent() {
|
|
|
1105
1334
|
local log_file="$LOG_DIR/iteration-${ITERATION}.log"
|
|
1106
1335
|
local audit_log="$LOG_DIR/audit-iter-${ITERATION}.log"
|
|
1107
1336
|
|
|
1108
|
-
# Gather context: tail of implementer output +
|
|
1337
|
+
# Gather context: tail of implementer output + cumulative diff
|
|
1109
1338
|
local impl_tail
|
|
1110
1339
|
impl_tail="$(tail -100 "$log_file" 2>/dev/null || echo "(no output)")"
|
|
1111
|
-
|
|
1112
|
-
|
|
1340
|
+
|
|
1341
|
+
# Use cumulative diff from loop start so auditor sees ALL work, not just latest commit
|
|
1342
|
+
local diff_stat cumulative_note=""
|
|
1343
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
|
|
1344
|
+
diff_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no changes)")"
|
|
1345
|
+
cumulative_note="Note: This diff shows ALL changes since the loop started (iteration 1 through ${ITERATION}), not just the latest commit."
|
|
1346
|
+
else
|
|
1347
|
+
diff_stat="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null || echo "(no changes)")"
|
|
1348
|
+
fi
|
|
1349
|
+
|
|
1350
|
+
# Include verified test status so auditor doesn't have to guess
|
|
1351
|
+
local test_context=""
|
|
1352
|
+
if [[ -n "$TEST_CMD" ]]; then
|
|
1353
|
+
if [[ "${TEST_PASSED:-}" == "true" ]]; then
|
|
1354
|
+
test_context="## Verified Test Status (from harness, not from agent)
|
|
1355
|
+
Tests: ALL PASSING (command: ${TEST_CMD})"
|
|
1356
|
+
else
|
|
1357
|
+
test_context="## Verified Test Status (from harness)
|
|
1358
|
+
Tests: FAILING (command: ${TEST_CMD})
|
|
1359
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1360
|
+
fi
|
|
1361
|
+
fi
|
|
1113
1362
|
|
|
1114
1363
|
local audit_prompt
|
|
1115
1364
|
read -r -d '' audit_prompt <<AUDIT_PROMPT || true
|
|
1116
|
-
You are an independent code auditor reviewing an autonomous coding agent.
|
|
1365
|
+
You are an independent code auditor reviewing an autonomous coding agent's CUMULATIVE work.
|
|
1366
|
+
This is iteration ${ITERATION}. The agent may have done most of the work in earlier iterations.
|
|
1117
1367
|
|
|
1118
1368
|
## Goal the agent was working toward
|
|
1119
1369
|
${GOAL}
|
|
1120
1370
|
|
|
1121
|
-
## Agent Output (last 100 lines)
|
|
1371
|
+
## Agent Output This Iteration (last 100 lines)
|
|
1122
1372
|
${impl_tail}
|
|
1123
1373
|
|
|
1124
|
-
## Changes Made (git diff --stat)
|
|
1374
|
+
## Cumulative Changes Made (git diff --stat)
|
|
1375
|
+
${cumulative_note}
|
|
1125
1376
|
${diff_stat}
|
|
1126
1377
|
|
|
1378
|
+
${test_context}
|
|
1379
|
+
|
|
1127
1380
|
## Your Task
|
|
1128
|
-
Critically review the work:
|
|
1129
|
-
1.
|
|
1130
|
-
2. Are there obvious bugs, logic errors, or security issues?
|
|
1381
|
+
Critically review the CUMULATIVE work (not just the latest iteration):
|
|
1382
|
+
1. Has the agent made meaningful progress toward the goal across all iterations?
|
|
1383
|
+
2. Are there obvious bugs, logic errors, or security issues in the current codebase?
|
|
1131
1384
|
3. Did the agent leave incomplete work (TODOs, placeholder code)?
|
|
1132
1385
|
4. Are there any regressions or broken patterns?
|
|
1133
1386
|
5. Is the code quality acceptable?
|
|
1134
1387
|
|
|
1388
|
+
IMPORTANT: If the current iteration made small or no code changes, that may be acceptable
|
|
1389
|
+
if earlier iterations already completed the substantive work. Judge the whole body of work.
|
|
1390
|
+
|
|
1135
1391
|
If the work is acceptable and moves toward the goal, output exactly: AUDIT_PASS
|
|
1136
1392
|
Otherwise, list the specific issues that need fixing.
|
|
1137
1393
|
AUDIT_PROMPT
|
|
@@ -1217,21 +1473,52 @@ check_definition_of_done() {
|
|
|
1217
1473
|
|
|
1218
1474
|
local dod_content
|
|
1219
1475
|
dod_content="$(cat "$DOD_FILE")"
|
|
1476
|
+
|
|
1477
|
+
# Use cumulative diff from loop start (not just HEAD~1) so the evaluator
|
|
1478
|
+
# can see ALL work done across every iteration, not just the latest commit.
|
|
1220
1479
|
local diff_content
|
|
1221
|
-
|
|
1480
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]]; then
|
|
1481
|
+
diff_content="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null || echo "(no diff)")"
|
|
1482
|
+
diff_content="${diff_content}
|
|
1483
|
+
|
|
1484
|
+
## Detailed Changes (cumulative diff, truncated to 200 lines)
|
|
1485
|
+
$(git -C "$PROJECT_ROOT" diff "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -200 || echo "(no diff)")"
|
|
1486
|
+
else
|
|
1487
|
+
diff_content="$(git -C "$PROJECT_ROOT" diff HEAD~1 2>/dev/null || echo "(no diff)")"
|
|
1488
|
+
fi
|
|
1489
|
+
|
|
1490
|
+
# Inject verified runtime facts so the evaluator doesn't have to guess
|
|
1491
|
+
local runtime_facts=""
|
|
1492
|
+
if [[ -n "$TEST_CMD" ]]; then
|
|
1493
|
+
if [[ "${TEST_PASSED:-}" == "true" ]]; then
|
|
1494
|
+
runtime_facts="## Verified Runtime Facts (from the loop harness, not from the agent)
|
|
1495
|
+
- Tests: ALL PASSING (verified by running '${TEST_CMD}' after this iteration)
|
|
1496
|
+
- Test output (last 10 lines):
|
|
1497
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1498
|
+
else
|
|
1499
|
+
runtime_facts="## Verified Runtime Facts
|
|
1500
|
+
- Tests: FAILING (verified by running '${TEST_CMD}')
|
|
1501
|
+
- Test output (last 10 lines):
|
|
1502
|
+
$(echo "${TEST_OUTPUT:-}" | tail -10)"
|
|
1503
|
+
fi
|
|
1504
|
+
fi
|
|
1222
1505
|
|
|
1223
1506
|
local dod_prompt
|
|
1224
1507
|
read -r -d '' dod_prompt <<DOD_PROMPT || true
|
|
1225
|
-
You are evaluating whether
|
|
1508
|
+
You are evaluating whether a project satisfies a Definition of Done checklist.
|
|
1509
|
+
You are reviewing the CUMULATIVE work across all iterations, not just the latest commit.
|
|
1226
1510
|
|
|
1227
1511
|
## Definition of Done
|
|
1228
1512
|
${dod_content}
|
|
1229
1513
|
|
|
1230
|
-
|
|
1514
|
+
${runtime_facts}
|
|
1515
|
+
|
|
1516
|
+
## Cumulative Changes Made (git diff from start of loop to now)
|
|
1231
1517
|
${diff_content}
|
|
1232
1518
|
|
|
1233
1519
|
## Your Task
|
|
1234
|
-
For each item in the Definition of Done, determine if the
|
|
1520
|
+
For each item in the Definition of Done, determine if the project satisfies it.
|
|
1521
|
+
The runtime facts above are verified by the harness — trust them as ground truth.
|
|
1235
1522
|
If ALL items are satisfied, output exactly: DOD_PASS
|
|
1236
1523
|
Otherwise, list which items are NOT satisfied and why.
|
|
1237
1524
|
DOD_PROMPT
|
|
@@ -1285,6 +1572,14 @@ guard_completion() {
|
|
|
1285
1572
|
rejection_reasons+=("tests failing")
|
|
1286
1573
|
fi
|
|
1287
1574
|
|
|
1575
|
+
# Holistic final gate: when all other gates pass, run a project-level assessment
|
|
1576
|
+
# that evaluates the entire codebase against the goal (not just the latest diff)
|
|
1577
|
+
if [[ ${#rejection_reasons[@]} -eq 0 ]]; then
|
|
1578
|
+
if ! run_holistic_gate; then
|
|
1579
|
+
rejection_reasons+=("holistic project assessment found gaps")
|
|
1580
|
+
fi
|
|
1581
|
+
fi
|
|
1582
|
+
|
|
1288
1583
|
if [[ ${#rejection_reasons[@]} -gt 0 ]]; then
|
|
1289
1584
|
local reasons_str
|
|
1290
1585
|
reasons_str="$(printf ', %s' "${rejection_reasons[@]}")"
|
|
@@ -1298,6 +1593,143 @@ guard_completion() {
|
|
|
1298
1593
|
return 0
|
|
1299
1594
|
}
|
|
1300
1595
|
|
|
1596
|
+
# Holistic gate: evaluates the full project against the original goal.
|
|
1597
|
+
# Only runs when all other gates pass (final checkpoint before acceptance).
|
|
1598
|
+
run_holistic_gate() {
|
|
1599
|
+
# Skip if no starting commit (can't compute cumulative diff)
|
|
1600
|
+
[[ -z "${LOOP_START_COMMIT:-}" ]] && return 0
|
|
1601
|
+
|
|
1602
|
+
local holistic_log="$LOG_DIR/holistic-iter-${ITERATION}.log"
|
|
1603
|
+
|
|
1604
|
+
# Build a project summary: file tree, test count, cumulative diff stats
|
|
1605
|
+
local file_count
|
|
1606
|
+
file_count=$(git -C "$PROJECT_ROOT" ls-files | wc -l | tr -d ' ')
|
|
1607
|
+
local cumulative_stat
|
|
1608
|
+
cumulative_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || echo "(no changes)")"
|
|
1609
|
+
local test_summary=""
|
|
1610
|
+
if [[ -n "${TEST_OUTPUT:-}" ]]; then
|
|
1611
|
+
test_summary="$(echo "$TEST_OUTPUT" | tail -5)"
|
|
1612
|
+
fi
|
|
1613
|
+
|
|
1614
|
+
local holistic_prompt
|
|
1615
|
+
read -r -d '' holistic_prompt <<HOLISTIC_PROMPT || true
|
|
1616
|
+
You are a final quality gate evaluating whether an autonomous coding agent has FULLY achieved its goal.
|
|
1617
|
+
|
|
1618
|
+
## Original Goal
|
|
1619
|
+
${GOAL}
|
|
1620
|
+
|
|
1621
|
+
## Project Stats
|
|
1622
|
+
- Files in repo: ${file_count}
|
|
1623
|
+
- Iterations completed: ${ITERATION}
|
|
1624
|
+
- Cumulative changes: ${cumulative_stat}
|
|
1625
|
+
- Tests: ${TEST_PASSED:-unknown} (command: ${TEST_CMD:-none})
|
|
1626
|
+
${test_summary:+- Test output: ${test_summary}}
|
|
1627
|
+
|
|
1628
|
+
## Cumulative Git Changes (diff --stat from start)
|
|
1629
|
+
$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | head -40 || echo "(none)")
|
|
1630
|
+
|
|
1631
|
+
## Your Task
|
|
1632
|
+
Based on the goal and the cumulative work done:
|
|
1633
|
+
1. Has the goal been FULLY achieved (not partially)?
|
|
1634
|
+
2. Is there any critical gap that would make this unacceptable for production?
|
|
1635
|
+
|
|
1636
|
+
If the goal is fully achieved, output exactly: HOLISTIC_PASS
|
|
1637
|
+
Otherwise, list the specific gaps remaining.
|
|
1638
|
+
HOLISTIC_PROMPT
|
|
1639
|
+
|
|
1640
|
+
echo -e " ${PURPLE}▸${RESET} Running holistic project assessment..."
|
|
1641
|
+
|
|
1642
|
+
local hol_model
|
|
1643
|
+
hol_model="$(select_audit_model)"
|
|
1644
|
+
local hol_flags=("--model" "$hol_model")
|
|
1645
|
+
if $SKIP_PERMISSIONS; then
|
|
1646
|
+
hol_flags+=("--dangerously-skip-permissions")
|
|
1647
|
+
fi
|
|
1648
|
+
|
|
1649
|
+
claude -p "$holistic_prompt" "${hol_flags[@]}" > "$holistic_log" 2>&1 || true
|
|
1650
|
+
|
|
1651
|
+
if grep -q "HOLISTIC_PASS" "$holistic_log" 2>/dev/null; then
|
|
1652
|
+
echo -e " ${GREEN}✓${RESET} Holistic assessment: passed"
|
|
1653
|
+
return 0
|
|
1654
|
+
else
|
|
1655
|
+
echo -e " ${YELLOW}⚠${RESET} Holistic assessment: gaps found"
|
|
1656
|
+
return 1
|
|
1657
|
+
fi
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1660
|
+
# ─── Context Window Management ───────────────────────────────────────────────
|
|
1661
|
+
# Prevents prompt from exceeding Claude's context limit (~200K tokens).
|
|
1662
|
+
# Trims least-critical sections first when over budget.
|
|
1663
|
+
|
|
1664
|
+
CONTEXT_BUDGET_CHARS="${CONTEXT_BUDGET_CHARS:-180000}" # ~45K tokens at 4 chars/token
|
|
1665
|
+
|
|
1666
|
+
manage_context_window() {
|
|
1667
|
+
local prompt="$1"
|
|
1668
|
+
local budget="${CONTEXT_BUDGET_CHARS}"
|
|
1669
|
+
local current_len=${#prompt}
|
|
1670
|
+
|
|
1671
|
+
if [[ "$current_len" -le "$budget" ]]; then
|
|
1672
|
+
echo "$prompt"
|
|
1673
|
+
return
|
|
1674
|
+
fi
|
|
1675
|
+
|
|
1676
|
+
# Over budget — progressively trim sections (least important first)
|
|
1677
|
+
local trimmed="$prompt"
|
|
1678
|
+
|
|
1679
|
+
# 1. Trim DORA/Performance baselines (least critical for code generation)
|
|
1680
|
+
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1681
|
+
trimmed=$(echo "$trimmed" | awk '/^## Performance Baselines/{skip=1; next} skip && /^## [^#]/{skip=0} !skip{print}')
|
|
1682
|
+
fi
|
|
1683
|
+
|
|
1684
|
+
# 2. Trim file hotspots to top 5
|
|
1685
|
+
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1686
|
+
trimmed=$(echo "$trimmed" | awk '/## File Hotspots/{p=1; c=0} p && /^- /{c++; if(c>5) next} {print}')
|
|
1687
|
+
fi
|
|
1688
|
+
|
|
1689
|
+
# 3. Trim git log to last 10 entries
|
|
1690
|
+
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1691
|
+
trimmed=$(echo "$trimmed" | awk '/## Recent Git Activity/{p=1; c=0} p && /^[a-f0-9]/{c++; if(c>10) next} {print}')
|
|
1692
|
+
fi
|
|
1693
|
+
|
|
1694
|
+
# 4. Truncate memory context to first 20K chars
|
|
1695
|
+
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1696
|
+
trimmed=$(echo "$trimmed" | awk -v max=20000 '
|
|
1697
|
+
/## Memory Context/{mem=1; skip_rest=0; chars=0; print; next}
|
|
1698
|
+
mem && /^## [^#]/{mem=0; print; next}
|
|
1699
|
+
mem{chars+=length($0)+1; if(chars>max){print "... (memory truncated for context budget)"; skip_rest=1; mem=0; next}}
|
|
1700
|
+
skip_rest && /^## [^#]/{skip_rest=0; print; next}
|
|
1701
|
+
skip_rest{next}
|
|
1702
|
+
{print}
|
|
1703
|
+
')
|
|
1704
|
+
fi
|
|
1705
|
+
|
|
1706
|
+
# 5. Truncate test output to last 50 lines
|
|
1707
|
+
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1708
|
+
trimmed=$(echo "$trimmed" | awk '
|
|
1709
|
+
/## Test Results/{found=1; buf=""; print; next}
|
|
1710
|
+
found && /^## [^#]/{found=0; n=split(buf,arr,"\n"); start=(n>50)?(n-49):1; for(i=start;i<=n;i++) if(arr[i]!="") print arr[i]; print; next}
|
|
1711
|
+
found{buf=buf $0 "\n"; next}
|
|
1712
|
+
{print}
|
|
1713
|
+
')
|
|
1714
|
+
fi
|
|
1715
|
+
|
|
1716
|
+
# 6. Last resort: hard truncate with notice
|
|
1717
|
+
if [[ "${#trimmed}" -gt "$budget" ]]; then
|
|
1718
|
+
trimmed="${trimmed:0:$budget}
|
|
1719
|
+
|
|
1720
|
+
... [CONTEXT TRUNCATED: prompt exceeded ${budget} char budget. Focus on the goal and most recent errors.]"
|
|
1721
|
+
fi
|
|
1722
|
+
|
|
1723
|
+
# Log the trimming
|
|
1724
|
+
local final_len=${#trimmed}
|
|
1725
|
+
if [[ "$final_len" -lt "$current_len" ]]; then
|
|
1726
|
+
warn "Context trimmed from ${current_len} to ${final_len} chars (budget: ${budget})"
|
|
1727
|
+
emit_event "loop.context_trimmed" "original=$current_len" "trimmed=$final_len" "budget=$budget" 2>/dev/null || true
|
|
1728
|
+
fi
|
|
1729
|
+
|
|
1730
|
+
echo "$trimmed"
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1301
1733
|
# ─── Prompt Composition ──────────────────────────────────────────────────────
|
|
1302
1734
|
|
|
1303
1735
|
compose_prompt() {
|
|
@@ -1348,7 +1780,7 @@ Fix these specific errors. Each line above is one distinct error from the test o
|
|
|
1348
1780
|
|
|
1349
1781
|
# Memory context injection (failure patterns + past learnings)
|
|
1350
1782
|
local memory_section=""
|
|
1351
|
-
if type memory_inject_context
|
|
1783
|
+
if type memory_inject_context >/dev/null 2>&1; then
|
|
1352
1784
|
memory_section="$(memory_inject_context "build" 2>/dev/null || true)"
|
|
1353
1785
|
elif [[ -f "$SCRIPT_DIR/sw-memory.sh" ]]; then
|
|
1354
1786
|
memory_section="$("$SCRIPT_DIR/sw-memory.sh" inject build 2>/dev/null || true)"
|
|
@@ -1356,7 +1788,7 @@ Fix these specific errors. Each line above is one distinct error from the test o
|
|
|
1356
1788
|
|
|
1357
1789
|
# DORA baselines for context
|
|
1358
1790
|
local dora_section=""
|
|
1359
|
-
if type memory_get_dora_baseline
|
|
1791
|
+
if type memory_get_dora_baseline >/dev/null 2>&1; then
|
|
1360
1792
|
local dora_json
|
|
1361
1793
|
dora_json="$(memory_get_dora_baseline 7 2>/dev/null || echo "{}")"
|
|
1362
1794
|
local dora_total
|
|
@@ -1385,7 +1817,7 @@ $(cat "$memory_refresh_file")"
|
|
|
1385
1817
|
local intelligence_section=""
|
|
1386
1818
|
if [[ "${NO_GITHUB:-}" != "true" ]]; then
|
|
1387
1819
|
# File hotspots — top 5 most-changed files
|
|
1388
|
-
if type gh_file_change_frequency
|
|
1820
|
+
if type gh_file_change_frequency >/dev/null 2>&1; then
|
|
1389
1821
|
local hotspots
|
|
1390
1822
|
hotspots=$(gh_file_change_frequency 2>/dev/null | head -5 || true)
|
|
1391
1823
|
if [[ -n "$hotspots" ]]; then
|
|
@@ -1396,7 +1828,7 @@ ${hotspots}"
|
|
|
1396
1828
|
fi
|
|
1397
1829
|
|
|
1398
1830
|
# CODEOWNERS context
|
|
1399
|
-
if type gh_codeowners
|
|
1831
|
+
if type gh_codeowners >/dev/null 2>&1; then
|
|
1400
1832
|
local owners
|
|
1401
1833
|
owners=$(gh_codeowners 2>/dev/null | head -10 || true)
|
|
1402
1834
|
if [[ -n "$owners" ]]; then
|
|
@@ -1407,7 +1839,7 @@ ${owners}"
|
|
|
1407
1839
|
fi
|
|
1408
1840
|
|
|
1409
1841
|
# Active security alerts
|
|
1410
|
-
if type gh_security_alerts
|
|
1842
|
+
if type gh_security_alerts >/dev/null 2>&1; then
|
|
1411
1843
|
local alerts
|
|
1412
1844
|
alerts=$(gh_security_alerts 2>/dev/null | head -5 || true)
|
|
1413
1845
|
if [[ -n "$alerts" ]]; then
|
|
@@ -1459,6 +1891,34 @@ ${last_error}"
|
|
|
1459
1891
|
# Stuckness detection — compare last 3 iteration outputs
|
|
1460
1892
|
local stuckness_section=""
|
|
1461
1893
|
stuckness_section="$(detect_stuckness)"
|
|
1894
|
+
local _stuck_ret=$?
|
|
1895
|
+
local stuckness_detected=false
|
|
1896
|
+
[[ "$_stuck_ret" -eq 0 ]] && stuckness_detected=true
|
|
1897
|
+
|
|
1898
|
+
# Strategy exploration when stuck — append alternative strategy to GOAL
|
|
1899
|
+
if [[ "$stuckness_detected" == "true" ]]; then
|
|
1900
|
+
local last_error diagnosis
|
|
1901
|
+
last_error=$(tail -1 "${ARTIFACTS_DIR:-${PROJECT_ROOT:-.}/.claude/pipeline-artifacts}/error-log.jsonl" 2>/dev/null | jq -r '"Type: \(.type), Exit: \(.exit_code), Error: \(.error | split("\n") | first)"' 2>/dev/null || true)
|
|
1902
|
+
[[ -z "$last_error" || "$last_error" == "null" ]] && last_error="unknown"
|
|
1903
|
+
diagnosis="${STUCKNESS_DIAGNOSIS:-}"
|
|
1904
|
+
local alt_strategy
|
|
1905
|
+
alt_strategy=$(explore_alternative_strategy "$last_error" "${ITERATION:-0}" "$diagnosis")
|
|
1906
|
+
GOAL="${GOAL}
|
|
1907
|
+
|
|
1908
|
+
${alt_strategy}"
|
|
1909
|
+
|
|
1910
|
+
# Handle model escalation
|
|
1911
|
+
if [[ "${ESCALATE_MODEL:-}" == "true" ]]; then
|
|
1912
|
+
if [[ -f "$SCRIPT_DIR/sw-model-router.sh" ]]; then
|
|
1913
|
+
source "$SCRIPT_DIR/sw-model-router.sh" 2>/dev/null || true
|
|
1914
|
+
fi
|
|
1915
|
+
if type escalate_model &>/dev/null; then
|
|
1916
|
+
MODEL=$(escalate_model "${MODEL:-sonnet}")
|
|
1917
|
+
info "Escalated to model: $MODEL"
|
|
1918
|
+
fi
|
|
1919
|
+
unset ESCALATE_MODEL
|
|
1920
|
+
fi
|
|
1921
|
+
fi
|
|
1462
1922
|
|
|
1463
1923
|
# Session restart context — inject previous session progress
|
|
1464
1924
|
local restart_section=""
|
|
@@ -1470,12 +1930,52 @@ You are starting a FRESH session after the previous one exhausted its iterations
|
|
|
1470
1930
|
Read the progress above and continue from where it left off. Do NOT repeat work already done."
|
|
1471
1931
|
fi
|
|
1472
1932
|
|
|
1933
|
+
# Resume-from-checkpoint context — reconstruct Claude context for meaningful resume
|
|
1934
|
+
local resume_section=""
|
|
1935
|
+
if [[ -n "${RESUMED_FROM_ITERATION:-}" && "${RESUMED_FROM_ITERATION:-0}" -gt 0 ]]; then
|
|
1936
|
+
local _test_tail=" (none recorded)"
|
|
1937
|
+
[[ -n "${RESUMED_TEST_OUTPUT:-}" ]] && _test_tail="$(echo "$RESUMED_TEST_OUTPUT" | tail -20)"
|
|
1938
|
+
resume_section="## RESUMING FROM ITERATION ${RESUMED_FROM_ITERATION}
|
|
1939
|
+
|
|
1940
|
+
Continue from where you left off. Do NOT repeat work already done.
|
|
1941
|
+
|
|
1942
|
+
Previous work modified these files:
|
|
1943
|
+
${RESUMED_MODIFIED:- (none recorded)}
|
|
1944
|
+
|
|
1945
|
+
Previous findings/errors from earlier iterations:
|
|
1946
|
+
${RESUMED_FINDINGS:- (none recorded)}
|
|
1947
|
+
|
|
1948
|
+
Last test output (fix any failures, tail):
|
|
1949
|
+
${_test_tail}
|
|
1950
|
+
|
|
1951
|
+
---
|
|
1952
|
+
"
|
|
1953
|
+
# Clear after first use so we don't keep injecting on every iteration
|
|
1954
|
+
RESUMED_FROM_ITERATION=""
|
|
1955
|
+
RESUMED_MODIFIED=""
|
|
1956
|
+
RESUMED_FINDINGS=""
|
|
1957
|
+
RESUMED_TEST_OUTPUT=""
|
|
1958
|
+
fi
|
|
1959
|
+
|
|
1960
|
+
# Build cumulative progress summary showing all iterations' work
|
|
1961
|
+
local cumulative_section=""
|
|
1962
|
+
if [[ -n "${LOOP_START_COMMIT:-}" ]] && [[ "$ITERATION" -gt 1 ]]; then
|
|
1963
|
+
local cum_stat
|
|
1964
|
+
cum_stat="$(git -C "$PROJECT_ROOT" diff --stat "${LOOP_START_COMMIT}..HEAD" 2>/dev/null | tail -1 || true)"
|
|
1965
|
+
if [[ -n "$cum_stat" ]]; then
|
|
1966
|
+
cumulative_section="## Cumulative Progress (all iterations combined)
|
|
1967
|
+
${cum_stat}
|
|
1968
|
+
"
|
|
1969
|
+
fi
|
|
1970
|
+
fi
|
|
1971
|
+
|
|
1473
1972
|
cat <<PROMPT
|
|
1474
1973
|
You are an autonomous coding agent on iteration ${ITERATION}/${MAX_ITERATIONS} of a continuous loop.
|
|
1475
|
-
|
|
1974
|
+
${resume_section}
|
|
1476
1975
|
## Your Goal
|
|
1477
1976
|
${GOAL}
|
|
1478
1977
|
|
|
1978
|
+
${cumulative_section}
|
|
1479
1979
|
## Current Progress
|
|
1480
1980
|
${recent_log}
|
|
1481
1981
|
|
|
@@ -1521,56 +2021,227 @@ ${stuckness_section}
|
|
|
1521
2021
|
PROMPT
|
|
1522
2022
|
}
|
|
1523
2023
|
|
|
2024
|
+
# ─── Alternative Strategy Exploration ─────────────────────────────────────────
|
|
2025
|
+
# When stuckness is detected, generate a context-aware alternative strategy.
|
|
2026
|
+
# Uses pattern matching on error type + iteration count to suggest different approaches.
|
|
2027
|
+
|
|
2028
|
+
explore_alternative_strategy() {
|
|
2029
|
+
local last_error="${1:-unknown}"
|
|
2030
|
+
local iteration="${2:-0}"
|
|
2031
|
+
local diagnosis="${3:-}"
|
|
2032
|
+
|
|
2033
|
+
# Track attempted strategies to avoid repeating them
|
|
2034
|
+
local strategy_file="${LOG_DIR:-/tmp}/strategy-attempts.txt"
|
|
2035
|
+
local attempted
|
|
2036
|
+
attempted=$(cat "$strategy_file" 2>/dev/null || true)
|
|
2037
|
+
|
|
2038
|
+
local strategy=""
|
|
2039
|
+
|
|
2040
|
+
# If quality gates are passing but evaluators disagree, suggest focusing on evaluator alignment
|
|
2041
|
+
if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "${QUALITY_GATE_PASSED:-}" == "true" || "${AUDIT_RESULT:-}" == "pass" ]]; then
|
|
2042
|
+
if ! echo "$attempted" | grep -q "evaluator_alignment"; then
|
|
2043
|
+
echo "evaluator_alignment" >> "$strategy_file"
|
|
2044
|
+
strategy="## Alternative Strategy: Evaluator Alignment
|
|
2045
|
+
The code appears functionally complete (tests pass). Focus on satisfying the remaining
|
|
2046
|
+
quality gate evaluators. Check the DoD log and audit log for specific complaints, then
|
|
2047
|
+
address those exact points rather than adding new features."
|
|
2048
|
+
fi
|
|
2049
|
+
fi
|
|
2050
|
+
|
|
2051
|
+
# If no code changes in last iteration, suggest verifying existing work
|
|
2052
|
+
if echo "$last_error" | grep -qi "no code changes" || [[ "$diagnosis" == *"no code"* ]]; then
|
|
2053
|
+
if ! echo "$attempted" | grep -q "verify_existing"; then
|
|
2054
|
+
echo "verify_existing" >> "$strategy_file"
|
|
2055
|
+
strategy="## Alternative Strategy: Verify Existing Work
|
|
2056
|
+
Recent iterations made no code changes. The work may already be complete.
|
|
2057
|
+
Run the full test suite, verify all features work, and if everything passes,
|
|
2058
|
+
commit a verification message and declare LOOP_COMPLETE with evidence."
|
|
2059
|
+
fi
|
|
2060
|
+
fi
|
|
2061
|
+
|
|
2062
|
+
# Generic fallback: break the problem down
|
|
2063
|
+
if [[ -z "$strategy" ]]; then
|
|
2064
|
+
if ! echo "$attempted" | grep -q "decompose"; then
|
|
2065
|
+
echo "decompose" >> "$strategy_file"
|
|
2066
|
+
strategy="## Alternative Strategy: Decompose
|
|
2067
|
+
Break the remaining work into smaller, independent steps. Focus on one specific
|
|
2068
|
+
file or function at a time. Read error messages literally — the root cause may
|
|
2069
|
+
differ from your assumption."
|
|
2070
|
+
fi
|
|
2071
|
+
fi
|
|
2072
|
+
|
|
2073
|
+
echo "$strategy"
|
|
2074
|
+
}
|
|
2075
|
+
|
|
1524
2076
|
# ─── Stuckness Detection ─────────────────────────────────────────────────────
|
|
1525
|
-
#
|
|
2077
|
+
# Multi-signal detection: text overlap, git diff hash, error repetition, exit code pattern, iteration budget.
|
|
2078
|
+
# Returns 0 when stuck, 1 when not. Outputs stuckness section and sets STUCKNESS_HINT when stuck.
|
|
2079
|
+
# When stuck: increments STUCKNESS_COUNT, emits event; if STUCKNESS_COUNT >= 3, caller triggers session restart.
|
|
2080
|
+
STUCKNESS_COUNT=0
|
|
2081
|
+
STUCKNESS_TRACKING_FILE=""
|
|
2082
|
+
|
|
2083
|
+
record_iteration_stuckness_data() {
|
|
2084
|
+
local exit_code="${1:-0}"
|
|
2085
|
+
[[ -z "$LOG_DIR" ]] && return 0
|
|
2086
|
+
local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
|
|
2087
|
+
local diff_hash error_hash
|
|
2088
|
+
diff_hash=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
|
|
2089
|
+
local error_log="${ARTIFACTS_DIR:-${STATE_DIR:-${PROJECT_ROOT:-.}/.claude}/pipeline-artifacts}/error-log.jsonl"
|
|
2090
|
+
if [[ -f "$error_log" ]]; then
|
|
2091
|
+
error_hash=$(tail -5 "$error_log" 2>/dev/null | sort -u | (md5 -q 2>/dev/null || md5sum 2>/dev/null | cut -d' ' -f1) || echo "none")
|
|
2092
|
+
else
|
|
2093
|
+
error_hash="none"
|
|
2094
|
+
fi
|
|
2095
|
+
echo "${diff_hash}|${error_hash}|${exit_code}" >> "$tracking_file"
|
|
2096
|
+
}
|
|
2097
|
+
|
|
1526
2098
|
detect_stuckness() {
|
|
1527
|
-
|
|
1528
|
-
|
|
2099
|
+
STUCKNESS_HINT=""
|
|
2100
|
+
local iteration="${ITERATION:-0}"
|
|
2101
|
+
local stuckness_signals=0
|
|
2102
|
+
local stuckness_reasons=()
|
|
2103
|
+
local tracking_file="${STUCKNESS_TRACKING_FILE:-$LOG_DIR/stuckness-tracking.txt}"
|
|
2104
|
+
local tracking_lines
|
|
2105
|
+
tracking_lines=$(wc -l < "$tracking_file" 2>/dev/null || echo "0")
|
|
2106
|
+
|
|
2107
|
+
# Signal 1: Text overlap (existing logic) — compare last 2 iteration logs
|
|
2108
|
+
if [[ "$iteration" -ge 3 ]]; then
|
|
2109
|
+
local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
|
|
2110
|
+
local log2="$LOG_DIR/iteration-$(( iteration - 2 )).log"
|
|
2111
|
+
local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
|
|
2112
|
+
|
|
2113
|
+
if [[ -f "$log1" && -f "$log2" ]]; then
|
|
2114
|
+
local lines1 lines2 common total overlap_pct
|
|
2115
|
+
lines1=$(tail -50 "$log1" 2>/dev/null | grep -v '^$' | sort || true)
|
|
2116
|
+
lines2=$(tail -50 "$log2" 2>/dev/null | grep -v '^$' | sort || true)
|
|
2117
|
+
|
|
2118
|
+
if [[ -n "$lines1" && -n "$lines2" ]]; then
|
|
2119
|
+
total=$(echo "$lines1" | wc -l | tr -d ' ')
|
|
2120
|
+
common=$(comm -12 <(echo "$lines1") <(echo "$lines2") 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
|
2121
|
+
if [[ "$total" -gt 0 ]]; then
|
|
2122
|
+
overlap_pct=$(( common * 100 / total ))
|
|
2123
|
+
else
|
|
2124
|
+
overlap_pct=0
|
|
2125
|
+
fi
|
|
2126
|
+
if [[ "${overlap_pct:-0}" -ge 90 ]]; then
|
|
2127
|
+
stuckness_signals=$((stuckness_signals + 1))
|
|
2128
|
+
stuckness_reasons+=("high text overlap (${overlap_pct}%) between iterations")
|
|
2129
|
+
fi
|
|
2130
|
+
fi
|
|
2131
|
+
fi
|
|
1529
2132
|
fi
|
|
1530
2133
|
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
2134
|
+
# Signal 2: Git diff hash — last 3 iterations produced zero or identical diffs
|
|
2135
|
+
if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
|
|
2136
|
+
local last_three
|
|
2137
|
+
last_three=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f1 || true)
|
|
2138
|
+
local unique_hashes
|
|
2139
|
+
unique_hashes=$(echo "$last_three" | sort -u | grep -v '^$' | wc -l | tr -d ' ')
|
|
2140
|
+
if [[ "$unique_hashes" -le 1 ]] && [[ -n "$last_three" ]]; then
|
|
2141
|
+
stuckness_signals=$((stuckness_signals + 1))
|
|
2142
|
+
stuckness_reasons+=("identical or zero git diffs in last 3 iterations")
|
|
2143
|
+
fi
|
|
2144
|
+
fi
|
|
1534
2145
|
|
|
1535
|
-
#
|
|
1536
|
-
if [[
|
|
1537
|
-
|
|
2146
|
+
# Signal 3: Error repetition — same error hash in last 3 iterations
|
|
2147
|
+
if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
|
|
2148
|
+
local last_three_errors
|
|
2149
|
+
last_three_errors=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f2 || true)
|
|
2150
|
+
local unique_error_hashes
|
|
2151
|
+
unique_error_hashes=$(echo "$last_three_errors" | sort -u | grep -v '^none$' | grep -v '^$' | wc -l | tr -d ' ')
|
|
2152
|
+
if [[ "$unique_error_hashes" -eq 1 ]] && [[ -n "$(echo "$last_three_errors" | grep -v '^none$')" ]]; then
|
|
2153
|
+
stuckness_signals=$((stuckness_signals + 1))
|
|
2154
|
+
stuckness_reasons+=("same error in last 3 iterations")
|
|
2155
|
+
fi
|
|
1538
2156
|
fi
|
|
1539
2157
|
|
|
1540
|
-
#
|
|
1541
|
-
local
|
|
1542
|
-
|
|
1543
|
-
|
|
2158
|
+
# Signal 4: Same error repeating 3+ times (legacy check on error-log content)
|
|
2159
|
+
local error_log
|
|
2160
|
+
error_log="${ARTIFACTS_DIR:-$PROJECT_ROOT/.claude/pipeline-artifacts}/error-log.jsonl"
|
|
2161
|
+
if [[ -f "$error_log" ]]; then
|
|
2162
|
+
local last_errors
|
|
2163
|
+
last_errors=$(tail -5 "$error_log" 2>/dev/null | jq -r '.error // .message // .error_hash // empty' 2>/dev/null | sort | uniq -c | sort -rn | head -1 || true)
|
|
2164
|
+
local repeat_count
|
|
2165
|
+
repeat_count=$(echo "$last_errors" | awk '{print $1}' 2>/dev/null || echo "0")
|
|
2166
|
+
if [[ "${repeat_count:-0}" -ge 3 ]]; then
|
|
2167
|
+
stuckness_signals=$((stuckness_signals + 1))
|
|
2168
|
+
stuckness_reasons+=("same error repeated ${repeat_count} times")
|
|
2169
|
+
fi
|
|
2170
|
+
fi
|
|
1544
2171
|
|
|
1545
|
-
|
|
1546
|
-
|
|
2172
|
+
# Signal 5: Exit code pattern — last 3 iterations had same non-zero exit code
|
|
2173
|
+
if [[ -f "$tracking_file" ]] && [[ "$tracking_lines" -ge 3 ]]; then
|
|
2174
|
+
local last_three_exits
|
|
2175
|
+
last_three_exits=$(tail -3 "$tracking_file" 2>/dev/null | cut -d'|' -f3 || true)
|
|
2176
|
+
local first_exit
|
|
2177
|
+
first_exit=$(echo "$last_three_exits" | head -1)
|
|
2178
|
+
if [[ "$first_exit" =~ ^[0-9]+$ ]] && [[ "$first_exit" -ne 0 ]]; then
|
|
2179
|
+
local all_same=true
|
|
2180
|
+
while IFS= read -r ex; do
|
|
2181
|
+
[[ "$ex" != "$first_exit" ]] && all_same=false
|
|
2182
|
+
done <<< "$last_three_exits"
|
|
2183
|
+
if [[ "$all_same" == true ]]; then
|
|
2184
|
+
stuckness_signals=$((stuckness_signals + 1))
|
|
2185
|
+
stuckness_reasons+=("same non-zero exit code (${first_exit}) in last 3 iterations")
|
|
2186
|
+
fi
|
|
2187
|
+
fi
|
|
1547
2188
|
fi
|
|
1548
2189
|
|
|
1549
|
-
|
|
1550
|
-
|
|
2190
|
+
# Signal 6: Git diff size — no or minimal code changes (existing)
|
|
2191
|
+
local diff_lines
|
|
2192
|
+
diff_lines=$(git -C "${PROJECT_ROOT:-.}" diff HEAD 2>/dev/null | wc -l | tr -d ' ' || echo "0")
|
|
2193
|
+
if [[ "${diff_lines:-0}" -lt 5 ]] && [[ "$iteration" -gt 2 ]]; then
|
|
2194
|
+
stuckness_signals=$((stuckness_signals + 1))
|
|
2195
|
+
stuckness_reasons+=("no code changes in last iteration")
|
|
2196
|
+
fi
|
|
1551
2197
|
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
2198
|
+
# Signal 7: Iteration budget — used >70% without passing tests
|
|
2199
|
+
local max_iter="${MAX_ITERATIONS:-20}"
|
|
2200
|
+
local progress_pct=0
|
|
2201
|
+
if [[ "$max_iter" -gt 0 ]]; then
|
|
2202
|
+
progress_pct=$(( iteration * 100 / max_iter ))
|
|
1556
2203
|
fi
|
|
2204
|
+
if [[ "$progress_pct" -gt 70 ]] && [[ "${TEST_PASSED:-false}" != "true" ]]; then
|
|
2205
|
+
stuckness_signals=$((stuckness_signals + 1))
|
|
2206
|
+
stuckness_reasons+=("used ${progress_pct}% of iteration budget without passing tests")
|
|
2207
|
+
fi
|
|
2208
|
+
|
|
2209
|
+
# Gate-aware dampening: if tests pass and the agent has made progress overall,
|
|
2210
|
+
# reduce stuckness signal count. The "no code changes" and "identical diffs" signals
|
|
2211
|
+
# fire when code is already complete and the agent is fighting evaluator quirks —
|
|
2212
|
+
# that's not genuine stuckness, it's "done but gates disagree."
|
|
2213
|
+
if [[ "${TEST_PASSED:-}" == "true" ]] && [[ "$stuckness_signals" -ge 2 ]]; then
|
|
2214
|
+
# If at least one quality signal is positive, dampen by 1
|
|
2215
|
+
if [[ "${AUDIT_RESULT:-}" == "pass" ]] || $QUALITY_GATE_PASSED 2>/dev/null; then
|
|
2216
|
+
stuckness_signals=$((stuckness_signals - 1))
|
|
2217
|
+
fi
|
|
2218
|
+
fi
|
|
2219
|
+
|
|
2220
|
+
# Decision: 2+ signals = stuck
|
|
2221
|
+
if [[ "$stuckness_signals" -ge 2 ]]; then
|
|
2222
|
+
STUCKNESS_COUNT=$(( STUCKNESS_COUNT + 1 ))
|
|
2223
|
+
STUCKNESS_DIAGNOSIS="${stuckness_reasons[*]}"
|
|
2224
|
+
if type emit_event >/dev/null 2>&1; then
|
|
2225
|
+
emit_event "loop.stuckness_detected" "signals=$stuckness_signals" "count=$STUCKNESS_COUNT" "iteration=$iteration" "reasons=${stuckness_reasons[*]}"
|
|
2226
|
+
fi
|
|
2227
|
+
STUCKNESS_HINT="IMPORTANT: The loop appears stuck. Previous approaches have not worked. You MUST try a fundamentally different strategy. Reasons: ${stuckness_reasons[*]}"
|
|
2228
|
+
warn "Stuckness detected (${stuckness_signals} signals, count ${STUCKNESS_COUNT}): ${stuckness_reasons[*]}"
|
|
1557
2229
|
|
|
1558
|
-
if [[ "$overlap_pct" -ge 90 ]]; then
|
|
1559
2230
|
local diff_summary=""
|
|
1560
|
-
|
|
2231
|
+
local log1="$LOG_DIR/iteration-$(( iteration - 1 )).log"
|
|
2232
|
+
local log3="$LOG_DIR/iteration-$(( iteration - 3 )).log"
|
|
2233
|
+
if [[ -f "$log3" && -f "$log1" ]]; then
|
|
1561
2234
|
diff_summary=$(diff <(tail -30 "$log3" 2>/dev/null) <(tail -30 "$log1" 2>/dev/null) 2>/dev/null | head -10 || true)
|
|
1562
2235
|
fi
|
|
1563
2236
|
|
|
1564
|
-
# Gather memory-based alternative approaches
|
|
1565
2237
|
local alternatives=""
|
|
1566
|
-
if type memory_inject_context
|
|
2238
|
+
if type memory_inject_context >/dev/null 2>&1; then
|
|
1567
2239
|
alternatives=$(memory_inject_context "build" 2>/dev/null | grep -i "fix:" | head -3 || true)
|
|
1568
2240
|
fi
|
|
1569
2241
|
|
|
1570
2242
|
cat <<STUCK_SECTION
|
|
1571
2243
|
## Stuckness Detected
|
|
1572
|
-
|
|
1573
|
-
You appear to be stuck on the same approach.
|
|
2244
|
+
${STUCKNESS_HINT}
|
|
1574
2245
|
|
|
1575
2246
|
${diff_summary:+Changes between recent iterations:
|
|
1576
2247
|
$diff_summary
|
|
@@ -1584,7 +2255,10 @@ Try a fundamentally different approach:
|
|
|
1584
2255
|
- Check if there's a dependency or configuration issue blocking progress
|
|
1585
2256
|
- Read error messages more carefully — the root cause may differ from your assumption
|
|
1586
2257
|
STUCK_SECTION
|
|
2258
|
+
return 0
|
|
1587
2259
|
fi
|
|
2260
|
+
|
|
2261
|
+
return 1
|
|
1588
2262
|
}
|
|
1589
2263
|
|
|
1590
2264
|
compose_audit_section() {
|
|
@@ -1675,7 +2349,7 @@ compose_worker_prompt() {
|
|
|
1675
2349
|
local role_desc=""
|
|
1676
2350
|
# Try to pull description from recruit's roles DB first
|
|
1677
2351
|
local recruit_roles_db="${HOME}/.shipwright/recruitment/roles.json"
|
|
1678
|
-
if [[ -f "$recruit_roles_db" ]] && command -v jq
|
|
2352
|
+
if [[ -f "$recruit_roles_db" ]] && command -v jq >/dev/null 2>&1; then
|
|
1679
2353
|
local recruit_desc
|
|
1680
2354
|
recruit_desc=$(jq -r --arg r "$role" '.[$r].description // ""' "$recruit_roles_db" 2>/dev/null) || true
|
|
1681
2355
|
if [[ -n "$recruit_desc" && "$recruit_desc" != "null" ]]; then
|
|
@@ -1735,6 +2409,12 @@ run_claude_iteration() {
|
|
|
1735
2409
|
local json_file="$LOG_DIR/iteration-${ITERATION}.json"
|
|
1736
2410
|
local prompt
|
|
1737
2411
|
prompt="$(compose_prompt)"
|
|
2412
|
+
local final_prompt
|
|
2413
|
+
final_prompt=$(manage_context_window "$prompt")
|
|
2414
|
+
|
|
2415
|
+
local prompt_chars=${#final_prompt}
|
|
2416
|
+
local approx_tokens=$((prompt_chars / 4))
|
|
2417
|
+
info "Prompt: ~${approx_tokens} tokens (${prompt_chars} chars)"
|
|
1738
2418
|
|
|
1739
2419
|
local flags
|
|
1740
2420
|
flags="$(build_claude_flags)"
|
|
@@ -1750,9 +2430,9 @@ run_claude_iteration() {
|
|
|
1750
2430
|
# shellcheck disable=SC2086
|
|
1751
2431
|
local err_file="${json_file%.json}.stderr"
|
|
1752
2432
|
if [[ -n "$TIMEOUT_CMD" ]]; then
|
|
1753
|
-
$TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$
|
|
2433
|
+
$TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
|
|
1754
2434
|
else
|
|
1755
|
-
claude -p "$
|
|
2435
|
+
claude -p "$final_prompt" $flags > "$json_file" 2>"$err_file" &
|
|
1756
2436
|
fi
|
|
1757
2437
|
CHILD_PID=$!
|
|
1758
2438
|
wait "$CHILD_PID" 2>/dev/null || exit_code=$?
|
|
@@ -1835,12 +2515,13 @@ show_summary() {
|
|
|
1835
2515
|
|
|
1836
2516
|
local status_display
|
|
1837
2517
|
case "$STATUS" in
|
|
1838
|
-
complete)
|
|
1839
|
-
circuit_breaker)
|
|
1840
|
-
max_iterations)
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
2518
|
+
complete) status_display="${GREEN}✓ Complete (LOOP_COMPLETE detected)${RESET}" ;;
|
|
2519
|
+
circuit_breaker) status_display="${RED}✗ Circuit breaker tripped${RESET}" ;;
|
|
2520
|
+
max_iterations) status_display="${YELLOW}⚠ Max iterations reached${RESET}" ;;
|
|
2521
|
+
budget_exhausted) status_display="${RED}✗ Budget exhausted${RESET}" ;;
|
|
2522
|
+
interrupted) status_display="${YELLOW}⚠ Interrupted by user${RESET}" ;;
|
|
2523
|
+
error) status_display="${RED}✗ Error${RESET}" ;;
|
|
2524
|
+
*) status_display="${DIM}$STATUS${RESET}" ;;
|
|
1844
2525
|
esac
|
|
1845
2526
|
|
|
1846
2527
|
local test_display
|
|
@@ -1909,6 +2590,15 @@ cleanup() {
|
|
|
1909
2590
|
--iteration "$ITERATION" \
|
|
1910
2591
|
--git-sha "$(git rev-parse HEAD 2>/dev/null || echo unknown)" 2>/dev/null || true
|
|
1911
2592
|
|
|
2593
|
+
# Save Claude context for meaningful resume (goal, findings, test output)
|
|
2594
|
+
export SW_LOOP_GOAL="$GOAL"
|
|
2595
|
+
export SW_LOOP_ITERATION="$ITERATION"
|
|
2596
|
+
export SW_LOOP_STATUS="$STATUS"
|
|
2597
|
+
export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
|
|
2598
|
+
export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
|
|
2599
|
+
export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
|
|
2600
|
+
"$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
|
|
2601
|
+
|
|
1912
2602
|
# Clear heartbeat
|
|
1913
2603
|
"$SCRIPT_DIR/sw-heartbeat.sh" clear "${PIPELINE_JOB_ID:-loop-$$}" 2>/dev/null || true
|
|
1914
2604
|
|
|
@@ -1934,7 +2624,7 @@ setup_worktrees() {
|
|
|
1934
2624
|
fi
|
|
1935
2625
|
|
|
1936
2626
|
# Create branch if it doesn't exist
|
|
1937
|
-
if ! git -C "$PROJECT_ROOT" rev-parse --verify "$branch_name"
|
|
2627
|
+
if ! git -C "$PROJECT_ROOT" rev-parse --verify "$branch_name" >/dev/null 2>&1; then
|
|
1938
2628
|
git -C "$PROJECT_ROOT" branch "$branch_name" HEAD 2>/dev/null || true
|
|
1939
2629
|
fi
|
|
1940
2630
|
|
|
@@ -1996,6 +2686,17 @@ CONSECUTIVE_FAILURES=0
|
|
|
1996
2686
|
echo -e "${CYAN}${BOLD}▸${RESET} Agent ${AGENT_NUM}/${TOTAL_AGENTS} starting in ${WORK_DIR}"
|
|
1997
2687
|
|
|
1998
2688
|
while [[ "$ITERATION" -lt "$MAX_ITERATIONS" ]]; do
|
|
2689
|
+
# Budget gate: stop if daily budget exhausted
|
|
2690
|
+
if [[ -x "$SCRIPT_DIR/sw-cost.sh" ]]; then
|
|
2691
|
+
budget_remaining=$("$SCRIPT_DIR/sw-cost.sh" remaining-budget 2>/dev/null || echo "")
|
|
2692
|
+
if [[ -n "$budget_remaining" && "$budget_remaining" != "unlimited" ]]; then
|
|
2693
|
+
if awk -v r="$budget_remaining" 'BEGIN { exit !(r <= 0) }' 2>/dev/null; then
|
|
2694
|
+
echo -e " ${RED}✗${RESET} Budget exhausted (\$${budget_remaining}) — stopping agent ${AGENT_NUM}"
|
|
2695
|
+
break
|
|
2696
|
+
fi
|
|
2697
|
+
fi
|
|
2698
|
+
fi
|
|
2699
|
+
|
|
1999
2700
|
ITERATION=$(( ITERATION + 1 ))
|
|
2000
2701
|
echo -e "\n${CYAN}${BOLD}▸${RESET} Agent ${AGENT_NUM} — Iteration ${ITERATION}/${MAX_ITERATIONS}"
|
|
2001
2702
|
|
|
@@ -2064,8 +2765,12 @@ PROMPT
|
|
|
2064
2765
|
# Auto-commit
|
|
2065
2766
|
git add -A 2>/dev/null || true
|
|
2066
2767
|
if git commit -m "agent-${AGENT_NUM}: iteration ${ITERATION}" --no-verify 2>/dev/null; then
|
|
2067
|
-
git push origin "loop/agent-${AGENT_NUM}" 2>/dev/null
|
|
2068
|
-
|
|
2768
|
+
if ! git push origin "loop/agent-${AGENT_NUM}" 2>/dev/null; then
|
|
2769
|
+
echo -e " ${YELLOW}⚠${RESET} git push failed for loop/agent-${AGENT_NUM} — remote may be out of sync"
|
|
2770
|
+
type emit_event >/dev/null 2>&1 && emit_event "loop.push_failed" "branch=loop/agent-${AGENT_NUM}"
|
|
2771
|
+
else
|
|
2772
|
+
echo -e " ${GREEN}✓${RESET} Committed and pushed"
|
|
2773
|
+
fi
|
|
2069
2774
|
fi
|
|
2070
2775
|
|
|
2071
2776
|
# Circuit breaker: check for progress
|
|
@@ -2083,7 +2788,7 @@ PROMPT
|
|
|
2083
2788
|
break
|
|
2084
2789
|
fi
|
|
2085
2790
|
|
|
2086
|
-
sleep
|
|
2791
|
+
sleep __SLEEP_BETWEEN_ITERATIONS__
|
|
2087
2792
|
done
|
|
2088
2793
|
|
|
2089
2794
|
echo -e "\n${DIM}Agent ${AGENT_NUM} finished after ${ITERATION} iterations${RESET}"
|
|
@@ -2094,11 +2799,14 @@ WORKEREOF
|
|
|
2094
2799
|
sed_i "s|__AGENT_NUM__|${agent_num}|g" "$worker_script"
|
|
2095
2800
|
sed_i "s|__TOTAL_AGENTS__|${total_agents}|g" "$worker_script"
|
|
2096
2801
|
sed_i "s|__MAX_ITERATIONS__|${MAX_ITERATIONS}|g" "$worker_script"
|
|
2802
|
+
sed_i "s|__SLEEP_BETWEEN_ITERATIONS__|$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)|g" "$worker_script"
|
|
2097
2803
|
# Paths and commands may contain sed-special chars — use awk
|
|
2098
2804
|
awk -v val="$wt_path" '{gsub(/__WORK_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
|
|
2099
2805
|
&& mv "${worker_script}.tmp" "$worker_script"
|
|
2100
2806
|
awk -v val="$LOG_DIR" '{gsub(/__LOG_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
|
|
2101
2807
|
&& mv "${worker_script}.tmp" "$worker_script"
|
|
2808
|
+
awk -v val="$SCRIPT_DIR" '{gsub(/__SCRIPT_DIR__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
|
|
2809
|
+
&& mv "${worker_script}.tmp" "$worker_script"
|
|
2102
2810
|
awk -v val="$TEST_CMD" '{gsub(/__TEST_CMD__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
|
|
2103
2811
|
&& mv "${worker_script}.tmp" "$worker_script"
|
|
2104
2812
|
awk -v val="$claude_flags" '{gsub(/__CLAUDE_FLAGS__/, val); print}' "$worker_script" > "${worker_script}.tmp" \
|
|
@@ -2137,11 +2845,12 @@ launch_multi_agent() {
|
|
|
2137
2845
|
local worker_script
|
|
2138
2846
|
worker_script="$(generate_worker_script "$i" "$AGENTS")"
|
|
2139
2847
|
|
|
2140
|
-
|
|
2848
|
+
local worker_pane_id
|
|
2849
|
+
worker_pane_id="$(tmux split-window -t "$MULTI_WINDOW_NAME" -c "$PROJECT_ROOT" -P -F '#{pane_id}')"
|
|
2141
2850
|
sleep 0.1
|
|
2142
|
-
tmux send-keys -t "$
|
|
2851
|
+
tmux send-keys -t "$worker_pane_id" "printf '\\033]2;agent-${i}\\033\\\\'" Enter
|
|
2143
2852
|
sleep 0.1
|
|
2144
|
-
tmux send-keys -t "$
|
|
2853
|
+
tmux send-keys -t "$worker_pane_id" "bash '$worker_script'" Enter
|
|
2145
2854
|
done
|
|
2146
2855
|
|
|
2147
2856
|
# Layout: monitor pane on top (35%), worker agents tile below
|
|
@@ -2181,7 +2890,7 @@ wait_for_multi_completion() {
|
|
|
2181
2890
|
latest_log="$(ls -t "$LOG_DIR"/agent-"${i}"-iter-*.log 2>/dev/null | head -1)"
|
|
2182
2891
|
if [[ -n "$latest_log" ]]; then
|
|
2183
2892
|
local age
|
|
2184
|
-
age=$(( $(now_epoch) - $(
|
|
2893
|
+
age=$(( $(now_epoch) - $(file_mtime "$latest_log") ))
|
|
2185
2894
|
if [[ $age -lt 300 ]]; then # Active within 5 minutes
|
|
2186
2895
|
running=$(( running + 1 ))
|
|
2187
2896
|
fi
|
|
@@ -2200,7 +2909,7 @@ wait_for_multi_completion() {
|
|
|
2200
2909
|
fi
|
|
2201
2910
|
fi
|
|
2202
2911
|
|
|
2203
|
-
sleep 5
|
|
2912
|
+
sleep "$(_config_get_int "loop.multi_agent_sleep" 5 2>/dev/null || echo 5)"
|
|
2204
2913
|
done
|
|
2205
2914
|
}
|
|
2206
2915
|
|
|
@@ -2233,12 +2942,21 @@ run_single_agent_loop() {
|
|
|
2233
2942
|
initialize_state
|
|
2234
2943
|
fi
|
|
2235
2944
|
|
|
2945
|
+
# Ensure LOOP_START_COMMIT is set (may not be on resume/restart)
|
|
2946
|
+
if [[ -z "${LOOP_START_COMMIT:-}" ]]; then
|
|
2947
|
+
LOOP_START_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "")"
|
|
2948
|
+
fi
|
|
2949
|
+
|
|
2236
2950
|
# Apply adaptive budget/model before showing banner
|
|
2237
2951
|
apply_adaptive_budget
|
|
2238
2952
|
MODEL="$(select_adaptive_model "build" "$MODEL")"
|
|
2239
2953
|
|
|
2240
2954
|
# Track applied memory fix patterns for outcome recording
|
|
2241
2955
|
_applied_fix_pattern=""
|
|
2956
|
+
STUCKNESS_COUNT=0
|
|
2957
|
+
STUCKNESS_TRACKING_FILE="$LOG_DIR/stuckness-tracking.txt"
|
|
2958
|
+
: > "$STUCKNESS_TRACKING_FILE" 2>/dev/null || true
|
|
2959
|
+
: > "${LOG_DIR:-/tmp}/strategy-attempts.txt" 2>/dev/null || true
|
|
2242
2960
|
|
|
2243
2961
|
show_banner
|
|
2244
2962
|
|
|
@@ -2246,17 +2964,58 @@ run_single_agent_loop() {
|
|
|
2246
2964
|
# Pre-checks (before incrementing — ITERATION tracks completed count)
|
|
2247
2965
|
check_circuit_breaker || break
|
|
2248
2966
|
check_max_iterations || break
|
|
2967
|
+
check_budget_gate || {
|
|
2968
|
+
STATUS="budget_exhausted"
|
|
2969
|
+
write_state
|
|
2970
|
+
write_progress
|
|
2971
|
+
error "Budget exhausted — stopping pipeline"
|
|
2972
|
+
show_summary
|
|
2973
|
+
return 1
|
|
2974
|
+
}
|
|
2249
2975
|
ITERATION=$(( ITERATION + 1 ))
|
|
2250
2976
|
|
|
2251
|
-
#
|
|
2977
|
+
# Emit iteration start event for pipeline visibility
|
|
2978
|
+
if type emit_event >/dev/null 2>&1; then
|
|
2979
|
+
emit_event "loop.iteration_start" \
|
|
2980
|
+
"iteration=$ITERATION" \
|
|
2981
|
+
"max=$MAX_ITERATIONS" \
|
|
2982
|
+
"job_id=${PIPELINE_JOB_ID:-loop-$$}" \
|
|
2983
|
+
"agent=${AGENT_NUM:-1}" \
|
|
2984
|
+
"test_passed=${TEST_PASSED:-unknown}"
|
|
2985
|
+
fi
|
|
2986
|
+
|
|
2987
|
+
# Root-cause diagnosis and memory-based fix on retry after test failure
|
|
2252
2988
|
if [[ "${TEST_PASSED:-}" == "false" ]]; then
|
|
2989
|
+
# Source memory module for diagnosis and fix lookup
|
|
2990
|
+
[[ -f "$SCRIPT_DIR/sw-memory.sh" ]] && source "$SCRIPT_DIR/sw-memory.sh" 2>/dev/null || true
|
|
2991
|
+
|
|
2992
|
+
# Capture failure for memory (enables memory_analyze_failure and future fix lookup)
|
|
2993
|
+
if type memory_capture_failure &>/dev/null && [[ -n "${TEST_OUTPUT:-}" ]]; then
|
|
2994
|
+
memory_capture_failure "test" "$TEST_OUTPUT" 2>/dev/null || true
|
|
2995
|
+
fi
|
|
2996
|
+
|
|
2997
|
+
# Pattern-based diagnosis (no Claude needed) — inject into goal for smarter retry
|
|
2998
|
+
local _changed_files=""
|
|
2999
|
+
_changed_files=$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')
|
|
3000
|
+
local _diagnosis
|
|
3001
|
+
_diagnosis=$(diagnose_failure "${TEST_OUTPUT:-}" "$_changed_files" "$ITERATION" 2>/dev/null || true)
|
|
3002
|
+
|
|
3003
|
+
if [[ -n "$_diagnosis" ]]; then
|
|
3004
|
+
GOAL="${GOAL}
|
|
3005
|
+
|
|
3006
|
+
${_diagnosis}"
|
|
3007
|
+
info "Failure diagnosis injected (classification from error pattern)"
|
|
3008
|
+
fi
|
|
3009
|
+
|
|
3010
|
+
# Memory-based fix suggestion (from past successful fixes)
|
|
2253
3011
|
local _last_error=""
|
|
2254
3012
|
local _prev_log="$LOG_DIR/iteration-$(( ITERATION - 1 )).log"
|
|
2255
3013
|
if [[ -f "$_prev_log" ]]; then
|
|
2256
3014
|
_last_error=$(tail -20 "$_prev_log" 2>/dev/null | grep -iE '(error|fail|exception)' | head -1 || true)
|
|
2257
3015
|
fi
|
|
3016
|
+
[[ -z "$_last_error" ]] && _last_error=$(echo "${TEST_OUTPUT:-}" | head -3 | tr '\n' ' ')
|
|
2258
3017
|
local _fix_suggestion=""
|
|
2259
|
-
if type memory_closed_loop_inject
|
|
3018
|
+
if type memory_closed_loop_inject >/dev/null 2>&1 && [[ -n "${_last_error:-}" ]]; then
|
|
2260
3019
|
_fix_suggestion=$(memory_closed_loop_inject "$_last_error" 2>/dev/null) || true
|
|
2261
3020
|
fi
|
|
2262
3021
|
if [[ -n "${_fix_suggestion:-}" ]]; then
|
|
@@ -2266,6 +3025,14 @@ run_single_agent_loop() {
|
|
|
2266
3025
|
${GOAL}"
|
|
2267
3026
|
info "Memory fix injected: ${_fix_suggestion:0:80}"
|
|
2268
3027
|
fi
|
|
3028
|
+
|
|
3029
|
+
# Analyze failure via Claude (background, non-blocking) for richer root_cause/fix in memory
|
|
3030
|
+
if type memory_analyze_failure &>/dev/null && [[ "${INTELLIGENCE_ENABLED:-auto}" != "false" ]]; then
|
|
3031
|
+
local _test_log="${TEST_LOG_FILE:-$LOG_DIR/tests-iter-$(( ITERATION - 1 )).log}"
|
|
3032
|
+
if [[ -f "$_test_log" ]]; then
|
|
3033
|
+
memory_analyze_failure "$_test_log" "test" 2>/dev/null &
|
|
3034
|
+
fi
|
|
3035
|
+
fi
|
|
2269
3036
|
fi
|
|
2270
3037
|
|
|
2271
3038
|
# Run Claude
|
|
@@ -2274,6 +3041,9 @@ ${GOAL}"
|
|
|
2274
3041
|
|
|
2275
3042
|
local log_file="$LOG_DIR/iteration-${ITERATION}.log"
|
|
2276
3043
|
|
|
3044
|
+
# Record iteration data for stuckness detection (diff hash, error hash, exit code)
|
|
3045
|
+
record_iteration_stuckness_data "$exit_code"
|
|
3046
|
+
|
|
2277
3047
|
# Detect fatal CLI errors (API key, auth, network) — abort immediately
|
|
2278
3048
|
if check_fatal_error "$log_file" "$exit_code"; then
|
|
2279
3049
|
STATUS="error"
|
|
@@ -2285,7 +3055,7 @@ ${GOAL}"
|
|
|
2285
3055
|
fi
|
|
2286
3056
|
|
|
2287
3057
|
# Mid-loop memory refresh — re-query with current error context after iteration 3
|
|
2288
|
-
if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context
|
|
3058
|
+
if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context >/dev/null 2>&1; then
|
|
2289
3059
|
local refresh_ctx
|
|
2290
3060
|
refresh_ctx=$(tail -20 "$log_file" 2>/dev/null || true)
|
|
2291
3061
|
if [[ -n "$refresh_ctx" ]]; then
|
|
@@ -2331,7 +3101,7 @@ ${GOAL}"
|
|
|
2331
3101
|
|
|
2332
3102
|
# Track fix outcome for memory effectiveness
|
|
2333
3103
|
if [[ -n "${_applied_fix_pattern:-}" ]]; then
|
|
2334
|
-
if type memory_record_fix_outcome
|
|
3104
|
+
if type memory_record_fix_outcome >/dev/null 2>&1; then
|
|
2335
3105
|
if [[ "${TEST_PASSED:-}" == "true" ]]; then
|
|
2336
3106
|
memory_record_fix_outcome "$_applied_fix_pattern" "true" "true" 2>/dev/null || true
|
|
2337
3107
|
else
|
|
@@ -2341,6 +3111,15 @@ ${GOAL}"
|
|
|
2341
3111
|
_applied_fix_pattern=""
|
|
2342
3112
|
fi
|
|
2343
3113
|
|
|
3114
|
+
# Save Claude context for checkpoint resume (goal, findings, test output)
|
|
3115
|
+
export SW_LOOP_GOAL="$GOAL"
|
|
3116
|
+
export SW_LOOP_ITERATION="$ITERATION"
|
|
3117
|
+
export SW_LOOP_STATUS="${STATUS:-running}"
|
|
3118
|
+
export SW_LOOP_TEST_OUTPUT="${TEST_OUTPUT:-}"
|
|
3119
|
+
export SW_LOOP_FINDINGS="${LOG_ENTRIES:-}"
|
|
3120
|
+
export SW_LOOP_MODIFIED="$(git diff --name-only HEAD 2>/dev/null | head -50 | tr '\n' ',' | sed 's/,$//')"
|
|
3121
|
+
"$SCRIPT_DIR/sw-checkpoint.sh" save-context --stage build 2>/dev/null || true
|
|
3122
|
+
|
|
2344
3123
|
# Audit agent (reviews implementer's work)
|
|
2345
3124
|
run_audit_agent
|
|
2346
3125
|
|
|
@@ -2374,6 +3153,18 @@ $summary
|
|
|
2374
3153
|
write_state
|
|
2375
3154
|
write_progress
|
|
2376
3155
|
|
|
3156
|
+
# Emit iteration complete event for pipeline visibility
|
|
3157
|
+
if type emit_event >/dev/null 2>&1; then
|
|
3158
|
+
emit_event "loop.iteration_complete" \
|
|
3159
|
+
"iteration=$ITERATION" \
|
|
3160
|
+
"max=$MAX_ITERATIONS" \
|
|
3161
|
+
"job_id=${PIPELINE_JOB_ID:-loop-$$}" \
|
|
3162
|
+
"agent=${AGENT_NUM:-1}" \
|
|
3163
|
+
"test_passed=${TEST_PASSED:-unknown}" \
|
|
3164
|
+
"commits=$TOTAL_COMMITS" \
|
|
3165
|
+
"status=${STATUS:-running}"
|
|
3166
|
+
fi
|
|
3167
|
+
|
|
2377
3168
|
# Update heartbeat
|
|
2378
3169
|
"$SCRIPT_DIR/sw-heartbeat.sh" write "${PIPELINE_JOB_ID:-loop-$$}" \
|
|
2379
3170
|
--pid $$ \
|
|
@@ -2396,7 +3187,16 @@ HUMAN FEEDBACK (received after iteration $ITERATION): $human_msg"
|
|
|
2396
3187
|
fi
|
|
2397
3188
|
fi
|
|
2398
3189
|
|
|
2399
|
-
|
|
3190
|
+
# Stuckness-triggered restart: if detected 3+ times, break to allow session restart
|
|
3191
|
+
if [[ "${STUCKNESS_COUNT:-0}" -ge 3 ]]; then
|
|
3192
|
+
STATUS="stuck_restart"
|
|
3193
|
+
write_state
|
|
3194
|
+
write_progress
|
|
3195
|
+
warn "Stuckness detected 3+ times — triggering session restart"
|
|
3196
|
+
break
|
|
3197
|
+
fi
|
|
3198
|
+
|
|
3199
|
+
sleep "$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)"
|
|
2400
3200
|
done
|
|
2401
3201
|
|
|
2402
3202
|
# Write final state after loop exits
|
|
@@ -2437,7 +3237,7 @@ run_loop_with_restarts() {
|
|
|
2437
3237
|
fi
|
|
2438
3238
|
|
|
2439
3239
|
RESTART_COUNT=$(( RESTART_COUNT + 1 ))
|
|
2440
|
-
if type emit_event
|
|
3240
|
+
if type emit_event >/dev/null 2>&1; then
|
|
2441
3241
|
emit_event "loop.restart" "restart=$RESTART_COUNT" "max=$MAX_RESTARTS" "iteration=$ITERATION"
|
|
2442
3242
|
fi
|
|
2443
3243
|
info "Session restart ${RESTART_COUNT}/${MAX_RESTARTS} — resetting iteration counter"
|
|
@@ -2448,6 +3248,7 @@ run_loop_with_restarts() {
|
|
|
2448
3248
|
ITERATION=0
|
|
2449
3249
|
CONSECUTIVE_FAILURES=0
|
|
2450
3250
|
EXTENSION_COUNT=0
|
|
3251
|
+
STUCKNESS_COUNT=0
|
|
2451
3252
|
STATUS="running"
|
|
2452
3253
|
LOG_ENTRIES=""
|
|
2453
3254
|
TEST_PASSED=""
|
|
@@ -2469,7 +3270,7 @@ run_loop_with_restarts() {
|
|
|
2469
3270
|
|
|
2470
3271
|
write_state
|
|
2471
3272
|
|
|
2472
|
-
sleep 2
|
|
3273
|
+
sleep "$(_config_get_int "loop.sleep_between_iterations" 2 2>/dev/null || echo 2)"
|
|
2473
3274
|
done
|
|
2474
3275
|
}
|
|
2475
3276
|
|