shipwright-cli 1.10.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +221 -55
  2. package/completions/_shipwright +264 -32
  3. package/completions/shipwright.bash +118 -26
  4. package/completions/shipwright.fish +80 -2
  5. package/dashboard/server.ts +208 -0
  6. package/docs/strategy/01-market-research.md +619 -0
  7. package/docs/strategy/02-mission-and-brand.md +587 -0
  8. package/docs/strategy/03-gtm-and-roadmap.md +759 -0
  9. package/docs/strategy/QUICK-START.txt +289 -0
  10. package/docs/strategy/README.md +172 -0
  11. package/docs/tmux-research/TMUX-ARCHITECTURE.md +567 -0
  12. package/docs/tmux-research/TMUX-AUDIT.md +925 -0
  13. package/docs/tmux-research/TMUX-BEST-PRACTICES-2025-2026.md +829 -0
  14. package/docs/tmux-research/TMUX-QUICK-REFERENCE.md +543 -0
  15. package/docs/tmux-research/TMUX-RESEARCH-INDEX.md +438 -0
  16. package/package.json +4 -2
  17. package/scripts/lib/helpers.sh +7 -0
  18. package/scripts/sw +323 -2
  19. package/scripts/sw-activity.sh +500 -0
  20. package/scripts/sw-adaptive.sh +925 -0
  21. package/scripts/sw-adversarial.sh +1 -1
  22. package/scripts/sw-architecture-enforcer.sh +1 -1
  23. package/scripts/sw-auth.sh +613 -0
  24. package/scripts/sw-autonomous.sh +754 -0
  25. package/scripts/sw-changelog.sh +704 -0
  26. package/scripts/sw-checkpoint.sh +1 -1
  27. package/scripts/sw-ci.sh +602 -0
  28. package/scripts/sw-cleanup.sh +1 -1
  29. package/scripts/sw-code-review.sh +698 -0
  30. package/scripts/sw-connect.sh +1 -1
  31. package/scripts/sw-context.sh +605 -0
  32. package/scripts/sw-cost.sh +44 -3
  33. package/scripts/sw-daemon.sh +568 -138
  34. package/scripts/sw-dashboard.sh +1 -1
  35. package/scripts/sw-db.sh +1380 -0
  36. package/scripts/sw-decompose.sh +539 -0
  37. package/scripts/sw-deps.sh +551 -0
  38. package/scripts/sw-developer-simulation.sh +1 -1
  39. package/scripts/sw-discovery.sh +412 -0
  40. package/scripts/sw-docs-agent.sh +539 -0
  41. package/scripts/sw-docs.sh +1 -1
  42. package/scripts/sw-doctor.sh +107 -1
  43. package/scripts/sw-dora.sh +615 -0
  44. package/scripts/sw-durable.sh +710 -0
  45. package/scripts/sw-e2e-orchestrator.sh +535 -0
  46. package/scripts/sw-eventbus.sh +393 -0
  47. package/scripts/sw-feedback.sh +479 -0
  48. package/scripts/sw-fix.sh +1 -1
  49. package/scripts/sw-fleet-discover.sh +567 -0
  50. package/scripts/sw-fleet-viz.sh +404 -0
  51. package/scripts/sw-fleet.sh +8 -1
  52. package/scripts/sw-github-app.sh +596 -0
  53. package/scripts/sw-github-checks.sh +4 -4
  54. package/scripts/sw-github-deploy.sh +1 -1
  55. package/scripts/sw-github-graphql.sh +1 -1
  56. package/scripts/sw-guild.sh +569 -0
  57. package/scripts/sw-heartbeat.sh +1 -1
  58. package/scripts/sw-hygiene.sh +559 -0
  59. package/scripts/sw-incident.sh +656 -0
  60. package/scripts/sw-init.sh +237 -24
  61. package/scripts/sw-instrument.sh +699 -0
  62. package/scripts/sw-intelligence.sh +1 -1
  63. package/scripts/sw-jira.sh +1 -1
  64. package/scripts/sw-launchd.sh +363 -28
  65. package/scripts/sw-linear.sh +1 -1
  66. package/scripts/sw-logs.sh +1 -1
  67. package/scripts/sw-loop.sh +267 -21
  68. package/scripts/sw-memory.sh +18 -1
  69. package/scripts/sw-mission-control.sh +487 -0
  70. package/scripts/sw-model-router.sh +545 -0
  71. package/scripts/sw-otel.sh +596 -0
  72. package/scripts/sw-oversight.sh +764 -0
  73. package/scripts/sw-pipeline-composer.sh +1 -1
  74. package/scripts/sw-pipeline-vitals.sh +1 -1
  75. package/scripts/sw-pipeline.sh +947 -35
  76. package/scripts/sw-pm.sh +758 -0
  77. package/scripts/sw-pr-lifecycle.sh +522 -0
  78. package/scripts/sw-predictive.sh +8 -1
  79. package/scripts/sw-prep.sh +1 -1
  80. package/scripts/sw-ps.sh +1 -1
  81. package/scripts/sw-public-dashboard.sh +798 -0
  82. package/scripts/sw-quality.sh +595 -0
  83. package/scripts/sw-reaper.sh +1 -1
  84. package/scripts/sw-recruit.sh +2248 -0
  85. package/scripts/sw-regression.sh +642 -0
  86. package/scripts/sw-release-manager.sh +736 -0
  87. package/scripts/sw-release.sh +706 -0
  88. package/scripts/sw-remote.sh +1 -1
  89. package/scripts/sw-replay.sh +520 -0
  90. package/scripts/sw-retro.sh +691 -0
  91. package/scripts/sw-scale.sh +444 -0
  92. package/scripts/sw-security-audit.sh +505 -0
  93. package/scripts/sw-self-optimize.sh +1 -1
  94. package/scripts/sw-session.sh +1 -1
  95. package/scripts/sw-setup.sh +263 -127
  96. package/scripts/sw-standup.sh +712 -0
  97. package/scripts/sw-status.sh +44 -2
  98. package/scripts/sw-strategic.sh +806 -0
  99. package/scripts/sw-stream.sh +450 -0
  100. package/scripts/sw-swarm.sh +620 -0
  101. package/scripts/sw-team-stages.sh +511 -0
  102. package/scripts/sw-templates.sh +4 -4
  103. package/scripts/sw-testgen.sh +566 -0
  104. package/scripts/sw-tmux-pipeline.sh +554 -0
  105. package/scripts/sw-tmux-role-color.sh +58 -0
  106. package/scripts/sw-tmux-status.sh +128 -0
  107. package/scripts/sw-tmux.sh +1 -1
  108. package/scripts/sw-trace.sh +485 -0
  109. package/scripts/sw-tracker-github.sh +188 -0
  110. package/scripts/sw-tracker-jira.sh +172 -0
  111. package/scripts/sw-tracker-linear.sh +251 -0
  112. package/scripts/sw-tracker.sh +117 -2
  113. package/scripts/sw-triage.sh +627 -0
  114. package/scripts/sw-upgrade.sh +1 -1
  115. package/scripts/sw-ux.sh +677 -0
  116. package/scripts/sw-webhook.sh +627 -0
  117. package/scripts/sw-widgets.sh +530 -0
  118. package/scripts/sw-worktree.sh +1 -1
  119. package/templates/pipelines/autonomous.json +2 -2
  120. package/tmux/shipwright-overlay.conf +35 -17
  121. package/tmux/tmux.conf +23 -21
@@ -10,6 +10,11 @@
10
10
  set -euo pipefail
11
11
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
12
12
 
13
+ # Allow spawning Claude CLI from within a Claude Code session (daemon, fleet, etc.)
14
+ unset CLAUDECODE 2>/dev/null || true
15
+ # Ignore SIGHUP so tmux attach/detach doesn't kill long-running agent sessions
16
+ trap '' HUP
17
+
13
18
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14
19
 
15
20
  # ─── Colors (matches shipwright theme) ──────────────────────────────────────────────
@@ -52,7 +57,13 @@ MAX_ITERATIONS_EXPLICIT=false
52
57
  MAX_RESTARTS=0
53
58
  SESSION_RESTART=false
54
59
  RESTART_COUNT=0
55
- VERSION="1.10.0"
60
+ REPO_OVERRIDE=""
61
+ VERSION="2.1.0"
62
+
63
+ # ─── Token Tracking ─────────────────────────────────────────────────────────
64
+ LOOP_INPUT_TOKENS=0
65
+ LOOP_OUTPUT_TOKENS=0
66
+ LOOP_COST_MILLICENTS=0
56
67
 
57
68
  # ─── Flexible Iteration Defaults ────────────────────────────────────────────
58
69
  AUTO_EXTEND=true # Auto-extend iterations when work is incomplete
@@ -81,6 +92,8 @@ show_help() {
81
92
  echo -e " ${CYAN}shipwright loop${RESET} \"<goal>\" [options]"
82
93
  echo ""
83
94
  echo -e "${BOLD}OPTIONS${RESET}"
95
+ echo -e " ${CYAN}--repo <path>${RESET} Change to directory before running (must be a git repo)"
96
+ echo -e " ${CYAN}--local${RESET} Disable GitHub operations (local-only mode)"
84
97
  echo -e " ${CYAN}--max-iterations${RESET} N Max loop iterations (default: 20)"
85
98
  echo -e " ${CYAN}--test-cmd${RESET} \"cmd\" Test command to run between iterations"
86
99
  echo -e " ${CYAN}--fast-test-cmd${RESET} \"cmd\" Fast/subset test command (alternates with full)"
@@ -130,6 +143,16 @@ show_help() {
130
143
 
131
144
  while [[ $# -gt 0 ]]; do
132
145
  case "$1" in
146
+ --repo)
147
+ REPO_OVERRIDE="${2:-}"
148
+ [[ -z "$REPO_OVERRIDE" ]] && { error "Missing value for --repo"; exit 1; }
149
+ shift 2
150
+ ;;
151
+ --repo=*) REPO_OVERRIDE="${1#--repo=}"; shift ;;
152
+ --local)
153
+ # Skip GitHub operations in loop
154
+ export NO_GITHUB=true
155
+ shift ;;
133
156
  --max-iterations)
134
157
  MAX_ITERATIONS="${2:-}"
135
158
  MAX_ITERATIONS_EXPLICIT=true
@@ -239,16 +262,28 @@ if [[ "$AGENTS" -gt 1 ]]; then
239
262
  USE_WORKTREE=true
240
263
  fi
241
264
 
265
+ # Recruit-powered auto-role assignment when multi-agent but no roles specified
266
+ if [[ "$AGENTS" -gt 1 ]] && [[ -z "$AGENT_ROLES" ]] && [[ -x "${SCRIPT_DIR:-}/sw-recruit.sh" ]]; then
267
+ _recruit_goal="${GOAL:-}"
268
+ if [[ -n "$_recruit_goal" ]]; then
269
+ _recruit_team=$(bash "$SCRIPT_DIR/sw-recruit.sh" team --json "$_recruit_goal" 2>/dev/null) || true
270
+ if [[ -n "$_recruit_team" ]]; then
271
+ _recruit_roles=$(echo "$_recruit_team" | jq -r '.team | join(",")' 2>/dev/null) || true
272
+ if [[ -n "$_recruit_roles" && "$_recruit_roles" != "null" ]]; then
273
+ AGENT_ROLES="$_recruit_roles"
274
+ info "Recruit assigned roles: ${AGENT_ROLES}"
275
+ fi
276
+ fi
277
+ fi
278
+ fi
279
+
242
280
  # Warn if --roles without --agents
243
281
  if [[ -n "$AGENT_ROLES" ]] && [[ "$AGENTS" -le 1 ]]; then
244
282
  warn "--roles requires --agents > 1 (roles are ignored in single-agent mode)"
245
283
  fi
246
284
 
247
- # Warn if --max-restarts with --agents > 1 (not yet supported)
248
- if [[ "${MAX_RESTARTS:-0}" -gt 0 ]] && [[ "$AGENTS" -gt 1 ]]; then
249
- warn "--max-restarts is ignored in multi-agent mode (restart support is single-agent only)"
250
- MAX_RESTARTS=0
251
- fi
285
+ # max-restarts is supported in both single-agent and multi-agent mode
286
+ # In multi-agent mode, restarts apply per-agent (agent can be respawned up to MAX_RESTARTS)
252
287
 
253
288
  # Validate numeric flags
254
289
  if ! [[ "$FAST_TEST_INTERVAL" =~ ^[1-9][0-9]*$ ]]; then
@@ -270,6 +305,23 @@ if ! $RESUME && [[ -z "$GOAL" ]]; then
270
305
  exit 1
271
306
  fi
272
307
 
308
+ # Handle --repo flag: change to directory before running
309
+ if [[ -n "$REPO_OVERRIDE" ]]; then
310
+ if [[ ! -d "$REPO_OVERRIDE" ]]; then
311
+ error "Directory does not exist: $REPO_OVERRIDE"
312
+ exit 1
313
+ fi
314
+ if ! cd "$REPO_OVERRIDE" 2>/dev/null; then
315
+ error "Cannot cd to: $REPO_OVERRIDE"
316
+ exit 1
317
+ fi
318
+ if ! git rev-parse --show-toplevel >/dev/null 2>&1; then
319
+ error "Not a git repository: $REPO_OVERRIDE"
320
+ exit 1
321
+ fi
322
+ info "Using repository: $(pwd)"
323
+ fi
324
+
273
325
  if ! command -v claude &>/dev/null; then
274
326
  error "Claude Code CLI not found. Install it first:"
275
327
  echo -e " ${DIM}npm install -g @anthropic-ai/claude-code${RESET}"
@@ -368,6 +420,110 @@ select_audit_model() {
368
420
  echo "$default_model"
369
421
  }
370
422
 
423
+ # ─── Token Accumulation ─────────────────────────────────────────────────────
424
+ # Parse token counts from Claude CLI JSON output and accumulate running totals.
425
+ # With --output-format json, the output is a JSON array containing a "result"
426
+ # object with usage.input_tokens, usage.output_tokens, and total_cost_usd.
427
+ accumulate_loop_tokens() {
428
+ local log_file="$1"
429
+ [[ ! -f "$log_file" ]] && return 0
430
+
431
+ # If jq is available and the file looks like JSON, parse structured output
432
+ if command -v jq &>/dev/null && head -c1 "$log_file" 2>/dev/null | grep -q '\['; then
433
+ local input_tok output_tok cache_read cache_create cost_usd
434
+ # The result object is the last element in the JSON array
435
+ input_tok=$(jq -r '.[-1].usage.input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
436
+ output_tok=$(jq -r '.[-1].usage.output_tokens // 0' "$log_file" 2>/dev/null || echo "0")
437
+ cache_read=$(jq -r '.[-1].usage.cache_read_input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
438
+ cache_create=$(jq -r '.[-1].usage.cache_creation_input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
439
+ cost_usd=$(jq -r '.[-1].total_cost_usd // 0' "$log_file" 2>/dev/null || echo "0")
440
+
441
+ LOOP_INPUT_TOKENS=$(( LOOP_INPUT_TOKENS + ${input_tok:-0} + ${cache_read:-0} + ${cache_create:-0} ))
442
+ LOOP_OUTPUT_TOKENS=$(( LOOP_OUTPUT_TOKENS + ${output_tok:-0} ))
443
+ # Accumulate cost in millicents for integer arithmetic
444
+ if [[ -n "$cost_usd" && "$cost_usd" != "0" && "$cost_usd" != "null" ]]; then
445
+ local cost_millicents
446
+ cost_millicents=$(echo "$cost_usd" | awk '{printf "%.0f", $1 * 100000}' 2>/dev/null || echo "0")
447
+ LOOP_COST_MILLICENTS=$(( ${LOOP_COST_MILLICENTS:-0} + ${cost_millicents:-0} ))
448
+ fi
449
+ else
450
+ # Fallback: regex-based parsing for non-JSON output
451
+ local input_tok output_tok
452
+ input_tok=$(grep -oE 'input[_ ]tokens?[: ]+[0-9,]+' "$log_file" 2>/dev/null | tail -1 | grep -oE '[0-9,]+' | tr -d ',' || echo "0")
453
+ output_tok=$(grep -oE 'output[_ ]tokens?[: ]+[0-9,]+' "$log_file" 2>/dev/null | tail -1 | grep -oE '[0-9,]+' | tr -d ',' || echo "0")
454
+
455
+ LOOP_INPUT_TOKENS=$(( LOOP_INPUT_TOKENS + ${input_tok:-0} ))
456
+ LOOP_OUTPUT_TOKENS=$(( LOOP_OUTPUT_TOKENS + ${output_tok:-0} ))
457
+ fi
458
+ }
459
+
460
+ # ─── JSON→Text Extraction ──────────────────────────────────────────────────
461
+ # Extract plain text from Claude's --output-format json response.
462
+ # Handles: valid JSON arrays, malformed JSON, non-JSON output, empty output.
463
+ _extract_text_from_json() {
464
+ local json_file="$1" log_file="$2" err_file="${3:-}"
465
+
466
+ # Case 1: File doesn't exist or is empty
467
+ if [[ ! -s "$json_file" ]]; then
468
+ # Check stderr for error messages
469
+ if [[ -s "$err_file" ]]; then
470
+ cp "$err_file" "$log_file"
471
+ else
472
+ echo "(no output)" > "$log_file"
473
+ fi
474
+ return 0
475
+ fi
476
+
477
+ local first_char
478
+ first_char=$(head -c1 "$json_file" 2>/dev/null || true)
479
+
480
+ # Case 2: Valid JSON array — extract .result from last element
481
+ if [[ "$first_char" == "[" ]] && command -v jq &>/dev/null; then
482
+ local extracted
483
+ extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
484
+ if [[ -n "$extracted" ]]; then
485
+ echo "$extracted" > "$log_file"
486
+ return 0
487
+ fi
488
+ # jq succeeded but result was null/empty — try .content or raw text
489
+ extracted=$(jq -r '.[].content // empty' "$json_file" 2>/dev/null | head -500) || true
490
+ if [[ -n "$extracted" ]]; then
491
+ echo "$extracted" > "$log_file"
492
+ return 0
493
+ fi
494
+ # JSON parsed but no text found — write placeholder
495
+ warn "JSON output has no .result field — check $json_file"
496
+ echo "(no text result in JSON output)" > "$log_file"
497
+ return 0
498
+ fi
499
+
500
+ # Case 3: Looks like JSON but no jq — can't parse, use raw
501
+ if [[ "$first_char" == "[" || "$first_char" == "{" ]]; then
502
+ warn "JSON output but jq not available — using raw output"
503
+ cp "$json_file" "$log_file"
504
+ return 0
505
+ fi
506
+
507
+ # Case 4: Not JSON at all (plain text, error message, etc.) — use as-is
508
+ cp "$json_file" "$log_file"
509
+ return 0
510
+ }
511
+
512
+ # Write accumulated token totals to a JSON file for the pipeline to read.
513
+ write_loop_tokens() {
514
+ local token_file="$LOG_DIR/loop-tokens.json"
515
+ local cost_usd="0"
516
+ if [[ "${LOOP_COST_MILLICENTS:-0}" -gt 0 ]]; then
517
+ cost_usd=$(awk "BEGIN {printf \"%.6f\", ${LOOP_COST_MILLICENTS} / 100000}" 2>/dev/null || echo "0")
518
+ fi
519
+ local tmp_file
520
+ tmp_file=$(mktemp "${token_file}.XXXXXX" 2>/dev/null || mktemp)
521
+ cat > "$tmp_file" <<TOKJSON
522
+ {"input_tokens":${LOOP_INPUT_TOKENS},"output_tokens":${LOOP_OUTPUT_TOKENS},"cost_usd":${cost_usd},"iterations":${ITERATION:-0}}
523
+ TOKJSON
524
+ mv "$tmp_file" "$token_file"
525
+ }
526
+
371
527
  # ─── Adaptive Iteration Budget ──────────────────────────────────────────────
372
528
  # Reads tuning config for smarter iteration/circuit-breaker thresholds.
373
529
  apply_adaptive_budget() {
@@ -668,11 +824,50 @@ git_auto_commit() {
668
824
  return 0
669
825
  }
670
826
 
827
+ # ─── Fatal Error Detection ────────────────────────────────────────────────────
828
+
829
+ check_fatal_error() {
830
+ local log_file="$1"
831
+ local cli_exit_code="${2:-0}"
832
+ [[ -f "$log_file" ]] || return 1
833
+
834
+ # Known fatal error patterns from Claude CLI / Anthropic API
835
+ local fatal_patterns="Invalid API key|invalid_api_key|authentication_error|API key expired"
836
+ fatal_patterns="${fatal_patterns}|rate_limit_error|overloaded_error|billing"
837
+ fatal_patterns="${fatal_patterns}|Could not resolve host|connection refused|ECONNREFUSED"
838
+ fatal_patterns="${fatal_patterns}|ANTHROPIC_API_KEY.*not set|No API key"
839
+
840
+ if grep -qiE "$fatal_patterns" "$log_file" 2>/dev/null; then
841
+ local match
842
+ match=$(grep -iE "$fatal_patterns" "$log_file" 2>/dev/null | head -1 | cut -c1-120)
843
+ error "Fatal CLI error: $match"
844
+ return 0 # fatal error detected
845
+ fi
846
+
847
+ # Non-zero exit + tiny output = likely CLI crash
848
+ if [[ "$cli_exit_code" -ne 0 ]]; then
849
+ local line_count
850
+ line_count=$(grep -cv '^$' "$log_file" 2>/dev/null || echo 0)
851
+ if [[ "$line_count" -lt 3 ]]; then
852
+ local content
853
+ content=$(head -3 "$log_file" 2>/dev/null | cut -c1-120)
854
+ error "CLI exited $cli_exit_code with minimal output: $content"
855
+ return 0
856
+ fi
857
+ fi
858
+
859
+ return 1 # no fatal error
860
+ }
861
+
671
862
  # ─── Progress & Circuit Breaker ───────────────────────────────────────────────
672
863
 
673
864
  check_progress() {
674
865
  local changes
675
- changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 2>/dev/null | tail -1 || echo "")"
866
+ # Exclude loop bookkeeping files only count real code changes as progress
867
+ changes="$(git -C "$PROJECT_ROOT" diff --stat HEAD~1 \
868
+ -- . ':!.claude/loop-state.md' ':!.claude/pipeline-state.md' \
869
+ ':!**/progress.md' ':!**/error-summary.json' \
870
+ 2>/dev/null | tail -1 || echo "")"
676
871
  local insertions
677
872
  insertions="$(echo "$changes" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)"
678
873
  if [[ "${insertions:-0}" -lt "$MIN_PROGRESS_LINES" ]]; then
@@ -1449,15 +1644,27 @@ compose_worker_prompt() {
1449
1644
 
1450
1645
  if [[ -n "$role" ]]; then
1451
1646
  local role_desc=""
1452
- case "$role" in
1453
- builder) role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
1454
- reviewer) role_desc="Focus on code review look for bugs, security issues, edge cases in recent commits. Make fixes via commits." ;;
1455
- tester) role_desc="Focus on test coverage — write new tests, fix failing tests, improve assertions and edge case coverage." ;;
1456
- optimizer) role_desc="Focus on performance profile hot paths, reduce complexity, optimize algorithms and data structures." ;;
1457
- docs) role_desc="Focus on documentation update README, add docstrings, write usage guides for new features." ;;
1458
- security) role_desc="Focus on security — audit for vulnerabilities, fix injection risks, validate inputs, check auth boundaries." ;;
1459
- *) role_desc="Focus on: ${role}. Apply your expertise in this area to advance the goal." ;;
1460
- esac
1647
+ # Try to pull description from recruit's roles DB first
1648
+ local recruit_roles_db="${HOME}/.shipwright/recruitment/roles.json"
1649
+ if [[ -f "$recruit_roles_db" ]] && command -v jq &>/dev/null; then
1650
+ local recruit_desc
1651
+ recruit_desc=$(jq -r --arg r "$role" '.[$r].description // ""' "$recruit_roles_db" 2>/dev/null) || true
1652
+ if [[ -n "$recruit_desc" && "$recruit_desc" != "null" ]]; then
1653
+ role_desc="$recruit_desc"
1654
+ fi
1655
+ fi
1656
+ # Fallback to hardcoded descriptions
1657
+ if [[ -z "$role_desc" ]]; then
1658
+ case "$role" in
1659
+ builder) role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
1660
+ reviewer) role_desc="Focus on code review — look for bugs, security issues, edge cases in recent commits. Make fixes via commits." ;;
1661
+ tester) role_desc="Focus on test coverage — write new tests, fix failing tests, improve assertions and edge case coverage." ;;
1662
+ optimizer) role_desc="Focus on performance — profile hot paths, reduce complexity, optimize algorithms and data structures." ;;
1663
+ docs) role_desc="Focus on documentation — update README, add docstrings, write usage guides for new features." ;;
1664
+ security) role_desc="Focus on security — audit for vulnerabilities, fix injection risks, validate inputs, check auth boundaries." ;;
1665
+ *) role_desc="Focus on: ${role}. Apply your expertise in this area to advance the goal." ;;
1666
+ esac
1667
+ fi
1461
1668
  role_section="## Your Role: ${role}
1462
1669
  ${role_desc}
1463
1670
  Prioritize work in your area of expertise. Coordinate with other agents via git log."
@@ -1481,6 +1688,7 @@ PROMPT
1481
1688
  build_claude_flags() {
1482
1689
  local flags=()
1483
1690
  flags+=("--model" "$MODEL")
1691
+ flags+=("--output-format" "json")
1484
1692
 
1485
1693
  if $SKIP_PERMISSIONS; then
1486
1694
  flags+=("--dangerously-skip-permissions")
@@ -1495,6 +1703,7 @@ build_claude_flags() {
1495
1703
 
1496
1704
  run_claude_iteration() {
1497
1705
  local log_file="$LOG_DIR/iteration-${ITERATION}.log"
1706
+ local json_file="$LOG_DIR/iteration-${ITERATION}.json"
1498
1707
  local prompt
1499
1708
  prompt="$(compose_prompt)"
1500
1709
 
@@ -1507,12 +1716,14 @@ run_claude_iteration() {
1507
1716
  echo -e "\n${CYAN}${BOLD}▸${RESET} ${BOLD}Iteration ${ITERATION}/${MAX_ITERATIONS}${RESET} — Starting..."
1508
1717
 
1509
1718
  # Run Claude headless (with timeout + PID capture for signal handling)
1719
+ # Output goes to .json first, then we extract text into .log for compat
1510
1720
  local exit_code=0
1511
1721
  # shellcheck disable=SC2086
1722
+ local err_file="${json_file%.json}.stderr"
1512
1723
  if [[ -n "$TIMEOUT_CMD" ]]; then
1513
- $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$prompt" $flags > "$log_file" 2>&1 &
1724
+ $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
1514
1725
  else
1515
- claude -p "$prompt" $flags > "$log_file" 2>&1 &
1726
+ claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
1516
1727
  fi
1517
1728
  CHILD_PID=$!
1518
1729
  wait "$CHILD_PID" 2>/dev/null || exit_code=$?
@@ -1521,12 +1732,19 @@ run_claude_iteration() {
1521
1732
  warn "Claude CLI timed out after ${CLAUDE_TIMEOUT}s"
1522
1733
  fi
1523
1734
 
1735
+ # Extract text result from JSON into .log for backwards compatibility
1736
+ # With --output-format json, stdout is a JSON array; .[-1].result has the text
1737
+ _extract_text_from_json "$json_file" "$log_file" "$err_file"
1738
+
1524
1739
  local iter_end
1525
1740
  iter_end="$(now_epoch)"
1526
1741
  local iter_duration=$(( iter_end - iter_start ))
1527
1742
 
1528
1743
  echo -e " ${GREEN}✓${RESET} Claude session completed ($(format_duration "$iter_duration"), exit $exit_code)"
1529
1744
 
1745
+ # Accumulate token usage from this iteration's JSON output
1746
+ accumulate_loop_tokens "$json_file"
1747
+
1530
1748
  # Show verbose output if requested
1531
1749
  if $VERBOSE; then
1532
1750
  echo -e " ${DIM}─── Claude Output ───${RESET}"
@@ -1545,7 +1763,14 @@ extract_summary() {
1545
1763
  local summary
1546
1764
  summary="$(grep -v '^$' "$log_file" | tail -5 | head -3 2>/dev/null || echo "(no output)")"
1547
1765
  # Truncate long lines
1548
- echo "$summary" | cut -c1-120
1766
+ summary="$(echo "$summary" | cut -c1-120)"
1767
+
1768
+ # Sanitize: if summary is just a CLI/API error, replace with generic text
1769
+ if echo "$summary" | grep -qiE 'Invalid API key|authentication_error|rate_limit|API key expired|ANTHROPIC_API_KEY'; then
1770
+ summary="(CLI error — no useful output this iteration)"
1771
+ fi
1772
+
1773
+ echo "$summary"
1549
1774
  }
1550
1775
 
1551
1776
  # ─── Display Helpers ─────────────────────────────────────────────────────────
@@ -1615,10 +1840,16 @@ show_summary() {
1615
1840
  echo -e " ${BOLD}Duration:${RESET} $(format_duration "$duration")"
1616
1841
  echo -e " ${BOLD}Commits:${RESET} $TOTAL_COMMITS"
1617
1842
  echo -e " ${BOLD}Tests:${RESET} $test_display"
1843
+ if [[ "$LOOP_INPUT_TOKENS" -gt 0 || "$LOOP_OUTPUT_TOKENS" -gt 0 ]]; then
1844
+ echo -e " ${BOLD}Tokens:${RESET} in=${LOOP_INPUT_TOKENS} out=${LOOP_OUTPUT_TOKENS}"
1845
+ fi
1618
1846
  echo ""
1619
1847
  echo -e " ${DIM}State: $STATE_FILE${RESET}"
1620
1848
  echo -e " ${DIM}Logs: $LOG_DIR/${RESET}"
1621
1849
  echo ""
1850
+
1851
+ # Write token totals for pipeline cost tracking
1852
+ write_loop_tokens
1622
1853
  }
1623
1854
 
1624
1855
  # ─── Signal Handling ──────────────────────────────────────────────────────────
@@ -1781,10 +2012,15 @@ Focus on areas they haven't touched yet.
1781
2012
  PROMPT
1782
2013
  )"
1783
2014
 
1784
- # Run Claude
2015
+ # Run Claude (output is JSON due to --output-format json in CLAUDE_FLAGS)
2016
+ local JSON_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.json"
2017
+ local ERR_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.stderr"
1785
2018
  LOG_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.log"
1786
2019
  # shellcheck disable=SC2086
1787
- claude -p "$PROMPT" $CLAUDE_FLAGS > "$LOG_FILE" 2>&1 || true
2020
+ claude -p "$PROMPT" $CLAUDE_FLAGS > "$JSON_FILE" 2>"$ERR_FILE" || true
2021
+
2022
+ # Extract text result from JSON into .log for backwards compat
2023
+ _extract_text_from_json "$JSON_FILE" "$LOG_FILE" "$ERR_FILE"
1788
2024
 
1789
2025
  echo -e " ${GREEN}✓${RESET} Claude session completed"
1790
2026
 
@@ -2009,6 +2245,16 @@ ${GOAL}"
2009
2245
 
2010
2246
  local log_file="$LOG_DIR/iteration-${ITERATION}.log"
2011
2247
 
2248
+ # Detect fatal CLI errors (API key, auth, network) — abort immediately
2249
+ if check_fatal_error "$log_file" "$exit_code"; then
2250
+ STATUS="error"
2251
+ write_state
2252
+ write_progress
2253
+ error "Fatal CLI error detected — aborting loop (see iteration log)"
2254
+ show_summary
2255
+ return 1
2256
+ fi
2257
+
2012
2258
  # Mid-loop memory refresh — re-query with current error context after iteration 3
2013
2259
  if [[ "$ITERATION" -ge 3 ]] && type memory_inject_context &>/dev/null 2>&1; then
2014
2260
  local refresh_ctx
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="1.10.0"
9
+ VERSION="2.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12
 
@@ -1077,6 +1077,20 @@ memory_get_dora_baseline() {
1077
1077
  echo "$metrics"
1078
1078
  }
1079
1079
 
1080
+ # memory_get_baseline <metric_name>
1081
+ # Output baseline value for a metric (bundle_size_kb, test_duration_s, coverage_pct, etc.).
1082
+ # Used by pipeline for regression checks. Outputs nothing if not set.
1083
+ memory_get_baseline() {
1084
+ local metric_name="${1:-}"
1085
+ [[ -z "$metric_name" ]] && return 1
1086
+ ensure_memory_dir
1087
+ local mem_dir
1088
+ mem_dir="$(repo_memory_dir)"
1089
+ local metrics_file="$mem_dir/metrics.json"
1090
+ [[ ! -f "$metrics_file" ]] && return 0
1091
+ jq -r --arg m "$metric_name" '.baselines[$m] // empty' "$metrics_file" 2>/dev/null || true
1092
+ }
1093
+
1080
1094
  # memory_update_metrics <metric_name> <value>
1081
1095
  # Track performance baselines and flag regressions.
1082
1096
  memory_update_metrics() {
@@ -1602,6 +1616,9 @@ case "$SUBCOMMAND" in
1602
1616
  pattern)
1603
1617
  memory_capture_pattern "$@"
1604
1618
  ;;
1619
+ get)
1620
+ memory_get_baseline "$@"
1621
+ ;;
1605
1622
  metric)
1606
1623
  memory_update_metrics "$@"
1607
1624
  ;;