shipwright-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +160 -72
  2. package/completions/_shipwright +59 -7
  3. package/completions/shipwright.bash +24 -4
  4. package/completions/shipwright.fish +80 -2
  5. package/dashboard/server.ts +208 -0
  6. package/docs/tmux-research/TMUX-ARCHITECTURE.md +567 -0
  7. package/docs/tmux-research/TMUX-AUDIT.md +925 -0
  8. package/docs/tmux-research/TMUX-BEST-PRACTICES-2025-2026.md +829 -0
  9. package/docs/tmux-research/TMUX-QUICK-REFERENCE.md +543 -0
  10. package/docs/tmux-research/TMUX-RESEARCH-INDEX.md +438 -0
  11. package/package.json +2 -2
  12. package/scripts/lib/helpers.sh +7 -0
  13. package/scripts/sw +116 -2
  14. package/scripts/sw-activity.sh +1 -1
  15. package/scripts/sw-adaptive.sh +1 -1
  16. package/scripts/sw-adversarial.sh +1 -1
  17. package/scripts/sw-architecture-enforcer.sh +1 -1
  18. package/scripts/sw-auth.sh +1 -1
  19. package/scripts/sw-autonomous.sh +128 -38
  20. package/scripts/sw-changelog.sh +1 -1
  21. package/scripts/sw-checkpoint.sh +1 -1
  22. package/scripts/sw-ci.sh +1 -1
  23. package/scripts/sw-cleanup.sh +1 -1
  24. package/scripts/sw-code-review.sh +62 -1
  25. package/scripts/sw-connect.sh +1 -1
  26. package/scripts/sw-context.sh +1 -1
  27. package/scripts/sw-cost.sh +44 -3
  28. package/scripts/sw-daemon.sh +155 -27
  29. package/scripts/sw-dashboard.sh +1 -1
  30. package/scripts/sw-db.sh +958 -118
  31. package/scripts/sw-decompose.sh +1 -1
  32. package/scripts/sw-deps.sh +1 -1
  33. package/scripts/sw-developer-simulation.sh +1 -1
  34. package/scripts/sw-discovery.sh +1 -1
  35. package/scripts/sw-docs-agent.sh +1 -1
  36. package/scripts/sw-docs.sh +1 -1
  37. package/scripts/sw-doctor.sh +49 -1
  38. package/scripts/sw-dora.sh +1 -1
  39. package/scripts/sw-durable.sh +1 -1
  40. package/scripts/sw-e2e-orchestrator.sh +1 -1
  41. package/scripts/sw-eventbus.sh +1 -1
  42. package/scripts/sw-feedback.sh +23 -15
  43. package/scripts/sw-fix.sh +1 -1
  44. package/scripts/sw-fleet-discover.sh +1 -1
  45. package/scripts/sw-fleet-viz.sh +1 -1
  46. package/scripts/sw-fleet.sh +1 -1
  47. package/scripts/sw-github-app.sh +1 -1
  48. package/scripts/sw-github-checks.sh +4 -4
  49. package/scripts/sw-github-deploy.sh +1 -1
  50. package/scripts/sw-github-graphql.sh +1 -1
  51. package/scripts/sw-guild.sh +1 -1
  52. package/scripts/sw-heartbeat.sh +1 -1
  53. package/scripts/sw-hygiene.sh +1 -1
  54. package/scripts/sw-incident.sh +45 -6
  55. package/scripts/sw-init.sh +150 -24
  56. package/scripts/sw-instrument.sh +1 -1
  57. package/scripts/sw-intelligence.sh +1 -1
  58. package/scripts/sw-jira.sh +1 -1
  59. package/scripts/sw-launchd.sh +1 -1
  60. package/scripts/sw-linear.sh +1 -1
  61. package/scripts/sw-logs.sh +1 -1
  62. package/scripts/sw-loop.sh +204 -19
  63. package/scripts/sw-memory.sh +18 -1
  64. package/scripts/sw-mission-control.sh +1 -1
  65. package/scripts/sw-model-router.sh +1 -1
  66. package/scripts/sw-otel.sh +1 -1
  67. package/scripts/sw-oversight.sh +76 -1
  68. package/scripts/sw-pipeline-composer.sh +1 -1
  69. package/scripts/sw-pipeline-vitals.sh +1 -1
  70. package/scripts/sw-pipeline.sh +261 -12
  71. package/scripts/sw-pm.sh +70 -5
  72. package/scripts/sw-pr-lifecycle.sh +1 -1
  73. package/scripts/sw-predictive.sh +8 -1
  74. package/scripts/sw-prep.sh +1 -1
  75. package/scripts/sw-ps.sh +1 -1
  76. package/scripts/sw-public-dashboard.sh +1 -1
  77. package/scripts/sw-quality.sh +1 -1
  78. package/scripts/sw-reaper.sh +1 -1
  79. package/scripts/sw-recruit.sh +1853 -178
  80. package/scripts/sw-regression.sh +1 -1
  81. package/scripts/sw-release-manager.sh +1 -1
  82. package/scripts/sw-release.sh +1 -1
  83. package/scripts/sw-remote.sh +1 -1
  84. package/scripts/sw-replay.sh +1 -1
  85. package/scripts/sw-retro.sh +1 -1
  86. package/scripts/sw-scale.sh +1 -1
  87. package/scripts/sw-security-audit.sh +1 -1
  88. package/scripts/sw-self-optimize.sh +1 -1
  89. package/scripts/sw-session.sh +1 -1
  90. package/scripts/sw-setup.sh +263 -127
  91. package/scripts/sw-standup.sh +1 -1
  92. package/scripts/sw-status.sh +44 -2
  93. package/scripts/sw-strategic.sh +189 -41
  94. package/scripts/sw-stream.sh +1 -1
  95. package/scripts/sw-swarm.sh +42 -5
  96. package/scripts/sw-team-stages.sh +1 -1
  97. package/scripts/sw-templates.sh +4 -4
  98. package/scripts/sw-testgen.sh +66 -15
  99. package/scripts/sw-tmux-pipeline.sh +1 -1
  100. package/scripts/sw-tmux-role-color.sh +58 -0
  101. package/scripts/sw-tmux-status.sh +128 -0
  102. package/scripts/sw-tmux.sh +1 -1
  103. package/scripts/sw-trace.sh +1 -1
  104. package/scripts/sw-tracker.sh +1 -1
  105. package/scripts/sw-triage.sh +61 -37
  106. package/scripts/sw-upgrade.sh +1 -1
  107. package/scripts/sw-ux.sh +1 -1
  108. package/scripts/sw-webhook.sh +1 -1
  109. package/scripts/sw-widgets.sh +1 -1
  110. package/scripts/sw-worktree.sh +1 -1
  111. package/templates/pipelines/autonomous.json +2 -2
  112. package/tmux/shipwright-overlay.conf +35 -17
  113. package/tmux/tmux.conf +23 -21
@@ -57,7 +57,13 @@ MAX_ITERATIONS_EXPLICIT=false
57
57
  MAX_RESTARTS=0
58
58
  SESSION_RESTART=false
59
59
  RESTART_COUNT=0
60
- VERSION="2.0.0"
60
+ REPO_OVERRIDE=""
61
+ VERSION="2.1.0"
62
+
63
+ # ─── Token Tracking ─────────────────────────────────────────────────────────
64
+ LOOP_INPUT_TOKENS=0
65
+ LOOP_OUTPUT_TOKENS=0
66
+ LOOP_COST_MILLICENTS=0
61
67
 
62
68
  # ─── Flexible Iteration Defaults ────────────────────────────────────────────
63
69
  AUTO_EXTEND=true # Auto-extend iterations when work is incomplete
@@ -86,6 +92,8 @@ show_help() {
86
92
  echo -e " ${CYAN}shipwright loop${RESET} \"<goal>\" [options]"
87
93
  echo ""
88
94
  echo -e "${BOLD}OPTIONS${RESET}"
95
+ echo -e " ${CYAN}--repo <path>${RESET} Change to directory before running (must be a git repo)"
96
+ echo -e " ${CYAN}--local${RESET} Disable GitHub operations (local-only mode)"
89
97
  echo -e " ${CYAN}--max-iterations${RESET} N Max loop iterations (default: 20)"
90
98
  echo -e " ${CYAN}--test-cmd${RESET} \"cmd\" Test command to run between iterations"
91
99
  echo -e " ${CYAN}--fast-test-cmd${RESET} \"cmd\" Fast/subset test command (alternates with full)"
@@ -135,6 +143,16 @@ show_help() {
135
143
 
136
144
  while [[ $# -gt 0 ]]; do
137
145
  case "$1" in
146
+ --repo)
147
+ REPO_OVERRIDE="${2:-}"
148
+ [[ -z "$REPO_OVERRIDE" ]] && { error "Missing value for --repo"; exit 1; }
149
+ shift 2
150
+ ;;
151
+ --repo=*) REPO_OVERRIDE="${1#--repo=}"; shift ;;
152
+ --local)
153
+ # Skip GitHub operations in loop
154
+ export NO_GITHUB=true
155
+ shift ;;
138
156
  --max-iterations)
139
157
  MAX_ITERATIONS="${2:-}"
140
158
  MAX_ITERATIONS_EXPLICIT=true
@@ -244,16 +262,28 @@ if [[ "$AGENTS" -gt 1 ]]; then
244
262
  USE_WORKTREE=true
245
263
  fi
246
264
 
265
+ # Recruit-powered auto-role assignment when multi-agent but no roles specified
266
+ if [[ "$AGENTS" -gt 1 ]] && [[ -z "$AGENT_ROLES" ]] && [[ -x "${SCRIPT_DIR:-}/sw-recruit.sh" ]]; then
267
+ _recruit_goal="${GOAL:-}"
268
+ if [[ -n "$_recruit_goal" ]]; then
269
+ _recruit_team=$(bash "$SCRIPT_DIR/sw-recruit.sh" team --json "$_recruit_goal" 2>/dev/null) || true
270
+ if [[ -n "$_recruit_team" ]]; then
271
+ _recruit_roles=$(echo "$_recruit_team" | jq -r '.team | join(",")' 2>/dev/null) || true
272
+ if [[ -n "$_recruit_roles" && "$_recruit_roles" != "null" ]]; then
273
+ AGENT_ROLES="$_recruit_roles"
274
+ info "Recruit assigned roles: ${AGENT_ROLES}"
275
+ fi
276
+ fi
277
+ fi
278
+ fi
279
+
247
280
  # Warn if --roles without --agents
248
281
  if [[ -n "$AGENT_ROLES" ]] && [[ "$AGENTS" -le 1 ]]; then
249
282
  warn "--roles requires --agents > 1 (roles are ignored in single-agent mode)"
250
283
  fi
251
284
 
252
- # Warn if --max-restarts with --agents > 1 (not yet supported)
253
- if [[ "${MAX_RESTARTS:-0}" -gt 0 ]] && [[ "$AGENTS" -gt 1 ]]; then
254
- warn "--max-restarts is ignored in multi-agent mode (restart support is single-agent only)"
255
- MAX_RESTARTS=0
256
- fi
285
+ # max-restarts is supported in both single-agent and multi-agent mode
286
+ # In multi-agent mode, restarts apply per-agent (agent can be respawned up to MAX_RESTARTS)
257
287
 
258
288
  # Validate numeric flags
259
289
  if ! [[ "$FAST_TEST_INTERVAL" =~ ^[1-9][0-9]*$ ]]; then
@@ -275,6 +305,23 @@ if ! $RESUME && [[ -z "$GOAL" ]]; then
275
305
  exit 1
276
306
  fi
277
307
 
308
+ # Handle --repo flag: change to directory before running
309
+ if [[ -n "$REPO_OVERRIDE" ]]; then
310
+ if [[ ! -d "$REPO_OVERRIDE" ]]; then
311
+ error "Directory does not exist: $REPO_OVERRIDE"
312
+ exit 1
313
+ fi
314
+ if ! cd "$REPO_OVERRIDE" 2>/dev/null; then
315
+ error "Cannot cd to: $REPO_OVERRIDE"
316
+ exit 1
317
+ fi
318
+ if ! git rev-parse --show-toplevel >/dev/null 2>&1; then
319
+ error "Not a git repository: $REPO_OVERRIDE"
320
+ exit 1
321
+ fi
322
+ info "Using repository: $(pwd)"
323
+ fi
324
+
278
325
  if ! command -v claude &>/dev/null; then
279
326
  error "Claude Code CLI not found. Install it first:"
280
327
  echo -e " ${DIM}npm install -g @anthropic-ai/claude-code${RESET}"
@@ -373,6 +420,110 @@ select_audit_model() {
373
420
  echo "$default_model"
374
421
  }
375
422
 
423
+ # ─── Token Accumulation ─────────────────────────────────────────────────────
424
+ # Parse token counts from Claude CLI JSON output and accumulate running totals.
425
+ # With --output-format json, the output is a JSON array containing a "result"
426
+ # object with usage.input_tokens, usage.output_tokens, and total_cost_usd.
427
+ accumulate_loop_tokens() {
428
+ local log_file="$1"
429
+ [[ ! -f "$log_file" ]] && return 0
430
+
431
+ # If jq is available and the file looks like JSON, parse structured output
432
+ if command -v jq &>/dev/null && head -c1 "$log_file" 2>/dev/null | grep -q '\['; then
433
+ local input_tok output_tok cache_read cache_create cost_usd
434
+ # The result object is the last element in the JSON array
435
+ input_tok=$(jq -r '.[-1].usage.input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
436
+ output_tok=$(jq -r '.[-1].usage.output_tokens // 0' "$log_file" 2>/dev/null || echo "0")
437
+ cache_read=$(jq -r '.[-1].usage.cache_read_input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
438
+ cache_create=$(jq -r '.[-1].usage.cache_creation_input_tokens // 0' "$log_file" 2>/dev/null || echo "0")
439
+ cost_usd=$(jq -r '.[-1].total_cost_usd // 0' "$log_file" 2>/dev/null || echo "0")
440
+
441
+ LOOP_INPUT_TOKENS=$(( LOOP_INPUT_TOKENS + ${input_tok:-0} + ${cache_read:-0} + ${cache_create:-0} ))
442
+ LOOP_OUTPUT_TOKENS=$(( LOOP_OUTPUT_TOKENS + ${output_tok:-0} ))
443
+ # Accumulate cost in millicents for integer arithmetic
444
+ if [[ -n "$cost_usd" && "$cost_usd" != "0" && "$cost_usd" != "null" ]]; then
445
+ local cost_millicents
446
+ cost_millicents=$(echo "$cost_usd" | awk '{printf "%.0f", $1 * 100000}' 2>/dev/null || echo "0")
447
+ LOOP_COST_MILLICENTS=$(( ${LOOP_COST_MILLICENTS:-0} + ${cost_millicents:-0} ))
448
+ fi
449
+ else
450
+ # Fallback: regex-based parsing for non-JSON output
451
+ local input_tok output_tok
452
+ input_tok=$(grep -oE 'input[_ ]tokens?[: ]+[0-9,]+' "$log_file" 2>/dev/null | tail -1 | grep -oE '[0-9,]+' | tr -d ',' || echo "0")
453
+ output_tok=$(grep -oE 'output[_ ]tokens?[: ]+[0-9,]+' "$log_file" 2>/dev/null | tail -1 | grep -oE '[0-9,]+' | tr -d ',' || echo "0")
454
+
455
+ LOOP_INPUT_TOKENS=$(( LOOP_INPUT_TOKENS + ${input_tok:-0} ))
456
+ LOOP_OUTPUT_TOKENS=$(( LOOP_OUTPUT_TOKENS + ${output_tok:-0} ))
457
+ fi
458
+ }
459
+
460
+ # ─── JSON→Text Extraction ──────────────────────────────────────────────────
461
+ # Extract plain text from Claude's --output-format json response.
462
+ # Handles: valid JSON arrays, malformed JSON, non-JSON output, empty output.
463
+ _extract_text_from_json() {
464
+ local json_file="$1" log_file="$2" err_file="${3:-}"
465
+
466
+ # Case 1: File doesn't exist or is empty
467
+ if [[ ! -s "$json_file" ]]; then
468
+ # Check stderr for error messages
469
+ if [[ -s "$err_file" ]]; then
470
+ cp "$err_file" "$log_file"
471
+ else
472
+ echo "(no output)" > "$log_file"
473
+ fi
474
+ return 0
475
+ fi
476
+
477
+ local first_char
478
+ first_char=$(head -c1 "$json_file" 2>/dev/null || true)
479
+
480
+ # Case 2: Valid JSON array — extract .result from last element
481
+ if [[ "$first_char" == "[" ]] && command -v jq &>/dev/null; then
482
+ local extracted
483
+ extracted=$(jq -r '.[-1].result // empty' "$json_file" 2>/dev/null) || true
484
+ if [[ -n "$extracted" ]]; then
485
+ echo "$extracted" > "$log_file"
486
+ return 0
487
+ fi
488
+ # jq succeeded but result was null/empty — try .content or raw text
489
+ extracted=$(jq -r '.[].content // empty' "$json_file" 2>/dev/null | head -500) || true
490
+ if [[ -n "$extracted" ]]; then
491
+ echo "$extracted" > "$log_file"
492
+ return 0
493
+ fi
494
+ # JSON parsed but no text found — write placeholder
495
+ warn "JSON output has no .result field — check $json_file"
496
+ echo "(no text result in JSON output)" > "$log_file"
497
+ return 0
498
+ fi
499
+
500
+ # Case 3: Looks like JSON but no jq — can't parse, use raw
501
+ if [[ "$first_char" == "[" || "$first_char" == "{" ]]; then
502
+ warn "JSON output but jq not available — using raw output"
503
+ cp "$json_file" "$log_file"
504
+ return 0
505
+ fi
506
+
507
+ # Case 4: Not JSON at all (plain text, error message, etc.) — use as-is
508
+ cp "$json_file" "$log_file"
509
+ return 0
510
+ }
511
+
512
+ # Write accumulated token totals to a JSON file for the pipeline to read.
513
+ write_loop_tokens() {
514
+ local token_file="$LOG_DIR/loop-tokens.json"
515
+ local cost_usd="0"
516
+ if [[ "${LOOP_COST_MILLICENTS:-0}" -gt 0 ]]; then
517
+ cost_usd=$(awk "BEGIN {printf \"%.6f\", ${LOOP_COST_MILLICENTS} / 100000}" 2>/dev/null || echo "0")
518
+ fi
519
+ local tmp_file
520
+ tmp_file=$(mktemp "${token_file}.XXXXXX" 2>/dev/null || mktemp)
521
+ cat > "$tmp_file" <<TOKJSON
522
+ {"input_tokens":${LOOP_INPUT_TOKENS},"output_tokens":${LOOP_OUTPUT_TOKENS},"cost_usd":${cost_usd},"iterations":${ITERATION:-0}}
523
+ TOKJSON
524
+ mv "$tmp_file" "$token_file"
525
+ }
526
+
376
527
  # ─── Adaptive Iteration Budget ──────────────────────────────────────────────
377
528
  # Reads tuning config for smarter iteration/circuit-breaker thresholds.
378
529
  apply_adaptive_budget() {
@@ -1493,15 +1644,27 @@ compose_worker_prompt() {
1493
1644
 
1494
1645
  if [[ -n "$role" ]]; then
1495
1646
  local role_desc=""
1496
- case "$role" in
1497
- builder) role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
1498
- reviewer) role_desc="Focus on code review look for bugs, security issues, edge cases in recent commits. Make fixes via commits." ;;
1499
- tester) role_desc="Focus on test coverage — write new tests, fix failing tests, improve assertions and edge case coverage." ;;
1500
- optimizer) role_desc="Focus on performance profile hot paths, reduce complexity, optimize algorithms and data structures." ;;
1501
- docs) role_desc="Focus on documentation update README, add docstrings, write usage guides for new features." ;;
1502
- security) role_desc="Focus on security — audit for vulnerabilities, fix injection risks, validate inputs, check auth boundaries." ;;
1503
- *) role_desc="Focus on: ${role}. Apply your expertise in this area to advance the goal." ;;
1504
- esac
1647
+ # Try to pull description from recruit's roles DB first
1648
+ local recruit_roles_db="${HOME}/.shipwright/recruitment/roles.json"
1649
+ if [[ -f "$recruit_roles_db" ]] && command -v jq &>/dev/null; then
1650
+ local recruit_desc
1651
+ recruit_desc=$(jq -r --arg r "$role" '.[$r].description // ""' "$recruit_roles_db" 2>/dev/null) || true
1652
+ if [[ -n "$recruit_desc" && "$recruit_desc" != "null" ]]; then
1653
+ role_desc="$recruit_desc"
1654
+ fi
1655
+ fi
1656
+ # Fallback to hardcoded descriptions
1657
+ if [[ -z "$role_desc" ]]; then
1658
+ case "$role" in
1659
+ builder) role_desc="Focus on implementation — writing code, fixing bugs, building features. You are the primary builder." ;;
1660
+ reviewer) role_desc="Focus on code review — look for bugs, security issues, edge cases in recent commits. Make fixes via commits." ;;
1661
+ tester) role_desc="Focus on test coverage — write new tests, fix failing tests, improve assertions and edge case coverage." ;;
1662
+ optimizer) role_desc="Focus on performance — profile hot paths, reduce complexity, optimize algorithms and data structures." ;;
1663
+ docs) role_desc="Focus on documentation — update README, add docstrings, write usage guides for new features." ;;
1664
+ security) role_desc="Focus on security — audit for vulnerabilities, fix injection risks, validate inputs, check auth boundaries." ;;
1665
+ *) role_desc="Focus on: ${role}. Apply your expertise in this area to advance the goal." ;;
1666
+ esac
1667
+ fi
1505
1668
  role_section="## Your Role: ${role}
1506
1669
  ${role_desc}
1507
1670
  Prioritize work in your area of expertise. Coordinate with other agents via git log."
@@ -1525,6 +1688,7 @@ PROMPT
1525
1688
  build_claude_flags() {
1526
1689
  local flags=()
1527
1690
  flags+=("--model" "$MODEL")
1691
+ flags+=("--output-format" "json")
1528
1692
 
1529
1693
  if $SKIP_PERMISSIONS; then
1530
1694
  flags+=("--dangerously-skip-permissions")
@@ -1539,6 +1703,7 @@ build_claude_flags() {
1539
1703
 
1540
1704
  run_claude_iteration() {
1541
1705
  local log_file="$LOG_DIR/iteration-${ITERATION}.log"
1706
+ local json_file="$LOG_DIR/iteration-${ITERATION}.json"
1542
1707
  local prompt
1543
1708
  prompt="$(compose_prompt)"
1544
1709
 
@@ -1551,12 +1716,14 @@ run_claude_iteration() {
1551
1716
  echo -e "\n${CYAN}${BOLD}▸${RESET} ${BOLD}Iteration ${ITERATION}/${MAX_ITERATIONS}${RESET} — Starting..."
1552
1717
 
1553
1718
  # Run Claude headless (with timeout + PID capture for signal handling)
1719
+ # Output goes to .json first, then we extract text into .log for compat
1554
1720
  local exit_code=0
1555
1721
  # shellcheck disable=SC2086
1722
+ local err_file="${json_file%.json}.stderr"
1556
1723
  if [[ -n "$TIMEOUT_CMD" ]]; then
1557
- $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$prompt" $flags > "$log_file" 2>&1 &
1724
+ $TIMEOUT_CMD "$CLAUDE_TIMEOUT" claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
1558
1725
  else
1559
- claude -p "$prompt" $flags > "$log_file" 2>&1 &
1726
+ claude -p "$prompt" $flags > "$json_file" 2>"$err_file" &
1560
1727
  fi
1561
1728
  CHILD_PID=$!
1562
1729
  wait "$CHILD_PID" 2>/dev/null || exit_code=$?
@@ -1565,12 +1732,19 @@ run_claude_iteration() {
1565
1732
  warn "Claude CLI timed out after ${CLAUDE_TIMEOUT}s"
1566
1733
  fi
1567
1734
 
1735
+ # Extract text result from JSON into .log for backwards compatibility
1736
+ # With --output-format json, stdout is a JSON array; .[-1].result has the text
1737
+ _extract_text_from_json "$json_file" "$log_file" "$err_file"
1738
+
1568
1739
  local iter_end
1569
1740
  iter_end="$(now_epoch)"
1570
1741
  local iter_duration=$(( iter_end - iter_start ))
1571
1742
 
1572
1743
  echo -e " ${GREEN}✓${RESET} Claude session completed ($(format_duration "$iter_duration"), exit $exit_code)"
1573
1744
 
1745
+ # Accumulate token usage from this iteration's JSON output
1746
+ accumulate_loop_tokens "$json_file"
1747
+
1574
1748
  # Show verbose output if requested
1575
1749
  if $VERBOSE; then
1576
1750
  echo -e " ${DIM}─── Claude Output ───${RESET}"
@@ -1666,10 +1840,16 @@ show_summary() {
1666
1840
  echo -e " ${BOLD}Duration:${RESET} $(format_duration "$duration")"
1667
1841
  echo -e " ${BOLD}Commits:${RESET} $TOTAL_COMMITS"
1668
1842
  echo -e " ${BOLD}Tests:${RESET} $test_display"
1843
+ if [[ "$LOOP_INPUT_TOKENS" -gt 0 || "$LOOP_OUTPUT_TOKENS" -gt 0 ]]; then
1844
+ echo -e " ${BOLD}Tokens:${RESET} in=${LOOP_INPUT_TOKENS} out=${LOOP_OUTPUT_TOKENS}"
1845
+ fi
1669
1846
  echo ""
1670
1847
  echo -e " ${DIM}State: $STATE_FILE${RESET}"
1671
1848
  echo -e " ${DIM}Logs: $LOG_DIR/${RESET}"
1672
1849
  echo ""
1850
+
1851
+ # Write token totals for pipeline cost tracking
1852
+ write_loop_tokens
1673
1853
  }
1674
1854
 
1675
1855
  # ─── Signal Handling ──────────────────────────────────────────────────────────
@@ -1832,10 +2012,15 @@ Focus on areas they haven't touched yet.
1832
2012
  PROMPT
1833
2013
  )"
1834
2014
 
1835
- # Run Claude
2015
+ # Run Claude (output is JSON due to --output-format json in CLAUDE_FLAGS)
2016
+ local JSON_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.json"
2017
+ local ERR_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.stderr"
1836
2018
  LOG_FILE="$LOG_DIR/agent-${AGENT_NUM}-iter-${ITERATION}.log"
1837
2019
  # shellcheck disable=SC2086
1838
- claude -p "$PROMPT" $CLAUDE_FLAGS > "$LOG_FILE" 2>&1 || true
2020
+ claude -p "$PROMPT" $CLAUDE_FLAGS > "$JSON_FILE" 2>"$ERR_FILE" || true
2021
+
2022
+ # Extract text result from JSON into .log for backwards compat
2023
+ _extract_text_from_json "$JSON_FILE" "$LOG_FILE" "$ERR_FILE"
1839
2024
 
1840
2025
  echo -e " ${GREEN}✓${RESET} Claude session completed"
1841
2026
 
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="2.0.0"
9
+ VERSION="2.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12
 
@@ -1077,6 +1077,20 @@ memory_get_dora_baseline() {
1077
1077
  echo "$metrics"
1078
1078
  }
1079
1079
 
1080
+ # memory_get_baseline <metric_name>
1081
+ # Output baseline value for a metric (bundle_size_kb, test_duration_s, coverage_pct, etc.).
1082
+ # Used by pipeline for regression checks. Outputs nothing if not set.
1083
+ memory_get_baseline() {
1084
+ local metric_name="${1:-}"
1085
+ [[ -z "$metric_name" ]] && return 1
1086
+ ensure_memory_dir
1087
+ local mem_dir
1088
+ mem_dir="$(repo_memory_dir)"
1089
+ local metrics_file="$mem_dir/metrics.json"
1090
+ [[ ! -f "$metrics_file" ]] && return 0
1091
+ jq -r --arg m "$metric_name" '.baselines[$m] // empty' "$metrics_file" 2>/dev/null || true
1092
+ }
1093
+
1080
1094
  # memory_update_metrics <metric_name> <value>
1081
1095
  # Track performance baselines and flag regressions.
1082
1096
  memory_update_metrics() {
@@ -1602,6 +1616,9 @@ case "$SUBCOMMAND" in
1602
1616
  pattern)
1603
1617
  memory_capture_pattern "$@"
1604
1618
  ;;
1619
+ get)
1620
+ memory_get_baseline "$@"
1621
+ ;;
1605
1622
  metric)
1606
1623
  memory_update_metrics "$@"
1607
1624
  ;;
@@ -7,7 +7,7 @@
7
7
  set -euo pipefail
8
8
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
9
9
 
10
- VERSION="2.0.0"
10
+ VERSION="2.1.0"
11
11
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
12
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
13
13
 
@@ -7,7 +7,7 @@
7
7
  set -euo pipefail
8
8
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
9
9
 
10
- VERSION="2.0.0"
10
+ VERSION="2.1.0"
11
11
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
12
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
13
13
 
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="2.0.0"
9
+ VERSION="2.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12
 
@@ -7,7 +7,7 @@
7
7
  set -euo pipefail
8
8
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
9
9
 
10
- VERSION="2.0.0"
10
+ VERSION="2.1.0"
11
11
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
12
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
13
13
 
@@ -331,6 +331,80 @@ _update_verdict() {
331
331
  fi
332
332
  }
333
333
 
334
+ # ─── Pipeline gate: submit review, record vote(s), output verdict ───────────
335
+ # Usage: oversight gate --diff <file> [--description <text>] [--reject-if <reason>]
336
+ # Outputs: approved | rejected | deadlock | pending (for pipeline to block on non-approved)
337
+ cmd_gate() {
338
+ local diff_file=""
339
+ local description=""
340
+ local reject_if=""
341
+
342
+ while [[ $# -gt 0 ]]; do
343
+ case "$1" in
344
+ --diff) diff_file="$2"; shift 2 ;;
345
+ --description) description="$2"; shift 2 ;;
346
+ --reject-if) reject_if="$2"; shift 2 ;;
347
+ -h|--help)
348
+ echo "Usage: oversight gate --diff <file> [--description <text>] [--reject-if <reason>]"
349
+ echo "Outputs verdict: approved | rejected | deadlock | pending"
350
+ exit 0
351
+ ;;
352
+ *) error "Unknown option: $1"; exit 1 ;;
353
+ esac
354
+ done
355
+
356
+ if [[ -z "$diff_file" || ! -f "$diff_file" ]]; then
357
+ error "Provide --diff <file> (must exist)"
358
+ exit 1
359
+ fi
360
+
361
+ _init_board_config
362
+ _init_members
363
+
364
+ local review_id
365
+ review_id=$(date +%s)_$(head -c8 /dev/urandom 2>/dev/null | od -A n -t x1 | tr -d ' ' || echo "$$")
366
+ local review_file="${OVERSIGHT_ROOT}/${review_id}.json"
367
+
368
+ # Build review record safely via jq (no JSON injection from description/diff_file)
369
+ jq -n \
370
+ --arg id "$review_id" \
371
+ --arg submitted "$(now_iso)" \
372
+ --arg diff "$diff_file" \
373
+ --arg desc "$description" \
374
+ '{id: $id, submitted_at: $submitted, pr_number: null, commit: null, diff_file: $diff, description: $desc, votes: {}, verdict: null, confidence_score: 0.0, appeals: []}' \
375
+ > "$review_file"
376
+
377
+ # Single pipeline voter: reject if --reject-if given, else approve
378
+ local decision="approve"
379
+ local reasoning="Pipeline review passed"
380
+ if [[ -n "$reject_if" ]]; then
381
+ decision="reject"
382
+ reasoning="$reject_if"
383
+ fi
384
+
385
+ local tmp_file="${review_file}.tmp"
386
+ jq --arg reviewer "pipeline" \
387
+ --arg decision "$decision" \
388
+ --arg reasoning "${reasoning//\"/\\\"}" \
389
+ --arg confidence "0.9" \
390
+ '.votes[$reviewer] = {
391
+ "decision": $decision,
392
+ "reasoning": $reasoning,
393
+ "confidence": ($confidence | tonumber),
394
+ "voted_at": "'$(now_iso)'"
395
+ }' "$review_file" > "$tmp_file"
396
+ mv "$tmp_file" "$review_file"
397
+
398
+ _update_verdict "$review_id"
399
+
400
+ local verdict
401
+ verdict=$(jq -r '.verdict // "pending"' "$review_file")
402
+ echo "$verdict"
403
+ if [[ "$verdict" == "rejected" || "$verdict" == "deadlock" ]]; then
404
+ exit 1
405
+ fi
406
+ }
407
+
334
408
  # ─── Verdict Display ──────────────────────────────────────────────────────
335
409
 
336
410
  cmd_verdict() {
@@ -667,6 +741,7 @@ main() {
667
741
  case "$cmd" in
668
742
  review) cmd_review "$@" ;;
669
743
  vote) cmd_vote "$@" ;;
744
+ gate) cmd_gate "$@" ;;
670
745
  verdict) cmd_verdict "$@" ;;
671
746
  history) cmd_history "$@" ;;
672
747
  members) cmd_members "$@" ;;
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="2.0.0"
9
+ VERSION="2.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12
 
@@ -6,7 +6,7 @@
6
6
  set -euo pipefail
7
7
  trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
8
 
9
- VERSION="2.0.0"
9
+ VERSION="2.1.0"
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
12