shipwright-cli 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/.claude/agents/code-reviewer.md +90 -0
  2. package/.claude/agents/devops-engineer.md +142 -0
  3. package/.claude/agents/pipeline-agent.md +80 -0
  4. package/.claude/agents/shell-script-specialist.md +150 -0
  5. package/.claude/agents/test-specialist.md +196 -0
  6. package/.claude/hooks/post-tool-use.sh +38 -0
  7. package/.claude/hooks/pre-tool-use.sh +25 -0
  8. package/.claude/hooks/session-started.sh +37 -0
  9. package/README.md +212 -814
  10. package/claude-code/CLAUDE.md.shipwright +54 -0
  11. package/claude-code/hooks/notify-idle.sh +2 -2
  12. package/claude-code/hooks/session-start.sh +24 -0
  13. package/claude-code/hooks/task-completed.sh +6 -2
  14. package/claude-code/settings.json.template +12 -0
  15. package/dashboard/public/app.js +4422 -0
  16. package/dashboard/public/index.html +816 -0
  17. package/dashboard/public/styles.css +4755 -0
  18. package/dashboard/server.ts +4315 -0
  19. package/docs/KNOWN-ISSUES.md +18 -10
  20. package/docs/TIPS.md +38 -26
  21. package/docs/patterns/README.md +33 -23
  22. package/package.json +9 -5
  23. package/scripts/adapters/iterm2-adapter.sh +1 -1
  24. package/scripts/adapters/tmux-adapter.sh +52 -23
  25. package/scripts/adapters/wezterm-adapter.sh +26 -14
  26. package/scripts/lib/compat.sh +200 -0
  27. package/scripts/lib/helpers.sh +72 -0
  28. package/scripts/postinstall.mjs +72 -13
  29. package/scripts/{cct → sw} +109 -21
  30. package/scripts/sw-adversarial.sh +274 -0
  31. package/scripts/sw-architecture-enforcer.sh +330 -0
  32. package/scripts/sw-checkpoint.sh +390 -0
  33. package/scripts/{cct-cleanup.sh → sw-cleanup.sh} +3 -1
  34. package/scripts/sw-connect.sh +619 -0
  35. package/scripts/{cct-cost.sh → sw-cost.sh} +368 -34
  36. package/scripts/{cct-daemon.sh → sw-daemon.sh} +2217 -204
  37. package/scripts/sw-dashboard.sh +477 -0
  38. package/scripts/sw-developer-simulation.sh +252 -0
  39. package/scripts/sw-docs.sh +635 -0
  40. package/scripts/sw-doctor.sh +907 -0
  41. package/scripts/{cct-fix.sh → sw-fix.sh} +10 -6
  42. package/scripts/{cct-fleet.sh → sw-fleet.sh} +498 -22
  43. package/scripts/sw-github-checks.sh +521 -0
  44. package/scripts/sw-github-deploy.sh +533 -0
  45. package/scripts/sw-github-graphql.sh +972 -0
  46. package/scripts/sw-heartbeat.sh +293 -0
  47. package/scripts/sw-init.sh +522 -0
  48. package/scripts/sw-intelligence.sh +1196 -0
  49. package/scripts/sw-jira.sh +643 -0
  50. package/scripts/sw-launchd.sh +364 -0
  51. package/scripts/sw-linear.sh +648 -0
  52. package/scripts/{cct-logs.sh → sw-logs.sh} +72 -2
  53. package/scripts/{cct-loop.sh → sw-loop.sh} +534 -44
  54. package/scripts/{cct-memory.sh → sw-memory.sh} +321 -38
  55. package/scripts/sw-patrol-meta.sh +417 -0
  56. package/scripts/sw-pipeline-composer.sh +455 -0
  57. package/scripts/{cct-pipeline.sh → sw-pipeline.sh} +2319 -178
  58. package/scripts/sw-predictive.sh +820 -0
  59. package/scripts/{cct-prep.sh → sw-prep.sh} +339 -49
  60. package/scripts/{cct-ps.sh → sw-ps.sh} +6 -4
  61. package/scripts/{cct-reaper.sh → sw-reaper.sh} +6 -4
  62. package/scripts/sw-remote.sh +687 -0
  63. package/scripts/sw-self-optimize.sh +947 -0
  64. package/scripts/sw-session.sh +519 -0
  65. package/scripts/sw-setup.sh +234 -0
  66. package/scripts/sw-status.sh +605 -0
  67. package/scripts/{cct-templates.sh → sw-templates.sh} +9 -4
  68. package/scripts/sw-tmux.sh +591 -0
  69. package/scripts/sw-tracker-jira.sh +277 -0
  70. package/scripts/sw-tracker-linear.sh +292 -0
  71. package/scripts/sw-tracker.sh +409 -0
  72. package/scripts/{cct-upgrade.sh → sw-upgrade.sh} +103 -46
  73. package/scripts/{cct-worktree.sh → sw-worktree.sh} +3 -0
  74. package/templates/pipelines/autonomous.json +27 -5
  75. package/templates/pipelines/full.json +12 -0
  76. package/templates/pipelines/standard.json +12 -0
  77. package/tmux/{claude-teams-overlay.conf → shipwright-overlay.conf} +27 -9
  78. package/tmux/templates/accessibility.json +34 -0
  79. package/tmux/templates/api-design.json +35 -0
  80. package/tmux/templates/architecture.json +1 -0
  81. package/tmux/templates/bug-fix.json +9 -0
  82. package/tmux/templates/code-review.json +1 -0
  83. package/tmux/templates/compliance.json +36 -0
  84. package/tmux/templates/data-pipeline.json +36 -0
  85. package/tmux/templates/debt-paydown.json +34 -0
  86. package/tmux/templates/devops.json +1 -0
  87. package/tmux/templates/documentation.json +1 -0
  88. package/tmux/templates/exploration.json +1 -0
  89. package/tmux/templates/feature-dev.json +1 -0
  90. package/tmux/templates/full-stack.json +8 -0
  91. package/tmux/templates/i18n.json +34 -0
  92. package/tmux/templates/incident-response.json +36 -0
  93. package/tmux/templates/migration.json +1 -0
  94. package/tmux/templates/observability.json +35 -0
  95. package/tmux/templates/onboarding.json +33 -0
  96. package/tmux/templates/performance.json +35 -0
  97. package/tmux/templates/refactor.json +1 -0
  98. package/tmux/templates/release.json +35 -0
  99. package/tmux/templates/security-audit.json +8 -0
  100. package/tmux/templates/spike.json +34 -0
  101. package/tmux/templates/testing.json +1 -0
  102. package/tmux/tmux.conf +98 -9
  103. package/scripts/cct-doctor.sh +0 -328
  104. package/scripts/cct-init.sh +0 -282
  105. package/scripts/cct-session.sh +0 -284
  106. package/scripts/cct-status.sh +0 -169
@@ -4,8 +4,9 @@
4
4
  # ║ Spawns daemons across repos · Fleet dashboard · Aggregate metrics ║
5
5
  # ╚═══════════════════════════════════════════════════════════════════════════╝
6
6
  set -euo pipefail
7
+ trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
7
8
 
8
- VERSION="1.7.0"
9
+ VERSION="1.9.0"
9
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
11
  REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
11
12
 
@@ -20,6 +21,9 @@ DIM='\033[2m'
20
21
  BOLD='\033[1m'
21
22
  RESET='\033[0m'
22
23
 
24
+ # ─── Cross-platform compatibility ──────────────────────────────────────────
25
+ # shellcheck source=lib/compat.sh
26
+ [[ -f "$SCRIPT_DIR/lib/compat.sh" ]] && source "$SCRIPT_DIR/lib/compat.sh"
23
27
  # ─── Output Helpers ─────────────────────────────────────────────────────────
24
28
  info() { echo -e "${CYAN}${BOLD}▸${RESET} $*"; }
25
29
  success() { echo -e "${GREEN}${BOLD}✓${RESET} $*"; }
@@ -48,7 +52,7 @@ format_duration() {
48
52
  }
49
53
 
50
54
  # ─── Structured Event Log ──────────────────────────────────────────────────
51
- EVENTS_FILE="${HOME}/.claude-teams/events.jsonl"
55
+ EVENTS_FILE="${HOME}/.shipwright/events.jsonl"
52
56
 
53
57
  emit_event() {
54
58
  local event_type="$1"
@@ -64,12 +68,12 @@ emit_event() {
64
68
  json_fields="${json_fields},\"${key}\":\"${val}\""
65
69
  fi
66
70
  done
67
- mkdir -p "${HOME}/.claude-teams"
71
+ mkdir -p "${HOME}/.shipwright"
68
72
  echo "{\"ts\":\"$(now_iso)\",\"ts_epoch\":$(now_epoch),\"type\":\"${event_type}\"${json_fields}}" >> "$EVENTS_FILE"
69
73
  }
70
74
 
71
75
  # ─── Defaults ───────────────────────────────────────────────────────────────
72
- FLEET_DIR="$HOME/.claude-teams"
76
+ FLEET_DIR="$HOME/.shipwright"
73
77
  FLEET_STATE="$FLEET_DIR/fleet-state.json"
74
78
  CONFIG_PATH=""
75
79
 
@@ -196,6 +200,52 @@ session_name_for_repo() {
196
200
  echo "shipwright-fleet-${basename}"
197
201
  }
198
202
 
203
+ # ─── GitHub-Aware Repo Priority ──────────────────────────────────────────
204
+ # Returns a priority score (default 50) for a repo based on GitHub data.
205
+ # Used when intelligence.fleet_weighting is enabled.
206
+
207
+ _fleet_repo_priority() {
208
+ local repo_path="$1"
209
+ local priority=50 # default neutral priority
210
+
211
+ type _gh_detect_repo &>/dev/null 2>&1 || { echo "$priority"; return 0; }
212
+
213
+ # Detect repo from the repo path (run in subshell to avoid cd side-effects)
214
+ local gh_priority
215
+ gh_priority=$(
216
+ cd "$repo_path" 2>/dev/null || exit 1
217
+ _gh_detect_repo 2>/dev/null || exit 1
218
+ local owner="${GH_OWNER:-}" repo="${GH_REPO:-}"
219
+ [[ -z "$owner" || -z "$repo" ]] && exit 1
220
+
221
+ local p=50
222
+
223
+ # Factor: security alerts (urgent work)
224
+ if type gh_security_alerts &>/dev/null 2>&1; then
225
+ local alerts
226
+ alerts=$(gh_security_alerts "$owner" "$repo" 2>/dev/null | jq 'length' 2>/dev/null || echo "0")
227
+ if [[ "${alerts:-0}" -gt 5 ]]; then
228
+ p=$((p + 20))
229
+ elif [[ "${alerts:-0}" -gt 0 ]]; then
230
+ p=$((p + 10))
231
+ fi
232
+ fi
233
+
234
+ # Factor: contributor count (more contributors = more active = higher priority)
235
+ if type gh_contributors &>/dev/null 2>&1; then
236
+ local contribs
237
+ contribs=$(gh_contributors "$owner" "$repo" 2>/dev/null | jq 'length' 2>/dev/null || echo "0")
238
+ if [[ "${contribs:-0}" -gt 10 ]]; then
239
+ p=$((p + 10))
240
+ fi
241
+ fi
242
+
243
+ echo "$p"
244
+ ) || echo "$priority"
245
+
246
+ echo "${gh_priority:-$priority}"
247
+ }
248
+
199
249
  # ─── Worker Pool Rebalancer ───────────────────────────────────────────────
200
250
  # Runs in background, redistributes MAX_PARALLEL across repos based on demand
201
251
 
@@ -206,7 +256,7 @@ fleet_rebalance() {
206
256
  local total_workers
207
257
  total_workers=$(jq -r '.worker_pool.total_workers // 12' "$config_file")
208
258
 
209
- local shutdown_flag="$HOME/.claude-teams/fleet-rebalancer.shutdown"
259
+ local shutdown_flag="$HOME/.shipwright/fleet-rebalancer.shutdown"
210
260
  rm -f "$shutdown_flag"
211
261
 
212
262
  while true; do
@@ -224,11 +274,22 @@ fleet_rebalance() {
224
274
  fi
225
275
 
226
276
  # Collect demand per repo using indexed arrays (bash 3.2 compatible)
277
+ # When intelligence is available, weight by complexity × urgency
227
278
  local repo_list=()
228
279
  local demand_list=()
280
+ local weight_list=()
229
281
  local total_demand=0
282
+ local total_weight=0
230
283
  local repo_count=0
231
284
 
285
+ # Check if intelligence weighting is enabled
286
+ local intel_weighting=false
287
+ local fleet_intel_enabled
288
+ fleet_intel_enabled=$(jq -r '.intelligence.fleet_weighting // false' "$config_file" 2>/dev/null || echo "false")
289
+ if [[ "$fleet_intel_enabled" == "true" ]]; then
290
+ intel_weighting=true
291
+ fi
292
+
232
293
  while IFS= read -r repo_name; do
233
294
  local repo_path
234
295
  repo_path=$(jq -r --arg r "$repo_name" '.repos[$r].path' "$FLEET_STATE" 2>/dev/null || true)
@@ -236,10 +297,10 @@ fleet_rebalance() {
236
297
 
237
298
  # Read daemon state — try repo-local state first
238
299
  local active=0 queued=0
239
- local daemon_state="$repo_path/.claude-teams/daemon-state.json"
300
+ local daemon_state="$repo_path/.shipwright/daemon-state.json"
240
301
  if [[ ! -f "$daemon_state" ]]; then
241
302
  # Fall back to shared state, filtered by repo
242
- daemon_state="$HOME/.claude-teams/daemon-state.json"
303
+ daemon_state="$HOME/.shipwright/daemon-state.json"
243
304
  fi
244
305
  if [[ -f "$daemon_state" ]]; then
245
306
  active=$(jq -r '.active_jobs | length // 0' "$daemon_state" 2>/dev/null || echo 0)
@@ -250,9 +311,59 @@ fleet_rebalance() {
250
311
  fi
251
312
 
252
313
  local demand=$((active + queued))
314
+
315
+ # Compute intelligence weight: complexity × urgency
316
+ # Falls back to raw demand when no intelligence data exists
317
+ local weight="$demand"
318
+ if [[ "$intel_weighting" == "true" && "$demand" -gt 0 ]]; then
319
+ local intel_cache="$repo_path/.claude/intelligence-cache.json"
320
+ local avg_complexity=50
321
+ local urgency_factor=1
322
+
323
+ # Read average issue complexity from intelligence cache
324
+ if [[ -f "$intel_cache" ]]; then
325
+ local cached_complexity
326
+ cached_complexity=$(jq -r '.analysis.avg_issue_complexity // 50' "$intel_cache" 2>/dev/null || echo "50")
327
+ [[ "$cached_complexity" =~ ^[0-9]+$ ]] && avg_complexity="$cached_complexity"
328
+ fi
329
+
330
+ # Check for deadline urgency in queued issues
331
+ if [[ -f "$daemon_state" ]]; then
332
+ local urgent_count=0
333
+ local urgent_raw
334
+ urgent_raw=$(jq -r '[.queued[]? | select(.labels[]? == "priority" or .labels[]? == "urgent" or .labels[]? == "hotfix")] | length' "$daemon_state" 2>/dev/null || echo "0")
335
+ [[ "$urgent_raw" =~ ^[0-9]+$ ]] && urgent_count="$urgent_raw"
336
+
337
+ if [[ "$queued" -gt 0 && "$urgent_count" -gt 0 ]]; then
338
+ # Urgency boost: 1.0 base + 0.5 per urgent ratio
339
+ urgency_factor=$(awk -v uc="$urgent_count" -v q="$queued" \
340
+ 'BEGIN { r = uc / q; f = 1.0 + (r * 0.5); printf "%.0f", f * 100 }')
341
+ # urgency_factor is now scaled by 100 (e.g. 150 = 1.5x)
342
+ else
343
+ urgency_factor=100
344
+ fi
345
+ else
346
+ urgency_factor=100
347
+ fi
348
+
349
+ # GitHub priority factor (normalized: priority / 50, so 50 = 1.0x)
350
+ local gh_priority_factor=100 # 100 = 1.0x (neutral)
351
+ if [[ -n "$repo_path" && "$repo_path" != "null" ]]; then
352
+ local gh_prio
353
+ gh_prio=$(_fleet_repo_priority "$repo_path" 2>/dev/null || echo "50")
354
+ [[ "$gh_prio" =~ ^[0-9]+$ ]] && gh_priority_factor=$((gh_prio * 2))
355
+ fi
356
+
357
+ # Weight = demand × (complexity / 50) × (urgency / 100) × (gh_priority / 100)
358
+ weight=$(awk -v d="$demand" -v c="$avg_complexity" -v u="$urgency_factor" -v g="$gh_priority_factor" \
359
+ 'BEGIN { w = d * (c / 50.0) * (u / 100.0) * (g / 100.0); if (w < 1) w = 1; printf "%.0f", w }')
360
+ fi
361
+
253
362
  repo_list+=("$repo_name")
254
363
  demand_list+=("$demand")
364
+ weight_list+=("$weight")
255
365
  total_demand=$((total_demand + demand))
366
+ total_weight=$((total_weight + weight))
256
367
  repo_count=$((repo_count + 1))
257
368
  done <<< "$repo_names"
258
369
 
@@ -261,17 +372,28 @@ fleet_rebalance() {
261
372
  fi
262
373
 
263
374
  # Distribute workers proportionally with budget enforcement
375
+ # When intelligence weighting is active, use weighted demand
264
376
  local allocated_total=0
265
377
  local alloc_list=()
378
+ local use_weight="$total_weight"
379
+ local effective_total="$total_demand"
380
+ if [[ "$intel_weighting" == "true" && "$total_weight" -gt 0 ]]; then
381
+ effective_total="$total_weight"
382
+ fi
266
383
 
267
384
  local i
268
385
  for i in $(seq 0 $((repo_count - 1))); do
269
386
  local new_max
270
- if [[ "$total_demand" -eq 0 ]]; then
387
+ if [[ "$effective_total" -eq 0 ]]; then
271
388
  new_max=$(( total_workers / repo_count ))
272
389
  else
273
- local repo_demand="${demand_list[$i]}"
274
- new_max=$(awk -v d="$repo_demand" -v td="$total_demand" -v tw="$total_workers" \
390
+ local repo_score
391
+ if [[ "$intel_weighting" == "true" && "$total_weight" -gt 0 ]]; then
392
+ repo_score="${weight_list[$i]}"
393
+ else
394
+ repo_score="${demand_list[$i]}"
395
+ fi
396
+ new_max=$(awk -v d="$repo_score" -v td="$effective_total" -v tw="$total_workers" \
275
397
  'BEGIN { v = (d / td) * tw; if (v < 1) v = 1; printf "%.0f", v }')
276
398
  fi
277
399
  [[ "$new_max" -lt 1 ]] && new_max=1
@@ -317,16 +439,318 @@ fleet_rebalance() {
317
439
 
318
440
  # Signal daemons to reload
319
441
  if [[ "$reload_needed" == "true" ]]; then
320
- touch "$HOME/.claude-teams/fleet-reload.flag"
442
+ touch "$HOME/.shipwright/fleet-reload.flag"
321
443
  emit_event "fleet.rebalance" \
322
444
  "total_workers=$total_workers" \
323
445
  "total_demand=$total_demand" \
446
+ "total_weight=$total_weight" \
447
+ "intel_weighting=$intel_weighting" \
324
448
  "repo_count=$repo_count" \
325
449
  "allocated=$allocated_total"
326
450
  fi
327
451
  done
328
452
  }
329
453
 
454
+ # ─── Distributed Worker Rebalancer ───────────────────────────────────────
455
+ # Extends fleet rebalancing across registered remote machines
456
+
457
+ fleet_rebalance_distributed() {
458
+ local machines_file="$HOME/.shipwright/machines.json"
459
+ [[ ! -f "$machines_file" ]] && return 0
460
+
461
+ local machine_count
462
+ machine_count=$(jq '.machines | length' "$machines_file" 2>/dev/null || echo 0)
463
+ [[ "$machine_count" -eq 0 ]] && return 0
464
+
465
+ local ssh_opts="-o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=accept-new"
466
+
467
+ # Collect demand and capacity from all machines
468
+ local machine_names=()
469
+ local machine_hosts=()
470
+ local machine_users=()
471
+ local machine_paths=()
472
+ local machine_max_workers=()
473
+ local machine_demands=()
474
+ local machine_actives=()
475
+ local total_demand=0
476
+ local total_capacity=0
477
+ local reachable_count=0
478
+
479
+ local i
480
+ for i in $(seq 0 $((machine_count - 1))); do
481
+ local name host ssh_user sw_path max_w
482
+ name=$(jq -r --argjson i "$i" '.machines[$i].name' "$machines_file")
483
+ host=$(jq -r --argjson i "$i" '.machines[$i].host' "$machines_file")
484
+ ssh_user=$(jq -r --argjson i "$i" '.machines[$i].ssh_user // ""' "$machines_file")
485
+ sw_path=$(jq -r --argjson i "$i" '.machines[$i].shipwright_path' "$machines_file")
486
+ max_w=$(jq -r --argjson i "$i" '.machines[$i].max_workers // 4' "$machines_file")
487
+
488
+ # Query machine for active/queued jobs
489
+ local query_cmd="active=0; queued=0; if [ -f \"\$HOME/.shipwright/daemon-state.json\" ]; then active=\$(python3 -c \"import json; d=json.load(open('\$HOME/.shipwright/daemon-state.json')); print(len(d.get('active_jobs',{})))\" 2>/dev/null || echo 0); queued=\$(python3 -c \"import json; d=json.load(open('\$HOME/.shipwright/daemon-state.json')); print(len(d.get('queued',[])))\" 2>/dev/null || echo 0); fi; echo \"\${active}|\${queued}\""
490
+
491
+ local result=""
492
+ if [[ "$host" == "localhost" || "$host" == "127.0.0.1" || "$host" == "::1" ]]; then
493
+ result=$(bash -c "$query_cmd" 2>/dev/null || echo "0|0")
494
+ else
495
+ local target="$host"
496
+ if [[ -n "$ssh_user" && "$ssh_user" != "null" ]]; then
497
+ target="${ssh_user}@${host}"
498
+ fi
499
+ # shellcheck disable=SC2086
500
+ result=$(ssh $ssh_opts "$target" "$query_cmd" 2>/dev/null || echo "")
501
+ fi
502
+
503
+ if [[ -z "$result" ]]; then
504
+ # Machine unreachable — skip
505
+ continue
506
+ fi
507
+
508
+ local active_val queued_val
509
+ active_val=$(echo "$result" | cut -d'|' -f1)
510
+ queued_val=$(echo "$result" | cut -d'|' -f2)
511
+ [[ ! "$active_val" =~ ^[0-9]+$ ]] && active_val=0
512
+ [[ ! "$queued_val" =~ ^[0-9]+$ ]] && queued_val=0
513
+
514
+ local demand=$((active_val + queued_val))
515
+
516
+ machine_names+=("$name")
517
+ machine_hosts+=("$host")
518
+ machine_users+=("$ssh_user")
519
+ machine_paths+=("$sw_path")
520
+ machine_max_workers+=("$max_w")
521
+ machine_demands+=("$demand")
522
+ machine_actives+=("$active_val")
523
+ total_demand=$((total_demand + demand))
524
+ total_capacity=$((total_capacity + max_w))
525
+ reachable_count=$((reachable_count + 1))
526
+ done
527
+
528
+ [[ "$reachable_count" -eq 0 ]] && return 0
529
+
530
+ # Proportional allocation: distribute total capacity by demand
531
+ local alloc_list=()
532
+ local allocated_total=0
533
+
534
+ for i in $(seq 0 $((reachable_count - 1))); do
535
+ local new_max
536
+ local cap="${machine_max_workers[$i]}"
537
+ if [[ "$total_demand" -eq 0 ]]; then
538
+ # No demand anywhere — give each machine its max
539
+ new_max="$cap"
540
+ else
541
+ local d="${machine_demands[$i]}"
542
+ new_max=$(awk -v d="$d" -v td="$total_demand" -v cap="$cap" \
543
+ 'BEGIN { v = (d / td) * cap; if (v < 1) v = 1; if (v > cap) v = cap; printf "%.0f", v }')
544
+ fi
545
+ [[ "$new_max" -lt 1 ]] && new_max=1
546
+ [[ "$new_max" -gt "$cap" ]] && new_max="$cap"
547
+ alloc_list+=("$new_max")
548
+ allocated_total=$((allocated_total + new_max))
549
+ done
550
+
551
+ # Write allocation to each machine's daemon config
552
+ for i in $(seq 0 $((reachable_count - 1))); do
553
+ local name="${machine_names[$i]}"
554
+ local host="${machine_hosts[$i]}"
555
+ local ssh_user="${machine_users[$i]}"
556
+ local sw_path="${machine_paths[$i]}"
557
+ local new_max="${alloc_list[$i]}"
558
+
559
+ local update_cmd="if [ -f '${sw_path}/.claude/daemon-config.json' ]; then tmp=\"${sw_path}/.claude/daemon-config.json.tmp.\$\$\"; jq --argjson mp ${new_max} '.max_parallel = \$mp' '${sw_path}/.claude/daemon-config.json' > \"\$tmp\" && mv \"\$tmp\" '${sw_path}/.claude/daemon-config.json'; fi"
560
+
561
+ if [[ "$host" == "localhost" || "$host" == "127.0.0.1" || "$host" == "::1" ]]; then
562
+ bash -c "$update_cmd" 2>/dev/null || true
563
+ else
564
+ local target="$host"
565
+ if [[ -n "$ssh_user" && "$ssh_user" != "null" ]]; then
566
+ target="${ssh_user}@${host}"
567
+ fi
568
+ # shellcheck disable=SC2086
569
+ ssh $ssh_opts "$target" "$update_cmd" 2>/dev/null || true
570
+ fi
571
+ done
572
+
573
+ emit_event "fleet.distributed_rebalance" \
574
+ "machines=$reachable_count" \
575
+ "total_workers=$allocated_total" \
576
+ "total_demand=$total_demand"
577
+ }
578
+
579
+ # ─── Machine Health Monitor ─────────────────────────────────────────────
580
+ # Checks machine heartbeats and marks unreachable machines
581
+
582
+ check_machine_health() {
583
+ local machines_file="$HOME/.shipwright/machines.json"
584
+ [[ ! -f "$machines_file" ]] && return 0
585
+
586
+ local machine_count
587
+ machine_count=$(jq '.machines | length' "$machines_file" 2>/dev/null || echo 0)
588
+ [[ "$machine_count" -eq 0 ]] && return 0
589
+
590
+ local ssh_opts="-o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=accept-new"
591
+ local health_file="$HOME/.shipwright/machine-health.json"
592
+ local now
593
+ now=$(date +%s)
594
+
595
+ # Initialize health file if needed
596
+ if [[ ! -f "$health_file" ]]; then
597
+ echo '{}' > "$health_file"
598
+ fi
599
+
600
+ local i
601
+ for i in $(seq 0 $((machine_count - 1))); do
602
+ local name host ssh_user
603
+ name=$(jq -r --argjson i "$i" '.machines[$i].name' "$machines_file")
604
+ host=$(jq -r --argjson i "$i" '.machines[$i].host' "$machines_file")
605
+ ssh_user=$(jq -r --argjson i "$i" '.machines[$i].ssh_user // ""' "$machines_file")
606
+
607
+ local status="online"
608
+ local hb_cmd="if [ -f \"\$HOME/.shipwright/machine-heartbeat.json\" ]; then cat \"\$HOME/.shipwright/machine-heartbeat.json\"; else echo '{\"ts_epoch\":0}'; fi"
609
+
610
+ local hb_result=""
611
+ if [[ "$host" == "localhost" || "$host" == "127.0.0.1" || "$host" == "::1" ]]; then
612
+ hb_result=$(bash -c "$hb_cmd" 2>/dev/null || echo '{"ts_epoch":0}')
613
+ else
614
+ local target="$host"
615
+ if [[ -n "$ssh_user" && "$ssh_user" != "null" ]]; then
616
+ target="${ssh_user}@${host}"
617
+ fi
618
+ # shellcheck disable=SC2086
619
+ hb_result=$(ssh $ssh_opts "$target" "$hb_cmd" 2>/dev/null || echo "")
620
+ fi
621
+
622
+ if [[ -z "$hb_result" ]]; then
623
+ status="offline"
624
+ else
625
+ local hb_epoch
626
+ hb_epoch=$(echo "$hb_result" | jq -r '.ts_epoch // 0' 2>/dev/null || echo 0)
627
+ [[ ! "$hb_epoch" =~ ^[0-9]+$ ]] && hb_epoch=0
628
+
629
+ local age=$((now - hb_epoch))
630
+ if [[ "$hb_epoch" -eq 0 ]]; then
631
+ # No heartbeat file yet — treat as degraded if not localhost
632
+ if [[ "$host" == "localhost" || "$host" == "127.0.0.1" || "$host" == "::1" ]]; then
633
+ status="online"
634
+ else
635
+ status="degraded"
636
+ fi
637
+ elif [[ "$age" -gt 120 ]]; then
638
+ status="offline"
639
+ elif [[ "$age" -gt 60 ]]; then
640
+ status="degraded"
641
+ fi
642
+ fi
643
+
644
+ # Update health file atomically
645
+ local tmp_health="${health_file}.tmp.$$"
646
+ jq --arg name "$name" --arg status "$status" --argjson ts "$now" \
647
+ '.[$name] = {status: $status, checked_at: $ts}' "$health_file" > "$tmp_health" \
648
+ && mv "$tmp_health" "$health_file"
649
+
650
+ if [[ "$status" == "offline" ]]; then
651
+ emit_event "fleet.machine_offline" "machine=$name" "host=$host"
652
+ fi
653
+ done
654
+ }
655
+
656
+ # ─── Cross-Machine Event Aggregation ───────────────────────────────────
657
+
658
+ aggregate_remote_events() {
659
+ local machines_file="$HOME/.shipwright/machines.json"
660
+ [[ ! -f "$machines_file" ]] && return 0
661
+
662
+ local machine_count
663
+ machine_count=$(jq '.machines | length' "$machines_file" 2>/dev/null || echo 0)
664
+ [[ "$machine_count" -eq 0 ]] && return 0
665
+
666
+ local ssh_opts="-o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=accept-new"
667
+ local offsets_file="$HOME/.shipwright/remote-offsets.json"
668
+
669
+ # Initialize offsets file if needed
670
+ if [[ ! -f "$offsets_file" ]]; then
671
+ echo '{}' > "$offsets_file"
672
+ fi
673
+
674
+ local i
675
+ for i in $(seq 0 $((machine_count - 1))); do
676
+ local name host ssh_user
677
+ name=$(jq -r --argjson i "$i" '.machines[$i].name' "$machines_file")
678
+ host=$(jq -r --argjson i "$i" '.machines[$i].host' "$machines_file")
679
+ ssh_user=$(jq -r --argjson i "$i" '.machines[$i].ssh_user // ""' "$machines_file")
680
+
681
+ # Skip localhost — we already have local events
682
+ if [[ "$host" == "localhost" || "$host" == "127.0.0.1" || "$host" == "::1" ]]; then
683
+ continue
684
+ fi
685
+
686
+ # Get last offset for this machine
687
+ local last_offset
688
+ last_offset=$(jq -r --arg n "$name" '.[$n] // 0' "$offsets_file" 2>/dev/null || echo 0)
689
+ [[ ! "$last_offset" =~ ^[0-9]+$ ]] && last_offset=0
690
+
691
+ local target="$host"
692
+ if [[ -n "$ssh_user" && "$ssh_user" != "null" ]]; then
693
+ target="${ssh_user}@${host}"
694
+ fi
695
+
696
+ # Fetch new events from remote (tail from offset)
697
+ local next_line=$((last_offset + 1))
698
+ local fetch_cmd="tail -n +${next_line} \"\$HOME/.shipwright/events.jsonl\" 2>/dev/null || true"
699
+ local new_events
700
+ # shellcheck disable=SC2086
701
+ new_events=$(ssh $ssh_opts "$target" "$fetch_cmd" 2>/dev/null || echo "")
702
+
703
+ if [[ -z "$new_events" ]]; then
704
+ continue
705
+ fi
706
+
707
+ # Count new lines
708
+ local new_lines
709
+ new_lines=$(echo "$new_events" | wc -l | tr -d ' ')
710
+
711
+ # Add machine= field to each event and append to local events
712
+ while IFS= read -r line; do
713
+ [[ -z "$line" ]] && continue
714
+ # Add machine field via jq
715
+ local enriched
716
+ enriched=$(echo "$line" | jq -c --arg m "$name" '. + {machine: $m}' 2>/dev/null || true)
717
+ if [[ -n "$enriched" ]]; then
718
+ echo "$enriched" >> "$EVENTS_FILE"
719
+ fi
720
+ done <<< "$new_events"
721
+
722
+ # Update offset
723
+ local new_offset=$((last_offset + new_lines))
724
+ local tmp_offsets="${offsets_file}.tmp.$$"
725
+ jq --arg n "$name" --argjson o "$new_offset" '.[$n] = $o' "$offsets_file" > "$tmp_offsets" \
726
+ && mv "$tmp_offsets" "$offsets_file"
727
+
728
+ done
729
+ }
730
+
731
+ # ─── Distributed Fleet Loop ────────────────────────────────────────────
732
+ # Background loop that runs distributed rebalancing + health checks + event aggregation
733
+
734
+ fleet_distributed_loop() {
735
+ local interval="${1:-30}"
736
+ local shutdown_flag="$HOME/.shipwright/fleet-distributed.shutdown"
737
+ rm -f "$shutdown_flag"
738
+
739
+ while true; do
740
+ sleep "$interval"
741
+
742
+ # Check for shutdown signal
743
+ if [[ -f "$shutdown_flag" ]]; then
744
+ break
745
+ fi
746
+
747
+ # Run distributed tasks
748
+ check_machine_health 2>/dev/null || true
749
+ fleet_rebalance_distributed 2>/dev/null || true
750
+ aggregate_remote_events 2>/dev/null || true
751
+ done
752
+ }
753
+
330
754
  # ─── Fleet Start ────────────────────────────────────────────────────────────
331
755
 
332
756
  fleet_start() {
@@ -435,7 +859,7 @@ fleet_start() {
435
859
 
436
860
  # Spawn daemon in detached tmux session
437
861
  tmux new-session -d -s "$session_name" \
438
- "cd '$repo_path' && '$SCRIPT_DIR/cct-daemon.sh' start $daemon_config_flag"
862
+ "cd '$repo_path' && '$SCRIPT_DIR/sw-daemon.sh' start $daemon_config_flag"
439
863
 
440
864
  # Record in fleet state
441
865
  local tmp2="${fleet_state_tmp}.2"
@@ -468,13 +892,35 @@ fleet_start() {
468
892
  pool_total=$(jq -r '.worker_pool.total_workers // 12' "$config_file")
469
893
  fleet_rebalance "$config_file" &
470
894
  local rebalancer_pid=$!
895
+ sleep 1
896
+ if ! kill -0 "$rebalancer_pid" 2>/dev/null; then
897
+ fleet_log ERROR "Rebalancer process exited immediately (PID: $rebalancer_pid)"
898
+ else
899
+ # Record rebalancer PID in fleet state
900
+ local tmp_rs="${FLEET_STATE}.tmp.$$"
901
+ jq --argjson pid "$rebalancer_pid" '.rebalancer_pid = $pid' "$FLEET_STATE" > "$tmp_rs" \
902
+ && mv "$tmp_rs" "$FLEET_STATE"
471
903
 
472
- # Record rebalancer PID in fleet state
473
- local tmp_rs="${FLEET_STATE}.tmp.$$"
474
- jq --argjson pid "$rebalancer_pid" '.rebalancer_pid = $pid' "$FLEET_STATE" > "$tmp_rs" \
475
- && mv "$tmp_rs" "$FLEET_STATE"
904
+ success "Worker pool: ${CYAN}${pool_total} total workers${RESET} (rebalancer PID: ${rebalancer_pid})"
905
+ fi
906
+ fi
476
907
 
477
- success "Worker pool: ${CYAN}${pool_total} total workers${RESET} (rebalancer PID: ${rebalancer_pid})"
908
+ # Start distributed worker loop if machines are registered
909
+ local machines_file="$HOME/.shipwright/machines.json"
910
+ if [[ -f "$machines_file" ]]; then
911
+ local dist_machine_count
912
+ dist_machine_count=$(jq '.machines | length' "$machines_file" 2>/dev/null || echo 0)
913
+ if [[ "$dist_machine_count" -gt 0 ]]; then
914
+ fleet_distributed_loop 30 &
915
+ local dist_pid=$!
916
+
917
+ # Record distributed loop PID in fleet state
918
+ local tmp_dist="${FLEET_STATE}.tmp.$$"
919
+ jq --argjson pid "$dist_pid" '.distributed_loop_pid = $pid' "$FLEET_STATE" > "$tmp_dist" \
920
+ && mv "$tmp_dist" "$FLEET_STATE"
921
+
922
+ success "Distributed workers: ${CYAN}${dist_machine_count} machines${RESET} (loop PID: ${dist_pid})"
923
+ fi
478
924
  fi
479
925
 
480
926
  echo ""
@@ -512,7 +958,10 @@ fleet_stop() {
512
958
  fi
513
959
 
514
960
  # Signal rebalancer to stop
515
- touch "$HOME/.claude-teams/fleet-rebalancer.shutdown"
961
+ touch "$HOME/.shipwright/fleet-rebalancer.shutdown"
962
+
963
+ # Signal distributed loop to stop
964
+ touch "$HOME/.shipwright/fleet-distributed.shutdown"
516
965
 
517
966
  # Kill rebalancer if running
518
967
  local rebalancer_pid
@@ -523,9 +972,19 @@ fleet_stop() {
523
972
  success "Stopped worker pool rebalancer (PID: ${rebalancer_pid})"
524
973
  fi
525
974
 
975
+ # Kill distributed loop if running
976
+ local dist_pid
977
+ dist_pid=$(jq -r '.distributed_loop_pid // empty' "$FLEET_STATE" 2>/dev/null || true)
978
+ if [[ -n "$dist_pid" ]]; then
979
+ kill "$dist_pid" 2>/dev/null || true
980
+ wait "$dist_pid" 2>/dev/null || true
981
+ success "Stopped distributed worker loop (PID: ${dist_pid})"
982
+ fi
983
+
526
984
  # Clean up flags
527
- rm -f "$HOME/.claude-teams/fleet-reload.flag"
528
- rm -f "$HOME/.claude-teams/fleet-rebalancer.shutdown"
985
+ rm -f "$HOME/.shipwright/fleet-reload.flag"
986
+ rm -f "$HOME/.shipwright/fleet-rebalancer.shutdown"
987
+ rm -f "$HOME/.shipwright/fleet-distributed.shutdown"
529
988
 
530
989
  local stopped=0
531
990
  while IFS= read -r repo_name; do
@@ -535,7 +994,7 @@ fleet_stop() {
535
994
  repo_path=$(jq -r --arg r "$repo_name" '.repos[$r].path' "$FLEET_STATE")
536
995
 
537
996
  # Try graceful shutdown via the daemon's shutdown flag
538
- local daemon_dir="$HOME/.claude-teams"
997
+ local daemon_dir="$HOME/.shipwright"
539
998
  local shutdown_flag="$daemon_dir/daemon.shutdown"
540
999
 
541
1000
  # Send shutdown signal to the daemon process inside the tmux session
@@ -606,6 +1065,23 @@ fleet_status() {
606
1065
  echo ""
607
1066
  fi
608
1067
 
1068
+ # Show distributed machine summary if available
1069
+ local machines_file="$HOME/.shipwright/machines.json"
1070
+ if [[ -f "$machines_file" ]]; then
1071
+ local dist_count
1072
+ dist_count=$(jq '.machines | length' "$machines_file" 2>/dev/null || echo 0)
1073
+ if [[ "$dist_count" -gt 0 ]]; then
1074
+ local health_file="$HOME/.shipwright/machine-health.json"
1075
+ local m_online=0 m_offline=0
1076
+ if [[ -f "$health_file" ]]; then
1077
+ m_online=$(jq '[to_entries[] | select(.value.status == "online")] | length' "$health_file" 2>/dev/null || echo 0)
1078
+ m_offline=$(jq '[to_entries[] | select(.value.status == "offline")] | length' "$health_file" 2>/dev/null || echo 0)
1079
+ fi
1080
+ echo -e " ${BOLD}Machines:${RESET} ${dist_count} registered ${GREEN}${m_online} online${RESET} ${RED}${m_offline} offline${RESET}"
1081
+ echo ""
1082
+ fi
1083
+ fi
1084
+
609
1085
  # Header
610
1086
  printf " ${BOLD}%-20s %-10s %-10s %-10s %-10s %-20s${RESET}\n" \
611
1087
  "REPO" "STATUS" "ACTIVE" "QUEUED" "DONE" "LAST POLL"
@@ -628,7 +1104,7 @@ fleet_status() {
628
1104
 
629
1105
  # Try to read daemon state from the repo's daemon state file
630
1106
  local active="-" queued="-" done="-" last_poll="-"
631
- local daemon_state="$HOME/.claude-teams/daemon-state.json"
1107
+ local daemon_state="$HOME/.shipwright/daemon-state.json"
632
1108
  if [[ -f "$daemon_state" ]]; then
633
1109
  active=$(jq -r '.active_jobs // 0' "$daemon_state" 2>/dev/null || echo "-")
634
1110
  queued=$(jq -r '.queued // 0' "$daemon_state" 2>/dev/null || echo "-")