trantor 0.17.0 → 0.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,38 @@
1
1
  {
2
2
  "name": "trantor",
3
- "owner": { "name": "Sasha Bogojevic", "email": "hello@hivedigitalllc.com" },
3
+ "owner": {
4
+ "name": "Sasha Bogojevic",
5
+ "email": "hello@hivedigitalllc.com"
6
+ },
4
7
  "metadata": {
5
8
  "description": "Trantor — the hub-world for AI agent crews: live message bus, presence, project Kanban/flow board + context-handoff for independent AI coding agents (Claude, Codex, Gemini, …)",
6
- "version": "0.17.0"
9
+ "version": "0.17.3"
7
10
  },
8
11
  "plugins": [
9
12
  {
10
13
  "name": "trantor",
11
14
  "source": "./",
12
15
  "description": "The hub-world for AI agent crews. Say \"fire up the crew\" and Claude becomes the architect: a plan-aware Advisor routes the work (solo / cheap inline calls / live crew of Codex, Gemini, Kimi & DeepSeek in their own terminal windows), a Kanban/flow command center with a testing gate tracks it, and an economics brain (Scrooge) keeps the receipts. Includes the relay MCP, a SessionStart auto-discovery hook, and a PreCompact context-handoff so a fresh session can take over a full window instead of compacting.",
13
- "version": "0.17.0",
14
- "author": { "name": "Sasha Bogojevic" },
16
+ "version": "0.17.3",
17
+ "author": {
18
+ "name": "Sasha Bogojevic"
19
+ },
15
20
  "category": "development",
16
- "keywords": ["multi-agent", "agent-crew", "orchestration", "coordination", "mcp", "hooks", "kanban", "context-handoff", "message-bus", "claude-code", "codex", "gemini", "llm-routing"]
21
+ "keywords": [
22
+ "multi-agent",
23
+ "agent-crew",
24
+ "orchestration",
25
+ "coordination",
26
+ "mcp",
27
+ "hooks",
28
+ "kanban",
29
+ "context-handoff",
30
+ "message-bus",
31
+ "claude-code",
32
+ "codex",
33
+ "gemini",
34
+ "llm-routing"
35
+ ]
17
36
  }
18
37
  ]
19
38
  }
@@ -1,11 +1,13 @@
1
1
  {
2
2
  "name": "trantor",
3
- "version": "0.17.0",
3
+ "version": "0.17.3",
4
4
  "description": "Trantor — the hub-world for AI agent crews: live message bus, presence, project Kanban/flow board + crew orchestration for independent AI coding agents (Claude, Codex, Gemini, Kimi, DeepSeek)",
5
5
  "mcpServers": {
6
6
  "relay": {
7
7
  "command": "node",
8
- "args": ["${CLAUDE_PLUGIN_ROOT}/mcp.mjs"]
8
+ "args": [
9
+ "${CLAUDE_PLUGIN_ROOT}/mcp.mjs"
10
+ ]
9
11
  }
10
12
  },
11
13
  "skills": "./skills/"
package/bin/advise.mjs CHANGED
@@ -122,7 +122,10 @@ export function advise(input, world = loadWorld()) {
122
122
  const cards = routing.map((r, i) => ({
123
123
  order: i + 1, title: r.title, difficulty: r.difficulty,
124
124
  assignee: r.executor === "scrooge" || r.executor === "orchestrator" ? undefined : `${r.executor}:<project>`,
125
- model: r.model || (["scrooge", "orchestrator"].includes(r.executor) ? undefined : `${r.executor}-default`),
125
+ // "auto" = resolve a LIVE model at spawn (the orchestrator runs `trantor up <agent>:<provider>
126
+ // --task --difficulty`, which picks the best live model). Was `<cli>-default` — a stale default.
127
+ model: r.model || (["scrooge", "orchestrator"].includes(r.executor) ? undefined : "auto"),
128
+ task: ["scrooge", "orchestrator"].includes(r.executor) ? undefined : r.kind,
126
129
  via: r.executor === "scrooge" ? "relay_scrooge" : "relay_task_add",
127
130
  deps_orders: r.executor === "orchestrator" && /integrat/i.test(r.title)
128
131
  ? routing.map((x, j) => j + 1).filter(j => j !== i + 1)
package/bin/cli.mjs CHANGED
@@ -22,6 +22,7 @@ switch (cmd) {
22
22
  case "verify": run("bin/crew-verify.mjs"); break;
23
23
  case "up": process.argv.splice(2, 1); spawn("/bin/bash", [join(ROOT, "bin/crew.sh"), "up", ...args], { stdio: "inherit", cwd: process.cwd() }).on("exit", c => process.exit(c ?? 0)); break;
24
24
  case "down": spawn("/bin/bash", [join(ROOT, "bin/crew.sh"), "down"], { stdio: "inherit", cwd: process.cwd() }).on("exit", c => process.exit(c ?? 0)); break;
25
+ case "swap": spawn("/bin/bash", [join(ROOT, "bin/crew.sh"), "swap", ...args], { stdio: "inherit", cwd: process.cwd() }).on("exit", c => process.exit(c ?? 0)); break;
25
26
  case "hub": run("hub.mjs"); break;
26
27
  case "watch": run("bin/relay-watch.mjs"); break;
27
28
  case "ui": {
@@ -48,8 +48,11 @@ async function api(path, body) {
48
48
  // ---- per-CLI invocation (first turn vs resume turn). {P} = prompt file path ----
49
49
  // CREW_MODEL env pins the model: each CLI gets its own flag via {M} (empty when unset).
50
50
  let MODEL = process.env.CREW_MODEL || "";
51
- // opencode expects provider/model qualify bare ids for the deepseek/opencode agents
52
- if (MODEL && !MODEL.includes("/") && (AGENT === "deepseek" || AGENT === "opencode")) MODEL = `deepseek/${MODEL}`;
51
+ // opencode expects provider/model. A BARE id for the `deepseek` agent qualifies to its
52
+ // own provider; `opencode` ids must already be provider-qualified (e.g.
53
+ // `zai-coding-plan/glm-5.1`) — never assume `deepseek/` for opencode (that mangled
54
+ // ZAI-coding-plan models into deepseek/…). `scrooge route` returns qualified ids.
55
+ if (MODEL && !MODEL.includes("/") && AGENT === "deepseek") MODEL = `deepseek/${MODEL}`;
53
56
  const CLI = {
54
57
  codex: { first: `codex exec{M} --skip-git-repo-check --dangerously-bypass-approvals-and-sandbox "$(cat {P})" < /dev/null`,
55
58
  next: `codex exec resume --last{M} --skip-git-repo-check --dangerously-bypass-approvals-and-sandbox "$(cat {P})" < /dev/null`, mflag: " -m " },
@@ -69,6 +72,46 @@ if (!cli) { console.error(`unknown agent '${AGENT}' (known: ${Object.keys(CLI).j
69
72
 
70
73
  const RULES = `Rules: you are ${SESSION} on the trantor crew. Work your assigned file(s), report on the bus (relay_send, <280 chars), move your Kanban card as you go (doing -> testing -> done; run the tests in 'testing', use 'failed' + a report if they break). When your work for THIS message is finished, END YOUR TURN — do NOT park, do NOT loop relay_wait; the runner waits for you and will wake you with the next message.`;
71
74
 
75
+ // ---- failure visibility ----------------------------------------------------
76
+ // A turn's CLI can fail (credits exhausted, auth, crash) and the runner would just
77
+ // re-park — staying green on the bus, telling the orchestrator NOTHING. These surface
78
+ // every non-zero turn to the bus in real time so the orchestrator (and `trantor swap`)
79
+ // can react, and flip presence to errored/down.
80
+ let consecFails = 0;
81
+ let lastErrText = "";
82
+ const ERRF = join(homedir(), ".agent-bus", `err-${AGENT}-${PROJ}.txt`);
83
+
84
+ function classifyFailure(exit, errText) {
85
+ const t = (errText || "").toLowerCase();
86
+ if (exit === 127) return "missing-cli";
87
+ if (/quota|insufficient|credit|balance|payment required|402|429|too many requests|rate.?limit|exceeded your|out of (credit|quota)/.test(t)) return "exhausted";
88
+ if (/unauthor|401|invalid[ _-]?api[ _-]?key|forbidden|403|token expired|expired/.test(t)) return "auth";
89
+ return "crashed";
90
+ }
91
+
92
+ async function reportFailure(exit, trigger) {
93
+ consecFails++;
94
+ const reason = classifyFailure(exit, lastErrText);
95
+ const down = consecFails >= 2;
96
+ const status = down ? `down: ${reason} · ${consecFails} fails` : `errored: ${reason}`;
97
+ await api("/register", { session: SESSION, project: PROJ, status }).catch(() => {});
98
+ const hint = reason === "exhausted" ? " — needs `trantor swap`"
99
+ : reason === "auth" ? " — check credentials"
100
+ : reason === "missing-cli" ? " — CLI not on PATH" : "";
101
+ const text = down
102
+ ? `🛑 ${SESSION} DOWN — ${consecFails} consecutive failures (${reason}, exit ${exit})${hint}`
103
+ : `⚠️ ${SESSION} turn FAILED (${trigger}, exit ${exit} · ${reason})${hint}`;
104
+ await api("/send", { from: SESSION, to: "all", text, project: PROJ }).catch(() => {});
105
+ log(`\x1b[31mreported failure to bus: ${reason} (exit ${exit})\x1b[0m`);
106
+ }
107
+
108
+ async function reportHealthy() {
109
+ if (consecFails === 0) return; // already healthy — don't spam
110
+ consecFails = 0;
111
+ await api("/register", { session: SESSION, project: PROJ, status: `active in ${PROJ}` }).catch(() => {});
112
+ await api("/send", { from: SESSION, to: "all", text: `✅ ${SESSION} recovered`, project: PROJ }).catch(() => {});
113
+ }
114
+
72
115
  let sid = "";
73
116
  function runTurn(prompt, isFirst, trigger = "kickoff") {
74
117
  TURN++; banner(trigger);
@@ -82,12 +125,16 @@ function runTurn(prompt, isFirst, trigger = "kickoff") {
82
125
  const envs = [join(homedir(), ".agent-bus", ".env"), cli.env].filter(f => f && existsSync(f));
83
126
  for (const f of envs.reverse()) cmd = `set -a; source ${f}; set +a; ${cmd}`; // ~/.agent-bus/.env wins
84
127
  log(`turn starting (${isFirst ? "fresh session" : "resume"})${MODEL ? ` · model=${MODEL}` : ""}`);
85
- // inherit stdio so the window shows the agent working live; also capture for sid-parsing
86
- const r = spawnSync("/bin/bash", ["-c", cli.sid ? `${cmd} | tee /dev/stderr` : cmd], {
128
+ // inherit stdio so the window shows the agent working live; also capture for sid-parsing.
129
+ // Tee stderr to ERRF (still shown live in the window) so a failed turn can be classified.
130
+ try { appendFileSync(ERRF, "", { flag: "w" }); } catch {}
131
+ const inner = cli.sid ? `${cmd} | tee /dev/stderr` : cmd;
132
+ const r = spawnSync("/bin/bash", ["-c", `{ ${inner} ; } 2> >(tee -a ${ERRF} >&2)`], {
87
133
  cwd: DIR, encoding: "utf8", stdio: cli.sid ? ["ignore", "pipe", "inherit"] : "inherit",
88
134
  env: { ...process.env, RELAY_URL: HUB, RELAY_AGENT: AGENT, RELAY_PROJECT: PROJ },
89
135
  maxBuffer: 16 * 1024 * 1024,
90
136
  });
137
+ try { lastErrText = readFileSync(ERRF, "utf8").slice(-4000); } catch { lastErrText = ""; }
91
138
  if (cli.sid && r.stdout) { const m = r.stdout.match(cli.sid); if (m) sid = m[1]; }
92
139
  telemetry({ ts: Date.now(), agent: AGENT, project: PROJ, turn: TURN, trigger, model: MODEL || "default", duration_ms: Date.now() - t0, exit: r.status });
93
140
  log(`turn ended (exit ${r.status}, ${((Date.now() - t0) / 1000).toFixed(0)}s)`);
@@ -114,7 +161,8 @@ async function loadLessons() {
114
161
  await api("/register", { session: SESSION, project: PROJ, status: "crew member booting" }).catch(() => {});
115
162
 
116
163
  let pendingBcast = [];
117
- runTurn(KICKOFF + LESSONS, true, "kickoff");
164
+ const ec0 = runTurn(KICKOFF + LESSONS, true, "kickoff");
165
+ if (ec0) await reportFailure(ec0, "kickoff"); // a failed kickoff = the "fired up, died, nobody knew" case
118
166
  log(`parked — long-polling the bus as ${SESSION} (free; this poll is also the heartbeat)`);
119
167
 
120
168
  while (true) {
@@ -134,7 +182,9 @@ async function loadLessons() {
134
182
  pendingBcast = [];
135
183
  const lines = wake.map(m => `[${m.from}${m.to === "all" ? " -> all (mentions you)" : ""}]: ${m.text}`).join("\n");
136
184
  const prompt = `NEW BUS MESSAGE${wake.length > 1 ? "S" : ""} for you:\n${lines}\n${ctx}\nAct on what's addressed to you, then end your turn.\n\n${RULES}`;
137
- await loadLessons(); runTurn(prompt + LESSONS, false, direct.length ? "direct message" : "@mention");
185
+ await loadLessons();
186
+ const ec = runTurn(prompt + LESSONS, false, direct.length ? "direct message" : "@mention");
187
+ if (ec) await reportFailure(ec, "message"); else await reportHealthy();
138
188
  log("parked — waiting for the next message");
139
189
  }
140
190
  })();
@@ -13,8 +13,16 @@ import { homedir } from "node:os";
13
13
  const args = process.argv.slice(2);
14
14
  const ti = args.indexOf("--timeout");
15
15
  const TIMEOUT = ti >= 0 ? Number(args.splice(ti, 2)[1]) : 30;
16
+ // --since <ms>: the spawn epoch captured by the launcher BEFORE it spawned the windows.
17
+ // An agent counts as up the moment it registers (even "booting") with lastSeen >= this epoch.
18
+ // Without it we'd default to "now", but the launcher only starts us AFTER the spawn+serialize
19
+ // sleep, so a runner's early "booting" registration can land just before our own start and then
20
+ // go silent through a slow first turn (e.g. opencode+GLM cold start ~40s) — a false failure that
21
+ // triggers a duplicate respawn. Anchoring to the pre-spawn epoch removes that race.
22
+ const si = args.indexOf("--since");
23
+ const SINCE = si >= 0 ? Number(args.splice(si, 2)[1]) : NaN;
16
24
  const [PROJ, ...AGENTS] = args;
17
- if (!PROJ || !AGENTS.length) { console.error("usage: crew-verify.mjs <project> <agent...> [--timeout 30]"); process.exit(2); }
25
+ if (!PROJ || !AGENTS.length) { console.error("usage: crew-verify.mjs <project> <agent...> [--timeout 30] [--since <ms>]"); process.exit(2); }
18
26
 
19
27
  function hubUrl() {
20
28
  if (process.env.RELAY_URL) return process.env.RELAY_URL;
@@ -22,15 +30,21 @@ function hubUrl() {
22
30
  return "http://127.0.0.1:4477";
23
31
  }
24
32
  const HUB = hubUrl();
25
- const START = Date.now();
33
+ // Two distinct clocks, deliberately separate:
34
+ // - FRESH_SINCE: the freshness threshold. A registration counts only if lastSeen >= this.
35
+ // Prefer the launcher's pre-spawn epoch (so an early "booting" beat counts); else our start.
36
+ // - DEADLINE: how long WE poll, always measured from our own start so it can't be skewed
37
+ // (e.g. an epoch far in the past wouldn't shrink the window; one in the future wouldn't hang).
38
+ const DEADLINE = Date.now() + TIMEOUT * 1000;
39
+ const FRESH_SINCE = Number.isFinite(SINCE) ? SINCE : Date.now();
26
40
 
27
41
  (async () => {
28
42
  const want = new Set(AGENTS.map(a => `${a}:${PROJ}`));
29
43
  const up = new Set();
30
- while (Date.now() - START < TIMEOUT * 1000 && up.size < want.size) {
44
+ while (Date.now() < DEADLINE && up.size < want.size) {
31
45
  try {
32
46
  const { peers } = await (await fetch(`${HUB}/peers`)).json();
33
- for (const p of peers) if (want.has(p.session) && p.lastSeen >= START) up.add(p.session);
47
+ for (const p of peers) if (want.has(p.session) && p.lastSeen >= FRESH_SINCE) up.add(p.session);
34
48
  } catch {}
35
49
  if (up.size < want.size) await new Promise(s => setTimeout(s, 1500));
36
50
  }
package/bin/crew.sh CHANGED
@@ -23,7 +23,8 @@ mkdir -p "$HOME/.agent-bus"
23
23
 
24
24
  down() {
25
25
  [ -f "$STATE" ] || { echo "no tracked crew windows"; return 0; }
26
- while read -r wid; do
26
+ while IFS=$'\t' read -r a wid; do
27
+ [ -n "${wid:-}" ] || wid="$a" # back-compat: old STATE stored bare window ids
27
28
  TTY=$(osascript -e "tell application \"Terminal\" to get tty of (first window whose id is $wid)" 2>/dev/null)
28
29
  if [ -n "$TTY" ]; then
29
30
  # SIGKILL everything on the tty, login included — TUIs trap SIGTERM, and a live login
@@ -32,7 +33,8 @@ down() {
32
33
  fi
33
34
  done < "$STATE"
34
35
  sleep 1
35
- while read -r wid; do
36
+ while IFS=$'\t' read -r a wid; do
37
+ [ -n "${wid:-}" ] || wid="$a"
36
38
  osascript -e "tell application \"Terminal\" to close (first window whose id is $wid)" 2>/dev/null
37
39
  done < "$STATE"
38
40
  sleep 0.5
@@ -44,8 +46,50 @@ down() {
44
46
  echo "crew torn down"
45
47
  }
46
48
  [ "$CMD" = "down" ] && { down; exit 0; }
47
- [ "$CMD" != "up" ] && { echo "usage: crew.sh up <agent...> | crew.sh down"; exit 1; }
48
- [ $# -eq 0 ] && { echo "usage: crew.sh up codex gemini kimi deepseek (any subset; agent:model pins a model, e.g. deepseek:deepseek-v4-pro)"; exit 1; }
49
+ case "$CMD" in up|swap) ;; *) echo "usage: crew.sh up <agent...> | crew.sh swap <oldAgent> <newAgent[:provider[/model]]> | crew.sh down"; exit 1 ;; esac
50
+
51
+ # --task/--difficulty drive LAZY live-model selection for provider-only specs (agent:provider).
52
+ # An agent spec is one of: `codex` (CLI default) · `opencode:zai-coding-plan` (provider only →
53
+ # pick the best live model now) · `opencode:zai-coding-plan/glm-5.2` (full pin, used as-is).
54
+ TASK="code"; DIFF="medium"; _ARGS=()
55
+ while [ $# -gt 0 ]; do
56
+ case "$1" in
57
+ --task) TASK="${2:-code}"; shift 2 || shift ;;
58
+ --difficulty|--diff) DIFF="${2:-medium}"; shift 2 || shift ;;
59
+ *) _ARGS+=("$1"); shift ;;
60
+ esac
61
+ done
62
+ if [ ${#_ARGS[@]} -gt 0 ]; then set -- "${_ARGS[@]}"; else set --; fi
63
+ [ $# -eq 0 ] && { echo "usage: crew.sh up [--task K --difficulty D] codex gemini kimi deepseek (agent:provider picks a live model; agent:provider/model pins one)"; exit 1; }
64
+
65
+ # scrooge (the model-routing brain) is bundled with this trantor install; fall back to PATH.
66
+ SCROOGE="$BUS_DIR/engine/bin/scrooge"
67
+ [ -f "$SCROOGE" ] || SCROOGE="$(command -v scrooge 2>/dev/null || echo scrooge)"
68
+
69
+ # resolve_model <agent> <provider> <task> <diff> -> echoes a runner-ready model id, or empty
70
+ # (→ CLI default). Enumeration is CLI-aware and never guesses an endpoint: opencode-managed
71
+ # agents list via `opencode models <provider>`; others self-enumerate via the provider's /models.
72
+ resolve_model() {
73
+ local agent="$1" provider="$2" task="$3" diff="$4" cands="" out=""
74
+ case "$agent" in
75
+ opencode|deepseek)
76
+ cands="$(opencode models "$provider" 2>/dev/null | tr '\n' ' ')"
77
+ [ -n "$cands" ] || { echo "[crew] no live models via 'opencode models $provider' — CLI default" >&2; return 0; }
78
+ out="$(python3 "$SCROOGE" route --candidates "$cands" -t "$task" -d "$diff" --json 2>/dev/null)" ;;
79
+ *)
80
+ out="$(python3 "$SCROOGE" route --provider "$provider" -t "$task" -d "$diff" --json 2>/dev/null)" ;;
81
+ esac
82
+ [ -n "$out" ] || { echo "[crew] live model selection failed for $agent:$provider — CLI default" >&2; return 0; }
83
+ printf '%s' "$out" | python3 -c 'import json,sys
84
+ try: print(json.load(sys.stdin).get("qualified") or "")
85
+ except Exception: pass' 2>/dev/null
86
+ }
87
+
88
+ # epoch_ms: milliseconds since the epoch, captured BEFORE a spawn so crew-verify can count an
89
+ # agent the moment it registers (even "booting"), instead of racing its own start time. A slow
90
+ # first turn (opencode+GLM cold start ~40s) means no heartbeat for the whole turn; anchoring the
91
+ # verifier to this pre-spawn epoch lets the early "booting" registration satisfy it.
92
+ epoch_ms() { python3 -c 'import time;print(int(time.time()*1000))'; }
49
93
 
50
94
  if [ "$(uname)" != "Darwin" ]; then
51
95
  echo "Window spawning is macOS-only. Run one per terminal, in $DIR:"
@@ -82,37 +126,87 @@ spawn_grid() { # $@ = agents — (re)computes the grid for THIS batch and spawn
82
126
  [ $N -le 2 ] && COLS=1
83
127
  local ROWS=$(( (N + COLS - 1) / COLS ))
84
128
  local CW=$(( GW / COLS )) CH=$(( GH / ROWS ))
85
- local i=0 SPEC AGENT MODEL
129
+ local i=0 SPEC AGENT FIELD MODEL
86
130
  for SPEC in "$@"; do
87
- AGENT="${SPEC%%:*}" # agent[:model] — model rides in as CREW_MODEL
88
- MODEL=""; [ "$SPEC" != "$AGENT" ] && MODEL="${SPEC#*:}"
131
+ AGENT="${SPEC%%:*}" # agent[:provider[/model]] — model rides in as CREW_MODEL
132
+ FIELD=""; [ "$SPEC" != "$AGENT" ] && FIELD="${SPEC#*:}"
133
+ MODEL=""
134
+ if [ -n "$FIELD" ]; then
135
+ case "$FIELD" in
136
+ */*) MODEL="$FIELD" ;; # full pin: provider/model
137
+ *) MODEL="$(resolve_model "$AGENT" "$FIELD" "$TASK" "$DIFF")" # provider only: pick live now
138
+ if [ -n "$MODEL" ]; then echo " → $AGENT: live model $MODEL ($FIELD · $TASK/$DIFF)"
139
+ else echo " → $AGENT: '$FIELD' live selection unavailable — CLI default"; fi ;;
140
+ esac
141
+ fi
89
142
  local C=$(( i % COLS )) R=$(( i / COLS ))
90
- local X1=$(( GX + C * CW )) Y1=$(( GY + R * CH ))
91
- osascript \
143
+ local X1=$(( GX + C * CW )) Y1=$(( GY + R * CH )) WID=""
144
+ WID="$(osascript \
92
145
  -e 'tell application "Terminal"' \
93
146
  -e " set w to do script \"cd $DIR && clear && CREW_MODEL=$MODEL node $BUS_DIR/bin/crew-runner.mjs $AGENT $DIR\"" \
94
147
  -e " set custom title of w to \"$(echo "$AGENT" | tr '[:lower:]' '[:upper:]') — trantor crew\"" \
95
148
  -e " set theWin to first window whose tabs contains w" \
96
149
  -e " set bounds of theWin to {$X1, $Y1, $(( X1 + CW )), $(( Y1 + CH ))}" \
97
150
  -e " return id of theWin" \
98
- -e 'end tell' >> "$STATE" 2>/dev/null && echo " → $AGENT window spawned" || echo " ✗ $AGENT osascript spawn ERROR"
151
+ -e 'end tell' 2>/dev/null)"
152
+ if [ -n "$WID" ]; then printf '%s\t%s\n' "$AGENT" "$WID" >> "$STATE"; echo " → $AGENT window spawned"; else echo " ✗ $AGENT osascript spawn ERROR"; fi
99
153
  sleep 1.2 # serialize — rapid-fire 'do script' calls race and silently drop windows
100
154
  i=$(( i + 1 ))
101
155
  done
102
156
  }
103
157
 
158
+ # swap <oldAgent> <newSpec>: replace a live agent (e.g. one reported exhausted) with a fresh
159
+ # one whose model is live-selected. Tears down the old agent's window, spawns the new spec.
160
+ swap() {
161
+ local OLD="${1:-}" NEWSPEC="${2:-}"
162
+ [ -n "$OLD" ] && [ -n "$NEWSPEC" ] || { echo "usage: trantor swap <oldAgent> <newAgent[:provider[/model]]> [--task K --difficulty D]"; exit 1; }
163
+ if [ -f "$STATE" ]; then
164
+ local tmp="$STATE.tmp"; : > "$tmp"
165
+ while IFS=$'\t' read -r a wid; do
166
+ [ -n "${wid:-}" ] || { wid="$a"; a=""; }
167
+ if [ "$a" = "$OLD" ]; then
168
+ echo "— tearing down old agent '$OLD' (window $wid) —"
169
+ local TTY; TTY=$(osascript -e "tell application \"Terminal\" to get tty of (first window whose id is $wid)" 2>/dev/null)
170
+ [ -n "$TTY" ] && for pid in $(ps -t "${TTY#/dev/}" -o pid= 2>/dev/null); do kill -9 "$pid" 2>/dev/null; done
171
+ sleep 0.5
172
+ osascript -e "tell application \"Terminal\" to close (first window whose id is $wid)" 2>/dev/null
173
+ else
174
+ printf '%s\t%s\n' "$a" "$wid" >> "$tmp"
175
+ fi
176
+ done < "$STATE"
177
+ mv "$tmp" "$STATE"
178
+ fi
179
+ echo "— spawning replacement: $NEWSPEC ($TASK/$DIFF) —"
180
+ local SWAP_EPOCH; SWAP_EPOCH=$(epoch_ms)
181
+ spawn_grid "$NEWSPEC"
182
+ local NEWAGENT="${NEWSPEC%%:*}"
183
+ echo "— verifying replacement on the bus —"
184
+ node "$BUS_DIR/bin/crew-verify.mjs" "$PROJ" "$NEWAGENT" --since "$SWAP_EPOCH" --timeout 30
185
+ echo "— swapped. RESEND the contract to '$NEWAGENT' (it joined fresh with no context). —"
186
+ }
187
+
188
+ if [ "$CMD" = "swap" ]; then swap "$@"; exit 0; fi
189
+
190
+ # spec_for_agent <agent> <spec...>: echo the FULL original spec (agent:provider[/model]) whose
191
+ # agent part matches — so a retry respawns on the SAME live-selected model, not the CLI default.
192
+ spec_for_agent() { local want="$1"; shift; local s; for s in "$@"; do [ "${s%%:*}" = "$want" ] && { printf '%s' "$s"; return; }; done; printf '%s' "$want"; }
193
+
104
194
  echo "— spawning crew (serialized) —"
195
+ SPAWN_EPOCH=$(epoch_ms)
105
196
  spawn_grid "$@"
106
197
 
107
198
  echo "— verifying on the bus (the spawn is not the truth; the bus is) —"
108
199
  AGENTS_ONLY=$(for a in "$@"; do printf "%s " "${a%%:*}"; done)
109
- VER=$(node "$BUS_DIR/bin/crew-verify.mjs" "$PROJ" $AGENTS_ONLY --timeout 30)
200
+ VER=$(node "$BUS_DIR/bin/crew-verify.mjs" "$PROJ" $AGENTS_ONLY --since "$SPAWN_EPOCH" --timeout 30)
110
201
  echo "$VER"
111
202
  RETRY=$(echo "$VER" | grep "^FAILED:" | cut -d: -f2 | tr ',' ' ')
112
203
  if [ -n "${RETRY// }" ]; then
113
- echo "— retrying failed spawns: $RETRY —"
114
- spawn_grid $RETRY
115
- VER2=$(node "$BUS_DIR/bin/crew-verify.mjs" "$PROJ" $RETRY --timeout 30)
204
+ # map failed agent names back to their FULL specs (preserve provider/model on respawn)
205
+ RETRY_SPECS=""; for a in $RETRY; do RETRY_SPECS="$RETRY_SPECS $(spec_for_agent "$a" "$@")"; done
206
+ echo " retrying failed spawns:$RETRY_SPECS —"
207
+ RETRY_EPOCH=$(epoch_ms)
208
+ spawn_grid $RETRY_SPECS
209
+ VER2=$(node "$BUS_DIR/bin/crew-verify.mjs" "$PROJ" $RETRY --since "$RETRY_EPOCH" --timeout 30)
116
210
  echo "$VER2"
117
211
  STILL=$(echo "$VER2" | grep "^FAILED:" | cut -d: -f2)
118
212
  if [ -n "$STILL" ]; then
@@ -590,6 +590,86 @@ def cmd_models(reg, args):
590
590
  for i in ids:
591
591
  print(i)
592
592
 
593
+ def _model_version_key(mid):
594
+ """Best-effort 'newest' ordering when no capability data exists: the trailing
595
+ numeric version wins (glm-5.2 > glm-5.1 > glm-4.7). Returns a sortable tuple."""
596
+ m = re.search(r"(\d+(?:\.\d+)*)", mid)
597
+ return tuple(int(x) for x in m.group(1).split(".")) if m else (0,)
598
+
599
+ _SMALL_VARIANT = ("air", "mini", "flash", "lite", "turbo", "nano", "small")
600
+ def _is_small_variant(mid):
601
+ low = mid.lower()
602
+ return any(s in low for s in _SMALL_VARIANT)
603
+
604
+ def _heuristic_pick(cands, difficulty):
605
+ """No capability data → pick by newest version, difficulty-aware on size variants
606
+ (easy prefers a small/turbo variant; medium/hard prefers the full model)."""
607
+ top_ver = max(_model_version_key(c) for c in cands)
608
+ top = [c for c in cands if _model_version_key(c) == top_ver]
609
+ if difficulty == "easy":
610
+ smalls = sorted(c for c in top if _is_small_variant(c))
611
+ if smalls:
612
+ return smalls[0]
613
+ bigs = sorted(c for c in top if not _is_small_variant(c))
614
+ return (bigs or sorted(top))[0]
615
+
616
+ def cmd_route(reg, args):
617
+ """Pick ONE deliberate live model for the crew path (task × difficulty).
618
+ Enumeration is the CALLER's job for CLI-managed providers — pass ids via
619
+ --candidates (e.g. `opencode models <provider>`); for raw-API providers in the
620
+ registry, --provider self-enumerates via /models. Scoring uses capabilities when
621
+ available, else a newest-version heuristic. JSON with --json. NEVER guesses an
622
+ endpoint."""
623
+ caps = load_caps()
624
+ provider = args.provider
625
+ raw = []
626
+ if args.candidates:
627
+ raw = [c.strip() for c in re.split(r"[,\s]+", args.candidates) if c.strip()]
628
+ elif provider:
629
+ if provider not in reg["providers"]:
630
+ print(json.dumps({"error": "unknown provider '%s' and no --candidates given" % provider}))
631
+ raise SystemExit(2)
632
+ if not provider_key(reg, provider):
633
+ print(json.dumps({"error": "no API key for provider '%s'" % provider}))
634
+ raise SystemExit(2)
635
+ raw = list_live_models(reg, provider, ttl=0)
636
+ if not raw:
637
+ print(json.dumps({"error": "no candidate models (provider offline / no key / empty --candidates)"}))
638
+ raise SystemExit(2)
639
+ # bare id (strip any provider/ prefix) → first original (qualified) form seen
640
+ by_bare = {}
641
+ for r in raw:
642
+ by_bare.setdefault(r.split("/")[-1], r)
643
+ bare_ids = list(by_bare.keys())
644
+ task = args.task or "code"
645
+ difficulty = args.difficulty or "medium"
646
+ metric = task_metric(task)
647
+ # Only trust the capability-weighted router when the NEWEST-version candidate is itself
648
+ # scored. Otherwise stale caps (which lag new releases) would demote a newer model to 0
649
+ # and pick an old one — the exact failure that motivated this. Then prefer newest.
650
+ top_ver = max(_model_version_key(b) for b in bare_ids)
651
+ top_scored = any(model_quality(caps, b, metric) > 0
652
+ for b in bare_ids if _model_version_key(b) == top_ver)
653
+ if top_scored:
654
+ scored = weigh_candidates(reg, caps, bare_ids, task, difficulty)
655
+ pick_bare = scored[0][0]
656
+ weighed, why = True, "capability×cost ranked (%s · %s floor) over %d live" % (task, difficulty, len(bare_ids))
657
+ cand_out = [{"model": b, "score": round(s, 4)} for b, s in scored[:6]]
658
+ else:
659
+ pick_bare = _heuristic_pick(bare_ids, difficulty)
660
+ weighed, why = False, "capabilities lag the newest model(s) → newest-version heuristic (difficulty-aware); run scrooge-capabilities to enrich"
661
+ cand_out = [{"model": b} for b in sorted(bare_ids, key=_model_version_key, reverse=True)[:6]]
662
+ original = by_bare[pick_bare]
663
+ qualified = original if "/" in original else (("%s/%s" % (provider, pick_bare)) if provider else pick_bare)
664
+ result = {"provider": provider, "model": pick_bare, "qualified": qualified,
665
+ "weighed": weighed, "task": task, "difficulty": difficulty, "why": why, "candidates": cand_out}
666
+ if getattr(args, "json", False):
667
+ print(json.dumps(result))
668
+ else:
669
+ err(DIM("[scrooge route] %s → %s (%s)" % (provider or "?", qualified, "weighed" if weighed else "heuristic")))
670
+ print(qualified)
671
+ return 0
672
+
593
673
  def cmd_list(reg, args):
594
674
  print("PROVIDERS (live = key present):")
595
675
  for p, cfg in reg["providers"].items():
@@ -1207,11 +1287,19 @@ def main():
1207
1287
  reg = load_registry()
1208
1288
 
1209
1289
  # Manual subcommand dispatch (avoids argparse subparser vs positional-prompt clash).
1210
- if argv and argv[0] in ("models", "list", "ledger", "watch", "learn", "lessons", "forget"):
1290
+ if argv and argv[0] in ("models", "route", "list", "ledger", "watch", "learn", "lessons", "forget"):
1211
1291
  cmd, rest = argv[0], argv[1:]
1212
1292
  if cmd == "models":
1213
1293
  ap = argparse.ArgumentParser(prog="scrooge models"); ap.add_argument("provider")
1214
1294
  return cmd_models(reg, ap.parse_args(rest))
1295
+ if cmd == "route":
1296
+ ap = argparse.ArgumentParser(prog="scrooge route")
1297
+ ap.add_argument("--provider", "-p", help="registry API provider to self-enumerate via /models")
1298
+ ap.add_argument("--candidates", "-c", help="comma/space-separated model ids (e.g. `opencode models <p>`); takes precedence over --provider enumeration")
1299
+ ap.add_argument("--task", "-t", default="code")
1300
+ ap.add_argument("--difficulty", "-d", choices=["easy", "medium", "hard"], default="medium")
1301
+ ap.add_argument("--json", action="store_true")
1302
+ return cmd_route(reg, ap.parse_args(rest))
1215
1303
  if cmd == "list":
1216
1304
  return cmd_list(reg, None)
1217
1305
  if cmd == "ledger":
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env node
2
+ // trantor PostToolUse heartbeat — keeps a live session's presence fresh on the bus.
3
+ //
4
+ // Registration (sessionstart.mjs / mcp.mjs) tells the hub a session was BORN; nothing
5
+ // tells it the session is still ALIVE. So presence decays after RELAY_ONLINE_MS (5 min)
6
+ // and the dashboard rots into a graveyard of "idle" boards even while sessions work —
7
+ // worst right after the laptop wakes from sleep, when every lastSeen is stale at once and
8
+ // there is no resume event to re-register. This hook fixes that: every tool call (a true
9
+ // sign of life) refreshes lastSeen, throttled so we hit the hub at most once per window.
10
+ // The first tool call after a wake re-greens the session — that first action IS the resume signal.
11
+ //
12
+ // Cheap + fail-silent by contract: a per-session stamp file gates the network call to once
13
+ // per HEARTBEAT_MS, and a short fetch timeout means we never add real latency to a tool call.
14
+ // We POST /register WITHOUT a status field so the session's meaningful status is preserved
15
+ // (the hub only overwrites status when one is supplied).
16
+ import { readFileSync, writeFileSync, existsSync } from "node:fs";
17
+ import { join, basename } from "node:path";
18
+ import { homedir, hostname } from "node:os";
19
+
20
+ const HEARTBEAT_MS = Number(process.env.RELAY_HEARTBEAT_MS || 60 * 1000);
21
+ const FETCH_TIMEOUT_MS = Number(process.env.RELAY_HEARTBEAT_TIMEOUT_MS || 1500);
22
+
23
+ function relayUrl() {
24
+ if (process.env.RELAY_URL) return process.env.RELAY_URL;
25
+ try {
26
+ const cfg = join(homedir(), ".agent-bus", "config.json");
27
+ if (existsSync(cfg)) { const u = JSON.parse(readFileSync(cfg, "utf8")).url; if (u) return u; }
28
+ } catch {}
29
+ return "http://127.0.0.1:4477";
30
+ }
31
+
32
+ async function main() {
33
+ const projectDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
34
+ // Mirror sessionstart.mjs: home-directory sessions aren't project work — don't register
35
+ // them (would spawn a phantom "<username>" board). Opt in with RELAY_SESSION/RELAY_PROJECT.
36
+ if (!process.env.RELAY_SESSION && !process.env.RELAY_PROJECT && projectDir === homedir()) return;
37
+
38
+ // Mirror mcp.mjs identity resolution EXACTLY so we refresh the same peer the relay
39
+ // registered (not a phantom): RELAY_PROJECT wins for project; RELAY_SESSION wins for
40
+ // identity, else a RELAY_AGENT brand ("codex","kimi",…) per project, else hostname:project.
41
+ const project = process.env.RELAY_PROJECT || basename(projectDir);
42
+ const session = process.env.RELAY_SESSION
43
+ || (process.env.RELAY_AGENT ? `${process.env.RELAY_AGENT}:${project}` : `${hostname()}:${project}`);
44
+
45
+ // Throttle: only ping if HEARTBEAT_MS has elapsed since the last ping for THIS session.
46
+ const stamp = join(homedir(), ".agent-bus", `hb-${session.replace(/[^A-Za-z0-9_.-]/g, "_")}.stamp`);
47
+ try {
48
+ if (existsSync(stamp)) {
49
+ const last = Number(readFileSync(stamp, "utf8")) || 0;
50
+ if (Date.now() - last < HEARTBEAT_MS) return; // within window — nothing to do
51
+ }
52
+ } catch {}
53
+ // Write the stamp BEFORE the network call so rapid concurrent tool calls don't all fire.
54
+ try { writeFileSync(stamp, String(Date.now())); } catch {}
55
+
56
+ // POST /register with no status -> hub refreshes lastSeen + project, preserves status.
57
+ try {
58
+ await fetch(`${relayUrl()}/register`, {
59
+ method: "POST",
60
+ headers: { "content-type": "application/json" },
61
+ body: JSON.stringify({ session, project }),
62
+ signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
63
+ });
64
+ } catch {}
65
+ }
66
+
67
+ // Never block or break the tool flow: swallow everything, always exit clean.
68
+ main().catch(() => {}).finally(() => process.exit(0));
package/hooks/hooks.json CHANGED
@@ -1,9 +1,12 @@
1
1
  {
2
- "description": "trantor — auto-register each session + inject live roster (SessionStart); write a handoff before compaction (PreCompact)",
2
+ "description": "trantor — auto-register each session + inject live roster (SessionStart); heartbeat presence on every tool call so live sessions stay green and recover after sleep (PostToolUse); write a handoff before compaction (PreCompact)",
3
3
  "hooks": {
4
4
  "SessionStart": [
5
5
  { "matcher": "", "hooks": [ { "type": "command", "command": "node ${CLAUDE_PLUGIN_ROOT}/hooks/sessionstart.mjs" } ] }
6
6
  ],
7
+ "PostToolUse": [
8
+ { "matcher": "", "hooks": [ { "type": "command", "command": "node ${CLAUDE_PLUGIN_ROOT}/hooks/heartbeat.mjs" } ] }
9
+ ],
7
10
  "PreCompact": [
8
11
  { "matcher": "", "hooks": [ { "type": "command", "command": "node ${CLAUDE_PLUGIN_ROOT}/hooks/precompact.mjs" } ] }
9
12
  ]
@@ -61,6 +61,14 @@ let additionalContext = "";
61
61
  try {
62
62
  await readStdin();
63
63
  const projectDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
64
+ // Sessions started in the home directory itself aren't project work — registering
65
+ // them spawns a phantom "<username>" project board on the dashboard. Set
66
+ // RELAY_SESSION (or RELAY_PROJECT) to deliberately put a home-dir session on the bus.
67
+ if (!process.env.RELAY_SESSION && !process.env.RELAY_PROJECT && projectDir === homedir()) {
68
+ process.stderr.write("[trantor] session in the home directory — not registering on the bus (set RELAY_SESSION to opt in)\n");
69
+ process.stdout.write("{}");
70
+ process.exit(0);
71
+ }
64
72
  const session = process.env.RELAY_SESSION || `${hostname()}:${basename(projectDir)}`;
65
73
  const url = relayUrl();
66
74
 
package/hub.mjs CHANGED
@@ -11,7 +11,7 @@ import { join } from "node:path";
11
11
 
12
12
  const PORT = Number(process.env.RELAY_PORT || 4477);
13
13
  const HOST = process.env.RELAY_HOST || "127.0.0.1";
14
- const DATA_DIR = join(homedir(), ".agent-bus");
14
+ const DATA_DIR = process.env.RELAY_DATA_DIR || join(homedir(), ".agent-bus");
15
15
  const DATA = join(DATA_DIR, "bus.json");
16
16
  const ONLINE_MS = Number(process.env.RELAY_ONLINE_MS || 5 * 60 * 1000);
17
17
  if (!existsSync(DATA_DIR)) mkdirSync(DATA_DIR, { recursive: true });
@@ -38,6 +38,7 @@ try { UI = readFileSync(new URL("./ui.html", import.meta.url), "utf8"); } catch
38
38
  // open SSE streams: [{ session, res }]
39
39
  const streams = [];
40
40
  const now = () => Date.now();
41
+ const fmtAge = ms => { const m = Math.floor(ms / 60000); return m > 48 * 60 ? `${Math.floor(m / 1440)}d ago` : m > 90 ? `${Math.floor(m / 60)}h ago` : `${m}m ago`; };
41
42
  function body(req) { return new Promise(r => { let d = ""; req.on("data", c => (d += c)); req.on("end", () => { try { r(d ? JSON.parse(d) : {}); } catch { r({}); } }); }); }
42
43
  function json(res, code, obj) { res.writeHead(code, { "content-type": "application/json", "access-control-allow-origin": "*" }); res.end(JSON.stringify(obj)); }
43
44
  function touch(session, status, project) {
@@ -50,6 +51,15 @@ function touch(session, status, project) {
50
51
  if (!p.project && session.includes(":")) p.project = session.split(":").pop().slice(0, 80);
51
52
  state.peers[session] = p; dirty = true;
52
53
  }
54
+ // Derive a coarse health from the free-text status the runner sets on a failed turn
55
+ // ("errored: <reason>" / "down: <reason>") — lets the board show a failing-but-alive agent
56
+ // distinctly instead of a healthy green. Default "ok".
57
+ function healthOf(status) {
58
+ const s = String(status || "").toLowerCase();
59
+ if (s.startsWith("down")) return "down";
60
+ if (s.startsWith("errored")) return "errored";
61
+ return "ok";
62
+ }
53
63
  function deliverable(m, session) { return (m.to === session || m.to === "all") && m.from !== session; }
54
64
  function pushToStreams(msg) {
55
65
  for (const s of streams) if (deliverable(msg, s.session)) { try { s.res.write(`data: ${JSON.stringify(msg)}\n\n`); } catch {} }
@@ -62,7 +72,7 @@ const server = http.createServer(async (req, res) => {
62
72
  if (req.method === "POST" && P === "/status") { const b = await body(req); touch(b.session, b.status ?? "", b.project); return json(res, 200, { ok: true }); }
63
73
  if (req.method === "GET" && P === "/peers") {
64
74
  const cutoff = now() - ONLINE_MS;
65
- return json(res, 200, { peers: Object.entries(state.peers).map(([s, v]) => ({ session: s, lastSeen: v.lastSeen, online: v.lastSeen > cutoff, status: v.status || "", project: v.project || "" })) });
75
+ return json(res, 200, { peers: Object.entries(state.peers).map(([s, v]) => ({ session: s, lastSeen: v.lastSeen, online: v.lastSeen > cutoff, status: v.status || "", health: healthOf(v.status), project: v.project || "" })) });
66
76
  }
67
77
  // --- Kanban tasks ---
68
78
  if (req.method === "POST" && P === "/task") { // create a card
@@ -107,16 +117,30 @@ const server = http.createServer(async (req, res) => {
107
117
  state.projectMeta[k] = m; dirty = true;
108
118
  return json(res, 200, { ok: true, project: k, brief: m.brief || "" });
109
119
  }
120
+ if (req.method === "POST" && P === "/project/delete") { // forget a project: its cards, peers, brief, and lane
121
+ const b = await body(req); const k = String(b.project || "").slice(0, 80);
122
+ if (!k) return json(res, 400, { error: "project required" });
123
+ const nt = state.tasks.length, np = Object.keys(state.peers).length, nm = state.messages.length;
124
+ state.tasks = state.tasks.filter(t => t.project !== k);
125
+ for (const [s, v] of Object.entries(state.peers)) if (v.project === k) delete state.peers[s];
126
+ delete state.projectMeta[k];
127
+ state.messages = state.messages.filter(m2 => (m2.project || "") !== k);
128
+ dirty = true; // the project reappears cleanly if an agent ever registers it again
129
+ return json(res, 200, { ok: true, project: k, removed: { tasks: nt - state.tasks.length, peers: np - Object.keys(state.peers).length, messages: nm - state.messages.length } });
130
+ }
110
131
  if (req.method === "GET" && P === "/projects") { // project-grouped view
111
132
  const cutoff = now() - ONLINE_MS; const byProj = {};
112
133
  const proj = p => p || "(unassigned)";
113
- const mk = k => (byProj[k] ||= { project: k, brief: (state.projectMeta[k]?.brief) || "", agents: [], tasks: { todo:0,doing:0,testing:0,failed:0,done:0,blocked:0 }, doingTitles: [] });
134
+ const mk = k => (byProj[k] ||= { project: k, brief: (state.projectMeta[k]?.brief) || "", agents: [], tasks: { todo:0,doing:0,testing:0,failed:0,done:0,blocked:0 }, doingTitles: [], lastActivity: 0 });
114
135
  for (const [s, v] of Object.entries(state.peers)) {
115
- const k = proj(v.project); mk(k).agents.push({ session: s, online: v.lastSeen > cutoff, status: v.status || "" });
136
+ const k = proj(v.project); const e = mk(k); e.agents.push({ session: s, online: v.lastSeen > cutoff, status: v.status || "", health: healthOf(v.status) });
137
+ if ((v.lastSeen || 0) > e.lastActivity) e.lastActivity = v.lastSeen;
116
138
  }
117
- for (const t of state.tasks) { const e = mk(proj(t.project)); e.tasks[t.status] = (e.tasks[t.status]||0)+1; if (t.status === "doing") e.doingTitles.push(t.title); }
139
+ for (const t of state.tasks) { const e = mk(proj(t.project)); e.tasks[t.status] = (e.tasks[t.status]||0)+1; if (t.status === "doing") e.doingTitles.push(t.title); if ((t.updated || 0) > e.lastActivity) e.lastActivity = t.updated; }
118
140
  // derive a one-line phase ("where it is in the process") from the board
119
141
  for (const e of Object.values(byProj)) {
142
+ const mu = state.projectMeta[e.project]?.updated || 0; if (mu > e.lastActivity) e.lastActivity = mu;
143
+ e.idle = !e.agents.some(a => a.online);
120
144
  const { todo, doing, testing=0, failed=0, done, blocked } = e.tasks; const total = todo+doing+testing+failed+done+blocked;
121
145
  e.phase = total === 0 ? "no cards yet"
122
146
  : failed > 0 ? `${failed} FAILED — fixing`
@@ -126,6 +150,8 @@ const server = http.createServer(async (req, res) => {
126
150
  : done === total ? "shipped — all cards done"
127
151
  : todo > 0 ? `planned: ${todo} card${todo>1?"s":""} queued`
128
152
  : "in progress";
153
+ // dead board: no live agents -> the phase above is stale, say so honestly
154
+ if (e.idle) e.phase = `idle · last activity ${e.lastActivity ? fmtAge(now() - e.lastActivity) : "unknown"}`;
129
155
  }
130
156
  return json(res, 200, { projects: Object.values(byProj) });
131
157
  }
package/mcp.mjs CHANGED
@@ -103,7 +103,12 @@ server.tool("relay_board", "Show THIS project's Kanban board (all cards + their
103
103
 
104
104
  server.tool("relay_peers", "List other Claude sessions connected to the relay (online in last 5 min).", {}, async () => {
105
105
  const { peers } = await api("GET", "/peers");
106
- const lines = peers.map(p => `${p.online ? "🟢" : "⚪"} ${p.session}${p.session === SESSION ? " (you)" : ""}`);
106
+ const lines = peers.map(p => {
107
+ // health surfaces a failing-but-alive agent (runner-reported) — not a green lie
108
+ const icon = !p.online ? "⚪" : p.health === "down" ? "🛑" : p.health === "errored" ? "🔴" : "🟢";
109
+ const note = (p.health === "errored" || p.health === "down") && p.status ? ` — ${p.status}` : "";
110
+ return `${icon} ${p.session}${p.session === SESSION ? " (you)" : ""}${note}`;
111
+ });
107
112
  return { content: [{ type: "text", text: lines.join("\n") || "no peers yet" }] };
108
113
  });
109
114
 
@@ -152,5 +157,18 @@ server.tool("relay_wait", "Block up to `timeout` seconds waiting for the next me
152
157
  });
153
158
 
154
159
  await api("POST", "/register", { session: SESSION, project: PROJECT, status: `active in ${PROJECT}` }).catch(() => {});
160
+
161
+ // Heartbeat — keep this session's presence fresh for as long as the MCP process lives.
162
+ // Registration alone decays after the hub's online window (5 min); without this, idle agents
163
+ // — and EVERY agent after the laptop sleeps (dead connection, no resume event) — fall off the
164
+ // board while their process is still alive. This is the UNIVERSAL counterpart to the Claude-only
165
+ // PostToolUse heartbeat hook: it runs inside the relay every agent loads (Claude, codex, gemini,
166
+ // kimi, deepseek), so the whole crew stays tracked. We POST /register with NO status, so the
167
+ // hub refreshes lastSeen but preserves the session's meaningful status. setInterval pauses during
168
+ // sleep and fires on wake, so presence self-heals within one interval; .unref() lets the process
169
+ // still exit cleanly when the agent closes the stdio transport (no phantom peers).
170
+ const HEARTBEAT_MS = Number(process.env.RELAY_HEARTBEAT_MS || 60 * 1000);
171
+ setInterval(() => { api("POST", "/register", { session: SESSION, project: PROJECT }).catch(() => {}); }, HEARTBEAT_MS).unref?.();
172
+
155
173
  await server.connect(new StdioServerTransport());
156
- process.stderr.write(`[trantor-mcp] connected as ${SESSION} -> ${URL_BASE}\n`);
174
+ process.stderr.write(`[trantor-mcp] connected as ${SESSION} -> ${URL_BASE} (heartbeat ${HEARTBEAT_MS}ms)\n`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "trantor",
3
- "version": "0.17.0",
3
+ "version": "0.17.3",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "trantor": "bin/cli.mjs"
@@ -10,9 +10,9 @@
10
10
  "zod": "^4.4.3"
11
11
  },
12
12
  "scripts": {
13
- "test": "node test.mjs && node test-scenarios.mjs"
13
+ "test": "node test.mjs && node test-scenarios.mjs && node test-failure.mjs"
14
14
  },
15
- "description": "The hub-world for AI agent crews orchestrate Claude Code, Codex, Gemini, Kimi & DeepSeek as live crews with a plan-aware Advisor, a Kanban/flow command center, a testing gate, and an economics brain (Scrooge).",
15
+ "description": "The hub-world for AI agent crews \u2014 orchestrate Claude Code, Codex, Gemini, Kimi & DeepSeek as live crews with a plan-aware Advisor, a Kanban/flow command center, a testing gate, and an economics brain (Scrooge).",
16
16
  "files": [
17
17
  "hub.mjs",
18
18
  "mcp.mjs",
package/ui.html CHANGED
@@ -22,6 +22,10 @@ main{flex:1;display:grid;grid-template-columns:1fr 330px;min-height:0}
22
22
  .agent .nm{color:var(--tx)}
23
23
  .agent svg{flex:none}
24
24
  .agent.offl{opacity:.42}
25
+ .agent.err{border-color:#ef4444;color:#ef4444}
26
+ .agent.err .nm{color:#ef4444}
27
+ .agent.down{border-color:#ef4444;background:rgba(239,68,68,.14);color:#ef4444}
28
+ .agent.down .nm{color:#ef4444;font-weight:600}
25
29
  .agent .ast{color:var(--mut)}
26
30
  .prog{font-size:11.5px;color:var(--dim);white-space:nowrap}
27
31
  /* project brief + phase row */
@@ -56,6 +60,20 @@ main{flex:1;display:grid;grid-template-columns:1fr 330px;min-height:0}
56
60
  .tcard.done{opacity:.65}.tcard.done span{text-decoration:line-through}
57
61
  .empty{color:var(--dim);text-align:center;padding:30px}
58
62
  .empty.big{padding:60px 20px;font-size:15px}
63
+ /* idle projects — collapsed thin rows below the live boards */
64
+ .idle-head{font-size:10.5px;text-transform:uppercase;letter-spacing:.09em;color:var(--dim);font-weight:700;margin:18px 2px 8px}
65
+ .idle-row{display:flex;align-items:center;gap:7px;background:var(--panel);border:1px solid var(--line);border-radius:10px;padding:7px 14px;margin-bottom:7px;font-size:12.5px;color:var(--mut);cursor:pointer}
66
+ .idle-row:hover{background:var(--card);border-color:#2c3a52}
67
+ .idle-row .nm{color:var(--tx);font-weight:600}
68
+ .mv{display:inline-flex;gap:3px;margin-left:6px}
69
+ .mv b{cursor:pointer;color:var(--dim);font-size:10px;line-height:1;padding:3px 5px;border:1px solid var(--line);border-radius:6px;background:var(--card);font-weight:400}
70
+ .mv b:hover{color:var(--tx);border-color:#2c3a52}
71
+ .pdel{cursor:pointer;color:var(--dim);font-size:10.5px;padding:2px 7px;border:1px solid var(--line);border-radius:6px;background:var(--card);margin-left:5px;white-space:nowrap}
72
+ .pdel:hover{color:var(--red);border-color:#5a2c2c}
73
+ .pdel.arm{color:#fff;background:var(--red);border-color:var(--red)}
74
+ .sortmode{font-size:10.5px;color:var(--dim);margin:0 0 10px;cursor:pointer;user-select:none}
75
+ .sortmode:hover{color:var(--mut)}
76
+ .proj.idle .proj-h{opacity:.55;cursor:pointer}
59
77
  /* flow (DAG) view */
60
78
  .vtog{display:flex;gap:2px;margin-left:10px}
61
79
  .vbtn{font-size:10px;font-weight:700;letter-spacing:.05em;padding:2px 9px;border-radius:10px;border:1px solid var(--line);background:var(--card);color:var(--dim);cursor:pointer}
@@ -120,7 +138,7 @@ aside h2{font-size:10.5px;text-transform:uppercase;letter-spacing:.09em;color:va
120
138
  <body>
121
139
  <header>
122
140
  <span class="dot" id="livedot"></span>
123
- <span class="logo">agent<b>·</b>bus</span>
141
+ <span class="logo">tran<b>t</b>or</span>
124
142
  <span class="pill" id="hub">—</span>
125
143
  <span class="spacer"></span>
126
144
  <span class="pill" id="econ" title="Scrooge ledger, last 24h" style="display:none"></span>
@@ -160,7 +178,7 @@ const ICON={
160
178
  const BRAND=[[/claude|anthropic/,'anthropic'],[/codex|openai|gpt|chatgpt/,'openai'],[/gemini|bard/,'gemini'],[/mistral|mixtral/,'mistral'],[/deepseek/,'deepseek'],[/kimi|moonshot|\bk2\b/,'moonshot'],[/qwen/,'qwen'],[/grok|xai/,'grok'],[/cursor/,'cursor']];
161
179
  function brandOf(s){const x=String(s).toLowerCase();for(const[re,b]of BRAND)if(re.test(x))return b;return null;}
162
180
  function iconFor(s,size){const b=brandOf(s);if(!b||!ICON[b])return `<span style="display:inline-flex;width:${size}px;height:${size}px;align-items:center;justify-content:center;border-radius:50%;background:#2a3346;color:#aeb9c9;font-size:${Math.round(size*.55)}px;font-weight:700">${esc(String(s).slice(0,1).toUpperCase())}</span>`;const i=ICON[b];return `<svg viewBox="0 0 24 24" width="${size}" height="${size}" fill="currentColor" style="color:${i.c}">${i.p.map(d=>`<path d="${d}"/>`).join('')}</svg>`;}
163
- const phaseClass=ph=>/FAILED|blocked/.test(ph)?'blocked':/building|verifying|progress/.test(ph)?'building':/shipped|done/.test(ph)?'shipped':'planned';
181
+ const phaseClass=ph=>/^idle ·/.test(ph)?'planned':/FAILED|blocked/.test(ph)?'blocked':/building|verifying|progress/.test(ph)?'building':/shipped|done/.test(ph)?'shipped':'planned';
164
182
 
165
183
  let POOLS={};
166
184
  async function econ(){
@@ -178,6 +196,16 @@ econ();setInterval(econ,15000);
178
196
  function poolOf(session){const b=brandOf(session);const k=b==='anthropic'?'claude':b==='openai'?'codex':b==='moonshot'?'kimi':b;return POOLS[k]||'';}
179
197
  const VIEWS = JSON.parse(localStorage.getItem("abViews") || "{}");
180
198
  function setView(proj, v){ VIEWS[proj] = v; localStorage.setItem("abViews", JSON.stringify(VIEWS)); render(); }
199
+ function toggleIdle(name){ if(!name)return; const s=new Set(JSON.parse(localStorage.getItem("abIdleOpen")||"[]")); s.has(name)?s.delete(name):s.add(name); localStorage.setItem("abIdleOpen",JSON.stringify([...s])); render(); }
200
+ let armedDel=null,armedTs=0; // pending ✕ confirmation (project name + when it was armed)
201
+ // ▲▼: snapshot the currently displayed order, swap, persist — manual order then wins until ↺ reset
202
+ function moveProj(name,dir){
203
+ const names=[...document.querySelectorAll('#boards [data-projname]')].map(d=>d.dataset.projname);
204
+ const i=names.indexOf(name),j=i+dir;
205
+ if(i<0||j<0||j>=names.length)return;
206
+ [names[i],names[j]]=[names[j],names[i]];
207
+ localStorage.setItem("abOrder",JSON.stringify(names)); render();
208
+ }
181
209
  function flowLayout(cards, proj){
182
210
  const byId = Object.fromEntries(cards.map(t => [t.id, t]));
183
211
  let anyDeps = cards.some(t => (t.deps || []).length);
@@ -343,16 +371,25 @@ async function render(){
343
371
  const sel=$('#to'),cur=sel.value;
344
372
  sel.innerHTML='<option value="all">all (broadcast)</option>'+allS.map(s=>`<option value="${esc(s)}">${esc(s)}</option>`).join('');
345
373
  sel.value=cur;
346
- // sort: projects with online agents first, then by name; unassigned last
347
- projects.sort((a,b)=>{const oa=a.agents.some(x=>x.online),ob=b.agents.some(x=>x.online);if(oa!==ob)return ob-oa;if(a.project==='(unassigned)')return 1;if(b.project==='(unassigned)')return -1;return a.project.localeCompare(b.project);});
374
+ // sort: manual order (abOrder, set via the ▲▼ controls) wins; otherwise online
375
+ // first, then most-recent activity working boards float to the top on their own
376
+ const ORDER=JSON.parse(localStorage.getItem("abOrder")||"[]");
377
+ projects.sort((a,b)=>{
378
+ const ia=ORDER.indexOf(a.project),ib=ORDER.indexOf(b.project);
379
+ if(ia>=0||ib>=0)return (ia<0?1e9:ia)-(ib<0?1e9:ib);
380
+ const oa=a.agents.some(x=>x.online),ob=b.agents.some(x=>x.online);if(oa!==ob)return ob-oa;
381
+ if(a.project==='(unassigned)')return 1;if(b.project==='(unassigned)')return -1;
382
+ return (b.lastActivity||0)-(a.lastActivity||0)||a.project.localeCompare(b.project);
383
+ });
348
384
  const el=$('#boards');
349
385
  if(dragging)return; // never rebuild mid-gesture
350
386
  if(!projects.length){el.innerHTML='<div class="empty big">no projects yet — agents register a project on connect</div>';return;}
351
- el.innerHTML=projects.map(p=>{
387
+ const idleOpen=new Set(JSON.parse(localStorage.getItem("abIdleOpen")||"[]"));
388
+ const projBlock=p=>{
352
389
  const pt=tasks.filter(t=>t.project===p.project);
353
390
  const done=pt.filter(t=>t.status==='done').length;
354
391
  const pct=pt.length?Math.round(done/pt.length*100):0;
355
- const agents=p.agents.sort((a,b)=>b.online-a.online).map(a=>`<span class="agent ${a.online?'':'offl'}" title="${esc(a.session)}${a.online?' · online':' · offline'}">${iconFor(a.session,15)}<span class="nm">${esc(a.session)}</span>${a.status?` <span class="ast">· ${esc(a.status)}</span>`:''}${poolOf(a.session)?` <span class="ast" style="opacity:.7">[${esc(poolOf(a.session))}]</span>`:''}</span>`).join('');
392
+ const agents=p.agents.sort((a,b)=>b.online-a.online).map(a=>`<span class="agent ${a.online?'':'offl'}${a.health==='down'?' down':a.health==='errored'?' err':''}" title="${esc(a.session)}${a.online?' · online':' · offline'}${a.health&&a.health!=='ok'?' · '+a.health:''}">${iconFor(a.session,15)}<span class="nm">${esc(a.session)}</span>${a.status?` <span class="ast">· ${esc(a.status)}</span>`:''}${poolOf(a.session)?` <span class="ast" style="opacity:.7">[${esc(poolOf(a.session))}]</span>`:''}</span>`).join('');
356
393
  const cols=COLS.map(([k,label])=>{
357
394
  let cards=pt.filter(t=>k==='testing'?(t.status==='testing'||t.status==='failed'):t.status===k);
358
395
  if(k==='done')cards=[...cards].sort((a,b)=>(b.updated||0)-(a.updated||0)); // newest finished on top
@@ -371,18 +408,46 @@ async function render(){
371
408
  const pmsgs=msgs.filter(m=>projOf(m)===p.project);
372
409
  const view = VIEWS[p.project] || "board";
373
410
  const vtog = `<div class="vtog"><button class="vbtn ${view==="board"?"on":""}" data-proj="${esc(p.project)}" data-view="board">BOARD</button><button class="vbtn ${view==="flow"?"on":""}" data-proj="${esc(p.project)}" data-view="flow">FLOW</button></div>`;
374
- return `<div class="proj">`+
375
- `<div class="proj-h"><span class="pname">📁 <b>${esc(p.project)}</b></span>${vtog}<div class="agents">${agents||'<span class="dim">no agents</span>'}</div><span class="spacer"></span><span class="prog">${done}/${pt.length} done · ${pct}%</span></div>`+
411
+ const ctl=`<span class="mv"><b class="mvup" data-proj="${esc(p.project)}" title="move up">▲</b><b class="mvdn" data-proj="${esc(p.project)}" title="move down">▼</b></span>${p.idle===true?`<span class="pdel" data-proj="${esc(p.project)}" title="forget this project (cards, peers, brief) — it returns if an agent registers it again">✕</span>`:''}`;
412
+ return `<div class="proj${p.idle===true?' idle':''}" data-projname="${esc(p.project)}"${p.idle===true?` data-idleproj="${esc(p.project)}"`:''}>`+
413
+ `<div class="proj-h"><span class="pname">📁 <b>${esc(p.project)}</b></span>${vtog}<div class="agents">${agents||'<span class="dim">no agents</span>'}</div><span class="spacer"></span><span class="prog">${done}/${pt.length} done · ${pct}%</span>${ctl}</div>`+
376
414
  `<div class="proj-brief">${brief}${ph?`<span class="phase ${phaseClass(ph)}">${esc(ph)}</span>`:''}</div>`+
377
415
  `<div class="pbar"><i style="width:${pct}%"></i></div>`+
378
416
  (view === "flow" ? flowHTML(pt, p.project) : `<div class="kanban">${cols}</div>`)+
379
417
  chatLane(pmsgs)+
380
418
  `</div>`;
381
- }).join('');
419
+ };
420
+ // idle projects (zero online agents) collapse to thin rows below the live boards
421
+ const idleRow=p=>{
422
+ const pt=tasks.filter(t=>t.project===p.project);const done=pt.filter(t=>t.status==='done').length;
423
+ return `<div class="idle-row" data-projname="${esc(p.project)}" data-idleproj="${esc(p.project)}" title="click to expand">💤 <span class="nm">${esc(p.project)}</span><span class="dim">· ${esc(p.phase||'')} · ${done}/${pt.length} cards</span><span class="spacer"></span><span class="mv"><b class="mvup" data-proj="${esc(p.project)}" title="move up">▲</b><b class="mvdn" data-proj="${esc(p.project)}" title="move down">▼</b></span><span class="pdel" data-proj="${esc(p.project)}" title="forget this project (cards, peers, brief) — it returns if an agent registers it again">✕</span></div>`;
424
+ };
425
+ const live=projects.filter(p=>p.idle!==true),idlers=projects.filter(p=>p.idle===true);
426
+ el.innerHTML=(ORDER.length?`<div class="sortmode" id="sortreset" title="you've ordered projects manually — click to go back to automatic (recency) ordering">sort: manual · ↺ back to auto</div>`:'')
427
+ +live.map(projBlock).join('')
428
+ +(idlers.length?`<div class="idle-head">idle projects · ${idlers.length}</div>`+idlers.map(p=>idleOpen.has(p.project)?projBlock(p):idleRow(p)).join(''):'');
382
429
  // keep each project's chat scrolled to the latest line
383
430
  el.querySelectorAll('.chatlog').forEach(c=>{if(c.scrollHeight-c.clientHeight<60||c.dataset.stuck!=='0')c.scrollTop=c.scrollHeight;c.onscroll=()=>{c.dataset.stuck=(c.scrollHeight-c.scrollTop-c.clientHeight<40)?'1':'0';};});
384
431
  // click a card -> advance status todo->doing->done
385
432
  el.querySelectorAll('.vbtn').forEach(b=>b.onclick=()=>setView(b.dataset.proj,b.dataset.view));
433
+ // collapsed idle row -> expand; expanded idle project's header -> collapse again
434
+ el.querySelectorAll('.idle-row').forEach(r=>r.onclick=e=>{if(e.target.closest('.mv,.pdel'))return;toggleIdle(r.dataset.idleproj);});
435
+ el.querySelectorAll('.proj.idle .proj-h').forEach(h=>h.onclick=e=>{if(e.target.closest('.vbtn,.agent,.mv,.pdel'))return;toggleIdle(h.closest('.proj').dataset.idleproj);});
436
+ // ▲▼ reorder (persists as a manual order); ✕ forget — armed two-step, no popup
437
+ el.querySelectorAll('.mvup').forEach(b=>b.onclick=e=>{e.stopPropagation();moveProj(b.dataset.proj,-1);});
438
+ el.querySelectorAll('.mvdn').forEach(b=>b.onclick=e=>{e.stopPropagation();moveProj(b.dataset.proj,1);});
439
+ el.querySelectorAll('.pdel').forEach(b=>b.onclick=async e=>{
440
+ e.stopPropagation();
441
+ const p=b.dataset.proj;
442
+ if(!(armedDel===p&&Date.now()-armedTs<4000)){armedDel=p;armedTs=Date.now();b.classList.add('arm');b.textContent='✕ sure?';return;}
443
+ armedDel=null;
444
+ await fetch('/project/delete',{method:'POST',headers:{'content-type':'application/json'},body:JSON.stringify({project:p})});
445
+ render();
446
+ });
447
+ // the 2.5s re-render rebuilds the DOM — re-apply a still-fresh armed state so the
448
+ // "✕ sure?" confirmation survives until it expires instead of silently resetting
449
+ if(armedDel&&Date.now()-armedTs<4000){const ab=el.querySelector(`.pdel[data-proj="${CSS.escape(armedDel)}"]`);if(ab){ab.classList.add('arm');ab.textContent='✕ sure?';}}
450
+ const sr=$('#sortreset');if(sr)sr.onclick=()=>{localStorage.removeItem('abOrder');render();};
386
451
  wireFlow(el);
387
452
  el.querySelectorAll('.tcard, .fnode').forEach(c=>c.onclick=async()=>{
388
453
  const id=+c.dataset.id,t=tasks.find(x=>x.id===id);if(!t)return;
@@ -397,14 +462,25 @@ function addMsg(m,count=true){
397
462
  d.innerHTML=`<span class="t">${new Date(m.ts).toLocaleTimeString()}</span> ${iconFor(m.from,12)} <span class="from">${esc(m.from)}</span> → <span class="to">${esc(m.to)}</span>: ${esc(m.text)}`;
398
463
  const f=$('#feed');const stick=f.scrollHeight-f.scrollTop-f.clientHeight<50;f.appendChild(d);if(stick)f.scrollTop=f.scrollHeight;
399
464
  }
400
- function stream(){const ev=new EventSource('/stream?session=all');
465
+ let _ev=null;
466
+ function stream(){
467
+ try{_ev&&_ev.close();}catch(_){} // drop any prior connection before reopening (no duplicate streams)
468
+ const ev=new EventSource('/stream?session=all');_ev=ev;
401
469
  ev.onmessage=e=>{try{addMsg(JSON.parse(e.data));render();}catch(_){}};
402
- ev.onerror=()=>{$('#livedot').classList.add('off');setTimeout(()=>{ev.close();stream();$('#livedot').classList.remove('off');},2000);};}
470
+ ev.onerror=()=>{$('#livedot').classList.add('off');setTimeout(()=>{ev.close();if(_ev===ev)stream();$('#livedot').classList.remove('off');},2000);};}
403
471
  $('#send').onclick=async()=>{const t=$('#text').value.trim();if(!t)return;
404
472
  await fetch('/send',{method:'POST',headers:{'content-type':'application/json'},body:JSON.stringify({from:'dashboard',to:$('#to').value,text:t})});$('#text').value='';};
405
473
  $('#text').addEventListener('keydown',e=>{if(e.key==='Enter')$('#send').click();});
406
474
  fetch('/recent?limit=40').then(r=>r.json()).then(d=>(d.messages||[]).forEach(m=>addMsg(m,false))).catch(()=>{});
407
475
  $('#hub').textContent=location.host;
408
476
  render();setInterval(render,2500);stream();
477
+ // Self-heal after the laptop sleeps / the tab is backgrounded: browser timers and the SSE
478
+ // stream get suspended and don't reliably resume, so the board freezes on stale data. The
479
+ // moment the tab becomes visible / focused / regains network, force an immediate refresh +
480
+ // stream reconnect — the equivalent of the heartbeat's "first action after wake" on the UI side.
481
+ function wake(){render();stream();}
482
+ document.addEventListener('visibilitychange',()=>{if(!document.hidden)wake();});
483
+ addEventListener('focus',wake);
484
+ addEventListener('online',wake);
409
485
  </script>
410
486
  </body></html>