agent-coord-mcp 0.5.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/server.ts CHANGED
@@ -1,10 +1,15 @@
1
1
  #!/usr/bin/env node
2
+ import { randomUUID } from "node:crypto";
3
+ import { createServer, IncomingMessage, ServerResponse } from "node:http";
2
4
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
5
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
- import { ensureDirs } from "./store.js";
6
+ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
7
+ import { ensureDirs, readTokenMapSync } from "./store.js";
5
8
  import {
6
9
  attachAgentSchema,
7
10
  attachAgentTool,
11
+ clearTransportSchema,
12
+ clearTransportTool,
8
13
  detachAgentSchema,
9
14
  detachAgentTool,
10
15
  heartbeatSchema,
@@ -29,6 +34,8 @@ import {
29
34
  registerTool,
30
35
  renameAgentSchema,
31
36
  renameAgentTool,
37
+ reportTransportSchema,
38
+ reportTransportTool,
32
39
  sendMessageSchema,
33
40
  sendMessageTool,
34
41
  setRoomMotdSchema,
@@ -49,8 +56,44 @@ function jsonResult(data: unknown) {
49
56
  };
50
57
  }
51
58
 
52
- async function main() {
53
- ensureDirs();
59
+ // Build a fully-configured McpServer with every tool registered. Returns a
60
+ // fresh instance each call — in HTTP mode we need one server per session so
61
+ // transports don't share Protocol state.
62
+ //
63
+ // Identity binding (v0.7.0 + TOFU in v0.7.1):
64
+ // - `initialBound` (when set) pre-binds the session — from a bearer token
65
+ // (HTTP/tokens.json) or AGENT_COORD_BOUND_AGENT env (stdio).
66
+ // - Otherwise the session starts unbound. The first tool call that carries
67
+ // an agentId/from field captures that value as the session's binding —
68
+ // trust-on-first-use. Subsequent calls must match; mid-session identity
69
+ // switching (the PR #45 spoof shape) is rejected.
70
+ // - rename_agent updates the binding to the new id on success so the
71
+ // renamed session keeps working.
72
+ function buildServer(initialBound?: string): McpServer {
73
+ let bound = initialBound;
74
+
75
+ // Gate every tool that takes a caller identity. `field: null` (list_agents,
76
+ // list_rooms, prune) bypasses the check entirely.
77
+ function gate(
78
+ field: "agentId" | "from" | null,
79
+ handler: (args: Record<string, unknown>) => Promise<unknown>,
80
+ ) {
81
+ return async (args: Record<string, unknown>) => {
82
+ if (field) {
83
+ const claimed = args[field];
84
+ if (typeof claimed === "string") {
85
+ if (bound === undefined) {
86
+ bound = claimed; // TOFU: first claim wins, then sticky.
87
+ } else if (bound !== claimed) {
88
+ throw new Error(
89
+ `identity bound to '${bound}'; rejected attempt to act as '${claimed}'`,
90
+ );
91
+ }
92
+ }
93
+ }
94
+ return jsonResult(await handler(args));
95
+ };
96
+ }
54
97
 
55
98
  const server = new McpServer({
56
99
  name: "agent-coord",
@@ -61,137 +104,338 @@ async function main() {
61
104
  "join",
62
105
  "Recommended session-start call. Does register + auto-attach (if running inside tmux) + read inbox in one round-trip. Pass attach=false to skip the transport, attach={...overrides} to customize, or omit it to let the server auto-detect $TMUX_PANE. Returns the registration, attach result, any unread inbox messages, and the default channel's topic + MOTD (room rules) so you see them on connect.",
63
106
  joinSchema,
64
- async (args) => jsonResult(await joinTool(args))
107
+ gate("agentId", joinTool as (a: Record<string, unknown>) => Promise<unknown>),
65
108
  );
66
109
 
67
110
  server.tool(
68
111
  "register",
69
112
  "Register this agent in the shared registry. Lower-level than `join` — does not attach a transport or drain the inbox. Prefer `join` unless you need explicit control.",
70
113
  registerSchema,
71
- async (args) => jsonResult(await registerTool(args))
114
+ gate("agentId", registerTool as (a: Record<string, unknown>) => Promise<unknown>),
72
115
  );
73
116
 
74
117
  server.tool(
75
118
  "unregister",
76
119
  "Tear down this agent: detach any attached transport (kills the pusher) and remove the registry entry. Clean shutdown counterpart to `join`.",
77
120
  unregisterSchema,
78
- async (args) => jsonResult(await unregisterTool(args))
121
+ gate("agentId", unregisterTool as (a: Record<string, unknown>) => Promise<unknown>),
79
122
  );
80
123
 
81
124
  server.tool(
82
125
  "status",
83
126
  "Introspect this agent's coord state: registration, attached transport, inbox depth and unread count, and whether this MCP server is running inside tmux. Useful for debugging 'why isn't my DM landing'.",
84
127
  statusSchema,
85
- async (args) => jsonResult(await statusTool(args))
128
+ gate("agentId", statusTool as (a: Record<string, unknown>) => Promise<unknown>),
86
129
  );
87
130
 
88
131
  server.tool(
89
132
  "heartbeat",
90
133
  "Refresh this agent's lastHeartbeat timestamp.",
91
134
  heartbeatSchema,
92
- async (args) => jsonResult(await heartbeatTool(args))
135
+ gate("agentId", heartbeatTool as (a: Record<string, unknown>) => Promise<unknown>),
93
136
  );
94
137
 
95
138
  server.tool(
96
139
  "list_agents",
97
140
  "List all known agents and whether they appear online (heartbeat <5min).",
98
141
  listAgentsSchema,
99
- async () => jsonResult(await listAgentsTool())
142
+ gate(null, listAgentsTool as () => Promise<unknown>),
100
143
  );
101
144
 
102
145
  server.tool(
103
146
  "send_message",
104
- "Send a message. If 'to' is set, goes to that agent's inbox (DM); otherwise to a channel — pass 'room' (e.g. 'seo' or '#seo') to target a specific channel, or omit it for the default 'general' channel.",
147
+ "Send a message. If 'to' is set, goes to that agent's inbox (DM); otherwise to a channel — pass 'room' (e.g. 'seo' or '#seo') to target a specific channel, or omit it for the default 'general' channel. The 'from' field is enforced against the session's bound identity when binding is configured.",
105
148
  sendMessageSchema,
106
- async (args) => jsonResult(await sendMessageTool(args))
149
+ gate("from", sendMessageTool as (a: Record<string, unknown>) => Promise<unknown>),
107
150
  );
108
151
 
109
152
  server.tool(
110
153
  "read_messages",
111
154
  "Read new messages from inbox|room|status. For source='room', pass 'room' to read a specific channel (default 'general'). Advances the per-channel cursor unless peek=true.",
112
155
  readMessagesSchema,
113
- async (args) => jsonResult(await readMessagesTool(args))
156
+ gate("agentId", readMessagesTool as (a: Record<string, unknown>) => Promise<unknown>),
114
157
  );
115
158
 
116
159
  server.tool(
117
160
  "post_status",
118
161
  "Append a status broadcast to the shared status stream.",
119
162
  postStatusSchema,
120
- async (args) => jsonResult(await postStatusTool(args))
163
+ gate("agentId", postStatusTool as (a: Record<string, unknown>) => Promise<unknown>),
121
164
  );
122
165
 
123
166
  server.tool(
124
167
  "prune",
125
168
  "Trim room/status/inbox JSONL to entries newer than `olderThanDays` (default 7). Removes inbox files for agents no longer in the registry unless removeOrphanInboxes=false. Pass dryRun=true to preview.",
126
169
  pruneSchema,
127
- async (args) => jsonResult(await pruneTool(args))
170
+ gate(null, pruneTool as (a: Record<string, unknown>) => Promise<unknown>),
128
171
  );
129
172
 
130
173
  server.tool(
131
174
  "wait_for_message",
132
175
  "Block (max 60s) until a new message appears on the given source, then return it. For source='room', pass 'room' to wait on a specific channel (default 'general').",
133
176
  waitForMessageSchema,
134
- async (args) => jsonResult(await waitForMessageTool(args))
177
+ gate("agentId", waitForMessageTool as (a: Record<string, unknown>) => Promise<unknown>),
135
178
  );
136
179
 
137
180
  server.tool(
138
181
  "list_rooms",
139
182
  "List all channels with their topic, MOTD (room rules), members, message count, and last activity.",
140
183
  listRoomsSchema,
141
- async () => jsonResult(await listRoomsTool())
184
+ gate(null, listRoomsTool as () => Promise<unknown>),
142
185
  );
143
186
 
144
187
  server.tool(
145
188
  "join_room",
146
189
  "Join a channel (creating it if new). Adds this agent to the channel's membership so the notification hooks push its messages, and returns the channel's topic, MOTD, members, and unread count.",
147
190
  joinRoomSchema,
148
- async (args) => jsonResult(await joinRoomTool(args))
191
+ gate("agentId", joinRoomTool as (a: Record<string, unknown>) => Promise<unknown>),
149
192
  );
150
193
 
151
194
  server.tool(
152
195
  "leave_room",
153
196
  "Leave a channel — removes this agent from its membership. Cannot leave the default 'general' channel.",
154
197
  leaveRoomSchema,
155
- async (args) => jsonResult(await leaveRoomTool(args))
198
+ gate("agentId", leaveRoomTool as (a: Record<string, unknown>) => Promise<unknown>),
156
199
  );
157
200
 
158
201
  server.tool(
159
202
  "set_room_topic",
160
203
  "Set a channel's topic (a short one-line description). Posts a system notice to the channel.",
161
204
  setRoomTopicSchema,
162
- async (args) => jsonResult(await setRoomTopicTool(args))
205
+ gate("agentId", setRoomTopicTool as (a: Record<string, unknown>) => Promise<unknown>),
163
206
  );
164
207
 
165
208
  server.tool(
166
209
  "set_room_motd",
167
210
  "Set a channel's MOTD / room rules (shown to agents on join). Posts a system notice to the channel.",
168
211
  setRoomMotdSchema,
169
- async (args) => jsonResult(await setRoomMotdTool(args))
212
+ gate("agentId", setRoomMotdTool as (a: Record<string, unknown>) => Promise<unknown>),
170
213
  );
171
214
 
172
215
  server.tool(
173
216
  "rename_agent",
174
- "Rename an agent (NICK): migrates its registry entry, inbox, cursor, and channel memberships to the new id, then broadcasts a rename notice to its channels. If a live tmux-push transport is attached it is detached first (the pusher is bound to the old id) — re-attach as the new id (join/attach_agent) to restore real-time delivery; the response sets detachedTransport + a warning when this happens.",
217
+ "Rename an agent (NICK): migrates its registry entry, inbox, cursor, and channel memberships to the new id, then broadcasts a rename notice to its channels. When tokens.json identity binding is on, the caller's bearer token is atomically rotated to the new id so the same session keeps authenticating after rename. If a live tmux-push transport is attached it is detached first (the pusher is bound to the old id) — re-attach as the new id (join/attach_agent) to restore real-time delivery; the response sets detachedTransport + a warning when this happens.",
175
218
  renameAgentSchema,
176
- async (args) => jsonResult(await renameAgentTool(args))
219
+ // Special: after a successful rename we update the session's bound id
220
+ // too, so the same session can keep operating under the new name without
221
+ // the next call being rejected as a binding mismatch.
222
+ async (args: Record<string, unknown>) => {
223
+ const claimed = args.agentId;
224
+ if (typeof claimed === "string") {
225
+ if (bound === undefined) bound = claimed;
226
+ else if (bound !== claimed) {
227
+ throw new Error(`identity bound to '${bound}'; rejected attempt to act as '${claimed}'`);
228
+ }
229
+ }
230
+ const result = await renameAgentTool(args as { agentId: string; newAgentId: string });
231
+ if (result && typeof result === "object" && (result as { ok?: unknown }).ok === true) {
232
+ const to = (result as { to?: unknown }).to;
233
+ if (typeof to === "string") bound = to;
234
+ }
235
+ return jsonResult(result);
236
+ },
177
237
  );
178
238
 
179
239
  server.tool(
180
240
  "attach_agent",
181
241
  "Start the tmux-push transport for an agent: spawns hooks/tmux-pusher.mjs as a background process so peer DMs (and optionally room messages) get typed into the agent's tmux pane in real time. tmuxTarget defaults to the MCP server's own $TMUX_PANE if this server is running inside tmux. allowlist restricts which peer agentIds can push. Updates list_agents to show transport=tmux-push.",
182
242
  attachAgentSchema,
183
- async (args) => jsonResult(await attachAgentTool(args))
243
+ gate("agentId", attachAgentTool as (a: Record<string, unknown>) => Promise<unknown>),
184
244
  );
185
245
 
186
246
  server.tool(
187
247
  "detach_agent",
188
248
  "Stop the tmux-push transport for an agent: kills the pusher process and clears the transport marker.",
189
249
  detachAgentSchema,
190
- async (args) => jsonResult(await detachAgentTool(args))
250
+ gate("agentId", detachAgentTool as (a: Record<string, unknown>) => Promise<unknown>),
251
+ );
252
+
253
+ server.tool(
254
+ "report_transport",
255
+ "Publish a transport marker for an agent (used by the remote tmux pusher, scripts/coord-pusher.mjs, to surface itself in list_agents). Set transport='tmux-push-remote' and optionally host/tmuxTarget. Liveness for remote markers is heartbeat-based — keep calling heartbeat or this marker gets GC'd after staleness.",
256
+ reportTransportSchema,
257
+ gate("agentId", reportTransportTool as (a: Record<string, unknown>) => Promise<unknown>),
258
+ );
259
+
260
+ server.tool(
261
+ "clear_transport",
262
+ "Idempotent delete of an agent's transport marker. The wire-callable counterpart to detach_agent for remote pushers: it only removes the marker — there's no local process to kill.",
263
+ clearTransportSchema,
264
+ gate("agentId", clearTransportTool as (a: Record<string, unknown>) => Promise<unknown>),
191
265
  );
192
266
 
193
- const transport = new StdioServerTransport();
194
- await server.connect(transport);
267
+ return server;
268
+ }
269
+
270
+ // Lazy-loaded token map for HTTP identity binding. Hot-reloaded on SIGHUP so
271
+ // operators can rotate / add agents without a server restart.
272
+ let tokenMap: Map<string, string> | null = null;
273
+ function loadTokenMap(initial: boolean): void {
274
+ try {
275
+ tokenMap = readTokenMapSync();
276
+ } catch (e) {
277
+ // On initial load a bad file is fatal — refuse to start in a known-bad
278
+ // auth state. On SIGHUP, log and keep the previous (valid) map.
279
+ if (initial) {
280
+ console.error((e as Error).message);
281
+ process.exit(1);
282
+ }
283
+ console.error(`[agent-coord-mcp] SIGHUP: ${(e as Error).message} (keeping previous map)`);
284
+ return;
285
+ }
286
+ if (!initial) {
287
+ console.error(`[agent-coord-mcp] SIGHUP: token map reloaded (${tokenMap?.size ?? 0} agents)`);
288
+ }
289
+ }
290
+
291
+ async function main() {
292
+ ensureDirs();
293
+ loadTokenMap(true);
294
+ process.on("SIGHUP", () => loadTokenMap(false));
295
+
296
+ // Transport selector. AGENT_COORD_HTTP_PORT set → run as a long-lived HTTP
297
+ // daemon (Streamable HTTP transport + bearer-token auth). Otherwise the
298
+ // historical stdio behavior (per-client subprocess spawned by Claude Code).
299
+ const httpPort = process.env.AGENT_COORD_HTTP_PORT;
300
+ if (httpPort) {
301
+ await startHttp(parseInt(httpPort, 10));
302
+ } else {
303
+ const boundAgent = process.env.AGENT_COORD_BOUND_AGENT;
304
+ if (!boundAgent) {
305
+ console.error(
306
+ "[agent-coord-mcp] bus identity unbound (stdio) — falling back to TOFU: the " +
307
+ "first tool call's agentId/from claim becomes this session's bound identity " +
308
+ "and subsequent calls cannot switch. For stricter pre-binding, set " +
309
+ "AGENT_COORD_BOUND_AGENT=<your-id> in the MCP launch env.",
310
+ );
311
+ }
312
+ const server = buildServer(boundAgent);
313
+ const transport = new StdioServerTransport();
314
+ await server.connect(transport);
315
+ }
316
+ }
317
+
318
+ async function startHttp(port: number): Promise<void> {
319
+ const sharedToken = process.env.AGENT_COORD_TOKEN;
320
+ const bound = tokenMap !== null;
321
+ if (!bound && !sharedToken) {
322
+ console.error(
323
+ "[agent-coord-mcp] HTTP mode needs auth: either set AGENT_COORD_TOKEN (legacy " +
324
+ "shared bearer, advisory identity) or create ~/agent-coord/tokens.json (per-agent " +
325
+ "tokens, enforced identity). Refusing to start an unauthenticated network listener.",
326
+ );
327
+ process.exit(1);
328
+ }
329
+ if (bound && sharedToken) {
330
+ console.error(
331
+ "[agent-coord-mcp] note: tokens.json is present — AGENT_COORD_TOKEN is ignored " +
332
+ "(per-agent tokens take precedence).",
333
+ );
334
+ }
335
+ if (!bound) {
336
+ console.error(
337
+ "[agent-coord-mcp] bus identity unbound (HTTP) — shared bearer auths the channel; " +
338
+ "per-session identity falls back to TOFU (the first agentId/from claim becomes " +
339
+ "the session's bound id, can't switch mid-stream). Create ~/agent-coord/tokens.json " +
340
+ "to pre-bind sessions to identities at connect time.",
341
+ );
342
+ }
343
+ const bindAddr = process.env.AGENT_COORD_BIND ?? "127.0.0.1";
344
+ const sharedExpected = sharedToken ? `Bearer ${sharedToken}` : null;
345
+
346
+ // One transport+server pair per client session. The SDK exposes session
347
+ // affinity via the `mcp-session-id` header: a new request without it is
348
+ // an init (create new pair); follow-ups carry the id (look up the pair).
349
+ // We cannot share one stateful transport across clients (it errors with
350
+ // "Server already initialized"), and stateless mode rejects reuse.
351
+ const sessions = new Map<string, StreamableHTTPServerTransport>();
352
+
353
+ async function makeSessionTransport(boundAgent?: string): Promise<StreamableHTTPServerTransport> {
354
+ // `let` + explicit type lets the SDK callbacks close over the binding
355
+ // before it's assigned — they only fire after construction completes.
356
+ let transport: StreamableHTTPServerTransport;
357
+ transport = new StreamableHTTPServerTransport({
358
+ sessionIdGenerator: () => randomUUID(),
359
+ onsessioninitialized: (id: string) => { sessions.set(id, transport); },
360
+ });
361
+ transport.onclose = () => {
362
+ if (transport.sessionId) sessions.delete(transport.sessionId);
363
+ };
364
+ const server = buildServer(boundAgent);
365
+ await server.connect(transport);
366
+ return transport;
367
+ }
368
+
369
+ // Reverse-lookup: extract bearer from header, map → bound agent. Returns
370
+ // undefined if no map is configured (advisory mode); throws-like return of
371
+ // null if the bearer doesn't match any known agent (caller responds 401).
372
+ function resolveBoundAgent(authHeader: string | undefined): { ok: boolean; agent?: string } {
373
+ if (!authHeader || !authHeader.startsWith("Bearer ")) return { ok: false };
374
+ const bearer = authHeader.slice("Bearer ".length);
375
+ if (tokenMap) {
376
+ const agent = tokenMap.get(bearer);
377
+ return agent ? { ok: true, agent } : { ok: false };
378
+ }
379
+ // Advisory mode: only check the shared bearer matches.
380
+ return sharedExpected && authHeader === sharedExpected ? { ok: true } : { ok: false };
381
+ }
382
+
383
+ const http = createServer(async (req: IncomingMessage, res: ServerResponse) => {
384
+ try {
385
+ // Unauthenticated liveness probe so reverse proxies / orchestrators can
386
+ // health-check without needing a credential.
387
+ const url = req.url ?? "/";
388
+ if (req.method === "GET" && (url === "/healthz" || url === "/health")) {
389
+ res.writeHead(200, { "Content-Type": "text/plain" });
390
+ res.end("ok\n");
391
+ return;
392
+ }
393
+
394
+ // Auth gate. In bound mode the bearer also tells us *which* agent the
395
+ // session is bound to; in advisory mode it just gates entry. Constant-
396
+ // time compare isn't worthwhile here — the attacker model for the
397
+ // LAN/personal case is "someone on the same network" who can already
398
+ // observe traffic; TLS termination is the answer to that.
399
+ const resolved = resolveBoundAgent(req.headers.authorization);
400
+ if (!resolved.ok) {
401
+ res.writeHead(401, { "Content-Type": "text/plain", "WWW-Authenticate": "Bearer" });
402
+ res.end("unauthorized\n");
403
+ return;
404
+ }
405
+
406
+ // Session routing. Existing session id → reuse its transport; new client
407
+ // (no id, POST init) → mint a fresh transport+server pair bound to the
408
+ // bearer's agent; anything else is a protocol error.
409
+ const sid = req.headers["mcp-session-id"];
410
+ let transport = typeof sid === "string" ? sessions.get(sid) : undefined;
411
+ if (!transport) {
412
+ if (req.method !== "POST") {
413
+ res.writeHead(400, { "Content-Type": "text/plain" });
414
+ res.end("missing or unknown mcp-session-id\n");
415
+ return;
416
+ }
417
+ transport = await makeSessionTransport(resolved.agent);
418
+ }
419
+ await transport.handleRequest(req, res);
420
+ } catch (err) {
421
+ console.error("[agent-coord-mcp] http request failed:", err);
422
+ if (!res.headersSent) {
423
+ res.writeHead(500, { "Content-Type": "text/plain" });
424
+ res.end("internal error\n");
425
+ }
426
+ }
427
+ });
428
+
429
+ http.listen(port, bindAddr, () => {
430
+ const mode = bound ? `pre-bound (${tokenMap?.size ?? 0} agents)` : "TOFU";
431
+ console.error(`[agent-coord-mcp] http listening on ${bindAddr}:${port} — identity ${mode}`);
432
+ if (bindAddr !== "127.0.0.1" && bindAddr !== "localhost") {
433
+ console.error(
434
+ `[agent-coord-mcp] WARNING: bound to ${bindAddr} without TLS. Front with a TLS reverse proxy ` +
435
+ `(or restrict to a private network e.g. Tailscale/WireGuard) before exposing publicly.`,
436
+ );
437
+ }
438
+ });
195
439
  }
196
440
 
197
441
  main().catch((err) => {
package/src/store.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { promises as fs, existsSync, mkdirSync } from "node:fs";
1
+ import { promises as fs, existsSync, mkdirSync, readFileSync } from "node:fs";
2
2
  import { homedir } from "node:os";
3
3
  import path from "node:path";
4
4
  import lockfile from "proper-lockfile";
@@ -24,6 +24,14 @@ export const ROOMS_DIR = path.join(ROOT, "rooms");
24
24
  export const ROOMS_FILE = path.join(ROOT, "rooms.json");
25
25
  export const DEFAULT_ROOM = "general";
26
26
 
27
+ // Per-agent token map for identity-bound bus auth (v0.7.0). Shape on disk:
28
+ // { "alice": "tk_<random-secret>", "bob": "tk_<another-secret>" }
29
+ // HTTP transport reverse-looks-up the bearer to bind the session to an
30
+ // agentId, then enforces that bound id against every tool call's
31
+ // from/agentId field. Absent → advisory mode (legacy behaviour, with a
32
+ // startup warning). Should be mode 600; operator-managed.
33
+ export const TOKENS_FILE = path.join(ROOT, "tokens.json");
34
+
27
35
  export function ensureDirs(): void {
28
36
  for (const d of [ROOT, INBOX_DIR, CURSOR_DIR, TRANSPORT_DIR, PID_DIR, LOG_DIR, ROOMS_DIR]) {
29
37
  if (!existsSync(d)) mkdirSync(d, { recursive: true });
@@ -33,6 +41,53 @@ export function ensureDirs(): void {
33
41
  }
34
42
  }
35
43
 
44
+ // Synchronous, deliberate. The result feeds the server's bearer→agent
45
+ // reverse-lookup map; we want startup to fail loudly on a malformed file
46
+ // rather than silently degrade to advisory mode. Returns null if the file
47
+ // is absent (operator hasn't configured binding yet).
48
+ export function readTokenMapSync(): Map<string, string> | null {
49
+ if (!existsSync(TOKENS_FILE)) return null;
50
+ const raw = readFileSync(TOKENS_FILE, "utf8");
51
+ let parsed: unknown;
52
+ try {
53
+ parsed = JSON.parse(raw);
54
+ } catch (e) {
55
+ throw new Error(
56
+ `[agent-coord-mcp] ${TOKENS_FILE} is not valid JSON: ${(e as Error).message}. ` +
57
+ `Fix or remove the file (the bus refuses to start with a broken token map).`,
58
+ );
59
+ }
60
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
61
+ throw new Error(
62
+ `[agent-coord-mcp] ${TOKENS_FILE} must be a JSON object mapping agentId → token.`,
63
+ );
64
+ }
65
+ const out = new Map<string, string>();
66
+ for (const [agentId, token] of Object.entries(parsed as Record<string, unknown>)) {
67
+ if (typeof token !== "string" || token.length === 0) {
68
+ throw new Error(
69
+ `[agent-coord-mcp] ${TOKENS_FILE}: agent "${agentId}" has a non-string/empty token.`,
70
+ );
71
+ }
72
+ out.set(token, agentId);
73
+ }
74
+ return out;
75
+ }
76
+
77
+ // Atomically rotate the token entry for an agent rename (used by
78
+ // rename_agent so the same bearer continues to authenticate the renamed
79
+ // identity). No-op if the file is absent or the old id isn't in the map.
80
+ export async function rotateAgentToken(oldAgentId: string, newAgentId: string): Promise<void> {
81
+ if (!existsSync(TOKENS_FILE)) return;
82
+ await updateJson<Record<string, string>>(TOKENS_FILE, {}, (current) => {
83
+ if (current[oldAgentId] !== undefined) {
84
+ current[newAgentId] = current[oldAgentId];
85
+ delete current[oldAgentId];
86
+ }
87
+ return current;
88
+ });
89
+ }
90
+
36
91
  export type RoomEntry = {
37
92
  topic?: string;
38
93
  motd?: string;
package/src/tools.ts CHANGED
@@ -32,6 +32,7 @@ import {
32
32
  removeMember,
33
33
  rewriteJsonl,
34
34
  roomFile,
35
+ rotateAgentToken,
35
36
  setRoomMeta,
36
37
  transportFile,
37
38
  TRANSPORT_DIR,
@@ -54,6 +55,9 @@ type TransportMarker = {
54
55
  pid: number;
55
56
  tmuxTarget?: string;
56
57
  since: number;
58
+ // Remote pushers run on a different machine; the local pid is meaningless,
59
+ // so we tag the host and use heartbeat-based liveness instead of pidAlive.
60
+ host?: string;
57
61
  };
58
62
 
59
63
  type AgentRegistry = Record<string, AgentEntry>;
@@ -241,6 +245,10 @@ export async function listAgentsTool() {
241
245
 
242
246
  async function loadLiveTransports(): Promise<Map<string, TransportMarker>> {
243
247
  const out = new Map<string, TransportMarker>();
248
+ // For remote markers we can't pid-check the foreign process — instead we trust
249
+ // the registry's lastHeartbeat, which the remote pusher refreshes every minute.
250
+ const reg = await readJson<AgentRegistry>(AGENTS_FILE, {});
251
+ const now = Date.now();
244
252
  for (const fname of await listTransportFiles()) {
245
253
  const file = path.join(TRANSPORT_DIR, fname);
246
254
  const marker = await readJson<TransportMarker | null>(file, null);
@@ -248,7 +256,15 @@ async function loadLiveTransports(): Promise<Map<string, TransportMarker>> {
248
256
  await deleteFile(file);
249
257
  continue;
250
258
  }
251
- if (!isPidAlive(marker.pid)) {
259
+ const isRemote = marker.transport === "tmux-push-remote";
260
+ if (isRemote) {
261
+ const entry = reg[marker.agentId];
262
+ const fresh = !!entry && now - entry.lastHeartbeat < STALE_MS;
263
+ if (!fresh) {
264
+ await deleteFile(file);
265
+ continue;
266
+ }
267
+ } else if (!isPidAlive(marker.pid)) {
252
268
  await deleteFile(file);
253
269
  continue;
254
270
  }
@@ -1038,6 +1054,11 @@ export async function renameAgentTool(args: { agentId: string; newAgentId: strin
1038
1054
  await moveFile(cursorFile(oldId), cursorFile(newId));
1039
1055
  await moveFile(transportFile(oldId), transportFile(newId));
1040
1056
 
1057
+ // Identity-binding token rotation: if tokens.json exists and had the old
1058
+ // id, move its token to the new id atomically. Lets the same bearer keep
1059
+ // authenticating after rename — no-op if binding isn't configured.
1060
+ await rotateAgentToken(oldId, newId);
1061
+
1041
1062
  // Broadcast a NICK notice to every channel the agent was in.
1042
1063
  for (const chan of joined) {
1043
1064
  await appendJsonl(roomFile(chan), sysMsg(newId, chan, `is now known as ${newId} (was ${oldId})`));
@@ -1055,6 +1076,51 @@ export async function renameAgentTool(args: { agentId: string; newAgentId: strin
1055
1076
  };
1056
1077
  }
1057
1078
 
1079
+ // ---------- transport markers (for remote pushers) ----------
1080
+
1081
+ export const reportTransportSchema = {
1082
+ agentId: z.string().min(1),
1083
+ transport: z.string().min(1),
1084
+ tmuxTarget: z.string().optional(),
1085
+ host: z.string().optional(),
1086
+ since: z.number().optional(),
1087
+ };
1088
+
1089
+ // Called by an external push daemon (typically scripts/coord-pusher.mjs on a
1090
+ // remote machine) to publish a transport marker so list_agents reflects the
1091
+ // attachment. The local tmux-push path writes the marker directly inside
1092
+ // attach_agent; this is the wire-callable equivalent for remote pushers.
1093
+ export async function reportTransportTool(args: {
1094
+ agentId: string;
1095
+ transport: string;
1096
+ tmuxTarget?: string;
1097
+ host?: string;
1098
+ since?: number;
1099
+ }) {
1100
+ const marker: TransportMarker = {
1101
+ agentId: args.agentId,
1102
+ transport: args.transport,
1103
+ pid: 0, // not meaningful for remote; liveness comes from heartbeat
1104
+ tmuxTarget: args.tmuxTarget,
1105
+ host: args.host,
1106
+ since: args.since ?? Date.now(),
1107
+ };
1108
+ await updateJson<TransportMarker>(transportFile(args.agentId), marker, () => marker);
1109
+ return { ok: true, marker };
1110
+ }
1111
+
1112
+ export const clearTransportSchema = {
1113
+ agentId: z.string().min(1),
1114
+ };
1115
+
1116
+ // Idempotent remote-counterpart to detach_agent: just deletes the marker. Used
1117
+ // by the remote pusher on graceful shutdown so list_agents stops showing it
1118
+ // attached. (Does NOT try to kill any process — there's nothing local to kill.)
1119
+ export async function clearTransportTool(args: { agentId: string }) {
1120
+ const removed = await deleteFile(transportFile(args.agentId));
1121
+ return { ok: true, removed };
1122
+ }
1123
+
1058
1124
  // ---------- helpers ----------
1059
1125
 
1060
1126
  async function moveFile(from: string, to: string): Promise<boolean> {