macro-agent 0.1.11 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/agent/agent-manager-v2.d.ts.map +1 -1
  2. package/dist/agent/agent-manager-v2.js +240 -7
  3. package/dist/agent/agent-manager-v2.js.map +1 -1
  4. package/dist/agent/types.d.ts +47 -0
  5. package/dist/agent/types.d.ts.map +1 -1
  6. package/dist/agent/types.js.map +1 -1
  7. package/dist/boot-v2.d.ts +33 -0
  8. package/dist/boot-v2.d.ts.map +1 -1
  9. package/dist/boot-v2.js +142 -11
  10. package/dist/boot-v2.js.map +1 -1
  11. package/dist/cli/inbox-mcp-proxy.d.ts +36 -0
  12. package/dist/cli/inbox-mcp-proxy.d.ts.map +1 -0
  13. package/dist/cli/inbox-mcp-proxy.js +51 -0
  14. package/dist/cli/inbox-mcp-proxy.js.map +1 -0
  15. package/dist/dispatch/loadout-translation.d.ts +100 -0
  16. package/dist/dispatch/loadout-translation.d.ts.map +1 -0
  17. package/dist/dispatch/loadout-translation.js +90 -0
  18. package/dist/dispatch/loadout-translation.js.map +1 -0
  19. package/dist/dispatch/mail-inbound-consumer.d.ts +89 -0
  20. package/dist/dispatch/mail-inbound-consumer.d.ts.map +1 -0
  21. package/dist/dispatch/mail-inbound-consumer.js +261 -0
  22. package/dist/dispatch/mail-inbound-consumer.js.map +1 -0
  23. package/dist/dispatch/mail-inbound-reuse-consumer.d.ts +75 -0
  24. package/dist/dispatch/mail-inbound-reuse-consumer.d.ts.map +1 -0
  25. package/dist/dispatch/mail-inbound-reuse-consumer.js +325 -0
  26. package/dist/dispatch/mail-inbound-reuse-consumer.js.map +1 -0
  27. package/dist/dispatch/permission-evaluator.d.ts +68 -0
  28. package/dist/dispatch/permission-evaluator.d.ts.map +1 -0
  29. package/dist/dispatch/permission-evaluator.js +159 -0
  30. package/dist/dispatch/permission-evaluator.js.map +1 -0
  31. package/dist/dispatch/permission-overlay.d.ts +64 -0
  32. package/dist/dispatch/permission-overlay.d.ts.map +1 -0
  33. package/dist/dispatch/permission-overlay.js +72 -0
  34. package/dist/dispatch/permission-overlay.js.map +1 -0
  35. package/dist/dispatch/permissions-handler.d.ts +71 -0
  36. package/dist/dispatch/permissions-handler.d.ts.map +1 -0
  37. package/dist/dispatch/permissions-handler.js +83 -0
  38. package/dist/dispatch/permissions-handler.js.map +1 -0
  39. package/dist/dispatch/spawn-agent-handler.d.ts +84 -0
  40. package/dist/dispatch/spawn-agent-handler.d.ts.map +1 -0
  41. package/dist/dispatch/spawn-agent-handler.js +85 -0
  42. package/dist/dispatch/spawn-agent-handler.js.map +1 -0
  43. package/dist/lifecycle/handlers-v2.d.ts +7 -0
  44. package/dist/lifecycle/handlers-v2.d.ts.map +1 -1
  45. package/dist/lifecycle/handlers-v2.js +27 -0
  46. package/dist/lifecycle/handlers-v2.js.map +1 -1
  47. package/dist/map/lifecycle-bridge.d.ts +18 -0
  48. package/dist/map/lifecycle-bridge.d.ts.map +1 -1
  49. package/dist/map/lifecycle-bridge.js +23 -1
  50. package/dist/map/lifecycle-bridge.js.map +1 -1
  51. package/dist/map/mail-bridge.d.ts +55 -0
  52. package/dist/map/mail-bridge.d.ts.map +1 -0
  53. package/dist/map/mail-bridge.js +115 -0
  54. package/dist/map/mail-bridge.js.map +1 -0
  55. package/dist/map/sidecar.d.ts.map +1 -1
  56. package/dist/map/sidecar.js +245 -1
  57. package/dist/map/sidecar.js.map +1 -1
  58. package/dist/map/types.d.ts +15 -0
  59. package/dist/map/types.d.ts.map +1 -1
  60. package/dist/mcp/tools/done-v2.d.ts.map +1 -1
  61. package/dist/mcp/tools/done-v2.js +1 -0
  62. package/dist/mcp/tools/done-v2.js.map +1 -1
  63. package/dist/teams/seed-defaults.d.ts.map +1 -1
  64. package/dist/teams/seed-defaults.js +6 -2
  65. package/dist/teams/seed-defaults.js.map +1 -1
  66. package/dist/teams/team-loader.d.ts.map +1 -1
  67. package/dist/teams/team-loader.js +17 -1
  68. package/dist/teams/team-loader.js.map +1 -1
  69. package/dist/teams/team-runtime-v2.d.ts.map +1 -1
  70. package/dist/teams/team-runtime-v2.js +2 -0
  71. package/dist/teams/team-runtime-v2.js.map +1 -1
  72. package/package.json +6 -6
  73. package/src/agent/__tests__/agent-manager-v2.permission-interception.test.ts +296 -0
  74. package/src/agent/__tests__/agent-manager-v2.permissions.test.ts +233 -0
  75. package/src/agent/agent-manager-v2.ts +268 -8
  76. package/src/agent/types.ts +51 -0
  77. package/src/boot-v2.ts +190 -12
  78. package/src/cli/inbox-mcp-proxy.ts +56 -0
  79. package/src/dispatch/CLAUDE.md +129 -0
  80. package/src/dispatch/__tests__/loadout-translation.test.ts +141 -0
  81. package/src/dispatch/__tests__/mail-inbound-consumer.integration.test.ts +519 -0
  82. package/src/dispatch/__tests__/mail-inbound-consumer.test.ts +589 -0
  83. package/src/dispatch/__tests__/mail-inbound-reuse-consumer.test.ts +575 -0
  84. package/src/dispatch/__tests__/permission-evaluator.test.ts +196 -0
  85. package/src/dispatch/__tests__/permission-overlay.test.ts +56 -0
  86. package/src/dispatch/__tests__/permissions-handler.test.ts +168 -0
  87. package/src/dispatch/__tests__/spawn-agent-handler.test.ts +282 -0
  88. package/src/dispatch/loadout-translation.ts +138 -0
  89. package/src/dispatch/mail-inbound-consumer.ts +397 -0
  90. package/src/dispatch/mail-inbound-reuse-consumer.ts +479 -0
  91. package/src/dispatch/permission-evaluator.ts +191 -0
  92. package/src/dispatch/permission-overlay.ts +89 -0
  93. package/src/dispatch/permissions-handler.ts +112 -0
  94. package/src/dispatch/spawn-agent-handler.ts +160 -0
  95. package/src/lifecycle/handlers-v2.ts +34 -0
  96. package/src/map/__tests__/lifecycle-bridge.test.ts +64 -0
  97. package/src/map/__tests__/mail-bridge.test.ts +196 -0
  98. package/src/map/lifecycle-bridge.ts +48 -2
  99. package/src/map/mail-bridge.ts +203 -0
  100. package/src/map/sidecar.ts +346 -1
  101. package/src/map/types.ts +21 -0
  102. package/src/mcp/tools/done-v2.ts +1 -0
  103. package/src/teams/seed-defaults.ts +6 -2
  104. package/src/teams/team-loader.ts +21 -2
  105. package/src/teams/team-runtime-v2.ts +2 -0
  106. package/src/workspace/__tests__/self-driving-yaml.test.ts +10 -2
  107. package/templates/teams/self-driving/team.yaml +142 -0
  108. package/tsconfig.json +2 -1
package/src/boot-v2.ts CHANGED
@@ -263,6 +263,39 @@ export interface BootV2Config {
263
263
  agentType?: string;
264
264
  customPrompt?: string;
265
265
  task?: string;
266
+ /**
267
+ * When true, spawn the bootstrap coordinator with
268
+ * `askForAllTools: true` and `permissionMode: 'interactive'` so the
269
+ * Claude SDK consults `canUseTool` for every tool call and
270
+ * acp-factory emits the resulting requests as `permission_request`
271
+ * session updates. This is the prerequisite for ACP+reuse dispatch
272
+ * to actually enforce per-dispatch loadout deny rules via the
273
+ * runtime overlay registry — the prompt iterator can only deny
274
+ * tools the SDK asks about.
275
+ *
276
+ * Trade-off: every tool call roundtrips through the host (~1-5ms
277
+ * latency penalty per call). Acceptable for autonomous dispatch
278
+ * targets; would be heavy for high-frequency interactive chat.
279
+ *
280
+ * Defaults to false (preserves the existing chat-friendly mode).
281
+ */
282
+ dispatchTarget?: boolean;
283
+ };
284
+ /**
285
+ * Optional parented worker spawn after the bootstrap coordinator
286
+ * comes up. Used by live tests (e.g., `live-mail-reuse-dispatch`)
287
+ * to provide a parented dispatch target that survives mail+reuse
288
+ * `done()` cleanly (the worker terminates as designed; the parent
289
+ * coord receives the `WORKER_DONE` signal — no orphan).
290
+ *
291
+ * Default `role`: `'reuse-target'`. Choose a role that does NOT
292
+ * collide with the sidecar's projected `'worker'` role in the
293
+ * hub-side roster — otherwise prefer-route may tie-break to the
294
+ * sidecar instead of this worker.
295
+ */
296
+ worker?: boolean | {
297
+ role?: string;
298
+ task?: string;
266
299
  };
267
300
  /**
268
301
  * Rehydration policy for agents that existed before this boot. Controls
@@ -377,11 +410,32 @@ export async function bootV2(
377
410
  !config.bootstrap?.coordinator
378
411
  ) {
379
412
  const envCwd = process.env.MACRO_BOOTSTRAP_CWD;
413
+ const dispatchTarget =
414
+ process.env.MACRO_BOOTSTRAP_COORDINATOR_DISPATCH_TARGET === "true";
415
+ const coordObj: Record<string, unknown> = {};
416
+ if (envCwd) coordObj.cwd = envCwd;
417
+ if (dispatchTarget) coordObj.dispatchTarget = true;
380
418
  config = {
381
419
  ...config,
382
420
  bootstrap: {
383
421
  ...(config.bootstrap ?? {}),
384
- coordinator: envCwd ? { cwd: envCwd } : true,
422
+ coordinator:
423
+ Object.keys(coordObj).length > 0
424
+ ? (coordObj as never)
425
+ : true,
426
+ },
427
+ };
428
+ }
429
+ if (
430
+ process.env.MACRO_BOOTSTRAP_WORKER === "true" &&
431
+ !config.bootstrap?.worker
432
+ ) {
433
+ const envWorkerRole = process.env.MACRO_BOOTSTRAP_WORKER_ROLE;
434
+ config = {
435
+ ...config,
436
+ bootstrap: {
437
+ ...(config.bootstrap ?? {}),
438
+ worker: envWorkerRole ? { role: envWorkerRole } : true,
385
439
  },
386
440
  };
387
441
  }
@@ -496,14 +550,73 @@ export async function bootV2(
496
550
 
497
551
  // 7a. Task Dispatch (opt-in autonomous task dispatch mode)
498
552
  let taskDispatcher: import("swarm-dispatch").TaskDispatcher | null = null;
553
+ // Hoisted so the MAP sidecar (step 13) can forward it to the mail bridge.
554
+ let dispatcherAgentId: string | undefined;
555
+ // Mail-inbound consumer — always wired (does not require dispatch.enabled).
556
+ let mailInboundConsumer: import("./dispatch/mail-inbound-consumer.js").MailInboundConsumer | null = null;
557
+ // Mail-inbound REUSE consumer — handles `x-dispatch/work` envelopes
558
+ // addressed to non-sidecar agents (long-lived workers/coordinators) and
559
+ // drives them through the dispatch turn using their existing session.
560
+ // Always wired so reuse routing works even without the outbound
561
+ // orchestrator. Filters non-overlapping with mailInboundConsumer.
562
+ let mailInboundReuseConsumer:
563
+ | import("./dispatch/mail-inbound-reuse-consumer.js").MailInboundReuseConsumer
564
+ | null = null;
565
+
566
+ {
567
+ // Stable dispatcher ID used as the inbox recipient for bridged envelopes.
568
+ // Matches the id the mail-bridge registers and delivers to. The outbound
569
+ // orchestrator (below, opt-in) reuses the same id so both code paths share
570
+ // one inbox recipient — no double-processing because the consumer only
571
+ // fires spawn() while the orchestrator fires spawn() only when polling
572
+ // opentasks (different trigger paths).
573
+ const { getStableInstanceId } = await import("./cli/stable-instance-id.js");
574
+ const inboundClaimantId = `${os.hostname()}:${process.pid}:${getStableInstanceId(cwd)}`;
575
+ const inboundDispatcherId = `dispatcher:${inboundClaimantId}`;
576
+ dispatcherAgentId = inboundDispatcherId;
577
+
578
+ // Register the inbox recipient so mail-bridge's registerAgent call is a
579
+ // no-op (it uses an upsert) and the inbox accepts deliveries immediately.
580
+ await inboxAdapter.registerAgent(inboundDispatcherId, {
581
+ role: "dispatcher",
582
+ scope: "default",
583
+ });
584
+
585
+ const rawInbox = inboxAdapter.getInbox();
586
+ const { createMailInboundConsumer } = await import(
587
+ "./dispatch/mail-inbound-consumer.js"
588
+ );
589
+ mailInboundConsumer = createMailInboundConsumer({
590
+ dispatcherAgentId: inboundDispatcherId,
591
+ inboxEvents: rawInbox.events as any,
592
+ agentManager,
593
+ agentStore,
594
+ getSidecar: () => (systemRef as any).mapSidecar ?? null,
595
+ log: (msg) => console.log(msg),
596
+ });
597
+
598
+ // Reuse consumer for envelopes addressed to long-lived workers/
599
+ // coordinators. Non-overlapping filter (event.agentId !== sidecarId).
600
+ const { createMailInboundReuseConsumer } = await import(
601
+ "./dispatch/mail-inbound-reuse-consumer.js"
602
+ );
603
+ mailInboundReuseConsumer = createMailInboundReuseConsumer({
604
+ dispatcherAgentId: inboundDispatcherId,
605
+ inboxEvents: rawInbox.events as any,
606
+ agentManager,
607
+ agentStore,
608
+ getSidecar: () => (systemRef as any).mapSidecar ?? null,
609
+ log: (msg) => console.log(msg),
610
+ });
611
+ }
499
612
 
500
613
  if (config.dispatch?.enabled && tasksAdapter) {
501
614
  const { createOrchestrator, createOpenTasksSource, createAgentInboxPort } =
502
615
  await import("swarm-dispatch");
503
- const { getStableInstanceId } = await import("./cli/stable-instance-id.js");
504
616
 
505
- const claimantId = `${os.hostname()}:${process.pid}:${getStableInstanceId(cwd)}`;
506
- const dispatchAgentId = `dispatcher:${claimantId}`;
617
+ // dispatcherAgentId is already set by the unconditional mail-inbound block above.
618
+ // Use it directly so both paths share the same inbox recipient.
619
+ const dispatchAgentId = dispatcherAgentId!;
507
620
 
508
621
  // Adapt opentasks client → DispatchTaskSource
509
622
  const opentasksClient = (tasksAdapter as any).client;
@@ -569,6 +682,7 @@ export async function bootV2(
569
682
  type?: string;
570
683
  schema?: string;
571
684
  data?: any;
685
+ _conversationId?: string;
572
686
  };
573
687
  if (content?.schema !== "x-dispatch/work") return null;
574
688
  const data = content.data;
@@ -586,18 +700,19 @@ export async function bootV2(
586
700
  metadata: {
587
701
  ...data.metadata,
588
702
  role: data.role,
703
+ // Thread conversation_id through so the reply bridge can post
704
+ // the worker's output back to the hub's mail conversation.
705
+ ...(content._conversationId
706
+ ? { _mailConversationId: content._conversationId }
707
+ : {}),
589
708
  },
590
709
  },
591
710
  };
592
711
  },
593
712
  },
594
713
  );
595
-
596
- // Register the dispatcher as an agent in the inbox so it can receive messages
597
- await inboxAdapter.registerAgent(dispatchAgentId, {
598
- role: "dispatcher",
599
- scope: "default",
600
- });
714
+ // Note: registerAgent for dispatchAgentId was already called in the
715
+ // unconditional mail-inbound block above no need to repeat here.
601
716
  }
602
717
 
603
718
  // Phase 2: Wire AgentRoster via inbox agent listing for route-first dispatch
@@ -631,7 +746,7 @@ export async function bootV2(
631
746
  (hasRouting ? ("prefer-route" as const) : ("spawn-only" as const));
632
747
 
633
748
  taskDispatcher = createOrchestrator(source, runtime, {
634
- claimantId,
749
+ claimantId: dispatchAgentId,
635
750
  pollIntervalMs: config.dispatch.pollIntervalMs ?? 15_000,
636
751
  defaultRole: config.dispatch.defaultRole ?? "worker",
637
752
  concurrency: { global: config.dispatch.maxConcurrent ?? 3 },
@@ -841,6 +956,7 @@ export async function bootV2(
841
956
  ? (id: string) => mapServerInstance!.getLocalMapId(id)
842
957
  : undefined,
843
958
  gitCascadeAdapter,
959
+ dispatcherAgentId,
844
960
  },
845
961
  {
846
962
  server: config.map.server,
@@ -1009,18 +1125,78 @@ export async function bootV2(
1009
1125
  return;
1010
1126
  }
1011
1127
  // No priors matched the policy → fresh spawn (first boot, or 'none').
1128
+ // dispatchTarget mode: bake askForAllTools + permissionMode='interactive'
1129
+ // into the spawn so the SDK funnels every tool call through canUseTool
1130
+ // and acp-factory emits permission_request session updates the prompt
1131
+ // iterator's overlay-enforcement path can consume.
1132
+ const isDispatchTarget = opts.dispatchTarget === true;
1012
1133
  const spawned = await agentManager.spawn({
1013
1134
  role: "coordinator",
1014
1135
  parent: null,
1015
1136
  cwd: bootstrapCwd,
1016
1137
  task: opts.task ?? "Default coordinator (auto-spawn on boot)",
1017
- permissionMode: opts.permissionMode,
1138
+ permissionMode: isDispatchTarget
1139
+ ? "interactive"
1140
+ : opts.permissionMode,
1018
1141
  agentType: opts.agentType,
1019
1142
  customPrompt: opts.customPrompt,
1143
+ ...(isDispatchTarget ? { askForAllTools: true } : {}),
1020
1144
  });
1021
1145
  console.log(
1022
1146
  `[boot-v2] Bootstrap coordinator spawned: ${(spawned as any).name ?? spawned.id} at ${bootstrapCwd}`,
1023
1147
  );
1148
+
1149
+ // Optional: bootstrap an additional worker for live tests
1150
+ // exercising mail+reuse semantics. Spawned with parent=null
1151
+ // because:
1152
+ // - The role-capability check only fires for parented spawns
1153
+ // (agent-manager-v2 line 559-572); bypassing it lets us use
1154
+ // a custom role (e.g., 'reuse-target') that doesn't collide
1155
+ // with the sidecar's projected 'worker' in the hub-side
1156
+ // dispatch roster.
1157
+ // - The worker's done() lifecycle is the same as the bootstrap
1158
+ // coord's: terminate cleanly. Phase 2C's `_lastSummary`
1159
+ // fallback (handlers-v2 + mail-inbound-reuse-consumer)
1160
+ // ensures the dispatch reply path recovers the summary from
1161
+ // metadata even if the prompt iterator's update stream races
1162
+ // the ACP connection close on terminate.
1163
+ if (config.bootstrap?.worker) {
1164
+ const workerOpts = config.bootstrap.worker === true
1165
+ ? {}
1166
+ : config.bootstrap.worker;
1167
+ const workerRole = workerOpts.role ?? "reuse-target";
1168
+ try {
1169
+ const workerSpawned = await agentManager.spawn({
1170
+ role: workerRole,
1171
+ parent: null,
1172
+ cwd: bootstrapCwd,
1173
+ task: workerOpts.task ?? "Await dispatch",
1174
+ // Funnel every tool call through the host so the prompt-iterator
1175
+ // handler can apply per-dispatch overlay deny rules at runtime
1176
+ // (Phase 3). Two layers must both be set:
1177
+ // - askForAllTools=true → settings.permissions.ask=['*'] so
1178
+ // the Claude SDK actually consults canUseTool for every
1179
+ // tool (without this, default mode auto-approves "safe"
1180
+ // tools like Read).
1181
+ // - permissionMode='interactive' → acp-factory emits the
1182
+ // resulting requestPermission as a `permission_request`
1183
+ // session update instead of auto-approving it (which is
1184
+ // macro-agent's default 'auto-approve' behavior).
1185
+ // Bootstrap dispatch targets are autonomous + latency-tolerant
1186
+ // so the per-call host roundtrip is acceptable.
1187
+ askForAllTools: true,
1188
+ permissionMode: "interactive",
1189
+ });
1190
+ console.log(
1191
+ `[boot-v2] Bootstrap dispatch-target spawned: ${(workerSpawned as any).name ?? workerSpawned.id} ` +
1192
+ `(role=${workerRole}, parent=null)`,
1193
+ );
1194
+ } catch (err) {
1195
+ console.warn(
1196
+ `[boot-v2] Bootstrap worker spawn failed: ${(err as Error).message}`,
1197
+ );
1198
+ }
1199
+ }
1024
1200
  };
1025
1201
 
1026
1202
  rehydrateOrSpawn().catch((err: Error) => {
@@ -1050,6 +1226,8 @@ export async function bootV2(
1050
1226
 
1051
1227
  async shutdown(): Promise<void> {
1052
1228
  clearInterval(healthCheckTimer);
1229
+ if (mailInboundConsumer) mailInboundConsumer.stop();
1230
+ if (mailInboundReuseConsumer) mailInboundReuseConsumer.stop();
1053
1231
  if (taskDispatcher) await taskDispatcher.stop();
1054
1232
  if (mapSidecar) await mapSidecar.stop();
1055
1233
  if (mapServerInstance) await mapServerInstance.stop();
@@ -0,0 +1,56 @@
1
+ /**
2
+ * inbox-mcp-proxy.ts — stdio entry that exposes agent-inbox tools as an
3
+ * MCP server to a spawned worker.
4
+ *
5
+ * Why this exists:
6
+ *
7
+ * The agent-inbox package ships an `InboxMcpProxy` (in dist/mcp/mcp-proxy.js)
8
+ * designed to bridge agent-inbox IPC ↔ MCP-stdio. macro-agent's
9
+ * `agentManager.spawn` configures this as a per-spawn MCP server so the
10
+ * spawned worker has access to `send_message`, `check_inbox`,
11
+ * `read_thread`, `list_agents` — the tools the macro-agent architecture
12
+ * docs claim are available, but which were not actually being mounted on
13
+ * workers prior to this fix.
14
+ *
15
+ * Particularly important for mail-inbound dispatch workers, which run with
16
+ * `isolatedSettings: true` and therefore can't pick up host-level plugin
17
+ * MCPs that would otherwise have provided agent-inbox.
18
+ *
19
+ * Env vars consumed:
20
+ *
21
+ * INBOX_SOCKET_PATH (required) — path to agent-inbox's IPC socket. Set
22
+ * by `buildMcpServerConfig` in
23
+ * agent-manager-v2.ts.
24
+ * MACRO_AGENT_ID (optional) — the spawned worker's agent id; used as
25
+ * the proxy's `defaultAgentId` so tools
26
+ * like check_inbox auto-target the
27
+ * caller's mailbox.
28
+ *
29
+ * Failure mode:
30
+ *
31
+ * If INBOX_SOCKET_PATH is unset, the script exits with a non-zero status
32
+ * so Claude Code's MCP-init reports the misconfiguration loudly rather
33
+ * than silently leaving the worker without inbox tools.
34
+ */
35
+
36
+ import { InboxMcpProxy } from "agent-inbox";
37
+
38
+ async function main(): Promise<void> {
39
+ const socketPath = process.env.INBOX_SOCKET_PATH;
40
+ if (!socketPath) {
41
+ console.error(
42
+ "[inbox-mcp-proxy] INBOX_SOCKET_PATH is unset — cannot start. " +
43
+ "macro-agent's agentManager.spawn should always inject this env var.",
44
+ );
45
+ process.exit(1);
46
+ }
47
+
48
+ const agentId = process.env.MACRO_AGENT_ID || "anonymous";
49
+ const proxy = new InboxMcpProxy(socketPath, agentId);
50
+ await proxy.start();
51
+ }
52
+
53
+ main().catch((err) => {
54
+ console.error(`[inbox-mcp-proxy] Fatal: ${(err as Error).message}`);
55
+ process.exit(1);
56
+ });
@@ -0,0 +1,129 @@
1
+ # macro-agent/src/dispatch — hub-driven work intake
2
+
3
+ This directory contains the consumer-side handlers that receive
4
+ hub-orchestrated `x-dispatch/work` envelopes from a connected OpenHive
5
+ hub (or any hub speaking the same wire shape). Two consumers, one
6
+ mail-bridge:
7
+
8
+ - **`mail-inbound-consumer.ts`** — addressed to the sidecar (`recipient =
9
+ dispatcher:<claimantId>`). Spawns a **fresh, parentless** worker per
10
+ envelope. Used when the hub picks the sidecar as the mail target —
11
+ typically because no specialized worker is registered yet, or because
12
+ the hub's `mail_lifecycle: 'fresh'` hint forced sidecar routing.
13
+ - **`mail-inbound-reuse-consumer.ts`** — addressed to a **non-sidecar**
14
+ agent that already exists on the swarm. Forwards the envelope into
15
+ the long-lived agent's prompt iterator instead of spawning. Used for
16
+ `mail_lifecycle: 'reuse'` dispatches against multi-role swarms.
17
+ - **`mail-bridge.ts`** — receives `mail/turn.received` MAP notifications
18
+ from the hub and forwards each turn into the sidecar's local
19
+ agent-inbox so the consumers above can dispatch on `inbox.message`.
20
+
21
+ ## Wire envelope shape (hub contract)
22
+
23
+ Every envelope follows:
24
+
25
+ ```json
26
+ {
27
+ "type": "x-dispatch/work",
28
+ "body": {
29
+ "taskId": "disp_xxx",
30
+ "prompt": "<rendered prompt>",
31
+ "role": "worker | executor | reviewer | ...",
32
+ "loadout": { "permissions": {...}, "mcpProviders": [...], ... },
33
+ "metadata": { "permissions": ..., "mcpProviders": ... }, // legacy
34
+ "_conversationId": "conv_xxx" // mail-reuse path only
35
+ }
36
+ }
37
+ ```
38
+
39
+ - `body.role` may be **any** string the hub side surfaces (e.g., from a
40
+ team template's `team_role_ref.role`). It is NOT guaranteed to match
41
+ any built-in or locally-configured macro-agent role. The consumer
42
+ must defend against unknown role strings — see "Role validation" below.
43
+ - `body.loadout` is the canonical structured slot. `body.metadata` carries
44
+ the legacy permissions/MCP fields for one deprecation cycle so older
45
+ consumers continue to work.
46
+
47
+ ## Role validation (regression hardening)
48
+
49
+ When the hub surfaces a team-defined role like `'executor'`, the wire
50
+ envelope arrives at `mail-inbound-consumer.ts` with `body.role:
51
+ 'executor'`. macro-agent's role registry has no `'executor'` entry, so
52
+ `roleRegistry.resolveRole('executor')` falls back to `GenericRole` —
53
+ which has:
54
+
55
+ - `lifecycle.type: 'persistent'` (no auto-cleanup, no task-bound timeout)
56
+ - `WILDCARD_CAPABILITY` (so it has the `done` capability technically),
57
+ but no system-prompt instruction telling the agent to call `done()`
58
+
59
+ When such an agent finishes a single prompt cycle (`promptUntilDone(...,
60
+ {maxFollowUps: 0})`), it stops without invoking `done({summary: ...})`.
61
+ The lifecycle handler's `_lastSummary` write is gated on `args.summary`,
62
+ so nothing gets persisted. The consumer reads back `_lastSummary` to
63
+ post the reply turn — empty → "no reply turn posted" → the hub never
64
+ sees an answer and the dispatch silently dies.
65
+
66
+ The fix: validate the requested role against the registry **before**
67
+ passing it to `agentManager.spawn`. Unknown roles fall back to
68
+ `'worker'` (ephemeral lifecycle + `LIFECYCLE_CAPABILITIES.DONE` +
69
+ explicit "you MUST call done()" system prompt):
70
+
71
+ ```typescript
72
+ const requestedRole = data.role;
73
+ const roleRegistry = agentManager.getRoleRegistry?.();
74
+ const knownRole =
75
+ requestedRole && roleRegistry?.getRole(requestedRole) !== undefined;
76
+ const role = knownRole ? requestedRole! : "worker";
77
+ if (requestedRole && !knownRole) {
78
+ log(`[mail-inbound] Unknown role '${requestedRole}' for taskId=${taskId} — falling back to 'worker'`);
79
+ }
80
+ ```
81
+
82
+ `roleRegistry.getRole(name)` is exact-match across custom / project /
83
+ user / built-in maps and returns `undefined` for unknown names —
84
+ distinct from `resolveRole(name)` which always returns *something* via
85
+ `GenericRole`. We use the former here intentionally so the fallback
86
+ path is observable (we log a warning) rather than silent.
87
+
88
+ **Apply the same pattern to any future consumer that spawns workers
89
+ from a wire envelope.** The receiving side owns the role taxonomy;
90
+ hubs cannot be expected to use names that match the receiver's
91
+ registry.
92
+
93
+ ## Reply path (worker → hub)
94
+
95
+ After the consumer spawns a worker:
96
+
97
+ 1. `agentManager.promptUntilDone(spawnedId, prompt, {maxFollowUps: 0})`
98
+ drives the worker through one prompt cycle.
99
+ 2. Worker calls `done({status, summary})` — the lifecycle handler
100
+ writes `_lastSummary` to agent metadata (`handlers-v2.ts:233`,
101
+ gated on `inDispatch || !parentId`).
102
+ 3. AgentManager fires the lifecycle `stopped` event.
103
+ 4. The consumer's `stopped` listener reads `_lastSummary` from
104
+ `agentStore.getAgent(agentId)` and posts a reply turn into the
105
+ dispatch conversation via `mail/turn`.
106
+ 5. The hub's `mail.turn.added` event fires; the orchestrator's
107
+ reply-demuxer routes it back to swarm-dispatch as the dispatch
108
+ completion.
109
+
110
+ If `_lastSummary` is empty when the worker stops, the consumer logs
111
+ "Worker stopped but _lastSummary is empty — no reply turn posted" and
112
+ bails. This is the canary for role-validation failures, lifecycle
113
+ misconfigurations, or agents that exit without calling `done()`.
114
+
115
+ ## Idempotency + dedup
116
+
117
+ Both consumers track `seenTaskIds` with a 1-hour TTL to drop duplicate
118
+ deliveries (the hub's mail-push bridge can re-fire `mail.turn.added`
119
+ on reconnect; the local inbox can re-emit `inbox.message` for the same
120
+ logical turn). Within the TTL window, a re-delivered taskId is
121
+ silently ignored. After the TTL expires, a stale retry could
122
+ legitimately re-spawn — preferable to permanent memory growth.
123
+
124
+ ## Tests
125
+
126
+ - `__tests__/mail-inbound-consumer.test.ts` — unit-level spawn/reply flow with mocked AgentManager
127
+ - `__tests__/mail-inbound-consumer.integration.test.ts` — full lifecycle through real `AgentManagerV2`
128
+ - `__tests__/mail-inbound-reuse-consumer.test.ts` — reuse-path classification + filtering
129
+ - Live e2e (in OpenHive): `src/__tests__/swarm/live-{loadout,mail-reuse}-dispatch*.test.ts` — gated by `LIVE_AGENT_E2E=true`
@@ -0,0 +1,141 @@
1
+ /**
2
+ * Unit tests for `loadoutToSpawnOptions` — the pure-function translator
3
+ * from wire-shape `MaterializedLoadout` to macro-agent `SpawnAgentOptions`.
4
+ *
5
+ * These tests pin the shared contract used by both the mail-inbound
6
+ * consumer and the new `dispatch/spawn-agent` MAP handler. Both paths
7
+ * should produce identical spawn options for the same loadout input.
8
+ */
9
+
10
+ import { describe, it, expect } from "vitest";
11
+ import {
12
+ loadoutToSpawnOptions,
13
+ type WireLoadout,
14
+ } from "../loadout-translation.js";
15
+
16
+ describe("loadoutToSpawnOptions", () => {
17
+ it("returns {} for undefined loadout", () => {
18
+ expect(loadoutToSpawnOptions(undefined)).toEqual({});
19
+ });
20
+
21
+ it("returns {} when ctx is provided but loadout is undefined", () => {
22
+ expect(loadoutToSpawnOptions(undefined, { fullAutonomous: true })).toEqual({});
23
+ });
24
+
25
+ it("returns {} for loadout with no permissions/capabilities (empty fields)", () => {
26
+ const loadout: WireLoadout = {};
27
+ expect(loadoutToSpawnOptions(loadout)).toEqual({});
28
+ });
29
+
30
+ it("returns {} when permissions is present but all rule arrays are empty", () => {
31
+ const loadout: WireLoadout = {
32
+ permissions: { allow: [], deny: [], ask: [] },
33
+ };
34
+ // hasAnyRule returns false → no permissions or fullAutonomous propagated.
35
+ expect(loadoutToSpawnOptions(loadout, { fullAutonomous: true })).toEqual({});
36
+ });
37
+
38
+ it("propagates deny rules + sets fullAutonomous from ctx (true)", () => {
39
+ const loadout: WireLoadout = {
40
+ permissions: { deny: ["Bash(rm -rf:*)"] },
41
+ };
42
+ const result = loadoutToSpawnOptions(loadout, { fullAutonomous: true });
43
+ expect(result.permissions).toEqual({
44
+ allow: [],
45
+ deny: ["Bash(rm -rf:*)"],
46
+ ask: [],
47
+ });
48
+ expect(result.fullAutonomous).toBe(true);
49
+ });
50
+
51
+ it("propagates deny rules + sets fullAutonomous from ctx (false)", () => {
52
+ const loadout: WireLoadout = {
53
+ permissions: { deny: ["Bash(rm -rf:*)"] },
54
+ };
55
+ const result = loadoutToSpawnOptions(loadout, { fullAutonomous: false });
56
+ expect(result.permissions).toEqual({
57
+ allow: [],
58
+ deny: ["Bash(rm -rf:*)"],
59
+ ask: [],
60
+ });
61
+ expect(result.fullAutonomous).toBe(false);
62
+ });
63
+
64
+ it("defaults fullAutonomous to false when ctx omits it but permissions are present", () => {
65
+ const loadout: WireLoadout = {
66
+ permissions: { ask: ["Write(*.env)"] },
67
+ };
68
+ const result = loadoutToSpawnOptions(loadout);
69
+ expect(result.permissions).toEqual({
70
+ allow: [],
71
+ deny: [],
72
+ ask: ["Write(*.env)"],
73
+ });
74
+ expect(result.fullAutonomous).toBe(false);
75
+ });
76
+
77
+ it("does NOT propagate fullAutonomous when no permissions are present", () => {
78
+ // ctx.fullAutonomous is meaningless without permissions to apply it to.
79
+ const loadout: WireLoadout = { capabilities: ["editor"] };
80
+ const result = loadoutToSpawnOptions(loadout, { fullAutonomous: true });
81
+ expect(result.fullAutonomous).toBeUndefined();
82
+ expect(result.permissions).toBeUndefined();
83
+ });
84
+
85
+ it("forwards capabilities (cloned, not aliased)", () => {
86
+ const sourceCaps = ["editor", "git"];
87
+ const loadout: WireLoadout = { capabilities: sourceCaps };
88
+ const result = loadoutToSpawnOptions(loadout);
89
+ expect(result.capabilities).toEqual(["editor", "git"]);
90
+ // Ensure we cloned — mutating the result's array must not affect the source.
91
+ result.capabilities!.push("newcap");
92
+ expect(sourceCaps).toEqual(["editor", "git"]);
93
+ });
94
+
95
+ it("does not include capabilities when the array is empty", () => {
96
+ const loadout: WireLoadout = { capabilities: [] };
97
+ const result = loadoutToSpawnOptions(loadout);
98
+ expect(result.capabilities).toBeUndefined();
99
+ });
100
+
101
+ it("ignores mcpProviders (Phase 2 — reserved/no-op)", () => {
102
+ const loadout: WireLoadout = {
103
+ mcpProviders: [
104
+ { name: "github", command: "github-mcp", args: ["--read-only"] },
105
+ ],
106
+ };
107
+ const result = loadoutToSpawnOptions(loadout);
108
+ // The provider should NOT appear on the returned spawn options.
109
+ expect(result).toEqual({});
110
+ expect((result as Record<string, unknown>).mcpProviders).toBeUndefined();
111
+ });
112
+
113
+ it("ignores mcpScope (Phase 1 — reserved/no-op)", () => {
114
+ const loadout: WireLoadout = {
115
+ mcpScope: [{ server: "github", tools: ["search"] }],
116
+ };
117
+ const result = loadoutToSpawnOptions(loadout);
118
+ expect(result).toEqual({});
119
+ expect((result as Record<string, unknown>).mcpScope).toBeUndefined();
120
+ });
121
+
122
+ it("combines permissions + capabilities + ignores mcp fields together", () => {
123
+ const loadout: WireLoadout = {
124
+ permissions: { allow: ["Read(**)"], deny: ["Bash(rm:*)"] },
125
+ capabilities: ["editor"],
126
+ mcpProviders: [{ name: "github" }],
127
+ mcpScope: [{ server: "github" }],
128
+ };
129
+ const result = loadoutToSpawnOptions(loadout, { fullAutonomous: true });
130
+
131
+ expect(result.permissions).toEqual({
132
+ allow: ["Read(**)"],
133
+ deny: ["Bash(rm:*)"],
134
+ ask: [],
135
+ });
136
+ expect(result.fullAutonomous).toBe(true);
137
+ expect(result.capabilities).toEqual(["editor"]);
138
+ expect((result as Record<string, unknown>).mcpProviders).toBeUndefined();
139
+ expect((result as Record<string, unknown>).mcpScope).toBeUndefined();
140
+ });
141
+ });