npm - macro-agent - Versions diffs - 0.1.11 → 0.2.0 - Mend

macro-agent 0.1.11 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/dist/agent/agent-manager-v2.d.ts.map +1 -1
package/dist/agent/agent-manager-v2.js +240 -7
package/dist/agent/agent-manager-v2.js.map +1 -1
package/dist/agent/types.d.ts +47 -0
package/dist/agent/types.d.ts.map +1 -1
package/dist/agent/types.js.map +1 -1
package/dist/boot-v2.d.ts +33 -0
package/dist/boot-v2.d.ts.map +1 -1
package/dist/boot-v2.js +142 -11
package/dist/boot-v2.js.map +1 -1
package/dist/cli/inbox-mcp-proxy.d.ts +36 -0
package/dist/cli/inbox-mcp-proxy.d.ts.map +1 -0
package/dist/cli/inbox-mcp-proxy.js +51 -0
package/dist/cli/inbox-mcp-proxy.js.map +1 -0
package/dist/dispatch/loadout-translation.d.ts +100 -0
package/dist/dispatch/loadout-translation.d.ts.map +1 -0
package/dist/dispatch/loadout-translation.js +90 -0
package/dist/dispatch/loadout-translation.js.map +1 -0
package/dist/dispatch/mail-inbound-consumer.d.ts +89 -0
package/dist/dispatch/mail-inbound-consumer.d.ts.map +1 -0
package/dist/dispatch/mail-inbound-consumer.js +261 -0
package/dist/dispatch/mail-inbound-consumer.js.map +1 -0
package/dist/dispatch/mail-inbound-reuse-consumer.d.ts +75 -0
package/dist/dispatch/mail-inbound-reuse-consumer.d.ts.map +1 -0
package/dist/dispatch/mail-inbound-reuse-consumer.js +325 -0
package/dist/dispatch/mail-inbound-reuse-consumer.js.map +1 -0
package/dist/dispatch/permission-evaluator.d.ts +68 -0
package/dist/dispatch/permission-evaluator.d.ts.map +1 -0
package/dist/dispatch/permission-evaluator.js +159 -0
package/dist/dispatch/permission-evaluator.js.map +1 -0
package/dist/dispatch/permission-overlay.d.ts +64 -0
package/dist/dispatch/permission-overlay.d.ts.map +1 -0
package/dist/dispatch/permission-overlay.js +72 -0
package/dist/dispatch/permission-overlay.js.map +1 -0
package/dist/dispatch/permissions-handler.d.ts +71 -0
package/dist/dispatch/permissions-handler.d.ts.map +1 -0
package/dist/dispatch/permissions-handler.js +83 -0
package/dist/dispatch/permissions-handler.js.map +1 -0
package/dist/dispatch/spawn-agent-handler.d.ts +84 -0
package/dist/dispatch/spawn-agent-handler.d.ts.map +1 -0
package/dist/dispatch/spawn-agent-handler.js +85 -0
package/dist/dispatch/spawn-agent-handler.js.map +1 -0
package/dist/lifecycle/handlers-v2.d.ts +7 -0
package/dist/lifecycle/handlers-v2.d.ts.map +1 -1
package/dist/lifecycle/handlers-v2.js +27 -0
package/dist/lifecycle/handlers-v2.js.map +1 -1
package/dist/map/lifecycle-bridge.d.ts +18 -0
package/dist/map/lifecycle-bridge.d.ts.map +1 -1
package/dist/map/lifecycle-bridge.js +23 -1
package/dist/map/lifecycle-bridge.js.map +1 -1
package/dist/map/mail-bridge.d.ts +55 -0
package/dist/map/mail-bridge.d.ts.map +1 -0
package/dist/map/mail-bridge.js +115 -0
package/dist/map/mail-bridge.js.map +1 -0
package/dist/map/sidecar.d.ts.map +1 -1
package/dist/map/sidecar.js +245 -1
package/dist/map/sidecar.js.map +1 -1
package/dist/map/types.d.ts +15 -0
package/dist/map/types.d.ts.map +1 -1
package/dist/mcp/tools/done-v2.d.ts.map +1 -1
package/dist/mcp/tools/done-v2.js +1 -0
package/dist/mcp/tools/done-v2.js.map +1 -1
package/dist/teams/seed-defaults.d.ts.map +1 -1
package/dist/teams/seed-defaults.js +6 -2
package/dist/teams/seed-defaults.js.map +1 -1
package/dist/teams/team-loader.d.ts.map +1 -1
package/dist/teams/team-loader.js +17 -1
package/dist/teams/team-loader.js.map +1 -1
package/dist/teams/team-runtime-v2.d.ts.map +1 -1
package/dist/teams/team-runtime-v2.js +2 -0
package/dist/teams/team-runtime-v2.js.map +1 -1
package/package.json +6 -6
package/src/agent/__tests__/agent-manager-v2.permission-interception.test.ts +296 -0
package/src/agent/__tests__/agent-manager-v2.permissions.test.ts +233 -0
package/src/agent/agent-manager-v2.ts +268 -8
package/src/agent/types.ts +51 -0
package/src/boot-v2.ts +190 -12
package/src/cli/inbox-mcp-proxy.ts +56 -0
package/src/dispatch/CLAUDE.md +129 -0
package/src/dispatch/__tests__/loadout-translation.test.ts +141 -0
package/src/dispatch/__tests__/mail-inbound-consumer.integration.test.ts +519 -0
package/src/dispatch/__tests__/mail-inbound-consumer.test.ts +589 -0
package/src/dispatch/__tests__/mail-inbound-reuse-consumer.test.ts +575 -0
package/src/dispatch/__tests__/permission-evaluator.test.ts +196 -0
package/src/dispatch/__tests__/permission-overlay.test.ts +56 -0
package/src/dispatch/__tests__/permissions-handler.test.ts +168 -0
package/src/dispatch/__tests__/spawn-agent-handler.test.ts +282 -0
package/src/dispatch/loadout-translation.ts +138 -0
package/src/dispatch/mail-inbound-consumer.ts +397 -0
package/src/dispatch/mail-inbound-reuse-consumer.ts +479 -0
package/src/dispatch/permission-evaluator.ts +191 -0
package/src/dispatch/permission-overlay.ts +89 -0
package/src/dispatch/permissions-handler.ts +112 -0
package/src/dispatch/spawn-agent-handler.ts +160 -0
package/src/lifecycle/handlers-v2.ts +34 -0
package/src/map/__tests__/lifecycle-bridge.test.ts +64 -0
package/src/map/__tests__/mail-bridge.test.ts +196 -0
package/src/map/lifecycle-bridge.ts +48 -2
package/src/map/mail-bridge.ts +203 -0
package/src/map/sidecar.ts +346 -1
package/src/map/types.ts +21 -0
package/src/mcp/tools/done-v2.ts +1 -0
package/src/teams/seed-defaults.ts +6 -2
package/src/teams/team-loader.ts +21 -2
package/src/teams/team-runtime-v2.ts +2 -0
package/src/workspace/__tests__/self-driving-yaml.test.ts +10 -2
package/templates/teams/self-driving/team.yaml +142 -0
package/tsconfig.json +2 -1

package/src/boot-v2.ts CHANGED Viewed

@@ -263,6 +263,39 @@ export interface BootV2Config {
       agentType?: string;
       customPrompt?: string;
       task?: string;
+      /**
+       * When true, spawn the bootstrap coordinator with
+       * `askForAllTools: true` and `permissionMode: 'interactive'` so the
+       * Claude SDK consults `canUseTool` for every tool call and
+       * acp-factory emits the resulting requests as `permission_request`
+       * session updates. This is the prerequisite for ACP+reuse dispatch
+       * to actually enforce per-dispatch loadout deny rules via the
+       * runtime overlay registry — the prompt iterator can only deny
+       * tools the SDK asks about.
+       *
+       * Trade-off: every tool call roundtrips through the host (~1-5ms
+       * latency penalty per call). Acceptable for autonomous dispatch
+       * targets; would be heavy for high-frequency interactive chat.
+       *
+       * Defaults to false (preserves the existing chat-friendly mode).
+       */
+      dispatchTarget?: boolean;
+    };
+    /**
+     * Optional parented worker spawn after the bootstrap coordinator
+     * comes up. Used by live tests (e.g., `live-mail-reuse-dispatch`)
+     * to provide a parented dispatch target that survives mail+reuse
+     * `done()` cleanly (the worker terminates as designed; the parent
+     * coord receives the `WORKER_DONE` signal — no orphan).
+     *
+     * Default `role`: `'reuse-target'`. Choose a role that does NOT
+     * collide with the sidecar's projected `'worker'` role in the
+     * hub-side roster — otherwise prefer-route may tie-break to the
+     * sidecar instead of this worker.
+     */
+    worker?: boolean | {
+      role?: string;
+      task?: string;
     };
     /**
      * Rehydration policy for agents that existed before this boot. Controls
@@ -377,11 +410,32 @@ export async function bootV2(
     !config.bootstrap?.coordinator
   ) {
     const envCwd = process.env.MACRO_BOOTSTRAP_CWD;
+    const dispatchTarget =
+      process.env.MACRO_BOOTSTRAP_COORDINATOR_DISPATCH_TARGET === "true";
+    const coordObj: Record<string, unknown> = {};
+    if (envCwd) coordObj.cwd = envCwd;
+    if (dispatchTarget) coordObj.dispatchTarget = true;
     config = {
       ...config,
       bootstrap: {
         ...(config.bootstrap ?? {}),
-        coordinator: envCwd ? { cwd: envCwd } : true,
+        coordinator:
+          Object.keys(coordObj).length > 0
+            ? (coordObj as never)
+            : true,
+      },
+    };
+  }
+  if (
+    process.env.MACRO_BOOTSTRAP_WORKER === "true" &&
+    !config.bootstrap?.worker
+  ) {
+    const envWorkerRole = process.env.MACRO_BOOTSTRAP_WORKER_ROLE;
+    config = {
+      ...config,
+      bootstrap: {
+        ...(config.bootstrap ?? {}),
+        worker: envWorkerRole ? { role: envWorkerRole } : true,
       },
     };
   }
@@ -496,14 +550,73 @@ export async function bootV2(
   // 7a. Task Dispatch (opt-in autonomous task dispatch mode)
   let taskDispatcher: import("swarm-dispatch").TaskDispatcher | null = null;
+  // Hoisted so the MAP sidecar (step 13) can forward it to the mail bridge.
+  let dispatcherAgentId: string | undefined;
+  // Mail-inbound consumer — always wired (does not require dispatch.enabled).
+  let mailInboundConsumer: import("./dispatch/mail-inbound-consumer.js").MailInboundConsumer | null = null;
+  // Mail-inbound REUSE consumer — handles `x-dispatch/work` envelopes
+  // addressed to non-sidecar agents (long-lived workers/coordinators) and
+  // drives them through the dispatch turn using their existing session.
+  // Always wired so reuse routing works even without the outbound
+  // orchestrator. Filters non-overlapping with mailInboundConsumer.
+  let mailInboundReuseConsumer:
+    | import("./dispatch/mail-inbound-reuse-consumer.js").MailInboundReuseConsumer
+    | null = null;
+  {
+    // Stable dispatcher ID used as the inbox recipient for bridged envelopes.
+    // Matches the id the mail-bridge registers and delivers to. The outbound
+    // orchestrator (below, opt-in) reuses the same id so both code paths share
+    // one inbox recipient — no double-processing because the consumer only
+    // fires spawn() while the orchestrator fires spawn() only when polling
+    // opentasks (different trigger paths).
+    const { getStableInstanceId } = await import("./cli/stable-instance-id.js");
+    const inboundClaimantId = `${os.hostname()}:${process.pid}:${getStableInstanceId(cwd)}`;
+    const inboundDispatcherId = `dispatcher:${inboundClaimantId}`;
+    dispatcherAgentId = inboundDispatcherId;
+    // Register the inbox recipient so mail-bridge's registerAgent call is a
+    // no-op (it uses an upsert) and the inbox accepts deliveries immediately.
+    await inboxAdapter.registerAgent(inboundDispatcherId, {
+      role: "dispatcher",
+      scope: "default",
+    });
+    const rawInbox = inboxAdapter.getInbox();
+    const { createMailInboundConsumer } = await import(
+      "./dispatch/mail-inbound-consumer.js"
+    );
+    mailInboundConsumer = createMailInboundConsumer({
+      dispatcherAgentId: inboundDispatcherId,
+      inboxEvents: rawInbox.events as any,
+      agentManager,
+      agentStore,
+      getSidecar: () => (systemRef as any).mapSidecar ?? null,
+      log: (msg) => console.log(msg),
+    });
+    // Reuse consumer for envelopes addressed to long-lived workers/
+    // coordinators. Non-overlapping filter (event.agentId !== sidecarId).
+    const { createMailInboundReuseConsumer } = await import(
+      "./dispatch/mail-inbound-reuse-consumer.js"
+    );
+    mailInboundReuseConsumer = createMailInboundReuseConsumer({
+      dispatcherAgentId: inboundDispatcherId,
+      inboxEvents: rawInbox.events as any,
+      agentManager,
+      agentStore,
+      getSidecar: () => (systemRef as any).mapSidecar ?? null,
+      log: (msg) => console.log(msg),
+    });
+  }
   if (config.dispatch?.enabled && tasksAdapter) {
     const { createOrchestrator, createOpenTasksSource, createAgentInboxPort } =
       await import("swarm-dispatch");
-    const { getStableInstanceId } = await import("./cli/stable-instance-id.js");
-    const claimantId = `${os.hostname()}:${process.pid}:${getStableInstanceId(cwd)}`;
-    const dispatchAgentId = `dispatcher:${claimantId}`;
+    // dispatcherAgentId is already set by the unconditional mail-inbound block above.
+    // Use it directly so both paths share the same inbox recipient.
+    const dispatchAgentId = dispatcherAgentId!;
     // Adapt opentasks client → DispatchTaskSource
     const opentasksClient = (tasksAdapter as any).client;
@@ -569,6 +682,7 @@ export async function bootV2(
               type?: string;
               schema?: string;
               data?: any;
+              _conversationId?: string;
             };
             if (content?.schema !== "x-dispatch/work") return null;
             const data = content.data;
@@ -586,18 +700,19 @@ export async function bootV2(
                 metadata: {
                   ...data.metadata,
                   role: data.role,
+                  // Thread conversation_id through so the reply bridge can post
+                  // the worker's output back to the hub's mail conversation.
+                  ...(content._conversationId
+                    ? { _mailConversationId: content._conversationId }
+                    : {}),
                 },
               },
             };
           },
         },
       );
-      // Register the dispatcher as an agent in the inbox so it can receive messages
-      await inboxAdapter.registerAgent(dispatchAgentId, {
-        role: "dispatcher",
-        scope: "default",
-      });
+      // Note: registerAgent for dispatchAgentId was already called in the
+      // unconditional mail-inbound block above — no need to repeat here.
     }
     // Phase 2: Wire AgentRoster via inbox agent listing for route-first dispatch
@@ -631,7 +746,7 @@ export async function bootV2(
       (hasRouting ? ("prefer-route" as const) : ("spawn-only" as const));
     taskDispatcher = createOrchestrator(source, runtime, {
-      claimantId,
+      claimantId: dispatchAgentId,
       pollIntervalMs: config.dispatch.pollIntervalMs ?? 15_000,
       defaultRole: config.dispatch.defaultRole ?? "worker",
       concurrency: { global: config.dispatch.maxConcurrent ?? 3 },
@@ -841,6 +956,7 @@ export async function bootV2(
             ? (id: string) => mapServerInstance!.getLocalMapId(id)
             : undefined,
           gitCascadeAdapter,
+          dispatcherAgentId,
         },
         {
           server: config.map.server,
@@ -1009,18 +1125,78 @@ export async function bootV2(
         return;
       }
       // No priors matched the policy → fresh spawn (first boot, or 'none').
+      // dispatchTarget mode: bake askForAllTools + permissionMode='interactive'
+      // into the spawn so the SDK funnels every tool call through canUseTool
+      // and acp-factory emits permission_request session updates the prompt
+      // iterator's overlay-enforcement path can consume.
+      const isDispatchTarget = opts.dispatchTarget === true;
       const spawned = await agentManager.spawn({
         role: "coordinator",
         parent: null,
         cwd: bootstrapCwd,
         task: opts.task ?? "Default coordinator (auto-spawn on boot)",
-        permissionMode: opts.permissionMode,
+        permissionMode: isDispatchTarget
+          ? "interactive"
+          : opts.permissionMode,
         agentType: opts.agentType,
         customPrompt: opts.customPrompt,
+        ...(isDispatchTarget ? { askForAllTools: true } : {}),
       });
       console.log(
         `[boot-v2] Bootstrap coordinator spawned: ${(spawned as any).name ?? spawned.id} at ${bootstrapCwd}`,
       );
+      // Optional: bootstrap an additional worker for live tests
+      // exercising mail+reuse semantics. Spawned with parent=null
+      // because:
+      //   - The role-capability check only fires for parented spawns
+      //     (agent-manager-v2 line 559-572); bypassing it lets us use
+      //     a custom role (e.g., 'reuse-target') that doesn't collide
+      //     with the sidecar's projected 'worker' in the hub-side
+      //     dispatch roster.
+      //   - The worker's done() lifecycle is the same as the bootstrap
+      //     coord's: terminate cleanly. Phase 2C's `_lastSummary`
+      //     fallback (handlers-v2 + mail-inbound-reuse-consumer)
+      //     ensures the dispatch reply path recovers the summary from
+      //     metadata even if the prompt iterator's update stream races
+      //     the ACP connection close on terminate.
+      if (config.bootstrap?.worker) {
+        const workerOpts = config.bootstrap.worker === true
+          ? {}
+          : config.bootstrap.worker;
+        const workerRole = workerOpts.role ?? "reuse-target";
+        try {
+          const workerSpawned = await agentManager.spawn({
+            role: workerRole,
+            parent: null,
+            cwd: bootstrapCwd,
+            task: workerOpts.task ?? "Await dispatch",
+            // Funnel every tool call through the host so the prompt-iterator
+            // handler can apply per-dispatch overlay deny rules at runtime
+            // (Phase 3). Two layers must both be set:
+            //   - askForAllTools=true → settings.permissions.ask=['*'] so
+            //     the Claude SDK actually consults canUseTool for every
+            //     tool (without this, default mode auto-approves "safe"
+            //     tools like Read).
+            //   - permissionMode='interactive' → acp-factory emits the
+            //     resulting requestPermission as a `permission_request`
+            //     session update instead of auto-approving it (which is
+            //     macro-agent's default 'auto-approve' behavior).
+            // Bootstrap dispatch targets are autonomous + latency-tolerant
+            // so the per-call host roundtrip is acceptable.
+            askForAllTools: true,
+            permissionMode: "interactive",
+          });
+          console.log(
+            `[boot-v2] Bootstrap dispatch-target spawned: ${(workerSpawned as any).name ?? workerSpawned.id} ` +
+              `(role=${workerRole}, parent=null)`,
+          );
+        } catch (err) {
+          console.warn(
+            `[boot-v2] Bootstrap worker spawn failed: ${(err as Error).message}`,
+          );
+        }
+      }
     };
     rehydrateOrSpawn().catch((err: Error) => {
@@ -1050,6 +1226,8 @@ export async function bootV2(
     async shutdown(): Promise<void> {
       clearInterval(healthCheckTimer);
+      if (mailInboundConsumer) mailInboundConsumer.stop();
+      if (mailInboundReuseConsumer) mailInboundReuseConsumer.stop();
       if (taskDispatcher) await taskDispatcher.stop();
       if (mapSidecar) await mapSidecar.stop();
       if (mapServerInstance) await mapServerInstance.stop();

package/src/cli/inbox-mcp-proxy.ts ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * inbox-mcp-proxy.ts — stdio entry that exposes agent-inbox tools as an
+ * MCP server to a spawned worker.
+ *
+ * Why this exists:
+ *
+ *   The agent-inbox package ships an `InboxMcpProxy` (in dist/mcp/mcp-proxy.js)
+ *   designed to bridge agent-inbox IPC ↔ MCP-stdio. macro-agent's
+ *   `agentManager.spawn` configures this as a per-spawn MCP server so the
+ *   spawned worker has access to `send_message`, `check_inbox`,
+ *   `read_thread`, `list_agents` — the tools the macro-agent architecture
+ *   docs claim are available, but which were not actually being mounted on
+ *   workers prior to this fix.
+ *
+ *   Particularly important for mail-inbound dispatch workers, which run with
+ *   `isolatedSettings: true` and therefore can't pick up host-level plugin
+ *   MCPs that would otherwise have provided agent-inbox.
+ *
+ * Env vars consumed:
+ *
+ *   INBOX_SOCKET_PATH  (required)  — path to agent-inbox's IPC socket. Set
+ *                                    by `buildMcpServerConfig` in
+ *                                    agent-manager-v2.ts.
+ *   MACRO_AGENT_ID     (optional)  — the spawned worker's agent id; used as
+ *                                    the proxy's `defaultAgentId` so tools
+ *                                    like check_inbox auto-target the
+ *                                    caller's mailbox.
+ *
+ * Failure mode:
+ *
+ *   If INBOX_SOCKET_PATH is unset, the script exits with a non-zero status
+ *   so Claude Code's MCP-init reports the misconfiguration loudly rather
+ *   than silently leaving the worker without inbox tools.
+ */
+import { InboxMcpProxy } from "agent-inbox";
+async function main(): Promise<void> {
+  const socketPath = process.env.INBOX_SOCKET_PATH;
+  if (!socketPath) {
+    console.error(
+      "[inbox-mcp-proxy] INBOX_SOCKET_PATH is unset — cannot start. " +
+        "macro-agent's agentManager.spawn should always inject this env var.",
+    );
+    process.exit(1);
+  }
+  const agentId = process.env.MACRO_AGENT_ID || "anonymous";
+  const proxy = new InboxMcpProxy(socketPath, agentId);
+  await proxy.start();
+}
+main().catch((err) => {
+  console.error(`[inbox-mcp-proxy] Fatal: ${(err as Error).message}`);
+  process.exit(1);
+});

package/src/dispatch/CLAUDE.md ADDED Viewed

@@ -0,0 +1,129 @@
+# macro-agent/src/dispatch — hub-driven work intake
+This directory contains the consumer-side handlers that receive
+hub-orchestrated `x-dispatch/work` envelopes from a connected OpenHive
+hub (or any hub speaking the same wire shape). Two consumers, one
+mail-bridge:
+- **`mail-inbound-consumer.ts`** — addressed to the sidecar (`recipient =
+  dispatcher:<claimantId>`). Spawns a **fresh, parentless** worker per
+  envelope. Used when the hub picks the sidecar as the mail target —
+  typically because no specialized worker is registered yet, or because
+  the hub's `mail_lifecycle: 'fresh'` hint forced sidecar routing.
+- **`mail-inbound-reuse-consumer.ts`** — addressed to a **non-sidecar**
+  agent that already exists on the swarm. Forwards the envelope into
+  the long-lived agent's prompt iterator instead of spawning. Used for
+  `mail_lifecycle: 'reuse'` dispatches against multi-role swarms.
+- **`mail-bridge.ts`** — receives `mail/turn.received` MAP notifications
+  from the hub and forwards each turn into the sidecar's local
+  agent-inbox so the consumers above can dispatch on `inbox.message`.
+## Wire envelope shape (hub contract)
+Every envelope follows:
+```json
+{
+  "type": "x-dispatch/work",
+  "body": {
+    "taskId": "disp_xxx",
+    "prompt": "<rendered prompt>",
+    "role": "worker | executor | reviewer | ...",
+    "loadout": { "permissions": {...}, "mcpProviders": [...], ... },
+    "metadata": { "permissions": ..., "mcpProviders": ... },  // legacy
+    "_conversationId": "conv_xxx"  // mail-reuse path only
+  }
+}
+```
+- `body.role` may be **any** string the hub side surfaces (e.g., from a
+  team template's `team_role_ref.role`). It is NOT guaranteed to match
+  any built-in or locally-configured macro-agent role. The consumer
+  must defend against unknown role strings — see "Role validation" below.
+- `body.loadout` is the canonical structured slot. `body.metadata` carries
+  the legacy permissions/MCP fields for one deprecation cycle so older
+  consumers continue to work.
+## Role validation (regression hardening)
+When the hub surfaces a team-defined role like `'executor'`, the wire
+envelope arrives at `mail-inbound-consumer.ts` with `body.role:
+'executor'`. macro-agent's role registry has no `'executor'` entry, so
+`roleRegistry.resolveRole('executor')` falls back to `GenericRole` —
+which has:
+- `lifecycle.type: 'persistent'` (no auto-cleanup, no task-bound timeout)
+- `WILDCARD_CAPABILITY` (so it has the `done` capability technically),
+  but no system-prompt instruction telling the agent to call `done()`
+When such an agent finishes a single prompt cycle (`promptUntilDone(...,
+{maxFollowUps: 0})`), it stops without invoking `done({summary: ...})`.
+The lifecycle handler's `_lastSummary` write is gated on `args.summary`,
+so nothing gets persisted. The consumer reads back `_lastSummary` to
+post the reply turn — empty → "no reply turn posted" → the hub never
+sees an answer and the dispatch silently dies.
+The fix: validate the requested role against the registry **before**
+passing it to `agentManager.spawn`. Unknown roles fall back to
+`'worker'` (ephemeral lifecycle + `LIFECYCLE_CAPABILITIES.DONE` +
+explicit "you MUST call done()" system prompt):
+```typescript
+const requestedRole = data.role;
+const roleRegistry = agentManager.getRoleRegistry?.();
+const knownRole =
+  requestedRole && roleRegistry?.getRole(requestedRole) !== undefined;
+const role = knownRole ? requestedRole! : "worker";
+if (requestedRole && !knownRole) {
+  log(`[mail-inbound] Unknown role '${requestedRole}' for taskId=${taskId} — falling back to 'worker'`);
+}
+```
+`roleRegistry.getRole(name)` is exact-match across custom / project /
+user / built-in maps and returns `undefined` for unknown names —
+distinct from `resolveRole(name)` which always returns *something* via
+`GenericRole`. We use the former here intentionally so the fallback
+path is observable (we log a warning) rather than silent.
+**Apply the same pattern to any future consumer that spawns workers
+from a wire envelope.** The receiving side owns the role taxonomy;
+hubs cannot be expected to use names that match the receiver's
+registry.
+## Reply path (worker → hub)
+After the consumer spawns a worker:
+1. `agentManager.promptUntilDone(spawnedId, prompt, {maxFollowUps: 0})`
+   drives the worker through one prompt cycle.
+2. Worker calls `done({status, summary})` — the lifecycle handler
+   writes `_lastSummary` to agent metadata (`handlers-v2.ts:233`,
+   gated on `inDispatch || !parentId`).
+3. AgentManager fires the lifecycle `stopped` event.
+4. The consumer's `stopped` listener reads `_lastSummary` from
+   `agentStore.getAgent(agentId)` and posts a reply turn into the
+   dispatch conversation via `mail/turn`.
+5. The hub's `mail.turn.added` event fires; the orchestrator's
+   reply-demuxer routes it back to swarm-dispatch as the dispatch
+   completion.
+If `_lastSummary` is empty when the worker stops, the consumer logs
+"Worker stopped but _lastSummary is empty — no reply turn posted" and
+bails. This is the canary for role-validation failures, lifecycle
+misconfigurations, or agents that exit without calling `done()`.
+## Idempotency + dedup
+Both consumers track `seenTaskIds` with a 1-hour TTL to drop duplicate
+deliveries (the hub's mail-push bridge can re-fire `mail.turn.added`
+on reconnect; the local inbox can re-emit `inbox.message` for the same
+logical turn). Within the TTL window, a re-delivered taskId is
+silently ignored. After the TTL expires, a stale retry could
+legitimately re-spawn — preferable to permanent memory growth.
+## Tests
+- `__tests__/mail-inbound-consumer.test.ts` — unit-level spawn/reply flow with mocked AgentManager
+- `__tests__/mail-inbound-consumer.integration.test.ts` — full lifecycle through real `AgentManagerV2`
+- `__tests__/mail-inbound-reuse-consumer.test.ts` — reuse-path classification + filtering
+- Live e2e (in OpenHive): `src/__tests__/swarm/live-{loadout,mail-reuse}-dispatch*.test.ts` — gated by `LIVE_AGENT_E2E=true`

package/src/dispatch/__tests__/loadout-translation.test.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * Unit tests for `loadoutToSpawnOptions` — the pure-function translator
+ * from wire-shape `MaterializedLoadout` to macro-agent `SpawnAgentOptions`.
+ *
+ * These tests pin the shared contract used by both the mail-inbound
+ * consumer and the new `dispatch/spawn-agent` MAP handler. Both paths
+ * should produce identical spawn options for the same loadout input.
+ */
+import { describe, it, expect } from "vitest";
+import {
+  loadoutToSpawnOptions,
+  type WireLoadout,
+} from "../loadout-translation.js";
+describe("loadoutToSpawnOptions", () => {
+  it("returns {} for undefined loadout", () => {
+    expect(loadoutToSpawnOptions(undefined)).toEqual({});
+  });
+  it("returns {} when ctx is provided but loadout is undefined", () => {
+    expect(loadoutToSpawnOptions(undefined, { fullAutonomous: true })).toEqual({});
+  });
+  it("returns {} for loadout with no permissions/capabilities (empty fields)", () => {
+    const loadout: WireLoadout = {};
+    expect(loadoutToSpawnOptions(loadout)).toEqual({});
+  });
+  it("returns {} when permissions is present but all rule arrays are empty", () => {
+    const loadout: WireLoadout = {
+      permissions: { allow: [], deny: [], ask: [] },
+    };
+    // hasAnyRule returns false → no permissions or fullAutonomous propagated.
+    expect(loadoutToSpawnOptions(loadout, { fullAutonomous: true })).toEqual({});
+  });
+  it("propagates deny rules + sets fullAutonomous from ctx (true)", () => {
+    const loadout: WireLoadout = {
+      permissions: { deny: ["Bash(rm -rf:*)"] },
+    };
+    const result = loadoutToSpawnOptions(loadout, { fullAutonomous: true });
+    expect(result.permissions).toEqual({
+      allow: [],
+      deny: ["Bash(rm -rf:*)"],
+      ask: [],
+    });
+    expect(result.fullAutonomous).toBe(true);
+  });
+  it("propagates deny rules + sets fullAutonomous from ctx (false)", () => {
+    const loadout: WireLoadout = {
+      permissions: { deny: ["Bash(rm -rf:*)"] },
+    };
+    const result = loadoutToSpawnOptions(loadout, { fullAutonomous: false });
+    expect(result.permissions).toEqual({
+      allow: [],
+      deny: ["Bash(rm -rf:*)"],
+      ask: [],
+    });
+    expect(result.fullAutonomous).toBe(false);
+  });
+  it("defaults fullAutonomous to false when ctx omits it but permissions are present", () => {
+    const loadout: WireLoadout = {
+      permissions: { ask: ["Write(*.env)"] },
+    };
+    const result = loadoutToSpawnOptions(loadout);
+    expect(result.permissions).toEqual({
+      allow: [],
+      deny: [],
+      ask: ["Write(*.env)"],
+    });
+    expect(result.fullAutonomous).toBe(false);
+  });
+  it("does NOT propagate fullAutonomous when no permissions are present", () => {
+    // ctx.fullAutonomous is meaningless without permissions to apply it to.
+    const loadout: WireLoadout = { capabilities: ["editor"] };
+    const result = loadoutToSpawnOptions(loadout, { fullAutonomous: true });
+    expect(result.fullAutonomous).toBeUndefined();
+    expect(result.permissions).toBeUndefined();
+  });
+  it("forwards capabilities (cloned, not aliased)", () => {
+    const sourceCaps = ["editor", "git"];
+    const loadout: WireLoadout = { capabilities: sourceCaps };
+    const result = loadoutToSpawnOptions(loadout);
+    expect(result.capabilities).toEqual(["editor", "git"]);
+    // Ensure we cloned — mutating the result's array must not affect the source.
+    result.capabilities!.push("newcap");
+    expect(sourceCaps).toEqual(["editor", "git"]);
+  });
+  it("does not include capabilities when the array is empty", () => {
+    const loadout: WireLoadout = { capabilities: [] };
+    const result = loadoutToSpawnOptions(loadout);
+    expect(result.capabilities).toBeUndefined();
+  });
+  it("ignores mcpProviders (Phase 2 — reserved/no-op)", () => {
+    const loadout: WireLoadout = {
+      mcpProviders: [
+        { name: "github", command: "github-mcp", args: ["--read-only"] },
+      ],
+    };
+    const result = loadoutToSpawnOptions(loadout);
+    // The provider should NOT appear on the returned spawn options.
+    expect(result).toEqual({});
+    expect((result as Record<string, unknown>).mcpProviders).toBeUndefined();
+  });
+  it("ignores mcpScope (Phase 1 — reserved/no-op)", () => {
+    const loadout: WireLoadout = {
+      mcpScope: [{ server: "github", tools: ["search"] }],
+    };
+    const result = loadoutToSpawnOptions(loadout);
+    expect(result).toEqual({});
+    expect((result as Record<string, unknown>).mcpScope).toBeUndefined();
+  });
+  it("combines permissions + capabilities + ignores mcp fields together", () => {
+    const loadout: WireLoadout = {
+      permissions: { allow: ["Read(**)"], deny: ["Bash(rm:*)"] },
+      capabilities: ["editor"],
+      mcpProviders: [{ name: "github" }],
+      mcpScope: [{ server: "github" }],
+    };
+    const result = loadoutToSpawnOptions(loadout, { fullAutonomous: true });
+    expect(result.permissions).toEqual({
+      allow: ["Read(**)"],
+      deny: ["Bash(rm:*)"],
+      ask: [],
+    });
+    expect(result.fullAutonomous).toBe(true);
+    expect(result.capabilities).toEqual(["editor"]);
+    expect((result as Record<string, unknown>).mcpProviders).toBeUndefined();
+    expect((result as Record<string, unknown>).mcpScope).toBeUndefined();
+  });
+});