npm - macro-agent - Versions diffs - 0.1.12 → 0.2.0 - Mend

macro-agent 0.1.12 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/dist/agent/agent-manager-v2.d.ts.map +1 -1
package/dist/agent/agent-manager-v2.js +240 -7
package/dist/agent/agent-manager-v2.js.map +1 -1
package/dist/agent/types.d.ts +47 -0
package/dist/agent/types.d.ts.map +1 -1
package/dist/agent/types.js.map +1 -1
package/dist/boot-v2.d.ts +33 -0
package/dist/boot-v2.d.ts.map +1 -1
package/dist/boot-v2.js +142 -11
package/dist/boot-v2.js.map +1 -1
package/dist/cli/inbox-mcp-proxy.d.ts +36 -0
package/dist/cli/inbox-mcp-proxy.d.ts.map +1 -0
package/dist/cli/inbox-mcp-proxy.js +51 -0
package/dist/cli/inbox-mcp-proxy.js.map +1 -0
package/dist/dispatch/loadout-translation.d.ts +100 -0
package/dist/dispatch/loadout-translation.d.ts.map +1 -0
package/dist/dispatch/loadout-translation.js +90 -0
package/dist/dispatch/loadout-translation.js.map +1 -0
package/dist/dispatch/mail-inbound-consumer.d.ts +89 -0
package/dist/dispatch/mail-inbound-consumer.d.ts.map +1 -0
package/dist/dispatch/mail-inbound-consumer.js +261 -0
package/dist/dispatch/mail-inbound-consumer.js.map +1 -0
package/dist/dispatch/mail-inbound-reuse-consumer.d.ts +75 -0
package/dist/dispatch/mail-inbound-reuse-consumer.d.ts.map +1 -0
package/dist/dispatch/mail-inbound-reuse-consumer.js +325 -0
package/dist/dispatch/mail-inbound-reuse-consumer.js.map +1 -0
package/dist/dispatch/permission-evaluator.d.ts +68 -0
package/dist/dispatch/permission-evaluator.d.ts.map +1 -0
package/dist/dispatch/permission-evaluator.js +159 -0
package/dist/dispatch/permission-evaluator.js.map +1 -0
package/dist/dispatch/permission-overlay.d.ts +64 -0
package/dist/dispatch/permission-overlay.d.ts.map +1 -0
package/dist/dispatch/permission-overlay.js +72 -0
package/dist/dispatch/permission-overlay.js.map +1 -0
package/dist/dispatch/permissions-handler.d.ts +71 -0
package/dist/dispatch/permissions-handler.d.ts.map +1 -0
package/dist/dispatch/permissions-handler.js +83 -0
package/dist/dispatch/permissions-handler.js.map +1 -0
package/dist/dispatch/spawn-agent-handler.d.ts +84 -0
package/dist/dispatch/spawn-agent-handler.d.ts.map +1 -0
package/dist/dispatch/spawn-agent-handler.js +85 -0
package/dist/dispatch/spawn-agent-handler.js.map +1 -0
package/dist/lifecycle/handlers-v2.d.ts +7 -0
package/dist/lifecycle/handlers-v2.d.ts.map +1 -1
package/dist/lifecycle/handlers-v2.js +27 -0
package/dist/lifecycle/handlers-v2.js.map +1 -1
package/dist/map/lifecycle-bridge.d.ts +18 -0
package/dist/map/lifecycle-bridge.d.ts.map +1 -1
package/dist/map/lifecycle-bridge.js +23 -1
package/dist/map/lifecycle-bridge.js.map +1 -1
package/dist/map/mail-bridge.d.ts +55 -0
package/dist/map/mail-bridge.d.ts.map +1 -0
package/dist/map/mail-bridge.js +115 -0
package/dist/map/mail-bridge.js.map +1 -0
package/dist/map/sidecar.d.ts.map +1 -1
package/dist/map/sidecar.js +245 -1
package/dist/map/sidecar.js.map +1 -1
package/dist/map/types.d.ts +15 -0
package/dist/map/types.d.ts.map +1 -1
package/dist/mcp/tools/done-v2.d.ts.map +1 -1
package/dist/mcp/tools/done-v2.js +1 -0
package/dist/mcp/tools/done-v2.js.map +1 -1
package/dist/teams/team-loader.d.ts.map +1 -1
package/dist/teams/team-loader.js.map +1 -1
package/dist/teams/team-runtime-v2.d.ts.map +1 -1
package/dist/teams/team-runtime-v2.js +2 -0
package/dist/teams/team-runtime-v2.js.map +1 -1
package/package.json +6 -5
package/src/agent/__tests__/agent-manager-v2.permission-interception.test.ts +296 -0
package/src/agent/__tests__/agent-manager-v2.permissions.test.ts +233 -0
package/src/agent/agent-manager-v2.ts +268 -8
package/src/agent/types.ts +51 -0
package/src/boot-v2.ts +190 -12
package/src/cli/inbox-mcp-proxy.ts +56 -0
package/src/dispatch/CLAUDE.md +129 -0
package/src/dispatch/__tests__/loadout-translation.test.ts +141 -0
package/src/dispatch/__tests__/mail-inbound-consumer.integration.test.ts +519 -0
package/src/dispatch/__tests__/mail-inbound-consumer.test.ts +589 -0
package/src/dispatch/__tests__/mail-inbound-reuse-consumer.test.ts +575 -0
package/src/dispatch/__tests__/permission-evaluator.test.ts +196 -0
package/src/dispatch/__tests__/permission-overlay.test.ts +56 -0
package/src/dispatch/__tests__/permissions-handler.test.ts +168 -0
package/src/dispatch/__tests__/spawn-agent-handler.test.ts +282 -0
package/src/dispatch/loadout-translation.ts +138 -0
package/src/dispatch/mail-inbound-consumer.ts +397 -0
package/src/dispatch/mail-inbound-reuse-consumer.ts +479 -0
package/src/dispatch/permission-evaluator.ts +191 -0
package/src/dispatch/permission-overlay.ts +89 -0
package/src/dispatch/permissions-handler.ts +112 -0
package/src/dispatch/spawn-agent-handler.ts +160 -0
package/src/lifecycle/handlers-v2.ts +34 -0
package/src/map/__tests__/lifecycle-bridge.test.ts +64 -0
package/src/map/__tests__/mail-bridge.test.ts +196 -0
package/src/map/lifecycle-bridge.ts +48 -2
package/src/map/mail-bridge.ts +203 -0
package/src/map/sidecar.ts +346 -1
package/src/map/types.ts +21 -0
package/src/mcp/tools/done-v2.ts +1 -0
package/src/teams/team-loader.ts +3 -1
package/src/teams/team-runtime-v2.ts +2 -0
package/dist/workspace/dataplane-adapter.d.ts +0 -260
package/dist/workspace/dataplane-adapter.d.ts.map +0 -1
package/dist/workspace/dataplane-adapter.js +0 -416
package/dist/workspace/dataplane-adapter.js.map +0 -1

package/src/dispatch/mail-inbound-reuse-consumer.ts ADDED Viewed

@@ -0,0 +1,479 @@
+/**
+ * Mail-Inbound Reuse Consumer
+ *
+ * Receives hub-driven `x-dispatch/work` envelopes addressed to **non-sidecar**
+ * agents — long-lived team workers, coordinators, etc. — and drives them
+ * through the dispatch turn using their existing session, then posts the
+ * summary back as a mail turn.
+ *
+ * Mirrors `mail-inbound-consumer.ts` but with three semantic differences:
+ *
+ *   1. Filters envelopes addressed to ANY non-sidecar agent (the existing
+ *      consumer filters for the dispatcher recipient).
+ *   2. Does **not** spawn — it drives the existing agent's session via
+ *      `agentManager.prompt(agentId, prompt)` and watches for `done()` in
+ *      the update stream.
+ *   3. Tracks `inflightDispatches` per agentId. A second envelope arriving
+ *      while the same agent is already processing a dispatch is rejected
+ *      with `recipient_busy` so the orchestrator can retry against another
+ *      agent (or fall back to fresh-spawn). Reject is **dispatch-scoped**
+ *      — non-dispatch work on the agent (peer messages, user chat) does
+ *      NOT trigger the busy reject; that work stacks naturally.
+ *
+ * Reply path: captures `args.summary` from the done() tool call's rawInput
+ * directly off the update stream, so it works for both parented and
+ * parentless target agents (the parented branch in `handlers-v2` does NOT
+ * stash `_lastSummary` — only parentless agents do — but we don't need
+ * that path because we observe done() in-stream).
+ *
+ * @module dispatch/mail-inbound-reuse-consumer
+ */
+import type { AgentManager } from "../agent/agent-manager.js";
+import type { AgentStore } from "../agent/agent-store.js";
+import type { ExtendedSessionUpdate } from "acp-factory";
+import type {
+  InboxEvents,
+  InboxMessageEvent,
+  MailInboundSidecar,
+} from "./mail-inbound-consumer.js";
+import {
+  collapsePermissionsForAutonomous,
+  type WireLoadout,
+} from "./loadout-translation.js";
+import {
+  setPermissionOverlay,
+  clearPermissionOverlay,
+} from "./permission-overlay.js";
+export interface MailInboundReuseConsumerOptions {
+  /**
+   * The sidecar agent ID. Envelopes addressed to this id are handled by
+   * the original `mail-inbound-consumer` (fresh-spawn path); the reuse
+   * consumer ignores them so the two consumers' filters don't overlap.
+   */
+  dispatcherAgentId: string;
+  /** Raw inbox event emitter (from inboxAdapter.getInbox().events). */
+  inboxEvents: InboxEvents;
+  /** Agent lifecycle manager — used to drive the existing session. */
+  agentManager: AgentManager;
+  /** Agent store — used to confirm the target agent is running. */
+  agentStore: AgentStore;
+  /**
+   * Optional sidecar reference. Populated after step 13 in boot-v2 via
+   * the shared systemRef. The consumer accesses it lazily at reply time.
+   */
+  getSidecar: () => MailInboundSidecar | null | undefined;
+  /** Optional logger (default: console.log). */
+  log?: (msg: string) => void;
+}
+export interface MailInboundReuseConsumerStats {
+  /** Count of envelopes dropped because they lacked a taskId. */
+  droppedMalformed: number;
+  /** Number of distinct taskIds currently tracked for dedup. */
+  seenTaskIds: number;
+  /** Number of rejects emitted because the target agent was already busy with a dispatch. */
+  busyRejects: number;
+  /** Currently in-flight dispatches keyed by agentId. */
+  inflightCount: number;
+}
+export interface MailInboundReuseConsumer {
+  stop(): void;
+  stats(): MailInboundReuseConsumerStats;
+}
+interface InflightDispatch {
+  dispatchId: string;
+  conversationId: string | null;
+  startedAt: number;
+}
+const SEEN_TASK_TTL_MS = 60 * 60 * 1000;
+/**
+ * Wire the mail-inbound reuse consumer.
+ *
+ * Returns a `stop()` handle that detaches the inbox listener.
+ */
+export function createMailInboundReuseConsumer(
+  opts: MailInboundReuseConsumerOptions,
+): MailInboundReuseConsumer {
+  const {
+    dispatcherAgentId,
+    inboxEvents,
+    agentManager,
+    agentStore,
+    getSidecar,
+    log = (msg: string) => console.log(msg),
+  } = opts;
+  // agentId → inflight dispatch state. Used both to gate concurrent
+  // dispatches against the same agent and to look up the conversation
+  // when posting the reply.
+  const inflightDispatches = new Map<string, InflightDispatch>();
+  // taskId → expiresAt: idempotency guard mirroring mail-inbound-consumer.
+  const seenTaskIds = new Map<string, number>();
+  function pruneSeenTaskIds(): void {
+    const now = Date.now();
+    for (const [id, expiresAt] of seenTaskIds) {
+      if (expiresAt <= now) seenTaskIds.delete(id);
+    }
+  }
+  let droppedMalformedCount = 0;
+  let busyRejectCount = 0;
+  log(
+    `[mail-inbound-reuse] Consumer ready — listening for x-dispatch/work envelopes ` +
+      `addressed to non-sidecar agents (sidecar=${dispatcherAgentId})`,
+  );
+  const onMessage = (event: InboxMessageEvent): void => {
+    // Only handle envelopes addressed to NON-sidecar agents. Sidecar
+    // envelopes are owned by mail-inbound-consumer (fresh-spawn).
+    if (event.agentId === dispatcherAgentId) return;
+    const content = event.message?.content as
+      | {
+          schema?: string;
+          data?: {
+            taskId?: string;
+            prompt?: string;
+            content?: string;
+            role?: string;
+            tags?: string[];
+            loadout?: WireLoadout;
+            metadata?: Record<string, unknown>;
+          };
+          _conversationId?: string;
+        }
+      | undefined;
+    if (content?.schema !== "x-dispatch/work") return;
+    const data = content.data;
+    if (!data?.taskId) {
+      droppedMalformedCount++;
+      log(
+        `[mail-inbound-reuse] Dropping malformed envelope (no taskId, total=${droppedMalformedCount})`,
+      );
+      return;
+    }
+    const taskId = data.taskId;
+    pruneSeenTaskIds();
+    const seenExpiresAt = seenTaskIds.get(taskId);
+    if (seenExpiresAt !== undefined && seenExpiresAt > Date.now()) {
+      // Re-delivery within dedup window — silently drop.
+      return;
+    }
+    seenTaskIds.set(taskId, Date.now() + SEEN_TASK_TTL_MS);
+    const targetAgentId = event.agentId;
+    const conversationId = content._conversationId ?? null;
+    const prompt = data.prompt ?? data.content ?? "";
+    // Resolve target — must be a known, non-stopped agent.
+    const targetRecord = agentStore.getAgent(targetAgentId);
+    if (!targetRecord) {
+      log(
+        `[mail-inbound-reuse] Unknown target agent ${targetAgentId} for taskId=${taskId} — dropping`,
+      );
+      void postReplyTurn(conversationId, targetAgentId, {
+        status: "agent_unavailable",
+        reason: `Agent ${targetAgentId} not registered on this swarm`,
+      });
+      return;
+    }
+    if (targetRecord.state === "stopped" || targetRecord.state === "failed") {
+      log(
+        `[mail-inbound-reuse] Target agent ${targetAgentId} state=${targetRecord.state} — dropping taskId=${taskId}`,
+      );
+      void postReplyTurn(conversationId, targetAgentId, {
+        status: "agent_unavailable",
+        reason: `Agent ${targetAgentId} state=${targetRecord.state}`,
+      });
+      return;
+    }
+    // In-flight check — only reject when this same agent is already
+    // processing another tracked dispatch. Non-dispatch work (peer chat,
+    // user prompts) does not block; promptUntilDone-style serial stacking
+    // handles that.
+    const existing = inflightDispatches.get(targetAgentId);
+    if (existing) {
+      busyRejectCount++;
+      log(
+        `[mail-inbound-reuse] recipient_busy — agent=${targetAgentId} already processing ` +
+          `dispatch=${existing.dispatchId}; rejecting taskId=${taskId}`,
+      );
+      void postReplyTurn(conversationId, targetAgentId, {
+        status: "recipient_busy",
+        reason: `Agent ${targetAgentId} is processing dispatch ${existing.dispatchId}`,
+      });
+      return;
+    }
+    log(
+      `[mail-inbound-reuse] Driving dispatch taskId=${taskId} on existing agent=${targetAgentId} ` +
+        `conv=${conversationId ?? "(none)"}`,
+    );
+    inflightDispatches.set(targetAgentId, {
+      dispatchId: taskId,
+      conversationId,
+      startedAt: Date.now(),
+    });
+    // Drive the agent's existing session via raw `prompt()` rather than
+    // `promptUntilDone` because the latter auto-terminates the agent on
+    // done() — fatal for long-lived workers we want to reuse. We watch
+    // the update stream ourselves for the done() tool call and capture
+    // the summary inline.
+    void driveDispatch(
+      targetAgentId,
+      taskId,
+      prompt,
+      conversationId,
+      data.loadout,
+    ).finally(() => {
+      inflightDispatches.delete(targetAgentId);
+      // Always clear the permission overlay, even if driveDispatch
+      // didn't set one — keeps the registry tidy and defends against
+      // a future code path that sets one but skips its own cleanup.
+      clearPermissionOverlay(targetAgentId);
+    });
+  };
+  async function driveDispatch(
+    targetAgentId: string,
+    taskId: string,
+    prompt: string,
+    conversationId: string | null,
+    loadout: WireLoadout | undefined,
+  ): Promise<void> {
+    // Apply the dispatch's loadout permissions as a runtime overlay
+    // for the duration of this prompt drive. The PreToolUse hook
+    // installed at spawn-time consults the overlay registry per tool
+    // call and denies calls that match the loadout's deny rules.
+    // `fullAutonomous: true` because mail-inbound workers have no
+    // human in the loop — `ask` rules collapse to `allow`. Cleared
+    // unconditionally in `finally` so a crash mid-prompt doesn't
+    // leave a stale overlay on the agent.
+    const overlay = collapsePermissionsForAutonomous(
+      loadout?.permissions,
+      /* fullAutonomous */ true,
+    );
+    if (overlay) {
+      setPermissionOverlay(targetAgentId, overlay);
+      log(
+        `[mail-inbound-reuse] Applied permission overlay for agent=${targetAgentId} ` +
+          `taskId=${taskId} (deny=${overlay.deny.length} allow=${overlay.allow.length})`,
+      );
+    }
+    let summary: string | undefined;
+    let status: string | undefined;
+    let doneSeen = false;
+    let promptError: Error | undefined;
+    try {
+      for await (const update of agentManager.prompt(targetAgentId, prompt)) {
+        const captured = captureDoneCall(update);
+        if (captured) {
+          doneSeen = true;
+          if (captured.summary) summary = captured.summary;
+          if (captured.status) status = captured.status;
+        }
+      }
+    } catch (err) {
+      promptError = err as Error;
+      log(
+        `[mail-inbound-reuse] prompt() threw for agent=${targetAgentId} taskId=${taskId}: ` +
+          `${promptError.message ?? String(promptError)}`,
+      );
+    }
+    // Fallback: read `_lastSummary` from agentStore. The done() handler
+    // persists this for in-flight agents (Phase 2C) so the reply path
+    // is reliable even when the prompt iterator's update stream raced
+    // the ACP connection close. Covers:
+    //   - prompt() threw before yielding the done() update (catch above)
+    //   - inline capture saw done() but `args.summary` was empty
+    //   - iterator yielded but our captureDoneCall missed (shape drift)
+    if (!summary) {
+      try {
+        const record = agentStore.getAgent(targetAgentId);
+        const fallback = record?.metadata?._lastSummary;
+        if (typeof fallback === "string" && fallback.length > 0) {
+          summary = fallback;
+          doneSeen = true;
+          log(
+            `[mail-inbound-reuse] Recovered summary from _lastSummary fallback for agent=${targetAgentId} taskId=${taskId}`,
+          );
+        }
+      } catch {
+        /* best effort — store may be closing during shutdown */
+      }
+    }
+    // Post reply: prefer real summary, fall back to status notes when
+    // we genuinely have nothing.
+    if (summary) {
+      void postReplyTurn(conversationId, targetAgentId, summary).then(() => {
+        // Clear the persisted summary so it doesn't replay if the same
+        // agentId is dispatched again. Best-effort.
+        try {
+          const existing = agentStore.getAgent(targetAgentId)?.metadata ?? {};
+          const { _lastSummary: _drop, ...rest } = existing as Record<string, unknown>;
+          void _drop;
+          agentStore.updateAgent(targetAgentId, { metadata: rest });
+        } catch {
+          /* best effort */
+        }
+      });
+      return;
+    }
+    if (promptError) {
+      void postReplyTurn(conversationId, targetAgentId, {
+        status: "failed",
+        reason: `Prompt failed: ${promptError.message ?? String(promptError)}`,
+      });
+      return;
+    }
+    if (!doneSeen) {
+      log(
+        `[mail-inbound-reuse] Agent ${targetAgentId} finished prompt without calling done() ` +
+          `for taskId=${taskId} — posting "incomplete" reply`,
+      );
+      void postReplyTurn(conversationId, targetAgentId, {
+        status: "incomplete",
+        reason: "Agent did not call done() within the prompt cycle",
+      });
+      return;
+    }
+    void postReplyTurn(
+      conversationId,
+      targetAgentId,
+      `Dispatch ${taskId} ${status ?? "completed"} (no summary)`,
+    );
+  }
+  /**
+   * Detect a `done()` tool-call update and extract `{ status, summary }`
+   * from rawInput. Mirrors `promptUntilDone`'s detection logic but also
+   * captures `summary` (which the AgentManager's loop discards).
+   */
+  function captureDoneCall(
+    update: ExtendedSessionUpdate,
+  ): { status?: string; summary?: string } | null {
+    const u = update as unknown as Record<string, unknown>;
+    const sessionUpdate = u.sessionUpdate;
+    const title = u.title;
+    const isDoneToolCall =
+      (sessionUpdate === "tool_call" || sessionUpdate === "tool_call_update") &&
+      typeof title === "string" &&
+      title.endsWith("__done");
+    if (!isDoneToolCall) {
+      // Older fallback shape.
+      if (
+        u.type === "result" &&
+        u.subtype === "tool_result" &&
+        u.toolName === "done"
+      ) {
+        const result = u.result as { status?: string; summary?: string } | undefined;
+        if (result) {
+          return { status: result.status, summary: result.summary };
+        }
+      }
+      return null;
+    }
+    let input: { status?: string; summary?: string } | undefined;
+    try {
+      const raw = u.rawInput;
+      if (typeof raw === "string") {
+        input = JSON.parse(raw) as { status?: string; summary?: string };
+      } else if (raw && typeof raw === "object") {
+        input = raw as { status?: string; summary?: string };
+      } else if (u.input && typeof u.input === "object") {
+        input = u.input as { status?: string; summary?: string };
+      }
+    } catch {
+      // rawInput not yet parseable (multi-update tool call); ignore.
+    }
+    if (!input) return null;
+    return { status: input.status, summary: input.summary };
+  }
+  async function postReplyTurn(
+    conversationId: string | null,
+    fromAgentId: string,
+    content: string | { status: string; reason: string },
+  ): Promise<void> {
+    if (!conversationId) {
+      log(
+        `[mail-inbound-reuse] No conversationId — reply for ${fromAgentId} dropped: ` +
+          `${typeof content === "string" ? content.slice(0, 80) : content.status}`,
+      );
+      return;
+    }
+    const sidecar = getSidecar();
+    if (!sidecar?.postMailTurn) {
+      log(`[mail-inbound-reuse] No sidecar/postMailTurn — reply turn dropped`);
+      return;
+    }
+    const body = typeof content === "string" ? content : JSON.stringify(content);
+    try {
+      await sidecar.postMailTurn(conversationId, fromAgentId, body);
+    } catch (err) {
+      log(
+        `[mail-inbound-reuse] postMailTurn failed for ${fromAgentId}: ` +
+          `${(err as Error).message ?? String(err)}`,
+      );
+    }
+  }
+  inboxEvents.on("inbox.message", onMessage);
+  let stopped = false;
+  return {
+    stop() {
+      if (stopped) return;
+      stopped = true;
+      try {
+        if (inboxEvents.off) {
+          inboxEvents.off("inbox.message", onMessage);
+        } else if (inboxEvents.removeListener) {
+          inboxEvents.removeListener("inbox.message", onMessage);
+        }
+      } catch {
+        // best effort
+      }
+      log(`[mail-inbound-reuse] Consumer stopped`);
+    },
+    stats() {
+      pruneSeenTaskIds();
+      return {
+        droppedMalformed: droppedMalformedCount,
+        seenTaskIds: seenTaskIds.size,
+        busyRejects: busyRejectCount,
+        inflightCount: inflightDispatches.size,
+      };
+    },
+  };
+}

package/src/dispatch/permission-evaluator.ts ADDED Viewed

@@ -0,0 +1,191 @@
+/**
+ * Permission Evaluator
+ *
+ * Pure function: given a tool call (name + input) and an overlay's
+ * permission rules, decide whether to deny, allow, or pass-through.
+ * Used by the `PreToolUse` hook installed at spawn time to enforce
+ * dispatch-supplied loadout permissions on a running session.
+ *
+ * Rule format (matches Claude Agent SDK convention):
+ *
+ *     <ToolName>                  — match any call to this tool
+ *     <ToolName>(<glob-pattern>)  — match calls whose primary input
+ *                                    field matches the glob pattern
+ *
+ * The "primary input field" is tool-specific:
+ *
+ *   Bash         → input.command
+ *   Read         → input.file_path
+ *   Write        → input.file_path
+ *   Edit         → input.file_path
+ *   Grep         → input.pattern
+ *   <other>      → no field-level match; rule must be bare `<ToolName>`
+ *
+ * Glob: `*` matches any sequence of characters (no path-segment
+ * distinction). Other regex specials are escaped.
+ *
+ * Decision precedence:
+ *
+ *   1. If any rule in `deny` matches → 'deny'
+ *   2. Else if any rule in `allow` matches → 'allow'
+ *   3. Else → 'pass-through' (let the session's static rules decide)
+ *
+ * `ask` rules are not evaluated here — the consumer is expected to
+ * collapse `ask` to either `allow` or `deny` before setting the
+ * overlay (via `collapsePermissionsForAutonomous` based on the
+ * spawn's `fullAutonomous` flag). The evaluator sees only collapsed
+ * `allow` and `deny` lists.
+ *
+ * @module dispatch/permission-evaluator
+ */
+import type { OverlayPermissions } from "./permission-overlay.js";
+export interface PermissionDecision {
+  decision: "allow" | "deny" | "pass-through";
+  matchedRule?: string;
+  matchedField?: string;
+}
+/**
+ * Tool-name → primary input field name. Add entries as needed for
+ * additional tools. Tools not listed have no field-level matching;
+ * only bare `<ToolName>` rules apply.
+ */
+const PRIMARY_INPUT_FIELD: Record<string, string> = {
+  Bash: "command",
+  Read: "file_path",
+  Write: "file_path",
+  Edit: "file_path",
+  MultiEdit: "file_path",
+  Grep: "pattern",
+  Glob: "pattern",
+  NotebookRead: "notebook_path",
+  NotebookEdit: "notebook_path",
+};
+/**
+ * Evaluate a single tool call against an overlay's permission rules.
+ *
+ * Returns `'pass-through'` (the default) when no rule matches — the
+ * caller should fall back to the session's static permission rules
+ * for the final decision.
+ */
+export function evaluatePermission(
+  toolName: string,
+  toolInput: unknown,
+  overlay: OverlayPermissions,
+): PermissionDecision {
+  // Deny rules win over allow.
+  for (const rule of overlay.deny ?? []) {
+    const match = matchRule(rule, toolName, toolInput);
+    if (match.matched) {
+      return {
+        decision: "deny",
+        matchedRule: rule,
+        ...(match.field ? { matchedField: match.field } : {}),
+      };
+    }
+  }
+  for (const rule of overlay.allow ?? []) {
+    const match = matchRule(rule, toolName, toolInput);
+    if (match.matched) {
+      return {
+        decision: "allow",
+        matchedRule: rule,
+        ...(match.field ? { matchedField: match.field } : {}),
+      };
+    }
+  }
+  return { decision: "pass-through" };
+}
+interface RuleMatch {
+  matched: boolean;
+  field?: string;
+}
+/**
+ * Test a single rule against a tool call. Returns whether the rule
+ * matched and which input field (if any) was tested.
+ *
+ * Exported only for testability; production callers should use
+ * `evaluatePermission`.
+ */
+export function matchRule(
+  rule: string,
+  toolName: string,
+  toolInput: unknown,
+): RuleMatch {
+  const parsed = parseRule(rule);
+  if (!parsed) return { matched: false };
+  if (parsed.toolName !== toolName) return { matched: false };
+  // No pattern → any call to this tool matches.
+  if (parsed.pattern === undefined) return { matched: true };
+  // Empty pattern (`Bash()`) — also any call to this tool. Conservative.
+  if (parsed.pattern === "") return { matched: true };
+  const fieldName = PRIMARY_INPUT_FIELD[toolName];
+  if (!fieldName) {
+    // No primary field defined for this tool → can't match a pattern.
+    // Pattern-bearing rules for unknown tools never match (skip).
+    return { matched: false };
+  }
+  const fieldValue = readField(toolInput, fieldName);
+  if (typeof fieldValue !== "string") {
+    return { matched: false };
+  }
+  const re = globToRegex(parsed.pattern);
+  return {
+    matched: re.test(fieldValue),
+    field: fieldName,
+  };
+}
+interface ParsedRule {
+  toolName: string;
+  /** undefined → bare `<ToolName>` rule (no parentheses) */
+  pattern?: string;
+}
+function parseRule(rule: string): ParsedRule | null {
+  // Tool names allow letters/digits/underscores AND hyphens — MCP tools use
+  // hyphens in their server prefix (e.g., `mcp__agent-inbox__list_agents`).
+  const m = rule.match(/^([A-Za-z_][A-Za-z0-9_-]*)(?:\((.*)\))?$/);
+  if (!m) return null;
+  const [, toolName, pattern] = m;
+  if (pattern === undefined) {
+    return { toolName: toolName as string };
+  }
+  return { toolName: toolName as string, pattern };
+}
+function readField(input: unknown, field: string): unknown {
+  if (!input || typeof input !== "object") return undefined;
+  return (input as Record<string, unknown>)[field];
+}
+/**
+ * Convert a Claude permission glob into a regex. Only `*` is special;
+ * everything else is treated as a literal. The result is anchored
+ * (`^...$`) for whole-string matching.
+ */
+function globToRegex(pattern: string): RegExp {
+  let out = "^";
+  for (const ch of pattern) {
+    if (ch === "*") {
+      out += ".*";
+    } else {
+      // Escape regex specials.
+      out += ch.replace(/[.+?^${}()|[\]\\]/g, "\\$&");
+    }
+  }
+  out += "$";
+  return new RegExp(out);
+}