npm - @desplega.ai/agent-swarm - Versions diffs - 1.100.2 → 1.100.4 - Mend

@desplega.ai/agent-swarm 1.100.2 → 1.100.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/openapi.json +1 -1
package/package.json +1 -1
package/src/be/db.ts +131 -4
package/src/be/memory/raters/retrieval.ts +6 -3
package/src/be/migrations/097_memory_retrieval_grouping.sql +10 -0
package/src/github/handlers.ts +84 -7
package/src/github/templates.ts +6 -2
package/src/heartbeat/heartbeat.ts +191 -5
package/src/providers/claude-adapter.ts +41 -4
package/src/slack/assistant.ts +28 -0
package/src/slack/channel-join.ts +38 -3
package/src/slack/handlers.ts +4 -1
package/src/tasks/worker-follow-up.ts +181 -20
package/src/tests/claude-adapter-binary.test.ts +74 -0
package/src/tests/github-handlers-inline-comments.test.ts +308 -0
package/src/tests/heartbeat-reroute-decision.test.ts +570 -0
package/src/tests/heartbeat-supersede-resume.test.ts +137 -0
package/src/tests/heartbeat.test.ts +4 -2
package/src/tests/memory-rater-implicit-citation.test.ts +31 -0
package/src/tests/prompt-template-remaining.test.ts +2 -1
package/src/tests/slack-assistant-comention-production.test.ts +319 -0
package/src/tests/slack-assistant-comention.test.ts +139 -0
package/src/tests/slack-channel-join.test.ts +150 -16
package/src/tests/workflow-swarm-script.test.ts +225 -0
package/src/tests/workflow-template.test.ts +17 -0
package/src/tools/send-task.ts +51 -1
package/src/tools/templates.ts +61 -0
package/src/workflows/engine.ts +22 -1
package/src/workflows/retry-poller.ts +2 -3
package/src/workflows/template.ts +48 -0

package/src/tests/heartbeat-supersede-resume.test.ts CHANGED Viewed

@@ -372,4 +372,141 @@ describe("Heartbeat — supersede + resume (DES-523)", () => {
     expect(updatedParent?.status).toBe("failed");
     expect(updatedParent?.failureReason).toBe("superseded_workflow_task");
   });
+  // --------------------------------------------------------------------------
+  // Phase 1 (DES-523) — same-agent pin
+  //
+  // crash_recovery resumes pin to the original (stable-ID) agent instead of the
+  // role-blind unassigned pool, even when the agent's `lastActivityAt` is stale
+  // (the >30s "fresh" gate is dropped for crash_recovery). The retained
+  // `offline` gate still routes genuinely-gone (gracefully-closed) agents to the
+  // pool.
+  // --------------------------------------------------------------------------
+  test("Phase 1: recoverable-but-stale agent → resume is PINNED (agentId=original, pending), not pooled", async () => {
+    const agent = createAgent({ name: "stale-recoverable", isLead: false, status: "busy" });
+    const parent = createTaskExtended("Work to resume on the same agent", { agentId: agent.id });
+    startTask(parent.id);
+    // Force the default single-slot capacity so the capacity-ordering invariant
+    // below is unambiguous.
+    getDb().run("UPDATE agents SET maxTasks = 1 WHERE id = ?", [agent.id]);
+    // Stale on BOTH axes: the task hasn't updated in 10 min (past the no-session
+    // threshold) AND the agent's lastActivityAt is 10 min old (far past
+    // WORKER_LIVENESS_WINDOW_SECONDS = 30s). Under the old `fresh` gate this
+    // resume would have been released to the unassigned pool; the pin must now
+    // hold regardless of staleness because the agent ID is stable across restart.
+    const oldTime = new Date(Date.now() - 10 * 60 * 1000).toISOString();
+    getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [oldTime, parent.id]);
+    getDb().run("UPDATE agents SET lastActivityAt = ? WHERE id = ?", [oldTime, agent.id]);
+    const findings = await codeLevelTriage();
+    expect(findings.autoResumedTasks.length).toBe(1);
+    expect(findings.pinnedResumes.length).toBe(1);
+    expect(findings.pinnedResumes[0]!.agentId).toBe(agent.id);
+    const children = getChildTasks(parent.id);
+    expect(children.length).toBe(1);
+    const resume = children[0]!;
+    expect(resume.taskType).toBe("resume");
+    // The pin: assigned to the ORIGINAL agent and therefore `pending` (NOT
+    // `unassigned`). createTaskExtended derives `pending` from a set agentId.
+    expect(resume.agentId).toBe(agent.id);
+    expect(resume.status).toBe("pending");
+    expect(findings.pinnedResumes[0]!.taskId).toBe(resume.id);
+    // Capacity-ordering invariant: maxTasks=1 and the agent held the parent
+    // `in_progress`. The pin succeeds ONLY because remediateCrashedWorkerTask
+    // supersedes the parent (freeing the single in_progress slot) BEFORE
+    // createResumeFollowUp runs its `activeCount < maxTasks` check. A reversed
+    // order would see activeCount=1 >= 1, skip the pin, and fall back to the
+    // pool — the exact bug this fix closes.
+  });
+  test("Phase 1: Case B (stale session heartbeat) also pins the resume to the original agent", async () => {
+    const agent = createAgent({ name: "crashed-stale", isLead: false, status: "busy" });
+    const parent = createTaskExtended("Crashed worker work (Case B)", { agentId: agent.id });
+    startTask(parent.id);
+    insertActiveSession({ agentId: agent.id, taskId: parent.id, triggerType: "task_assigned" });
+    const oldTime = new Date(Date.now() - 20 * 60 * 1000).toISOString();
+    getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [oldTime, parent.id]);
+    getDb().run("UPDATE active_sessions SET lastHeartbeatAt = ? WHERE taskId = ?", [
+      oldTime,
+      parent.id,
+    ]);
+    getDb().run("UPDATE agents SET lastActivityAt = ? WHERE id = ?", [oldTime, agent.id]);
+    const findings = await codeLevelTriage();
+    expect(findings.pinnedResumes.length).toBe(1);
+    expect(findings.pinnedResumes[0]!.agentId).toBe(agent.id);
+    const resume = getChildTasks(parent.id)[0]!;
+    expect(resume.taskType).toBe("resume");
+    expect(resume.agentId).toBe(agent.id);
+    expect(resume.status).toBe("pending");
+  });
+  test("Phase 1: offline (gracefully-closed) agent → resume is NOT pinned, falls back to the pool", async () => {
+    const agent = createAgent({ name: "gone-worker", isLead: false, status: "busy" });
+    const parent = createTaskExtended("Work whose agent is gone", { agentId: agent.id });
+    startTask(parent.id);
+    const oldTime = new Date(Date.now() - 10 * 60 * 1000).toISOString();
+    getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [oldTime, parent.id]);
+    // Genuinely gone: a graceful close set the agent offline. The retained
+    // `offline` gate must keep this routing to the pool. (The Phase 3 reaper
+    // does NOT act here — it acts only on pinned, still-pending resumes.)
+    getDb().run("UPDATE agents SET status = 'offline' WHERE id = ?", [agent.id]);
+    const findings = await codeLevelTriage();
+    // The crash path created the resume but did NOT pin it — the retained
+    // `offline` gate routed it to the unassigned pool instead.
+    expect(findings.autoResumedTasks.length).toBe(1);
+    expect(findings.pinnedResumes.length).toBe(0);
+    const resume = getChildTasks(parent.id)[0]!;
+    expect(resume.taskType).toBe("resume");
+    // NOTE: we deliberately do NOT assert the resume's final agentId/status. The
+    // resume is created `unassigned`, but `autoAssignPoolTasks` runs later in the
+    // same sweep and may legitimately assign the pool task to an idle worker
+    // (existing, intended pool behavior, untouched by Phase 1). The Phase-1
+    // contract here is only that the crash path itself did not pin it.
+  });
+  test("Phase 1: a pinned pending resume is invisible to the stall detector — re-sweep creates no 2nd resume", async () => {
+    const agent = createAgent({ name: "stale-recoverable-2", isLead: false, status: "busy" });
+    const parent = createTaskExtended("Work pinned then left unclaimed", { agentId: agent.id });
+    startTask(parent.id);
+    const oldTime = new Date(Date.now() - 10 * 60 * 1000).toISOString();
+    getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [oldTime, parent.id]);
+    getDb().run("UPDATE agents SET lastActivityAt = ? WHERE id = ?", [oldTime, agent.id]);
+    // First sweep pins the resume to the agent.
+    const first = await codeLevelTriage();
+    expect(first.pinnedResumes.length).toBe(1);
+    const resumeId = first.autoResumedTasks[0]!.resumeTaskId;
+    expect(getTaskById(resumeId)?.status).toBe("pending");
+    expect(getTaskById(resumeId)?.agentId).toBe(agent.id);
+    // Age the pinned resume well past the stall threshold. It is `pending`, not
+    // `in_progress`, so getStalledInProgressTasks cannot see it — no loop, no
+    // second resume, and the agent's still-stale activity does not matter.
+    getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [oldTime, resumeId]);
+    const second = await codeLevelTriage();
+    expect(second.autoResumedTasks.length).toBe(0);
+    expect(second.pinnedResumes.length).toBe(0);
+    const children = getChildTasks(parent.id);
+    expect(children.length).toBe(1);
+    expect(children[0]!.id).toBe(resumeId);
+    expect(getTaskById(resumeId)?.status).toBe("pending");
+  });
 });

package/src/tests/heartbeat.test.ts CHANGED Viewed

@@ -460,8 +460,10 @@ describe("Heartbeat Triage", () => {
       await codeLevelTriage();
-      // Agent should be set to idle since the parent task is terminal (superseded)
-      // and the resume follow-up was routed to the unassigned pool (worker is "dead").
+      // Agent goes idle: the parent task is terminal (superseded) and the
+      // crash_recovery resume is now PINNED back to this agent as `pending`
+      // (DES-523 same-agent pin). `pending` does not count toward in_progress
+      // capacity, so getActiveTaskCount drops to 0 and the agent flips to idle.
       const agents = getDb().query("SELECT status FROM agents WHERE id = ?").get(agent.id) as {
         status: string;
       };

package/src/tests/memory-rater-implicit-citation.test.ts CHANGED Viewed

@@ -188,6 +188,37 @@ describe("retrieval → ImplicitCitationRater → posterior shift", () => {
     expect(rows.map((r) => r.memoryId).sort()).toEqual([m1.id, m2.id].sort());
   });
+  test("recordRetrievals groups rows from one call and stamps result rank", () => {
+    const first = makeMemory("retrieval-group-first");
+    const second = makeMemory("retrieval-group-second");
+    const third = makeMemory("retrieval-group-third");
+    recordRetrievals(taskId, agentId, [
+      { memoryId: first.id, similarity: 0.9 },
+      { memoryId: second.id, similarity: 0.8 },
+    ]);
+    recordRetrievals(taskId, agentId, [{ memoryId: third.id, similarity: 0.7 }]);
+    const rows = getDb()
+      .prepare<{ memoryId: string; retrievalId: string | null; rank: number | null }, [string]>(
+        "SELECT memoryId, retrievalId, rank FROM memory_retrieval WHERE taskId = ?",
+      )
+      .all(taskId);
+    expect(rows).toHaveLength(3);
+    const byMemoryId = new Map(rows.map((row) => [row.memoryId, row]));
+    const firstRow = byMemoryId.get(first.id)!;
+    const secondRow = byMemoryId.get(second.id)!;
+    const thirdRow = byMemoryId.get(third.id)!;
+    expect(firstRow.retrievalId).toBeTruthy();
+    expect(secondRow.retrievalId).toBe(firstRow.retrievalId);
+    expect(firstRow.rank).toBe(0);
+    expect(secondRow.rank).toBe(1);
+    expect(thirdRow.retrievalId).toBeTruthy();
+    expect(thirdRow.retrievalId).not.toBe(firstRow.retrievalId);
+    expect(thirdRow.rank).toBe(0);
+  });
   test("recordRetrievals is a no-op when taskId is undefined", () => {
     const m = makeMemory("no-task");
     recordRetrievals(undefined, agentId, [{ memoryId: m.id, similarity: 0.9 }]);

package/src/tests/prompt-template-remaining.test.ts CHANGED Viewed

@@ -102,11 +102,12 @@ describe("template registration — all sources", () => {
     expect(eventTypes).toContain("heartbeat.checklist");
   });
-  test("Task lifecycle templates are registered (2 task_lifecycle)", () => {
+  test("Task lifecycle templates are registered (3 task_lifecycle)", () => {
     const all = getAllTemplateDefinitions();
     const eventTypes = all.map((d) => d.eventType);
     expect(eventTypes).toContain("task.worker.completed");
     expect(eventTypes).toContain("task.worker.failed");
+    expect(eventTypes).toContain("task.reroute.decision");
   });
   test("Runner trigger templates are registered (7 task_lifecycle)", () => {

package/src/tests/slack-assistant-comention-production.test.ts ADDED Viewed

@@ -0,0 +1,319 @@
+/**
+ * Production-path regression tests for the assistant-surface co-mention guard.
+ *
+ * These tests invoke the REAL production handlers (createAssistant().userMessage and
+ * the registerMessageHandler callback) to verify that task creation is suppressed
+ * when a Slack message @-mentions a different agent (e.g. Devin) but NOT our bot.
+ *
+ * Mutation resistance: removing the guard from src/slack/assistant.ts or
+ * src/slack/handlers.ts causes the co-mention message to reach
+ * createTaskWithSiblingAwareness, which fails the `not.toHaveBeenCalled()` assertions.
+ *
+ * Complements slack-assistant-comention.test.ts (pure helper-function unit tests).
+ * Regression for task 4ae1f3b5 — "<@U0831BS93V1> Are you here?" spawned an unwanted task.
+ */
+import { afterAll, beforeAll, beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
+import * as dbModule from "../be/db";
+import * as slackEnrichModule from "../slack/enrich";
+import * as slackEventDedupModule from "../slack/event-dedup";
+import * as siblingAwarenessModule from "../tasks/sibling-awareness";
+// ---------------------------------------------------------------------------
+// Production-handler spies.
+//
+// Avoid mock.module here: Bun's module overrides are process-global and can be
+// observed by other test files during module loading. Restorable spies keep the
+// regression test on the real production handlers without leaking fake modules.
+// ---------------------------------------------------------------------------
+let createAssistantFn: typeof import("../slack/assistant").createAssistant;
+let registerMessageHandlerFn: typeof import("../slack/handlers").registerMessageHandler;
+let createTaskWithSiblingAwarenessSpy: any;
+let getAgentWorkingOnThreadSpy: any;
+let getLeadAgentSpy: any;
+let getMostRecentTaskInThreadSpy: any;
+let getAgentByIdSpy: any;
+let getTasksByAgentIdSpy: any;
+let resolveSlackUserIdSpy: any;
+let enrichSlackUserEmailSpy: any;
+let wasEventSeenSpy: any;
+const originalEnv = {
+  ADDITIVE_SLACK: process.env.ADDITIVE_SLACK,
+  SLACK_ALLOWED_EMAIL_DOMAINS: process.env.SLACK_ALLOWED_EMAIL_DOMAINS,
+  SLACK_ALLOWED_USER_IDS: process.env.SLACK_ALLOWED_USER_IDS,
+};
+function restoreEnvValue(key: keyof typeof originalEnv): void {
+  const value = originalEnv[key];
+  if (value === undefined) {
+    delete process.env[key];
+  } else {
+    process.env[key] = value;
+  }
+}
+function installSpyImplementations(): void {
+  createTaskWithSiblingAwarenessSpy.mockImplementation(() => ({ id: "mock-task-id-prod-path" }));
+  getAgentWorkingOnThreadSpy.mockImplementation(() => null);
+  getLeadAgentSpy.mockImplementation(() => ({
+    id: "lead-prod-test-1",
+    name: "TestLead",
+    isLead: true,
+  }));
+  getMostRecentTaskInThreadSpy.mockImplementation(() => null);
+  getAgentByIdSpy.mockImplementation(() => null);
+  getTasksByAgentIdSpy.mockImplementation(() => []);
+  resolveSlackUserIdSpy.mockImplementation(async () => undefined);
+  enrichSlackUserEmailSpy.mockImplementation(async () => null);
+  wasEventSeenSpy.mockImplementation(() => false);
+}
+beforeAll(async () => {
+  process.env.ADDITIVE_SLACK = "false";
+  delete process.env.SLACK_ALLOWED_EMAIL_DOMAINS;
+  delete process.env.SLACK_ALLOWED_USER_IDS;
+  createTaskWithSiblingAwarenessSpy = spyOn(
+    siblingAwarenessModule,
+    "createTaskWithSiblingAwareness",
+  );
+  getAgentWorkingOnThreadSpy = spyOn(dbModule, "getAgentWorkingOnThread");
+  getLeadAgentSpy = spyOn(dbModule, "getLeadAgent");
+  getMostRecentTaskInThreadSpy = spyOn(dbModule, "getMostRecentTaskInThread");
+  getAgentByIdSpy = spyOn(dbModule, "getAgentById");
+  getTasksByAgentIdSpy = spyOn(dbModule, "getTasksByAgentId");
+  resolveSlackUserIdSpy = spyOn(slackEnrichModule, "resolveSlackUserId");
+  enrichSlackUserEmailSpy = spyOn(slackEnrichModule, "enrichSlackUserEmail");
+  wasEventSeenSpy = spyOn(slackEventDedupModule, "wasEventSeen");
+  installSpyImplementations();
+  ({ createAssistant: createAssistantFn } = await import("../slack/assistant"));
+  ({ registerMessageHandler: registerMessageHandlerFn } = await import("../slack/handlers"));
+});
+beforeEach(() => {
+  createTaskWithSiblingAwarenessSpy.mockClear();
+  getAgentWorkingOnThreadSpy.mockClear();
+  getLeadAgentSpy.mockClear();
+  getMostRecentTaskInThreadSpy.mockClear();
+  getAgentByIdSpy.mockClear();
+  getTasksByAgentIdSpy.mockClear();
+  resolveSlackUserIdSpy.mockClear();
+  enrichSlackUserEmailSpy.mockClear();
+  wasEventSeenSpy.mockClear();
+  installSpyImplementations();
+});
+afterAll(() => {
+  restoreEnvValue("ADDITIVE_SLACK");
+  restoreEnvValue("SLACK_ALLOWED_EMAIL_DOMAINS");
+  restoreEnvValue("SLACK_ALLOWED_USER_IDS");
+  mock.restore();
+});
+// ---------------------------------------------------------------------------
+// Shared constants
+// ---------------------------------------------------------------------------
+const BOT_USER_ID = "U_BOT_PROD_TEST";
+const DEVIN_USER_ID = "U0831BS93V1"; // the other agent from the original regression
+let slackDeliverySequence = 0;
+function nextSlackDelivery(eventIdPrefix: string): { eventId: string; ts: string } {
+  slackDeliverySequence += 1;
+  return {
+    eventId: `${eventIdPrefix}_${slackDeliverySequence}`,
+    ts: `2000000001.${String(slackDeliverySequence).padStart(6, "0")}`,
+  };
+}
+// Mock Slack WebClient — auth.test() returns our bot's user ID so the
+// module-level cachedBotUserId gets populated on the first handler invocation.
+const mockClient = {
+  auth: {
+    test: async () => ({ user_id: BOT_USER_ID, bot_id: "B_BOT_PROD_TEST" }),
+  },
+  conversations: {
+    // Needed only if getThreadContext is reached (thread_ts set); returning
+    // empty messages is safe for the paths exercised here.
+    replies: async () => ({ messages: [], ok: true }),
+  },
+};
+// ---------------------------------------------------------------------------
+// Production-path: assistant.ts — createAssistant().userMessage
+// ---------------------------------------------------------------------------
+describe("assistant.ts — userMessage production-path co-mention guard", () => {
+  // Access the registered middleware function directly.
+  // Bolt stores handlers as an array; [0] is the callback passed to the config.
+  let userMessageHandler: (args: Record<string, unknown>) => Promise<void>;
+  beforeAll(() => {
+    userMessageHandler = (createAssistantFn() as any).userMessage[0] as typeof userMessageHandler;
+  });
+  test("does NOT spawn a task when message @-mentions another agent but not our bot", async () => {
+    await userMessageHandler({
+      message: {
+        channel: "D_ASSISTANT_PROD_TEST",
+        ts: "1000000001.000001",
+        text: `<@${DEVIN_USER_ID}> Are you here?`,
+        user: "U_HUMAN_ASST_001",
+      },
+      body: { event_id: "evt_prod_asst_comention_001" },
+      say: mock(async () => {}),
+      setStatus: mock(async () => {}),
+      setTitle: mock(async () => {}),
+      getThreadContext: mock(async () => ({})),
+      client: mockClient,
+    });
+    expect(createTaskWithSiblingAwarenessSpy).not.toHaveBeenCalled();
+  });
+  test("DOES spawn a task for a plain DM with no @-mentions (baseline)", async () => {
+    await userMessageHandler({
+      message: {
+        channel: "D_ASSISTANT_PROD_TEST",
+        ts: "1000000001.000002",
+        text: "What is the current status of all agents?",
+        user: "U_HUMAN_ASST_001",
+      },
+      body: { event_id: "evt_prod_asst_plain_001" },
+      say: mock(async () => {}),
+      setStatus: mock(async () => {}),
+      setTitle: mock(async () => {}),
+      getThreadContext: mock(async () => ({})),
+      client: mockClient,
+    });
+    expect(createTaskWithSiblingAwarenessSpy).toHaveBeenCalledTimes(1);
+  });
+  test("does NOT spawn a task when message @-mentions a human user but not our bot", async () => {
+    await userMessageHandler({
+      message: {
+        channel: "D_ASSISTANT_PROD_TEST",
+        ts: "1000000001.000003",
+        text: "<@U037TJB7VHQ> what do you think?",
+        user: "U_HUMAN_ASST_001",
+      },
+      body: { event_id: "evt_prod_asst_comention_002" },
+      say: mock(async () => {}),
+      setStatus: mock(async () => {}),
+      setTitle: mock(async () => {}),
+      getThreadContext: mock(async () => ({})),
+      client: mockClient,
+    });
+    expect(createTaskWithSiblingAwarenessSpy).not.toHaveBeenCalled();
+  });
+});
+// ---------------------------------------------------------------------------
+// Production-path: handlers.ts — registerMessageHandler, assistant_thread fallback
+//
+// File-share messages in DM assistant threads bypass the Assistant handler and
+// land in the generic message handler. The isImplicitMention logic in
+// registerMessageHandler must suppress task creation when assistant_thread is set
+// AND the message @-mentions a different user (not our bot).
+// ---------------------------------------------------------------------------
+describe("registerMessageHandler — assistant_thread co-mention guard (production-path)", () => {
+  type MessageEventArg = {
+    channel: string;
+    ts: string;
+    text?: string;
+    user?: string;
+    subtype?: string;
+    bot_id?: string;
+    assistant_thread?: Record<string, unknown>;
+    thread_ts?: string;
+  };
+  type HandlerArgs = {
+    event: MessageEventArg;
+    body: Record<string, unknown>;
+    client: typeof mockClient;
+    say: (args: unknown) => Promise<void>;
+  };
+  let capturedHandler: ((args: HandlerArgs) => Promise<void>) | null = null;
+  beforeAll(() => {
+    const mockApp = {
+      event: (eventType: string, handler: (args: HandlerArgs) => Promise<void>) => {
+        // registerMessageHandler calls app.event("message", ...) and then
+        // app.event("app_mention", ...). Capture only the message handler.
+        if (eventType === "message") {
+          capturedHandler = handler;
+        }
+      },
+    };
+    registerMessageHandlerFn(mockApp as any);
+  });
+  test("does NOT spawn a task when assistant_thread message @-mentions another agent", async () => {
+    expect(capturedHandler).not.toBeNull();
+    const delivery = nextSlackDelivery("evt_prod_hdlr_comention");
+    await capturedHandler!({
+      event: {
+        channel: "D_HANDLER_PROD_TEST",
+        ts: delivery.ts,
+        text: `<@${DEVIN_USER_ID}> Are you here?`,
+        user: "U_HUMAN_HDLR_001",
+        assistant_thread: { channel_id: "D_HANDLER_PROD_TEST" },
+      },
+      body: { event_id: delivery.eventId },
+      client: mockClient,
+      say: mock(async () => {}),
+    });
+    expect(createTaskWithSiblingAwarenessSpy).not.toHaveBeenCalled();
+  });
+  test("DOES spawn a task for assistant_thread plain message with no @-mentions (baseline)", async () => {
+    expect(capturedHandler).not.toBeNull();
+    const delivery = nextSlackDelivery("evt_prod_hdlr_plain");
+    await capturedHandler!({
+      event: {
+        channel: "D_HANDLER_PROD_TEST",
+        ts: delivery.ts,
+        text: "What is the current status of all agents?",
+        user: "U_HUMAN_HDLR_001",
+        assistant_thread: { channel_id: "D_HANDLER_PROD_TEST" },
+      },
+      body: { event_id: delivery.eventId },
+      client: mockClient,
+      say: mock(async () => {}),
+    });
+    expect(createTaskWithSiblingAwarenessSpy).toHaveBeenCalledTimes(1);
+  });
+  test("does NOT spawn a task when assistant_thread message @-mentions a human (not our bot)", async () => {
+    expect(capturedHandler).not.toBeNull();
+    const delivery = nextSlackDelivery("evt_prod_hdlr_comention");
+    await capturedHandler!({
+      event: {
+        channel: "D_HANDLER_PROD_TEST",
+        ts: delivery.ts,
+        text: "<@U037TJB7VHQ> what do you think?",
+        user: "U_HUMAN_HDLR_001",
+        assistant_thread: { channel_id: "D_HANDLER_PROD_TEST" },
+      },
+      body: { event_id: delivery.eventId },
+      client: mockClient,
+      say: mock(async () => {}),
+    });
+    expect(createTaskWithSiblingAwarenessSpy).not.toHaveBeenCalled();
+  });
+});

package/src/tests/slack-assistant-comention.test.ts ADDED Viewed

@@ -0,0 +1,139 @@
+/**
+ * Tests for the assistant-surface co-mention guard.
+ *
+ * The guard in assistant.ts and handlers.ts prevents the swarm from spawning a
+ * task when a Slack message arrives on the AI-App / assistant surface and
+ * @-mentions a DIFFERENT agent (e.g. Devin) but NOT our bot.
+ *
+ * Regression for task 4ae1f3b5 — root message "<@U0831BS93V1> Are you here?"
+ * (Devin) triggered an unwanted swarm task.
+ */
+import { describe, expect, test } from "bun:test";
+import { hasOtherUserMention } from "../slack/router";
+const BOT_USER_ID = "U0ASK3PCZ4P"; // our bot
+const DEVIN_USER_ID = "U0831BS93V1"; // another agent
+const HUMAN_USER_ID = "U037TJB7VHQ"; // a human
+// ---------------------------------------------------------------------------
+// hasOtherUserMention — the function powering both guards
+// ---------------------------------------------------------------------------
+describe("hasOtherUserMention — assistant surface scenarios", () => {
+  test("returns true when message mentions only another agent (e.g. Devin)", () => {
+    expect(hasOtherUserMention(`<@${DEVIN_USER_ID}> Are you here?`, BOT_USER_ID)).toBe(true);
+  });
+  test("returns true when message mentions a human (not our bot)", () => {
+    expect(hasOtherUserMention(`<@${HUMAN_USER_ID}> what do you think?`, BOT_USER_ID)).toBe(true);
+  });
+  test("returns false when message mentions only our bot", () => {
+    expect(hasOtherUserMention(`<@${BOT_USER_ID}> help me`, BOT_USER_ID)).toBe(false);
+  });
+  test("returns false when message has no @-mentions at all (plain DM)", () => {
+    expect(hasOtherUserMention("Hello, what is the agent status?", BOT_USER_ID)).toBe(false);
+  });
+  test("returns true when message mentions both our bot AND another user", () => {
+    // @-mentions both — hasOtherUserMention is true because Devin is mentioned.
+    // The guard also checks !botMentioned, so this path goes through normally.
+    expect(
+      hasOtherUserMention(
+        `<@${BOT_USER_ID}> <@${DEVIN_USER_ID}> what do you both think?`,
+        BOT_USER_ID,
+      ),
+    ).toBe(true);
+  });
+  test("returns false for swarm#all text command (no @-mention)", () => {
+    expect(hasOtherUserMention("swarm#all deploy staging", BOT_USER_ID)).toBe(false);
+  });
+  test("returns false for swarm#<uuid> text command (no @-mention)", () => {
+    expect(
+      hasOtherUserMention("swarm#5fd166b4-7d41-40ce-852f-9a3c2ea191a3 run task", BOT_USER_ID),
+    ).toBe(false);
+  });
+});
+// ---------------------------------------------------------------------------
+// Guard condition — mirrors the logic in assistant.ts and handlers.ts
+// The guard fires (suppresses task creation) when:
+//   !botMentioned && hasOtherUserMention(text, botUserId)
+// ---------------------------------------------------------------------------
+describe("assistant surface guard condition", () => {
+  function shouldSkip(text: string, botUserId: string): boolean {
+    const botMentioned = text.includes(`<@${botUserId}>`);
+    return !botMentioned && hasOtherUserMention(text, botUserId);
+  }
+  test("skips — message mentions only Devin (the Devin co-mention case)", () => {
+    expect(shouldSkip(`<@${DEVIN_USER_ID}> Are you here?`, BOT_USER_ID)).toBe(true);
+  });
+  test("skips — message mentions a human user, not our bot", () => {
+    expect(shouldSkip(`<@${HUMAN_USER_ID}> wdyt?`, BOT_USER_ID)).toBe(true);
+  });
+  test("does NOT skip — message mentions our bot (direct mention)", () => {
+    expect(shouldSkip(`<@${BOT_USER_ID}> help me`, BOT_USER_ID)).toBe(false);
+  });
+  test("does NOT skip — plain DM with no @-mentions (normal assistant use)", () => {
+    expect(shouldSkip("Show me the latest agent tasks", BOT_USER_ID)).toBe(false);
+  });
+  test("does NOT skip — message mentions our bot AND Devin (co-mention, bot included)", () => {
+    // botMentioned=true → guard does NOT fire → task proceeds normally
+    expect(
+      shouldSkip(
+        `<@${BOT_USER_ID}> and <@${DEVIN_USER_ID}> can you both look at this?`,
+        BOT_USER_ID,
+      ),
+    ).toBe(false);
+  });
+  test("does NOT skip — swarm#all command (no @-mention)", () => {
+    expect(shouldSkip("swarm#all run the deployment", BOT_USER_ID)).toBe(false);
+  });
+  test("does NOT skip — swarm#<uuid> command (no @-mention)", () => {
+    expect(shouldSkip("swarm#5fd166b4-7d41-40ce-852f-9a3c2ea191a3 do the thing", BOT_USER_ID)).toBe(
+      false,
+    );
+  });
+});
+// ---------------------------------------------------------------------------
+// isImplicitMention guard — mirrors the logic added to handlers.ts line 494
+// isImplicitMention = isAssistantThread && !botMentioned && !hasOtherUserMention(text, botUserId)
+// ---------------------------------------------------------------------------
+describe("isImplicitMention with co-mention guard (handlers.ts)", () => {
+  function computeIsImplicitMention(
+    isAssistantThread: boolean,
+    text: string,
+    botUserId: string,
+  ): boolean {
+    const botMentioned = text.includes(`<@${botUserId}>`);
+    return isAssistantThread && !botMentioned && !hasOtherUserMention(text, botUserId);
+  }
+  test("false — not an assistant thread", () => {
+    expect(computeIsImplicitMention(false, "Hello there", BOT_USER_ID)).toBe(false);
+  });
+  test("false — assistant thread but message mentions Devin only", () => {
+    expect(computeIsImplicitMention(true, `<@${DEVIN_USER_ID}> are you here?`, BOT_USER_ID)).toBe(
+      false,
+    );
+  });
+  test("false — assistant thread, message mentions our bot (explicit mention)", () => {
+    expect(computeIsImplicitMention(true, `<@${BOT_USER_ID}> help`, BOT_USER_ID)).toBe(false);
+  });
+  test("true — assistant thread, plain message with no @-mentions (normal DM use)", () => {
+    expect(computeIsImplicitMention(true, "What are the active tasks?", BOT_USER_ID)).toBe(true);
+  });
+});