npm - @desplega.ai/agent-swarm - Versions diffs - 1.80.0 → 1.80.1 - Mend

@desplega.ai/agent-swarm 1.80.0 → 1.80.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/openapi.json +399 -14
package/package.json +3 -1
package/src/artifact-sdk/server.ts +2 -1
package/src/be/db.ts +1 -1
package/src/be/migrations/064_scripts.sql +39 -0
package/src/be/migrations/065_script_embeddings.sql +7 -0
package/src/be/scripts/db.ts +391 -0
package/src/be/scripts/embeddings.ts +231 -0
package/src/be/scripts/maintenance.ts +9 -0
package/src/be/scripts/typecheck.ts +193 -0
package/src/cli.tsx +22 -5
package/src/commands/artifact.ts +3 -2
package/src/commands/claude-managed-setup.ts +2 -1
package/src/commands/codex-login.ts +5 -3
package/src/commands/onboard.tsx +2 -1
package/src/commands/runner.ts +72 -10
package/src/commands/setup.tsx +5 -3
package/src/hooks/hook.ts +4 -3
package/src/http/index.ts +40 -29
package/src/http/memory.ts +28 -0
package/src/http/openapi.ts +1 -0
package/src/http/page-proxy.ts +2 -1
package/src/http/route-def.ts +1 -0
package/src/http/schedules.ts +37 -0
package/src/http/scripts.ts +381 -0
package/src/linear/outbound.ts +9 -2
package/src/otel.ts +5 -0
package/src/providers/claude-adapter.ts +22 -1
package/src/scripts-runtime/ctx.ts +23 -0
package/src/scripts-runtime/eval-harness.ts +39 -0
package/src/scripts-runtime/executors/native.ts +229 -0
package/src/scripts-runtime/executors/registry.ts +16 -0
package/src/scripts-runtime/executors/types.ts +63 -0
package/src/scripts-runtime/extract-signature.ts +81 -0
package/src/scripts-runtime/import-allowlist.ts +109 -0
package/src/scripts-runtime/loader.ts +96 -0
package/src/scripts-runtime/redacted.ts +48 -0
package/src/scripts-runtime/sdk-allowlist.ts +29 -0
package/src/scripts-runtime/stdlib/fetch.ts +46 -0
package/src/scripts-runtime/stdlib/glob.ts +8 -0
package/src/scripts-runtime/stdlib/grep.ts +34 -0
package/src/scripts-runtime/stdlib/index.ts +16 -0
package/src/scripts-runtime/stdlib/table.ts +17 -0
package/src/scripts-runtime/swarm-config.ts +35 -0
package/src/scripts-runtime/swarm-sdk.ts +197 -0
package/src/scripts-runtime/types/stdlib.d.ts +104 -0
package/src/scripts-runtime/types/swarm-sdk.d.ts +86 -0
package/src/server.ts +12 -0
package/src/tests/api-key.test.ts +33 -0
package/src/tests/codex-login.test.ts +1 -1
package/src/tests/linear-outbound-sync.test.ts +109 -0
package/src/tests/mcp-tools.test.ts +69 -0
package/src/tests/redacted.test.ts +29 -0
package/src/tests/runner-tool-spans.test.ts +268 -0
package/src/tests/script-executor-conformance.test.ts +142 -0
package/src/tests/script-executor-registry.test.ts +17 -0
package/src/tests/scripts-db.test.ts +329 -0
package/src/tests/scripts-embeddings.test.ts +291 -0
package/src/tests/scripts-extract-signature.test.ts +47 -0
package/src/tests/scripts-http.test.ts +350 -0
package/src/tests/scripts-import-allowlist.test.ts +55 -0
package/src/tests/scripts-mcp-e2e.test.ts +269 -0
package/src/tests/scripts-runtime-secret-egress.test.ts +44 -0
package/src/tests/scripts-runtime.test.ts +289 -0
package/src/tests/sdk-allowlist.test.ts +59 -0
package/src/tests/secret-scrubber.test.ts +35 -1
package/src/tests/swarm-config.test.ts +38 -0
package/src/tests/tool-annotations.test.ts +2 -2
package/src/tests/tool-call-progress.test.ts +30 -0
package/src/tests/workflow-e2e.test.ts +218 -0
package/src/tests/workflow-executors.test.ts +32 -2
package/src/tests/workflow-input-redaction.test.ts +232 -0
package/src/tests/workflow-swarm-script.test.ts +273 -0
package/src/tools/memory-rate.ts +2 -1
package/src/tools/script-common.ts +88 -0
package/src/tools/script-delete.ts +35 -0
package/src/tools/script-query-types.ts +37 -0
package/src/tools/script-run.ts +43 -0
package/src/tools/script-search.ts +32 -0
package/src/tools/script-upsert.ts +43 -0
package/src/tools/tool-config.ts +7 -0
package/src/types.ts +60 -1
package/src/utils/api-key.ts +28 -0
package/src/utils/page-session.ts +8 -6
package/src/utils/secret-scrubber.ts +22 -1
package/src/workflows/engine.ts +12 -4
package/src/workflows/executors/index.ts +1 -0
package/src/workflows/executors/registry.ts +2 -0
package/src/workflows/executors/script.ts +12 -1
package/src/workflows/executors/swarm-script.ts +170 -0
package/src/workflows/input.ts +65 -0
package/src/workflows/recovery.ts +31 -3
package/src/workflows/resume.ts +43 -5

package/src/tests/api-key.test.ts ADDED Viewed

@@ -0,0 +1,33 @@
+import { describe, expect, test } from "bun:test";
+import { getApiKey, setApiKey } from "../utils/api-key";
+describe("getApiKey", () => {
+  test("returns empty string when neither var is set", () => {
+    expect(getApiKey({})).toBe("");
+  });
+  test("returns API_KEY when only legacy var is set", () => {
+    expect(getApiKey({ API_KEY: "legacy" })).toBe("legacy");
+  });
+  test("returns AGENT_SWARM_API_KEY when only preferred var is set", () => {
+    expect(getApiKey({ AGENT_SWARM_API_KEY: "preferred" })).toBe("preferred");
+  });
+  test("prefers AGENT_SWARM_API_KEY over API_KEY when both set", () => {
+    expect(getApiKey({ AGENT_SWARM_API_KEY: "preferred", API_KEY: "legacy" })).toBe("preferred");
+  });
+  test("falls back to API_KEY if AGENT_SWARM_API_KEY is undefined", () => {
+    expect(getApiKey({ AGENT_SWARM_API_KEY: undefined, API_KEY: "x" })).toBe("x");
+  });
+});
+describe("setApiKey", () => {
+  test("populates both env var names", () => {
+    const env: Record<string, string | undefined> = {};
+    setApiKey("k", env);
+    expect(env.AGENT_SWARM_API_KEY).toBe("k");
+    expect(env.API_KEY).toBe("k");
+  });
+});

package/src/tests/codex-login.test.ts CHANGED Viewed

@@ -70,7 +70,7 @@ describe("resolveCodexLoginConfig", () => {
     expect(promptSecret).toHaveBeenCalledWith(
       "Swarm API key",
       "env-secret",
-      "Press Enter to use API_KEY from the environment",
+      "Press Enter to use AGENT_SWARM_API_KEY/API_KEY from the environment",
     );
   });

package/src/tests/linear-outbound-sync.test.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { unlink } from "node:fs/promises";
 import { closeDb, initDb } from "../be/db";
 import { createTrackerSync, getTrackerSync, updateTrackerSync } from "../be/db-queries/tracker";
 import { initLinearOutboundSync, teardownLinearOutboundSync } from "../linear/outbound";
+import { taskSessionMap } from "../linear/sync";
 import { workflowEventBus } from "../workflows/event-bus";
 const TEST_DB_PATH = "./test-linear-outbound-sync.sqlite";
@@ -17,6 +18,19 @@ mock.module("../linear/client", () => ({
   resetLinearClient: () => {},
 }));
+// Mock the AgentSession helpers in linear/sync so we can assert which activity type
+// the outbound handlers post (`action` vs `thought` vs `response`/`error`).
+const mockPostAgentSessionThought = mock(() => Promise.resolve());
+const mockPostAgentSessionAction = mock(() => Promise.resolve());
+const mockEndAgentSession = mock(() => Promise.resolve());
+mock.module("../linear/sync", () => ({
+  postAgentSessionThought: mockPostAgentSessionThought,
+  postAgentSessionAction: mockPostAgentSessionAction,
+  endAgentSession: mockEndAgentSession,
+  taskSessionMap,
+}));
 beforeAll(() => {
   initDb(TEST_DB_PATH);
 });
@@ -31,11 +45,16 @@ afterAll(async () => {
 describe("Linear Outbound Sync", () => {
   beforeEach(() => {
     mockCreateComment.mockClear();
+    mockPostAgentSessionThought.mockClear();
+    mockPostAgentSessionAction.mockClear();
+    mockEndAgentSession.mockClear();
+    taskSessionMap.clear();
     initLinearOutboundSync();
   });
   afterEach(() => {
     teardownLinearOutboundSync();
+    taskSessionMap.clear();
   });
   test("task.completed posts comment to Linear when mapping exists", async () => {
@@ -177,6 +196,96 @@ describe("Linear Outbound Sync", () => {
     expect(mockCreateComment).toHaveBeenCalledTimes(1);
   });
+  test("task.progress posts an action activity with both action AND parameter when sessionId is mapped", async () => {
+    const taskId = "outbound-task-progress";
+    taskSessionMap.set(taskId, "linear-session-123");
+    workflowEventBus.emit("task.progress", {
+      taskId,
+      progress: "📋 Reviewing task details",
+    });
+    await new Promise((resolve) => setTimeout(resolve, 10));
+    // Posts as `action` so the update renders as a structured card in Linear's AgentSession
+    // panel. Linear's spec requires BOTH `action` AND `parameter` for action-type activities;
+    // the original bug was calling postAgentSessionAction with only a single string (parameter
+    // undefined), which Linear silently rejected.
+    expect(mockPostAgentSessionAction).toHaveBeenCalledTimes(1);
+    expect(mockPostAgentSessionThought).not.toHaveBeenCalled();
+    const args = mockPostAgentSessionAction.mock.calls[0] as unknown[];
+    expect(args[0]).toBe("linear-session-123");
+    // Both action label and parameter must be present and non-empty
+    expect(typeof args[1]).toBe("string");
+    expect((args[1] as string).length).toBeGreaterThan(0);
+    expect(typeof args[2]).toBe("string");
+    expect((args[2] as string).length).toBeGreaterThan(0);
+    // Parameter carries the actual progress text
+    expect(args[2] as string).toBe("📋 Reviewing task details");
+  });
+  test("task.progress slices long progress strings into the parameter (cap at 2000)", async () => {
+    const taskId = "outbound-task-progress-long";
+    taskSessionMap.set(taskId, "linear-session-long");
+    const longProgress = "x".repeat(5000);
+    workflowEventBus.emit("task.progress", { taskId, progress: longProgress });
+    await new Promise((resolve) => setTimeout(resolve, 10));
+    expect(mockPostAgentSessionAction).toHaveBeenCalledTimes(1);
+    const args = mockPostAgentSessionAction.mock.calls[0] as unknown[];
+    expect((args[2] as string).length).toBe(2000);
+  });
+  test("task.progress is a no-op when no sessionId is mapped for the task", async () => {
+    workflowEventBus.emit("task.progress", {
+      taskId: "outbound-task-progress-no-session",
+      progress: "should be dropped",
+    });
+    await new Promise((resolve) => setTimeout(resolve, 10));
+    expect(mockPostAgentSessionThought).not.toHaveBeenCalled();
+    expect(mockPostAgentSessionAction).not.toHaveBeenCalled();
+  });
+  test("task.progress is a no-op when progress string is missing", async () => {
+    taskSessionMap.set("outbound-task-progress-empty", "linear-session-empty");
+    workflowEventBus.emit("task.progress", {
+      taskId: "outbound-task-progress-empty",
+    });
+    await new Promise((resolve) => setTimeout(resolve, 10));
+    expect(mockPostAgentSessionThought).not.toHaveBeenCalled();
+    expect(mockPostAgentSessionAction).not.toHaveBeenCalled();
+  });
+  test("task.created for Linear-sourced tasks still posts an action activity (with parameter)", async () => {
+    const taskId = "outbound-task-created-linear";
+    taskSessionMap.set(taskId, "linear-session-created");
+    workflowEventBus.emit("task.created", {
+      taskId,
+      source: "linear",
+    });
+    await new Promise((resolve) => setTimeout(resolve, 10));
+    expect(mockPostAgentSessionAction).toHaveBeenCalledTimes(1);
+    expect(mockPostAgentSessionThought).not.toHaveBeenCalled();
+    const args = mockPostAgentSessionAction.mock.calls[0] as unknown[];
+    expect(args[0]).toBe("linear-session-created");
+    expect(args[1]).toBe("Processing");
+    // parameter (3rd positional arg) must be present for `action` activities to be valid
+    expect(typeof args[2]).toBe("string");
+    expect(args[2] as string).toContain(taskId);
+  });
   test("teardown removes event listeners", async () => {
     teardownLinearOutboundSync();

package/src/tests/mcp-tools.test.ts ADDED Viewed

@@ -0,0 +1,69 @@
+import { afterAll, beforeAll, describe, expect, test } from "bun:test";
+import { unlink } from "node:fs/promises";
+import { closeDb } from "../be/db";
+import { createServer } from "../server";
+const TEST_DB_PATH = "./test-mcp-tools.sqlite";
+type RegisteredTool = {
+  title?: string;
+  description?: string;
+  inputSchema?: unknown;
+  outputSchema?: unknown;
+  annotations?: Record<string, unknown>;
+};
+async function removeDbFiles(path: string): Promise<void> {
+  for (const suffix of ["", "-wal", "-shm"]) {
+    try {
+      await unlink(path + suffix);
+    } catch (error) {
+      if ((error as NodeJS.ErrnoException).code !== "ENOENT") throw error;
+    }
+  }
+}
+describe("script MCP tools", () => {
+  let tools: Record<string, RegisteredTool>;
+  let savedDatabasePath: string | undefined;
+  beforeAll(async () => {
+    savedDatabasePath = process.env.DATABASE_PATH;
+    process.env.DATABASE_PATH = TEST_DB_PATH;
+    await removeDbFiles(TEST_DB_PATH);
+    const server = createServer();
+    tools = (server as unknown as { _registeredTools: Record<string, RegisteredTool> })
+      ._registeredTools;
+  });
+  afterAll(async () => {
+    closeDb();
+    if (savedDatabasePath === undefined) delete process.env.DATABASE_PATH;
+    else process.env.DATABASE_PATH = savedDatabasePath;
+    await removeDbFiles(TEST_DB_PATH);
+  });
+  test("registers all script tools with schemas and documented descriptions", () => {
+    const expected = {
+      "script-search":
+        "Semantic search over swarm-shared TypeScript scripts (catalog persisted in the agent-swarm DB; callable from agents and workflows). For ephemeral throwaway TS on your local machine, use code-mode instead.",
+      "script-run":
+        "Run a named swarm-shared script (callable across agents and from workflow `swarm-script` nodes), OR inline source (auto-saved as scratch to the catalog). Use for swarm-visible, durable scripts. For local-only throwaway TS, use code-mode `run`.",
+      "script-upsert":
+        "Persist a TypeScript script to the swarm catalog under your agent scope (or global if you're a lead). Other agents and workflow nodes will be able to find and run it. For local-only scripts, use code-mode `save`.",
+      "script-delete":
+        "Remove a swarm-shared script from the catalog. Versions table preserves history.",
+      "script-query-types":
+        "Fetch the signature + the auto-generated `swarm-sdk.d.ts` (derived from the live MCP tool registry) + the `stdlib.d.ts` blobs — for IDE-style introspection before authoring or running a script. The same types are used by `script-upsert`'s typecheck pass, so they are authoritative.",
+    };
+    for (const [name, description] of Object.entries(expected)) {
+      expect(tools[name]).toBeDefined();
+      expect(tools[name].title).toBeTruthy();
+      expect(tools[name].description).toBe(description);
+      expect(tools[name].inputSchema).toBeTruthy();
+      expect(tools[name].outputSchema).toBeTruthy();
+      expect(tools[name].annotations).toBeTruthy();
+    }
+  });
+});

package/src/tests/redacted.test.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import { describe, expect, test } from "bun:test";
+import { inspect } from "node:util";
+import { Redacted } from "../scripts-runtime/redacted";
+describe("Redacted", () => {
+  test("stringification surfaces are redacted", () => {
+    const secret = Redacted.make("hunter2", { type: "user", isSecret: true });
+    expect(String(secret)).toBe("<redacted>");
+    expect(JSON.stringify({ secret })).toBe('{"secret":"<redacted>"}');
+    expect(inspect(secret)).toContain("<redacted>");
+    expect(inspect(secret)).not.toContain("hunter2");
+  });
+  test("value round-trips the original value", () => {
+    const value = { nested: true };
+    const wrapped = Redacted.make(value);
+    expect(Redacted.value(wrapped)).toBe(value);
+  });
+  test("meta returns the stored metadata", () => {
+    const wrapped = Redacted.make("abc", { type: "system", isSecret: false });
+    expect(Redacted.meta(wrapped)).toEqual({ type: "system", isSecret: false });
+    expect(Redacted.isSecret(wrapped)).toBe(false);
+  });
+  test("unregistered objects throw", () => {
+    expect(() => Redacted.value({} as never)).toThrow("Redacted value was not in registry");
+  });
+});

package/src/tests/runner-tool-spans.test.ts ADDED Viewed

@@ -0,0 +1,268 @@
+import { describe, expect, test } from "bun:test";
+import { type ActiveToolSpanEntry, implicitCloseActiveToolSpans } from "../commands/runner";
+import type { Attributes, AttributeValue, SwarmSpan } from "../otel";
+/**
+ * Minimal recording SwarmSpan stub for asserting attributes/status/end calls.
+ * Keeps the runner-tool-spans unit test isolated from the real OTel SDK.
+ */
+type RecordingSpan = SwarmSpan & {
+  attrs: Record<string, AttributeValue>;
+  status?: { code: number; message?: string };
+  ended: boolean;
+};
+function makeSpan(): RecordingSpan {
+  const span: RecordingSpan = {
+    attrs: {},
+    ended: false,
+    setAttribute(key: string, value: AttributeValue) {
+      this.attrs[key] = value;
+      return this;
+    },
+    setAttributes(attributes: Attributes) {
+      for (const [k, v] of Object.entries(attributes)) {
+        if (v !== undefined) this.attrs[k] = v;
+      }
+      return this;
+    },
+    addEvent() {
+      return this;
+    },
+    recordException() {},
+    setStatus(s) {
+      this.status = s;
+      return this;
+    },
+    end() {
+      this.ended = true;
+    },
+  };
+  return span;
+}
+function entry(span: SwarmSpan, opts: { startedAt: number }): ActiveToolSpanEntry {
+  return { span, startedAt: opts.startedAt };
+}
+describe("implicitCloseActiveToolSpans", () => {
+  test("closes worker.tool spans with implicit_close=true and accurate duration_ms", () => {
+    const span = makeSpan();
+    const map = new Map<string, ActiveToolSpanEntry>();
+    map.set("call-1", entry(span, { startedAt: 1_000 }));
+    const closed = implicitCloseActiveToolSpans(map, 1_750);
+    expect(closed).toBe(1);
+    expect(span.ended).toBe(true);
+    expect(span.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    expect(span.attrs["agentswarm.tool.duration_ms"]).toBe(750);
+    expect(span.attrs["agentswarm.tool.call_id"]).toBe("call-1");
+    expect(span.status?.code).toBe(1);
+    expect(map.has("call-1")).toBe(false);
+  });
+  test("closes MCP spans at the assistant-message boundary too", () => {
+    const mcpSpan = makeSpan();
+    const harnessSpan = makeSpan();
+    const map = new Map<string, ActiveToolSpanEntry>();
+    map.set("mcp-1", entry(mcpSpan, { startedAt: 1_000 }));
+    map.set("call-1", entry(harnessSpan, { startedAt: 1_000 }));
+    const closed = implicitCloseActiveToolSpans(map, 2_000);
+    expect(closed).toBe(2);
+    expect(harnessSpan.ended).toBe(true);
+    expect(harnessSpan.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    expect(mcpSpan.ended).toBe(true);
+    expect(mcpSpan.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    expect(mcpSpan.attrs["agentswarm.tool.duration_ms"]).toBe(1_000);
+    expect(mcpSpan.attrs["agentswarm.tool.call_id"]).toBe("mcp-1");
+    expect(mcpSpan.status?.code).toBe(1);
+    expect(map.size).toBe(0);
+  });
+  test("no-op on an empty map (and returns 0)", () => {
+    const map = new Map<string, ActiveToolSpanEntry>();
+    const closed = implicitCloseActiveToolSpans(map, Date.now());
+    expect(closed).toBe(0);
+    expect(map.size).toBe(0);
+  });
+  test("closes multiple parallel spans (mix of harness and MCP) from the same turn", () => {
+    const a = makeSpan();
+    const b = makeSpan();
+    const c = makeSpan();
+    const map = new Map<string, ActiveToolSpanEntry>();
+    map.set("a", entry(a, { startedAt: 100 }));
+    map.set("b", entry(b, { startedAt: 200 }));
+    map.set("c", entry(c, { startedAt: 300 }));
+    const closed = implicitCloseActiveToolSpans(map, 1_000);
+    expect(closed).toBe(3);
+    expect(a.attrs["agentswarm.tool.duration_ms"]).toBe(900);
+    expect(b.attrs["agentswarm.tool.duration_ms"]).toBe(800);
+    expect(c.attrs["agentswarm.tool.duration_ms"]).toBe(700);
+    for (const span of [a, b, c]) {
+      expect(span.ended).toBe(true);
+      expect(span.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    }
+    expect(map.size).toBe(0);
+  });
+  test("called twice after a single turn → second call is a no-op", () => {
+    const span = makeSpan();
+    const map = new Map<string, ActiveToolSpanEntry>();
+    map.set("call-1", entry(span, { startedAt: 1_000 }));
+    expect(implicitCloseActiveToolSpans(map, 1_500)).toBe(1);
+    expect(implicitCloseActiveToolSpans(map, 2_000)).toBe(0);
+    // The span should not be ended twice or get a second duration overwrite.
+    expect(span.attrs["agentswarm.tool.duration_ms"]).toBe(500);
+  });
+});
+describe("end-to-end boundary semantics (helper integration)", () => {
+  // Simulates the runner's event-handler contract:
+  //   - tool_start adds an entry to the active-tool-spans map
+  //   - assistant-message boundary calls `implicitCloseActiveToolSpans`
+  //   - explicit tool_end closes the entry directly (no implicit_close attr)
+  //   - session shutdown calls a `closeActiveToolSpans` analog as a safety net
+  // We don't pull in the runner module directly (it imports the entire
+  // provider/HTTP surface); instead the test mirrors its small fragment of
+  // logic on the same exported helper.
+  function startToolSpan(
+    map: Map<string, ActiveToolSpanEntry>,
+    toolCallId: string,
+    opts: { startedAt: number },
+  ): RecordingSpan {
+    const span = makeSpan();
+    map.set(toolCallId, { span, startedAt: opts.startedAt });
+    return span;
+  }
+  function endToolSpan(
+    map: Map<string, ActiveToolSpanEntry>,
+    toolCallId: string,
+    now: number,
+  ): void {
+    // Mirrors the explicit `tool_end` branch in runner.ts: sets duration + OK
+    // status and ends the span. Crucially does NOT set `implicit_close`.
+    const active = map.get(toolCallId);
+    if (!active) return;
+    active.span.setAttributes({
+      "agentswarm.tool.duration_ms": now - active.startedAt,
+    });
+    active.span.setStatus({ code: 1 });
+    active.span.end();
+    map.delete(toolCallId);
+  }
+  function shutdownSafetyNet(
+    map: Map<string, ActiveToolSpanEntry>,
+    now: number,
+  ): { closed: number } {
+    // Mirrors `closeActiveToolSpans` (the safety net). After the boundary fix,
+    // we expect this to be a no-op in the typical case.
+    let closed = 0;
+    for (const [toolCallId, active] of map) {
+      active.span.setAttributes({
+        "agentswarm.tool.duration_ms": now - active.startedAt,
+        "agentswarm.tool.unclosed": true,
+        "agentswarm.tool.call_id": toolCallId,
+      });
+      active.span.end();
+      map.delete(toolCallId);
+      closed++;
+    }
+    return { closed };
+  }
+  test("tool_start → assistant boundary → span closes with implicit_close=true", () => {
+    const map = new Map<string, ActiveToolSpanEntry>();
+    const span = startToolSpan(map, "call-1", { startedAt: 1_000 });
+    implicitCloseActiveToolSpans(map, 1_500);
+    expect(span.ended).toBe(true);
+    expect(span.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    expect(span.attrs["agentswarm.tool.duration_ms"]).toBe(500);
+    expect(span.attrs["agentswarm.tool.unclosed"]).toBeUndefined();
+    expect(map.size).toBe(0);
+  });
+  test("tool_start → tool_end → span closes WITHOUT implicit_close", () => {
+    const map = new Map<string, ActiveToolSpanEntry>();
+    const span = startToolSpan(map, "call-1", { startedAt: 1_000 });
+    endToolSpan(map, "call-1", 1_200);
+    expect(span.ended).toBe(true);
+    expect(span.attrs["agentswarm.tool.duration_ms"]).toBe(200);
+    expect(span.attrs["agentswarm.tool.implicit_close"]).toBeUndefined();
+    expect(map.size).toBe(0);
+  });
+  test("MCP tool spans are also closed by the assistant-message boundary", () => {
+    const map = new Map<string, ActiveToolSpanEntry>();
+    const mcp = startToolSpan(map, "mcp-1", { startedAt: 1_000 });
+    implicitCloseActiveToolSpans(map, 2_000);
+    expect(mcp.ended).toBe(true);
+    expect(mcp.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    expect(mcp.attrs["agentswarm.tool.duration_ms"]).toBe(1_000);
+    expect(map.size).toBe(0);
+  });
+  test("mixed harness + MCP tool_starts → both kinds closed with implicit_close=true at boundary", () => {
+    // Simulates a turn where the model invokes both a harness tool (Bash) and
+    // an MCP tool (e.g. mcp__agent-swarm__store-progress), and the next
+    // assistant message arrives without any `tool_end` events from the
+    // adapter (Claude SDK behavior).
+    const map = new Map<string, ActiveToolSpanEntry>();
+    const harness = startToolSpan(map, "bash-1", { startedAt: 1_000 });
+    const mcp = startToolSpan(map, "mcp-1", { startedAt: 1_050 });
+    const closed = implicitCloseActiveToolSpans(map, 2_500);
+    expect(closed).toBe(2);
+    expect(harness.ended).toBe(true);
+    expect(mcp.ended).toBe(true);
+    expect(harness.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    expect(mcp.attrs["agentswarm.tool.implicit_close"]).toBe(true);
+    expect(harness.attrs["agentswarm.tool.duration_ms"]).toBe(1_500);
+    expect(mcp.attrs["agentswarm.tool.duration_ms"]).toBe(1_450);
+    expect(harness.attrs["agentswarm.tool.unclosed"]).toBeUndefined();
+    expect(mcp.attrs["agentswarm.tool.unclosed"]).toBeUndefined();
+    expect(map.size).toBe(0);
+  });
+  test("after boundary closes all spans, shutdown safety net closes 0", () => {
+    const map = new Map<string, ActiveToolSpanEntry>();
+    startToolSpan(map, "call-1", { startedAt: 1_000 });
+    startToolSpan(map, "call-2", { startedAt: 1_100 });
+    implicitCloseActiveToolSpans(map, 1_800);
+    expect(map.size).toBe(0);
+    const { closed } = shutdownSafetyNet(map, 2_000);
+    expect(closed).toBe(0);
+  });
+  test("if session ends before any boundary fires, safety net flags `unclosed`", () => {
+    const map = new Map<string, ActiveToolSpanEntry>();
+    const span = startToolSpan(map, "call-1", { startedAt: 1_000 });
+    // No boundary, straight to shutdown.
+    const { closed } = shutdownSafetyNet(map, 5_000);
+    expect(closed).toBe(1);
+    expect(span.ended).toBe(true);
+    expect(span.attrs["agentswarm.tool.unclosed"]).toBe(true);
+    expect(span.attrs["agentswarm.tool.implicit_close"]).toBeUndefined();
+    expect(span.attrs["agentswarm.tool.duration_ms"]).toBe(4_000);
+  });
+});

package/src/tests/script-executor-conformance.test.ts ADDED Viewed

@@ -0,0 +1,142 @@
+import { describe, expect, test } from "bun:test";
+import { NativeScriptExecutor } from "../scripts-runtime/executors/native";
+import type {
+  ExecutorInput,
+  ExecutorOutput,
+  ScriptExecutor,
+} from "../scripts-runtime/executors/types";
+import { DEFAULT_SCRIPT_RESOURCES } from "../scripts-runtime/executors/types";
+const payload = {
+  system: {
+    apiKey: { value: "conformance-secret", isSecret: true as const },
+    agentId: { value: "agent-1", isSecret: false as const },
+    mcpBaseUrl: { value: "http://localhost:3013", isSecret: false as const },
+  },
+  user: {},
+};
+function input(overrides: Partial<ExecutorInput> = {}): ExecutorInput {
+  return {
+    source: "export default async (args) => args.x + 1;",
+    args: { x: 1 },
+    configPayload: payload,
+    resources: {
+      ...DEFAULT_SCRIPT_RESOURCES,
+      memoryMb: 2048,
+      wallClockMs: 1_000,
+      ...overrides.resources,
+    },
+    fsMode: "none",
+    network: "open",
+    ...overrides,
+  };
+}
+class FakeScriptExecutor implements ScriptExecutor {
+  readonly name = "fake";
+  async run(runInput: ExecutorInput): Promise<ExecutorOutput> {
+    if (runInput.fsMode === "workspace-rw") {
+      return {
+        result: undefined,
+        stdout: "",
+        stderr: "workspace-rw not supported",
+        truncated: { stdout: false, stderr: false },
+        durationMs: 0,
+        exitCode: 1,
+        error: "executor_error",
+      };
+    }
+    if (runInput.signal?.aborted) {
+      return {
+        result: undefined,
+        stdout: "",
+        stderr: "",
+        truncated: { stdout: false, stderr: false },
+        durationMs: 0,
+        exitCode: 1,
+        error: "killed",
+      };
+    }
+    const stdout = "x".repeat(runInput.resources.maxStdoutBytes + 10);
+    return {
+      result: runInput.configPayload.system.apiKey.value,
+      stdout: stdout.slice(0, runInput.resources.maxStdoutBytes),
+      stderr: "",
+      truncated: { stdout: true, stderr: false },
+      durationMs: 1,
+      exitCode: 0,
+    };
+  }
+}
+function conformance(name: string, makeExecutor: () => ScriptExecutor) {
+  describe(`${name} ScriptExecutor conformance`, () => {
+    test("happy path run", async () => {
+      const output = await makeExecutor().run(
+        input({
+          source: "export default async (args) => args.x + 1;",
+          args: { x: 2 },
+        }),
+      );
+      expect(output.exitCode).toBe(0);
+      expect(output.error).toBeUndefined();
+    });
+    test("stdout cap is honored", async () => {
+      const output = await makeExecutor().run(
+        input({
+          resources: {
+            ...DEFAULT_SCRIPT_RESOURCES,
+            memoryMb: 2048,
+            maxStdoutBytes: 64,
+            wallClockMs: 1_000,
+          },
+          source: "export default async () => { console.log('x'.repeat(512)); return true; };",
+        }),
+      );
+      expect(output.stdout.length).toBeLessThanOrEqual(64);
+      expect(output.truncated.stdout).toBe(true);
+    });
+    test("workspace-rw returns executor_error", async () => {
+      const output = await makeExecutor().run(input({ fsMode: "workspace-rw" }));
+      expect(output.error).toBe("executor_error");
+    });
+    test("config payload is delivered", async () => {
+      const output = await makeExecutor().run(
+        input({
+          source:
+            "export default async (_args, ctx) => ctx.stdlib.Redacted.value(ctx.swarm.config.apiKey);",
+        }),
+      );
+      expect(output.result).toBe("conformance-secret");
+    });
+  });
+}
+conformance("native", () => new NativeScriptExecutor());
+conformance("fake", () => new FakeScriptExecutor());
+describe("native-only executor behavior", () => {
+  test("timeout maps to timeout", async () => {
+    const output = await new NativeScriptExecutor().run(
+      input({
+        resources: { ...DEFAULT_SCRIPT_RESOURCES, memoryMb: 2048, wallClockMs: 100 },
+        source: "export default async () => new Promise(() => {});",
+      }),
+    );
+    expect(output.error).toBe("timeout");
+  });
+  test("AbortSignal maps to killed", async () => {
+    const controller = new AbortController();
+    controller.abort();
+    const output = await new NativeScriptExecutor().run(input({ signal: controller.signal }));
+    expect(output.error).toBe("killed");
+  });
+});