npm - @alexkroman1/aai - Versions diffs - 0.12.3 → 1.0.2 - Mend

@alexkroman1/aai 0.12.3 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

package/.turbo/turbo-build.log +20 -0
package/CHANGELOG.md +174 -0
package/dist/constants-VTFoymJ-.js +47 -0
package/dist/host/_run-code.d.ts +1 -1
package/dist/host/_runtime-conformance.d.ts +4 -5
package/dist/host/builtin-tools.d.ts +11 -9
package/dist/host/runtime-barrel.d.ts +15 -0
package/dist/{direct-executor-DRRrZUp0.js → host/runtime-barrel.js} +453 -348
package/dist/host/runtime-config.d.ts +42 -0
package/dist/host/runtime.d.ts +119 -35
package/dist/host/s2s.d.ts +14 -38
package/dist/host/server.d.ts +16 -8
package/dist/host/session-ctx.d.ts +55 -0
package/dist/host/session.d.ts +20 -70
package/dist/host/tool-executor.d.ts +20 -0
package/dist/host/unstorage-kv.d.ts +1 -1
package/dist/host/ws-handler.d.ts +4 -2
package/dist/index.d.ts +9 -20
package/dist/index.js +63 -2
package/dist/{isolate → sdk}/_internal-types.d.ts +5 -9
package/dist/{isolate → sdk}/constants.d.ts +6 -4
package/dist/sdk/define.d.ts +66 -0
package/dist/{isolate → sdk}/kv.d.ts +1 -49
package/dist/sdk/manifest-barrel.d.ts +8 -0
package/dist/sdk/manifest-barrel.js +52 -0
package/dist/sdk/manifest.d.ts +50 -0
package/dist/{isolate → sdk}/protocol.d.ts +59 -36
package/dist/sdk/protocol.js +163 -0
package/dist/{isolate → sdk}/system-prompt.d.ts +2 -2
package/dist/sdk/types.d.ts +201 -0
package/dist/sdk/ws-upgrade.d.ts +5 -0
package/dist/{system-prompt-DYAYFW99.js → system-prompt-nik_iavo.js} +10 -10
package/dist/types-Cfx_4QDK.js +39 -0
package/dist/ws-upgrade-BeOQ7fXL.js +30 -0
package/exports-no-dev-deps.test.ts +62 -0
package/host/_mock-ws.ts +185 -0
package/host/_run-code.ts +217 -0
package/host/_runtime-conformance.ts +143 -0
package/host/_test-utils.ts +276 -0
package/host/builtin-tools.test.ts +774 -0
package/host/builtin-tools.ts +255 -0
package/host/cleanup.test.ts +422 -0
package/host/fixture-replay.test.ts +463 -0
package/host/fixtures/README.md +40 -0
package/host/fixtures/greeting-session-sequence.json +40 -0
package/host/fixtures/reply-audio-samples.json +42 -0
package/host/fixtures/reply-lifecycle.json +21 -0
package/host/fixtures/session-ready.json +48 -0
package/host/fixtures/session-updated.json +45 -0
package/host/fixtures/simple-question-sequence.json +73 -0
package/host/fixtures/tool-call-sequence.json +114 -0
package/host/fixtures/tool-calls.json +11 -0
package/host/fixtures/tool-config-session-sequence.json +51 -0
package/host/fixtures/user-speech-recognition.json +30 -0
package/host/fixtures/web-search-sequence.json +122 -0
package/host/integration.test.ts +222 -0
package/host/runtime-barrel.ts +25 -0
package/host/runtime-config.test.ts +71 -0
package/host/runtime-config.ts +99 -0
package/host/runtime.test.ts +641 -0
package/host/runtime.ts +308 -0
package/host/s2s-fixtures.test.ts +237 -0
package/host/s2s.test.ts +562 -0
package/host/s2s.ts +310 -0
package/host/server-shutdown.test.ts +76 -0
package/host/server.test.ts +116 -0
package/host/server.ts +223 -0
package/host/session-ctx.ts +107 -0
package/host/session-fixture-replay.test.ts +136 -0
package/host/session-prompt.test.ts +77 -0
package/host/session.test.ts +590 -0
package/host/session.ts +370 -0
package/host/tool-executor.test.ts +124 -0
package/host/tool-executor.ts +80 -0
package/host/unstorage-kv.test.ts +99 -0
package/host/unstorage-kv.ts +69 -0
package/host/ws-handler.test.ts +739 -0
package/host/ws-handler.ts +255 -0
package/index.ts +16 -0
package/package.json +24 -72
package/sdk/_internal-types.test.ts +34 -0
package/sdk/_internal-types.ts +115 -0
package/sdk/compat-fixtures/README.md +26 -0
package/sdk/compat-fixtures/v1.json +68 -0
package/sdk/constants.ts +77 -0
package/sdk/define.test.ts +57 -0
package/sdk/define.ts +88 -0
package/sdk/kv.ts +60 -0
package/sdk/manifest-barrel.ts +12 -0
package/sdk/manifest.test.ts +56 -0
package/sdk/manifest.ts +89 -0
package/sdk/protocol-compat.test.ts +187 -0
package/sdk/protocol-snapshot.test.ts +199 -0
package/sdk/protocol.test.ts +170 -0
package/sdk/protocol.ts +223 -0
package/sdk/schema-alignment.test.ts +191 -0
package/sdk/system-prompt.test.ts +111 -0
package/sdk/system-prompt.ts +74 -0
package/sdk/tsconfig.json +12 -0
package/sdk/types-inference.test.ts +122 -0
package/sdk/types.test.ts +14 -0
package/sdk/types.ts +226 -0
package/sdk/utils.test.ts +52 -0
package/sdk/utils.ts +20 -0
package/sdk/ws-upgrade.test.ts +48 -0
package/sdk/ws-upgrade.ts +13 -0
package/tsconfig.build.json +14 -0
package/tsconfig.json +10 -0
package/tsdown.config.ts +26 -0
package/vitest.config.ts +17 -0
package/dist/host/_test-utils.d.ts +0 -73
package/dist/host/direct-executor.d.ts +0 -130
package/dist/host/index.d.ts +0 -19
package/dist/host/index.js +0 -165
package/dist/host/matchers.d.ts +0 -20
package/dist/host/matchers.js +0 -41
package/dist/host/server.js +0 -164
package/dist/host/testing.d.ts +0 -294
package/dist/host/testing.js +0 -2
package/dist/host/vite-plugin.d.ts +0 -15
package/dist/host/vite-plugin.js +0 -83
package/dist/isolate/_kv-utils.d.ts +0 -10
package/dist/isolate/_utils.js +0 -17
package/dist/isolate/hooks.d.ts +0 -44
package/dist/isolate/hooks.js +0 -58
package/dist/isolate/index.d.ts +0 -18
package/dist/isolate/index.js +0 -6
package/dist/isolate/kv.js +0 -1
package/dist/isolate/protocol.js +0 -2
package/dist/isolate/types.d.ts +0 -418
package/dist/isolate/types.js +0 -175
package/dist/protocol-rcOrz7T3.js +0 -183
package/dist/testing-BreLdpq-.js +0 -513
package/dist/types.test-d.d.ts +0 -7
/package/dist/{isolate/_utils.d.ts → sdk/utils.d.ts} +0 -0

package/host/builtin-tools.ts ADDED Viewed

@@ -0,0 +1,255 @@
+// Copyright 2025 the AAI authors. MIT license.
+/**
+ * Built-in tool definitions for the AAI agent SDK.
+ *
+ * In self-hosted mode, these run in-process alongside custom tools.
+ * In platform mode, they run on the host process outside the sandbox.
+ * Network requests go through the host's fetch proxy (with SSRF protection).
+ */
+import { convert } from "html-to-text";
+import { z } from "zod";
+import { EMPTY_PARAMS, type ToolSchema } from "../sdk/_internal-types.ts";
+import { FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_PAGE_CHARS } from "../sdk/constants.ts";
+import type { ToolDef } from "../sdk/types.ts";
+import { createRunCode } from "./_run-code.ts";
+export { executeInIsolate } from "./_run-code.ts";
+const fetchSignal = () => AbortSignal.timeout(FETCH_TIMEOUT_MS);
+const htmlToText = (html: string): string => convert(html, { wordwrap: false });
+// ─── web_search ────────────────────────────────────────────────────────────
+const webSearchParams = z.object({
+  query: z.string().describe("The search query"),
+  max_results: z.number().describe("Maximum number of results to return (default 5)").optional(),
+});
+const BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search";
+const BraveSearchResponseSchema = z.object({
+  web: z
+    .object({
+      results: z.array(
+        z.object({
+          title: z.string(),
+          url: z.string(),
+          description: z.string(),
+        }),
+      ),
+    })
+    .optional(),
+});
+function createWebSearch(
+  fetchFn = globalThis.fetch,
+): ToolDef<typeof webSearchParams> & { guidance: string } {
+  return {
+    guidance:
+      "Use web_search for factual questions, current events, or anything you are unsure about. " +
+      "Search first rather than guessing.",
+    description:
+      "Search the web for current information, facts, news, or answers to questions. Returns a list of results with title, URL, and description. Use this when the user asks about something you don't know, need up-to-date information, or want to verify facts.",
+    parameters: webSearchParams,
+    async execute(args, ctx) {
+      const { query, max_results: maxResults = 5 } = args;
+      const apiKey = ctx.env.BRAVE_API_KEY ?? "";
+      if (!apiKey) {
+        return { error: "BRAVE_API_KEY is not set — web search unavailable" };
+      }
+      const url = `${BRAVE_SEARCH_URL}?${new URLSearchParams({
+        q: query,
+        count: String(maxResults),
+        text_decorations: "false",
+      })}`;
+      const resp = await fetchFn(url, {
+        headers: { "X-Subscription-Token": apiKey },
+        signal: fetchSignal(),
+      });
+      if (!resp.ok) {
+        return { error: `Search request failed: ${resp.status} ${resp.statusText}` };
+      }
+      const raw = await resp.json();
+      const data = BraveSearchResponseSchema.safeParse(raw);
+      if (!data.success) {
+        return { error: "Unexpected search response format" };
+      }
+      return (data.data.web?.results ?? []).slice(0, maxResults).map((r) => ({
+        title: r.title,
+        url: r.url,
+        description: r.description,
+      }));
+    },
+  };
+}
+// ─── visit_webpage ─────────────────────────────────────────────────────────
+const visitWebpageParams = z.object({
+  url: z.string().describe("The full URL to fetch (e.g., 'https://example.com/page')"),
+});
+function createVisitWebpage(
+  fetchFn = globalThis.fetch,
+): ToolDef<typeof visitWebpageParams> & { guidance: string } {
+  return {
+    guidance:
+      "Use visit_webpage to read the full content of a URL when search snippets are not detailed enough.",
+    description:
+      "Fetch a webpage and return its content as clean text. Use this to read the full content of a URL found via web_search, or any link the user shares. Good for reading articles, documentation, blog posts, or product pages.",
+    parameters: visitWebpageParams,
+    async execute(args, _ctx) {
+      const { url } = args;
+      const resp = await fetchFn(url, {
+        headers: {
+          "User-Agent":
+            "Mozilla/5.0 (compatible; VoiceAgent/1.0; +https://github.com/AssemblyAI/aai)",
+          Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        },
+        signal: fetchSignal(),
+      });
+      if (!resp.ok) {
+        return { error: `Failed to fetch: ${resp.status} ${resp.statusText}`, url };
+      }
+      const htmlContent = await resp.text();
+      const trimmedHtml =
+        htmlContent.length > MAX_HTML_BYTES ? htmlContent.slice(0, MAX_HTML_BYTES) : htmlContent;
+      const text = htmlToText(trimmedHtml);
+      const truncated = text.length > MAX_PAGE_CHARS;
+      const content = truncated ? text.slice(0, MAX_PAGE_CHARS) : text;
+      return {
+        url,
+        content,
+        ...(truncated ? { truncated: true, totalChars: text.length } : {}),
+      };
+    },
+  };
+}
+// ─── fetch_json ────────────────────────────────────────────────────────────
+const fetchJsonParams = z.object({
+  url: z.string().describe("The URL to fetch JSON from"),
+  headers: z
+    .record(z.string(), z.string())
+    .describe(
+      "Optional HTTP headers to include in the request (only safe headers like Accept, Content-Type are allowed)",
+    )
+    .optional(),
+});
+/** Headers the LLM must never control — could exfiltrate credentials or manipulate routing. */
+const BLOCKED_FETCH_HEADERS = new Set([
+  "authorization",
+  "cookie",
+  "set-cookie",
+  "host",
+  "proxy-authorization",
+  "x-forwarded-for",
+  "x-forwarded-host",
+  "x-forwarded-proto",
+  "x-real-ip",
+  "cf-connecting-ip",
+  "fly-client-ip",
+]);
+function sanitizeHeaders(
+  raw: Record<string, string> | undefined,
+): Record<string, string> | undefined {
+  if (!raw) return;
+  const safe: Record<string, string> = {};
+  for (const [key, value] of Object.entries(raw)) {
+    if (!BLOCKED_FETCH_HEADERS.has(key.toLowerCase())) safe[key] = value;
+  }
+  return Object.keys(safe).length > 0 ? safe : undefined;
+}
+function createFetchJson(
+  fetchFn = globalThis.fetch,
+): ToolDef<typeof fetchJsonParams> & { guidance: string } {
+  return {
+    guidance: "Use fetch_json to call REST APIs and retrieve structured JSON data.",
+    description:
+      "Call a REST API endpoint via HTTP GET and return the JSON response. Use this to fetch structured data from APIs — for example, weather data, stock prices, exchange rates, or any public JSON API. Supports custom headers for authenticated APIs.",
+    parameters: fetchJsonParams,
+    async execute(args, _ctx) {
+      const { url, headers } = args;
+      const safeHeaders = sanitizeHeaders(headers);
+      const resp = await fetchFn(url, {
+        ...(safeHeaders && { headers: safeHeaders }),
+        signal: fetchSignal(),
+      });
+      if (!resp.ok) {
+        return { error: `HTTP ${resp.status} ${resp.statusText}`, url };
+      }
+      try {
+        return await resp.json();
+      } catch {
+        return { error: "Response was not valid JSON", url };
+      }
+    },
+  };
+}
+// ─── Public API ────────────────────────────────────────────────────────────
+/** Options for creating built-in tool definitions. */
+export type BuiltinToolOptions = {
+  /** Override fetch implementation (defaults to globalThis.fetch). For testing. */
+  fetch?: typeof globalThis.fetch;
+};
+type ToolDefRecord = Record<string, ToolDef<z.ZodObject<z.ZodRawShape>>>;
+/** Resolve a builtin name to an array of [toolName, ToolDef] pairs. */
+function resolveBuiltin(name: string, opts?: BuiltinToolOptions): [string, ToolDef][] {
+  switch (name) {
+    case "web_search":
+      return [["web_search", createWebSearch(opts?.fetch)]];
+    case "visit_webpage":
+      return [["visit_webpage", createVisitWebpage(opts?.fetch)]];
+    case "fetch_json":
+      return [["fetch_json", createFetchJson(opts?.fetch)]];
+    case "run_code":
+      return [["run_code", createRunCode()]];
+    default:
+      return [];
+  }
+}
+/** Resolved builtins with defs, schemas, and guidance computed in a single pass. */
+export type ResolvedBuiltins = {
+  defs: ToolDefRecord;
+  schemas: ToolSchema[];
+  guidance: string[];
+};
+/**
+ * Resolve all builtin tools in one pass, returning defs, schemas, and guidance.
+ * Avoids redundant calls to `resolveBuiltin` and `z.toJSONSchema`.
+ */
+export function resolveAllBuiltins(
+  names: readonly string[],
+  opts?: BuiltinToolOptions,
+): ResolvedBuiltins {
+  const defs: ToolDefRecord = {};
+  const schemas: ToolSchema[] = [];
+  const guidance: string[] = [];
+  for (const name of names) {
+    for (const [toolName, def] of resolveBuiltin(name, opts)) {
+      defs[toolName] = def;
+      schemas.push({
+        name: toolName,
+        description: def.description,
+        parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS) as ToolSchema["parameters"],
+      });
+      const g = (def as { guidance?: string }).guidance;
+      if (g) guidance.push(g);
+    }
+  }
+  return { defs, schemas, guidance };
+}

package/host/cleanup.test.ts ADDED Viewed

@@ -0,0 +1,422 @@
+// Copyright 2025 the AAI authors. MIT license.
+/**
+ * Resource cleanup and leak detection tests for server-side components.
+ *
+ * Verifies that WebSocket connections, S2S handles, timers,
+ * message buffers, and hook promises are properly cleaned up on disconnect,
+ * error, and reset to prevent memory leaks in long-running processes.
+ */
+import { afterEach, describe, expect, test, vi } from "vitest";
+import { MockWebSocket } from "./_mock-ws.ts";
+import {
+  makeClient,
+  makeMockHandle,
+  makeSessionOpts,
+  makeStubSession,
+  silentLogger,
+} from "./_test-utils.ts";
+import type { S2sHandle } from "./s2s.ts";
+import type { Session } from "./session.ts";
+import { _internals, createS2sSession, type S2sSessionOptions } from "./session.ts";
+import { wireSessionSocket } from "./ws-handler.ts";
+const defaultConfig = { audioFormat: "pcm16" as const, sampleRate: 16_000, ttsSampleRate: 24_000 };
+// ─── wireSessionSocket cleanup tests ─────────────────────────────────────────
+describe("wireSessionSocket resource cleanup", () => {
+  test("session.stop() is called exactly once on normal close", async () => {
+    const session = makeStubSession();
+    const ws = new MockWebSocket("ws://test");
+    ws.readyState = MockWebSocket.OPEN;
+    wireSessionSocket(ws, {
+      sessions: new Map(),
+      createSession: () => session,
+      readyConfig: defaultConfig,
+      logger: silentLogger,
+    });
+    ws.close();
+    await vi.waitFor(() => {
+      expect(session.stop).toHaveBeenCalledOnce();
+    });
+  });
+  test("session is removed from sessions map even when stop() rejects", async () => {
+    const sessions = new Map<string, Session>();
+    const session = makeStubSession();
+    session.stop = vi.fn(() => Promise.reject(new Error("stop failed")));
+    const ws = new MockWebSocket("ws://test");
+    ws.readyState = MockWebSocket.OPEN;
+    wireSessionSocket(ws, {
+      sessions,
+      createSession: () => session,
+      readyConfig: defaultConfig,
+      logger: silentLogger,
+    });
+    expect(sessions.size).toBe(1);
+    ws.close();
+    await vi.waitFor(() => {
+      expect(sessions.size).toBe(0);
+    });
+  });
+  test("message buffer is cleared when start() fails", async () => {
+    const session = makeStubSession();
+    session.start = vi.fn(() => Promise.reject(new Error("start failed")));
+    const sessions = new Map<string, Session>();
+    const ws = new MockWebSocket("ws://test");
+    ws.readyState = MockWebSocket.OPEN;
+    wireSessionSocket(ws, {
+      sessions,
+      createSession: () => session,
+      readyConfig: defaultConfig,
+      logger: silentLogger,
+    });
+    // Send messages while start is failing
+    ws.simulateMessage(JSON.stringify({ type: "audio_ready" }));
+    await vi.waitFor(() => {
+      expect(sessions.size).toBe(0);
+    });
+    // Session is null, further messages should be silently ignored (no throw)
+    ws.simulateMessage(JSON.stringify({ type: "audio_ready" }));
+    ws.simulateMessage(new ArrayBuffer(4));
+  });
+  test("multiple rapid closes don't double-invoke stop()", async () => {
+    const session = makeStubSession();
+    session.stop = vi.fn(() => new Promise<void>((r) => setTimeout(r, 50)));
+    const sessions = new Map<string, Session>();
+    const ws = new MockWebSocket("ws://test");
+    ws.readyState = MockWebSocket.OPEN;
+    wireSessionSocket(ws, {
+      sessions,
+      createSession: () => session,
+      readyConfig: defaultConfig,
+      logger: silentLogger,
+    });
+    ws.close();
+    // Even if close event fires again, stop should only be called once
+    // because the session reference is captured on first close
+    await vi.waitFor(() => {
+      expect(session.stop).toHaveBeenCalledOnce();
+    });
+  });
+  test("close before open does not throw or leak", () => {
+    const ws = new MockWebSocket("ws://test");
+    ws.readyState = MockWebSocket.CONNECTING;
+    const sessions = new Map<string, Session>();
+    wireSessionSocket(ws, {
+      sessions,
+      createSession: () => makeStubSession(),
+      readyConfig: defaultConfig,
+      logger: silentLogger,
+    });
+    // Close before open — session is null, should not throw
+    ws.close();
+    expect(sessions.size).toBe(0);
+  });
+  test("error event after close does not throw", async () => {
+    const session = makeStubSession();
+    const ws = new MockWebSocket("ws://test");
+    ws.readyState = MockWebSocket.OPEN;
+    wireSessionSocket(ws, {
+      sessions: new Map(),
+      createSession: () => session,
+      readyConfig: defaultConfig,
+      logger: silentLogger,
+    });
+    ws.close();
+    await vi.waitFor(() => {
+      expect(session.stop).toHaveBeenCalled();
+    });
+    // Error after close should not throw
+    ws.dispatchEvent(new Event("error"));
+  });
+});
+// ─── createS2sSession cleanup tests ──────────────────────────────────────────
+describe("createS2sSession resource cleanup", () => {
+  let connectSpy: ReturnType<typeof vi.spyOn>;
+  let mockHandle: ReturnType<typeof makeMockHandle>;
+  function setup(overrides?: Partial<S2sSessionOptions>) {
+    mockHandle = makeMockHandle();
+    connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
+    const client = makeClient();
+    const opts = makeSessionOpts({ client, ...overrides });
+    const session = createS2sSession(opts);
+    return { session, client, opts, mockHandle };
+  }
+  afterEach(() => {
+    connectSpy?.mockRestore();
+  });
+  test("stop() closes S2S handle and waits for in-flight turn", async () => {
+    let resolveToolCall!: (value: string) => void;
+    const executeTool = vi.fn(
+      () =>
+        new Promise<string>((r) => {
+          resolveToolCall = r;
+        }),
+    );
+    const { session, mockHandle } = setup({ executeTool });
+    await session.start();
+    // Start a tool call
+    mockHandle._fire("replyStarted", { replyId: "r1" });
+    mockHandle._fire("event", { type: "tool_call", toolCallId: "c1", toolName: "t1", args: {} });
+    await vi.waitFor(() => expect(executeTool).toHaveBeenCalled());
+    // Stop while tool is in-flight
+    const stopPromise = session.stop();
+    resolveToolCall("done");
+    await stopPromise;
+    expect(mockHandle.close).toHaveBeenCalled();
+  });
+  test("onReset clears pendingTools and conversation messages", async () => {
+    const executeTool = vi.fn(async () => "result");
+    const { session, mockHandle } = setup({ executeTool });
+    await session.start();
+    // Accumulate some tool calls
+    mockHandle._fire("replyStarted", { replyId: "r1" });
+    mockHandle._fire("event", { type: "tool_call", toolCallId: "c1", toolName: "t1", args: {} });
+    await session.waitForTurn();
+    // Send a user transcript to add conversation messages
+    mockHandle._fire("event", { type: "user_transcript", text: "Hello" });
+    // Reset — should clear pending tools and conversation
+    session.onReset();
+    // Verify old handle was closed
+    expect(mockHandle.close).toHaveBeenCalled();
+  });
+  test("onReset invalidates currentReplyId to discard stale tool results", async () => {
+    let resolveToolCall!: (value: string) => void;
+    const executeTool = vi.fn(
+      () =>
+        new Promise<string>((r) => {
+          resolveToolCall = r;
+        }),
+    );
+    const handles: ReturnType<typeof makeMockHandle>[] = [];
+    const spy = vi.spyOn(_internals, "connectS2s").mockImplementation(async () => {
+      const h = makeMockHandle();
+      handles.push(h);
+      return h;
+    });
+    const client = makeClient();
+    const session = createS2sSession(makeSessionOpts({ client, executeTool }));
+    await session.start();
+    // biome-ignore lint/style/noNonNullAssertion: test assertions after length check
+    const firstHandle = handles[0]!;
+    // Start a tool call on the first handle
+    firstHandle._fire("replyStarted", { replyId: "r1" });
+    firstHandle._fire("event", { type: "tool_call", toolCallId: "c1", toolName: "t1", args: {} });
+    await vi.waitFor(() => expect(executeTool).toHaveBeenCalled());
+    // Reset while tool is in-flight
+    session.onReset();
+    // Tool finishes late — result should be discarded due to generation mismatch
+    resolveToolCall("stale-result");
+    await session.waitForTurn();
+    // New handle should not receive the stale result
+    const newHandle = handles[1];
+    expect(newHandle?.sendToolResult).not.toHaveBeenCalled();
+    spy.mockRestore();
+  });
+  test("stop() is safe to call without start()", async () => {
+    const client = makeClient();
+    const session = createS2sSession(makeSessionOpts({ client }));
+    // stop() without start() — should not throw
+    await session.stop();
+  });
+  test("stop() prevents orphaned S2S connection when called during start()", async () => {
+    let resolveConnect!: (value: S2sHandle) => void;
+    const handle = makeMockHandle();
+    const spy = vi.spyOn(_internals, "connectS2s").mockImplementation(
+      () =>
+        new Promise((r) => {
+          resolveConnect = r as (value: S2sHandle) => void;
+        }),
+    );
+    const client = makeClient();
+    const session = createS2sSession(makeSessionOpts({ client }));
+    const startPromise = session.start();
+    const stopPromise = session.stop();
+    // Connection resolves after stop — handle must be closed immediately
+    resolveConnect(handle);
+    await startPromise;
+    await stopPromise;
+    expect(handle.close).toHaveBeenCalled();
+    spy.mockRestore();
+  });
+  test("S2S error event closes handle and emits error to client", async () => {
+    const { session, client, mockHandle } = setup();
+    await session.start();
+    mockHandle._fire("error", new Error("S2S crashed"));
+    expect(mockHandle.close).toHaveBeenCalled();
+    expect(client.events).toContainEqual({
+      type: "error",
+      code: "internal",
+      message: "S2S crashed",
+    });
+  });
+  test("S2S close event nullifies the handle reference", async () => {
+    const { session, mockHandle } = setup();
+    await session.start();
+    // Simulate S2S WebSocket close
+    mockHandle._fire("close", 1000, "normal");
+    // Sending audio after close should not throw (no-ops via ?. on null s2s)
+    session.onAudio(new Uint8Array([1, 2, 3]));
+  });
+  test("sessionExpired event closes the S2S handle", async () => {
+    const { session, mockHandle } = setup();
+    await session.start();
+    mockHandle._fire("sessionExpired");
+    // The handler calls handle.close() directly
+    expect(mockHandle.close).toHaveBeenCalled();
+  });
+  test("rapid resets close all stale connections", async () => {
+    const handles: ReturnType<typeof makeMockHandle>[] = [];
+    const resolvers: ((h: S2sHandle) => void)[] = [];
+    const spy = vi.spyOn(_internals, "connectS2s").mockImplementation(
+      () =>
+        new Promise<S2sHandle>((resolve) => {
+          const h = makeMockHandle();
+          handles.push(h);
+          resolvers.push(resolve as (value: S2sHandle) => void);
+        }),
+    );
+    const client = makeClient();
+    const session = createS2sSession(makeSessionOpts({ client }));
+    const startPromise = session.start();
+    session.onReset();
+    session.onReset();
+    expect(resolvers.length).toBe(3);
+    // Resolve in order — first two are stale
+    // biome-ignore lint/style/noNonNullAssertion: test assertions after length check
+    resolvers[0]?.(handles[0]!);
+    // biome-ignore lint/style/noNonNullAssertion: test assertions after length check
+    resolvers[1]?.(handles[1]!);
+    // biome-ignore lint/style/noNonNullAssertion: test assertions after length check
+    resolvers[2]?.(handles[2]!);
+    await startPromise;
+    await vi.waitFor(() => {
+      expect(handles[0]?.close).toHaveBeenCalled();
+      expect(handles[1]?.close).toHaveBeenCalled();
+    });
+    expect(handles[2]?.close).not.toHaveBeenCalled();
+    spy.mockRestore();
+  });
+  test("concurrent tool calls all complete before stop() resolves", async () => {
+    const resolvers: ((value: string) => void)[] = [];
+    const executeTool = vi.fn(
+      () =>
+        new Promise<string>((r) => {
+          resolvers.push(r);
+        }),
+    );
+    const { session, mockHandle } = setup({ executeTool });
+    await session.start();
+    mockHandle._fire("replyStarted", { replyId: "r1" });
+    mockHandle._fire("event", { type: "tool_call", toolCallId: "c1", toolName: "t1", args: {} });
+    mockHandle._fire("event", { type: "tool_call", toolCallId: "c2", toolName: "t2", args: {} });
+    await vi.waitFor(() => expect(executeTool).toHaveBeenCalledTimes(2));
+    // Stop while both tools are in-flight
+    const stopPromise = session.stop();
+    // Resolve both tools
+    resolvers[0]?.("result-1");
+    resolvers[1]?.("result-2");
+    await stopPromise;
+    // If we get here, turnPromise was properly awaited
+    expect(mockHandle.close).toHaveBeenCalled();
+  });
+  test("connectS2s failure does not leak resources", async () => {
+    const spy = vi.spyOn(_internals, "connectS2s").mockRejectedValue(new Error("network error"));
+    const client = makeClient();
+    const session = createS2sSession(makeSessionOpts({ client }));
+    await session.start();
+    // Client should get error event
+    expect(client.events).toContainEqual(
+      expect.objectContaining({
+        type: "error",
+        code: "internal",
+        message: "network error",
+      }),
+    );
+    // stop() should not throw even after failed start
+    await session.stop();
+    spy.mockRestore();
+  });
+});