npm - @alexkroman1/aai - Versions diffs - 1.7.0 → 1.8.0 - Mend

@alexkroman1/aai 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (133) hide show

package/.turbo/turbo-build.log +11 -9
package/CHANGELOG.md +16 -0
package/dist/{_internal-types-CrnTi9Ew.js → _internal-types-CfOAbK6V.js} +22 -35
package/dist/constants-y68COEGj.js +29 -0
package/dist/host/_base64.d.ts +2 -0
package/dist/host/_mock-ws.d.ts +0 -61
package/dist/host/_pipeline-test-fakes.d.ts +7 -4
package/dist/host/_run-code.d.ts +0 -25
package/dist/host/_runtime-conformance.d.ts +3 -34
package/dist/host/memory-vector.d.ts +0 -11
package/dist/host/providers/resolve-kv.d.ts +0 -7
package/dist/host/providers/resolve-vector.d.ts +0 -8
package/dist/host/providers/stt/assemblyai.d.ts +0 -14
package/dist/host/providers/stt/deepgram.d.ts +2 -14
package/dist/host/providers/stt/soniox.d.ts +0 -22
package/dist/host/providers/tts/rime.d.ts +10 -31
package/dist/host/runtime-barrel.js +628 -642
package/dist/host/runtime-config.d.ts +9 -6
package/dist/host/runtime.d.ts +3 -0
package/dist/host/to-vercel-tools.d.ts +3 -33
package/dist/host/transports/openai-realtime-transport.d.ts +43 -0
package/dist/host/unstorage-kv.d.ts +0 -26
package/dist/index.js +3 -3
package/dist/openai-realtime-cjPAHMMx.js +10 -0
package/dist/sdk/_internal-types.d.ts +6 -55
package/dist/sdk/allowed-hosts.d.ts +4 -3
package/dist/sdk/constants.d.ts +4 -29
package/dist/sdk/define.d.ts +7 -4
package/dist/sdk/kv.d.ts +13 -37
package/dist/sdk/manifest-barrel.js +1 -1
package/dist/sdk/manifest.d.ts +8 -2
package/dist/sdk/protocol.js +1 -1
package/dist/sdk/providers/s2s/openai-realtime.d.ts +17 -0
package/dist/sdk/providers/s2s-barrel.d.ts +9 -0
package/dist/sdk/providers/s2s-barrel.js +2 -0
package/dist/sdk/providers/tts/rime.d.ts +1 -1
package/dist/sdk/providers.d.ts +6 -2
package/dist/sdk/types.d.ts +7 -1
package/dist/{types-KUgezM6u.js → types-DOWVZhb9.js} +1 -7
package/dist/{ws-upgrade-BeOQ7fXL.js → ws-upgrade-CG8-by1n.js} +2 -3
package/host/_base64.ts +9 -0
package/host/_mock-ws.ts +0 -65
package/host/_pipeline-test-fakes.ts +19 -31
package/host/_run-code.ts +10 -53
package/host/_runtime-conformance.ts +3 -44
package/host/_test-utils.ts +20 -42
package/host/builtin-tools.test.ts +127 -222
package/host/builtin-tools.ts +6 -10
package/host/cleanup.test.ts +30 -73
package/host/integration/pipeline-reference.integration.test.ts +12 -17
package/host/integration.test.ts +0 -7
package/host/memory-vector.test.ts +3 -1
package/host/memory-vector.ts +16 -21
package/host/pinecone-vector.test.ts +14 -17
package/host/pinecone-vector.ts +10 -19
package/host/providers/providers.test-d.ts +5 -3
package/host/providers/resolve-kv.ts +23 -41
package/host/providers/resolve-vector.ts +3 -12
package/host/providers/resolve.test.ts +15 -28
package/host/providers/resolve.ts +24 -24
package/host/providers/stt/assemblyai.test.ts +2 -14
package/host/providers/stt/assemblyai.ts +12 -35
package/host/providers/stt/deepgram.test.ts +23 -83
package/host/providers/stt/deepgram.ts +15 -40
package/host/providers/stt/elevenlabs.test.ts +26 -38
package/host/providers/stt/elevenlabs.ts +10 -9
package/host/providers/stt/soniox.test.ts +35 -85
package/host/providers/stt/soniox.ts +8 -53
package/host/providers/tts/cartesia.test.ts +19 -58
package/host/providers/tts/cartesia.ts +36 -66
package/host/providers/tts/rime.test.ts +12 -38
package/host/providers/tts/rime.ts +23 -86
package/host/runtime-config.test.ts +9 -9
package/host/runtime-config.ts +16 -22
package/host/runtime.test.ts +111 -73
package/host/runtime.ts +138 -86
package/host/s2s.test.ts +92 -191
package/host/s2s.ts +56 -53
package/host/server-shutdown.test.ts +9 -30
package/host/server.test.ts +2 -13
package/host/server.ts +85 -100
package/host/session-core.test.ts +15 -30
package/host/session-core.ts +10 -13
package/host/session-prompt.test.ts +1 -5
package/host/to-vercel-tools.test.ts +53 -72
package/host/to-vercel-tools.ts +9 -39
package/host/tool-executor.test.ts +25 -51
package/host/tool-executor.ts +18 -12
package/host/transports/openai-realtime-transport.test.ts +371 -0
package/host/transports/openai-realtime-transport.ts +319 -0
package/host/transports/pipeline-transport.test.ts +125 -298
package/host/transports/pipeline-transport.ts +20 -68
package/host/transports/s2s-transport-fixtures.test.ts +31 -92
package/host/transports/s2s-transport.test.ts +65 -134
package/host/transports/s2s-transport.ts +15 -43
package/host/transports/types.test.ts +4 -8
package/host/unstorage-kv.test.ts +3 -2
package/host/unstorage-kv.ts +5 -35
package/host/ws-handler.test.ts +72 -176
package/host/ws-handler.ts +6 -12
package/package.json +6 -1
package/sdk/__snapshots__/exports.test.ts.snap +7 -0
package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
package/sdk/_internal-types.test.ts +6 -9
package/sdk/_internal-types.ts +16 -57
package/sdk/_test-matchers.ts +25 -15
package/sdk/allowed-hosts.test.ts +50 -114
package/sdk/allowed-hosts.ts +8 -14
package/sdk/constants.ts +5 -52
package/sdk/define.test.ts +7 -6
package/sdk/define.ts +7 -3
package/sdk/exports.test.ts +6 -1
package/sdk/kv.ts +13 -37
package/sdk/manifest.test-d.ts +5 -0
package/sdk/manifest.test.ts +61 -9
package/sdk/manifest.ts +11 -11
package/sdk/protocol-compat.test.ts +66 -98
package/sdk/protocol-snapshot.test.ts +2 -16
package/sdk/protocol.test.ts +13 -22
package/sdk/providers/s2s/openai-realtime.ts +36 -0
package/sdk/providers/s2s-barrel.ts +12 -0
package/sdk/providers/tts/rime.ts +1 -1
package/sdk/providers.ts +24 -5
package/sdk/schema-alignment.test.ts +25 -73
package/sdk/schema-shapes.test.ts +1 -29
package/sdk/system-prompt.test.ts +0 -1
package/sdk/system-prompt.ts +17 -19
package/sdk/types-inference.test.ts +10 -36
package/sdk/types.ts +7 -0
package/sdk/ws-upgrade.test.ts +24 -23
package/sdk/ws-upgrade.ts +2 -3
package/tsdown.config.ts +8 -11
package/dist/constants-C2nirZUI.js +0 -54

package/host/transports/pipeline-transport.ts CHANGED Viewed

@@ -1,15 +1,11 @@
 // Copyright 2026 the AAI authors. MIT license.
 // Pipeline transport — STT → LLM → TTS orchestration behind the Transport interface.
+//
 // Pipeline mode executes tools inline via streamText's `tools.execute`.
 // `callbacks.onToolCall` is observability-only; runtime.ts routes it to
 // `client.toolCall` directly (bypassing SessionCore's tool-dispatch path,
 // which is S2S-only). `sendToolResult` is a no-op because results are
 // already handled by streamText.
-//
-// `conversationMessages` below is transport-local and currently uncapped —
-// SessionCore's `maxHistory` does not yet feed through. Long pipeline
-// sessions may accumulate unbounded context; revisit if it matters.
 import type { LanguageModel, ModelMessage } from "ai";
 import { stepCountIs, streamText } from "ai";
@@ -97,14 +93,8 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     });
   const { callbacks, sessionConfig } = opts;
-  // Derive the system prompt — pipeline mode always uses voice=true.
-  // In the refactored transport, we receive the final systemPrompt directly
-  // from sessionConfig (built by the caller). We use it as-is but also keep
-  // the hasTools logic available if the caller passes raw schemas.
   const systemPrompt = sessionConfig.systemPrompt;
-  // ---- State ----------------------------------------------------------------
   const sessionAbort = new AbortController();
   let audioReady = false;
   let terminated = false;
@@ -112,16 +102,13 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
   let ttsSession: TtsSession | null = null;
   let turnController: AbortController | null = null;
   let nextReplyId = 0;
-  // Conversation history — seeded from sessionConfig.history if provided.
-  // Pipeline transport manages its own history since SessionCore doesn't own
-  // the conversation in pipeline mode (history is needed to build the LLM
-  // messages array for each turn).
+  // Pipeline transport manages its own history; SessionCore does not own the
+  // conversation in pipeline mode (we need it to build LLM messages per turn).
   const conversationMessages: Message[] = sessionConfig.history ? [...sessionConfig.history] : [];
   let turnPromise: Promise<void> | null = null;
   const sttSubs: Unsubscribe[] = [];
   const ttsSubs: Unsubscribe[] = [];
-  // ---- History helpers ------------------------------------------------------
   function pushMessages(...msgs: Message[]): void {
     conversationMessages.push(...msgs);
     if (conversationMessages.length > DEFAULT_MAX_HISTORY) {
@@ -133,16 +120,11 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     turnPromise = (turnPromise ?? Promise.resolve()).then(() => p);
   }
-  // ---- Error helpers --------------------------------------------------------
   function emitError(code: SessionErrorCode, message: string): void {
     callbacks.onError(code, message);
   }
-  // ---- Termination ----------------------------------------------------------
-  /**
-   * Tear down after an unrecoverable provider error. Aborts the in-flight
-   * turn, cancels TTS, signals providers to close. Idempotent.
-   */
+  // Idempotent teardown after an unrecoverable provider error.
   function terminate(): void {
     if (terminated) return;
     terminated = true;
@@ -155,7 +137,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     sessionAbort.abort();
   }
-  // ---- STT event handlers ---------------------------------------------------
   function onSttPartial(_text: string): void {
     if (terminated) return;
     if (turnController === null) return;
@@ -170,7 +151,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     if (terminated) return;
     const trimmed = text.trim();
     if (trimmed.length === 0) return;
-    // Replace in-flight turn if one is running (duplicate/late STT final).
     if (turnController !== null) {
       log.info("Pipeline replacing in-flight turn", { sid: opts.sid });
       turnController.abort();
@@ -192,7 +172,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     terminate();
   }
-  // ---- TTS event handlers ---------------------------------------------------
   function onTtsError(err: TtsError): void {
     if (terminated) return;
     log.error("TTS error", { code: err.code, message: err.message, sid: opts.sid });
@@ -200,7 +179,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     terminate();
   }
-  // ---- LLM streaming --------------------------------------------------------
   async function consumeLlmStream(
     ctl: AbortController,
     messages: ModelMessage[],
@@ -274,8 +252,7 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
           pendingSeparator = true;
           return;
         case "tool-call": {
-          // Option A: fire callbacks.onToolCall for observability only.
-          // Actual execution happens inline via toVercelTools.
+          // Observability only — actual execution happens inline via toVercelTools.
           const input = (part.input ?? {}) as Record<string, unknown>;
           callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
           return;
@@ -292,17 +269,11 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     };
   }
-  // ---- TTS flush ------------------------------------------------------------
-  /**
-   * Flush TTS and wait for drain. Resolves on:
-   *   - TTS emits `done`
-   *   - `signal` aborts (barge-in / provider error / session stop)
-   *   - PIPELINE_FLUSH_TIMEOUT_MS elapses
-   * Resolves immediately if no TTS session.
-   */
+  // Resolves on TTS `done`, signal abort, or PIPELINE_FLUSH_TIMEOUT_MS elapsed.
   function flushTtsAndWait(signal: AbortSignal): Promise<void> {
     const tts = ttsSession;
     if (!tts) return Promise.resolve();
+    if (signal.aborted) return Promise.resolve();
     return new Promise<void>((resolve) => {
       let off: Unsubscribe | null = null;
       let timer: ReturnType<typeof setTimeout> | null = null;
@@ -322,24 +293,16 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
         resolve();
       };
       const onAbort = () => finish();
-      if (signal.aborted) {
-        resolve();
-        return;
-      }
       signal.addEventListener("abort", onAbort, { once: true });
       off = tts.on("done", finish);
       timer = setTimeout(() => {
-        log.warn("TTS flush timeout", {
-          sid: opts.sid,
-          timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS,
-        });
+        log.warn("TTS flush timeout", { sid: opts.sid, timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS });
         finish();
       }, PIPELINE_FLUSH_TIMEOUT_MS);
       tts.flush();
     });
   }
-  // ---- Turn orchestration ---------------------------------------------------
   async function runTurn(userText: string): Promise<void> {
     const replyId = `pipeline-${++nextReplyId}`;
     callbacks.onReplyStarted(replyId);
@@ -366,7 +329,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
       return;
     }
-    // Emit the complete transcript once the LLM finishes streaming.
     if (accumulated.length > 0) {
       callbacks.onAgentTranscript(accumulated, false);
       pushMessages({ role: "assistant", content: accumulated });
@@ -404,14 +366,12 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
       return;
     }
-    // Do NOT call callbacks.onAudioDone() here — session-core's flushReply
-    // (triggered by onReplyDone) emits audioDone + replyDone together, matching
-    // the S2S transport contract. Calling it here would double-fire audio_done.
+    // See runTurn: onReplyDone triggers session-core's flushReply which emits
+    // audioDone + replyDone together; firing onAudioDone here would double-fire.
     callbacks.onReplyDone();
     if (turnController === ctl) turnController = null;
   }
-  // ---- Provider lifecycle ---------------------------------------------------
   function reportOpenRejection(which: "stt" | "tts", reason: unknown): void {
     const msg = errorMessage(reason);
     log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
@@ -443,10 +403,8 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
         callbacks.onAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
       }),
     );
-    // Note: `done` is NOT subscribed here. flushTtsAndWait() attaches a
-    // one-shot listener per-turn so it knows when synthesis drains. Calling
-    // callbacks.onAudioDone() is done explicitly at the end of runTurn /
-    // runGreeting — not via a persistent subscription — to avoid double-firing.
+    // `done` is intentionally NOT subscribed persistently — flushTtsAndWait
+    // attaches a one-shot listener per-turn to avoid double-firing audio_done.
     ttsSubs.push(session.on("error", onTtsError));
   }
@@ -479,7 +437,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     if (!aborted && (sttFailed || ttsFailed)) terminate();
   }
-  // ---- Greeting on audio ready ----------------------------------------------
   function onAudioReady(): void {
     if (audioReady || terminated) return;
     audioReady = true;
@@ -492,13 +449,11 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     chainTurn(turn);
   }
-  // ---- Transport interface --------------------------------------------------
   return {
     async start(): Promise<void> {
       await openProviders();
-      // In S2S mode, onSessionReady fires when the provider acknowledges the
-      // session. In pipeline mode, we fire it immediately after providers open
-      // (which is the equivalent "ready" signal), then trigger greeting.
+      // S2S fires onSessionReady when the provider acks; in pipeline mode the
+      // equivalent "ready" signal is providers having opened.
       callbacks.onSessionReady?.(opts.sid);
       onAudioReady();
     },
@@ -522,8 +477,7 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
     sendUserAudio(bytes: Uint8Array): void {
       if (terminated || !audioReady) return;
-      const offset = bytes.byteOffset;
-      const length = bytes.byteLength;
+      const { byteOffset: offset, byteLength: length } = bytes;
       let pcm: Int16Array;
       if (offset % 2 === 0 && length % 2 === 0) {
         pcm = new Int16Array(bytes.buffer, offset, length / 2);
@@ -535,8 +489,8 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
       sttSession?.sendAudio(pcm);
     },
-    // Option A: tool execution stays inside toVercelTools/streamText.
-    // sendToolResult is a no-op for pipeline mode.
+    // Tool execution stays inside toVercelTools/streamText; results aren't
+    // routed through the transport.
     // biome-ignore lint/suspicious/noEmptyBlockStatements: intentional no-op for pipeline mode
     sendToolResult(_callId: string, _result: string): void {},
@@ -545,11 +499,9 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
       turnController?.abort();
       turnController = null;
       ttsSession?.cancel();
-      // Do NOT call callbacks.onCancelled() here. This method is invoked from
-      // session-core.onCancel (client-initiated cancel), which calls
-      // client.cancelled() itself — firing onCancelled here would double-cancel.
-      // Barge-in (STT partial) fires callbacks.onCancelled() directly in
-      // onSttPartial, where the cancel originates inside the transport.
+      // Do NOT call callbacks.onCancelled() here — session-core.onCancel
+      // (client-initiated) calls client.cancelled() itself. Barge-in fires
+      // onCancelled directly in onSttPartial where the cancel originates here.
     },
   };
 }

package/host/transports/s2s-transport-fixtures.test.ts CHANGED Viewed

@@ -20,7 +20,11 @@ import { z } from "zod";
 import type { AgentDef } from "../../sdk/types.ts";
 import { createFixtureSession, flush } from "../_test-utils.ts";
-// ─── Test agents with deterministic tools ────────────────────────────────────
+type FixtureSession = ReturnType<typeof createFixtureSession>;
+function firstToolResult(ctx: FixtureSession): [string, string] {
+  return vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [string, string];
+}
 const weatherAgent: AgentDef = {
   name: "weather-agent",
@@ -69,8 +73,6 @@ const statefulAgent: AgentDef<{ callCount: number }> = {
   },
 };
-// ─── Tests ───────────────────────────────────────────────────────────────────
 describe("fixture replay with real executor (transport layer)", () => {
   let cleanup: () => void;
@@ -78,23 +80,20 @@ describe("fixture replay with real executor (transport layer)", () => {
     cleanup?.();
   });
-  // ── Tool call: real Zod validation + real tool execution ───────────────
+  function makeCtx(agent: AgentDef): FixtureSession {
+    const ctx = createFixtureSession(agent);
+    cleanup = ctx.cleanup;
+    return ctx;
+  }
   test("tool call fixture: Zod validates args, real tool executes, result sent to S2S", async () => {
-    const ctx = createFixtureSession(weatherAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(weatherAgent);
     await ctx.start();
     ctx.replay("tool-call-sequence.json");
-    // Wait for the async tool execution pipeline to complete
     await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
-    // Verify the real tool was called and produced correct output
-    const [callId, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
-      string,
-      string,
-    ];
+    const [callId, resultStr] = firstToolResult(ctx);
     expect(callId).toBeTruthy();
     const result = JSON.parse(resultStr);
     expect(result.city).toBe("San Francisco");
@@ -103,8 +102,7 @@ describe("fixture replay with real executor (transport layer)", () => {
   });
   test("tool call fixture: client receives tool_call with validated args", async () => {
-    const ctx = createFixtureSession(weatherAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(weatherAgent);
     await ctx.start();
     ctx.replay("tool-call-sequence.json");
@@ -117,36 +115,30 @@ describe("fixture replay with real executor (transport layer)", () => {
   });
   test("tool call fixture: conversation history accumulates user + assistant messages", async () => {
-    const ctx = createFixtureSession(weatherAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(weatherAgent);
     await ctx.start();
     ctx.replay("tool-call-sequence.json");
     await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
     await flush();
-    // Client received user transcript
     expect(ctx.client.userTranscripts.length).toBeGreaterThan(0);
     const lastUserText = ctx.client.userTranscripts.at(-1) ?? "";
     expect(lastUserText.toLowerCase()).toContain("weather");
   });
-  // ── Simple question: no tools, just session lifecycle ──────────────────
   test("simple question fixture: greeting + agent response reach client", async () => {
-    const ctx = createFixtureSession(simpleAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(simpleAgent);
     await ctx.start();
     ctx.replay("simple-question-sequence.json");
     await flush();
-    expect(ctx.client.agentTranscripts.length).toBeGreaterThanOrEqual(2); // greeting + answer
+    expect(ctx.client.agentTranscripts.length).toBeGreaterThanOrEqual(2);
   });
   test("simple question fixture: user speech events forwarded to client", async () => {
-    const ctx = createFixtureSession(simpleAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(simpleAgent);
     await ctx.start();
     ctx.replay("simple-question-sequence.json");
@@ -157,29 +149,20 @@ describe("fixture replay with real executor (transport layer)", () => {
     expect(ctx.client.userTranscripts.length).toBeGreaterThan(0);
   });
-  // ── Stateful agent: session state persists across tool calls ───────────
   test("stateful agent: tool accesses and mutates session state", async () => {
-    const ctx = createFixtureSession(statefulAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(statefulAgent);
     await ctx.start();
     ctx.replay("tool-call-sequence.json");
     await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
-    const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
-      string,
-      string,
-    ];
+    const [, resultStr] = firstToolResult(ctx);
     const result = JSON.parse(resultStr);
-    expect(result.calls).toBe(1); // state.callCount was incremented
+    expect(result.calls).toBe(1);
   });
-  // ── Greeting only: session lifecycle without user audio ────────────────
   test("greeting fixture: session setup completes with reply_done", async () => {
-    const ctx = createFixtureSession(simpleAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(simpleAgent);
     await ctx.start();
     ctx.replay("greeting-session-sequence.json");
@@ -189,11 +172,8 @@ describe("fixture replay with real executor (transport layer)", () => {
     expect(ctx.client.replyDoneCount).toBeGreaterThan(0);
   });
-  // ── Tool schemas: real agent produces correct S2S tool schemas ─────────
   test("real executor builds correct tool schemas from AgentDef", () => {
-    const ctx = createFixtureSession(weatherAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(weatherAgent);
     const schema = ctx.executor.toolSchemas.find((s) => s.name === "get_weather");
     expect(schema).toBeDefined();
@@ -205,8 +185,6 @@ describe("fixture replay with real executor (transport layer)", () => {
     });
   });
-  // ── Tool errors are surfaced as tool results ───────────────────────────
   test("tool throw is surfaced as error result", async () => {
     const agent: AgentDef = {
       name: "error-agent",
@@ -224,23 +202,16 @@ describe("fixture replay with real executor (transport layer)", () => {
       },
     };
-    const ctx = createFixtureSession(agent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(agent);
     await ctx.start();
     ctx.replay("tool-call-sequence.json");
     await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
-    // Tool result should contain the error
-    const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
-      string,
-      string,
-    ];
+    const [, resultStr] = firstToolResult(ctx);
     expect(resultStr).toContain("API key expired");
   });
-  // ── Zod validation: bad args rejected ──────────────────────────────────
   test("Zod validation rejects malformed tool args", async () => {
     const agent: AgentDef = {
       name: "strict-agent",
@@ -259,26 +230,18 @@ describe("fixture replay with real executor (transport layer)", () => {
       },
     };
-    const ctx = createFixtureSession(agent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(agent);
     await ctx.start();
     ctx.replay("tool-call-sequence.json");
     await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
-    // The result should contain a Zod validation error
-    const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
-      string,
-      string,
-    ];
+    const [, resultStr] = firstToolResult(ctx);
     expect(resultStr).toContain("Invalid arguments");
     expect(resultStr).toContain("country");
   });
-  // ── Interrupted transcript NOT added to conversation history ────────────
   test("interrupted agent transcript is not pushed to conversation history", async () => {
-    // Use a tool that captures messages to inspect conversation history
     let capturedMessages: readonly { role: string; content: string }[] = [];
     const agent: AgentDef = {
       name: "interrupt-history-agent",
@@ -297,46 +260,36 @@ describe("fixture replay with real executor (transport layer)", () => {
       },
     };
-    const ctx = createFixtureSession(agent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(agent);
     await ctx.start();
     const cbs = ctx.mockCallbacks;
-    // Fire an interrupted transcript — should NOT go into conversation history
     cbs.onReplyStarted("r1");
     cbs.onAgentTranscript("This was interrupted", true);
     cbs.onCancelled();
     await flush();
-    // Client sees both agent_transcript and cancelled events
     expect(ctx.client.agentTranscripts).toContain("This was interrupted");
     expect(ctx.client.cancelledCount).toBeGreaterThan(0);
-    // Fire a non-interrupted transcript — SHOULD go into conversation history
     cbs.onReplyStarted("r2");
     cbs.onAgentTranscript("This was completed", false);
     cbs.onReplyDone();
     await flush();
-    // Trigger a tool call to inspect conversation history.
     cbs.onUserTranscript("check");
     await flush();
     cbs.onReplyStarted("r3");
     cbs.onToolCall("c1", "check_history", { q: "test" });
-    // Wait for tool to execute (captures messages)
     await vi.waitFor(() => expect(capturedMessages.length).toBeGreaterThan(0));
-    // Conversation history should contain the completed text but NOT the interrupted text
     const assistantMsgs = capturedMessages.filter((m) => m.role === "assistant");
     expect(assistantMsgs.some((m) => m.content === "This was completed")).toBe(true);
     expect(assistantMsgs.every((m) => m.content !== "This was interrupted")).toBe(true);
   });
-  // ── Conversation history correctness after full tool-call flow ──────────
   test("conversation history has user + assistant messages after tool-call flow", async () => {
-    // Use a tool that captures the messages it receives
     let capturedMessages: readonly { role: string; content: string }[] = [];
     const agent: AgentDef = {
       name: "history-agent",
@@ -355,28 +308,21 @@ describe("fixture replay with real executor (transport layer)", () => {
       },
     };
-    const ctx = createFixtureSession(agent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(agent);
     await ctx.start();
     ctx.replay("tool-call-sequence.json");
     await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
-    // The tool should have seen the user's weather question in messages
     const userMsgs = capturedMessages.filter((m) => m.role === "user");
     expect(userMsgs.some((m) => m.content.toLowerCase().includes("weather"))).toBe(true);
   });
-  // ── Audio chunks forwarded to client.audio ─────────────────────────────
   test("reply.audio events forwarded to client.audio", async () => {
-    const ctx = createFixtureSession(simpleAgent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(simpleAgent);
     await ctx.start();
-    // Fire audio events directly via callbacks (replay skips reply.audio)
-    const audioBytes = new Uint8Array([10, 20, 30, 40]);
-    ctx.mockCallbacks.onAudio(audioBytes);
+    ctx.mockCallbacks.onAudio(new Uint8Array([10, 20, 30, 40]));
     ctx.mockCallbacks.onAudio(new Uint8Array([50, 60]));
     expect(ctx.client.audioChunks.length).toBe(2);
@@ -384,8 +330,6 @@ describe("fixture replay with real executor (transport layer)", () => {
     expect(Array.from(ctx.client.audioChunks[1] ?? [])).toEqual([50, 60]);
   });
-  // ── Multiple tool calls in one reply: results buffered and sent together ─
   test("multiple tool calls in one reply: all results buffered and sent after replyDone", async () => {
     const agent: AgentDef = {
       name: "multi-tool-agent",
@@ -401,8 +345,7 @@ describe("fixture replay with real executor (transport layer)", () => {
       },
     };
-    const ctx = createFixtureSession(agent);
-    cleanup = ctx.cleanup;
+    const ctx = makeCtx(agent);
     await ctx.start();
     const cbs = ctx.mockCallbacks;
@@ -410,21 +353,17 @@ describe("fixture replay with real executor (transport layer)", () => {
     cbs.onToolCall("c1", "get_weather", { city: "NYC" });
     cbs.onToolCall("c2", "get_weather", { city: "LA" });
-    // Wait for both tool calls to be dispatched to the client
     await vi.waitFor(() => {
       expect(ctx.client.toolCallEvents.length).toBe(2);
     });
-    // Results NOT sent yet — reply.done hasn't fired
     expect(ctx.fakeHandle.sendToolResult).not.toHaveBeenCalled();
-    // Fire reply.done — should flush both results
     cbs.onReplyDone();
     await vi.waitFor(() => {
       expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalledTimes(2);
     });
-    // Verify both results are correct
     const calls = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls as [string, string][];
     const results = calls.map(([, r]) => JSON.parse(r));
     expect(results.some((r) => r.city === "NYC")).toBe(true);