@alexkroman1/aai 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/CHANGELOG.md +19 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/assemblyai-C969QGi4.js +35 -0
  5. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  6. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  7. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  8. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  9. package/dist/host/providers/tts/rime.d.ts +44 -0
  10. package/dist/host/runtime-barrel.d.ts +4 -2
  11. package/dist/host/runtime-barrel.js +1434 -1209
  12. package/dist/host/runtime.d.ts +2 -2
  13. package/dist/host/s2s.d.ts +16 -16
  14. package/dist/host/session-core.d.ts +37 -0
  15. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  16. package/dist/host/transports/s2s-transport.d.ts +19 -0
  17. package/dist/host/transports/types.d.ts +45 -0
  18. package/dist/host/ws-handler.d.ts +14 -10
  19. package/dist/sdk/_internal-types.d.ts +2 -0
  20. package/dist/sdk/manifest-barrel.js +1 -1
  21. package/dist/sdk/protocol.d.ts +6 -5
  22. package/dist/sdk/providers/llm-barrel.js +1 -1
  23. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  24. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  25. package/dist/sdk/providers/stt-barrel.js +2 -2
  26. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  27. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -2
  30. package/host/_pipeline-test-fakes.ts +6 -3
  31. package/host/_test-utils.ts +209 -128
  32. package/host/builtin-tools.ts +1 -0
  33. package/host/cleanup.test.ts +25 -298
  34. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  35. package/host/providers/resolve.ts +10 -2
  36. package/host/providers/stt/deepgram.test.ts +229 -0
  37. package/host/providers/stt/deepgram.ts +172 -0
  38. package/host/providers/tts/cartesia.ts +7 -3
  39. package/host/providers/tts/rime.test.ts +251 -0
  40. package/host/providers/tts/rime.ts +322 -0
  41. package/host/runtime-barrel.ts +4 -2
  42. package/host/runtime.test.ts +16 -47
  43. package/host/runtime.ts +131 -23
  44. package/host/s2s.test.ts +122 -131
  45. package/host/s2s.ts +44 -52
  46. package/host/session-core.test.ts +257 -0
  47. package/host/session-core.ts +262 -0
  48. package/host/to-vercel-tools.test.ts +9 -1
  49. package/host/transports/pipeline-transport.test.ts +653 -0
  50. package/host/transports/pipeline-transport.ts +532 -0
  51. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  52. package/host/transports/s2s-transport.test.ts +56 -0
  53. package/host/transports/s2s-transport.ts +116 -0
  54. package/host/transports/types.test.ts +22 -0
  55. package/host/transports/types.ts +51 -0
  56. package/host/ws-handler.test.ts +324 -242
  57. package/host/ws-handler.ts +56 -59
  58. package/package.json +2 -1
  59. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  60. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  61. package/sdk/_internal-types.ts +3 -0
  62. package/sdk/protocol-compat.test.ts +8 -0
  63. package/sdk/protocol.ts +6 -5
  64. package/sdk/providers/stt/deepgram.ts +43 -0
  65. package/sdk/providers/stt-barrel.ts +2 -0
  66. package/sdk/providers/tts/cartesia.ts +15 -5
  67. package/sdk/providers/tts/rime.ts +52 -0
  68. package/sdk/providers/tts-barrel.ts +2 -0
  69. package/sdk/schema-alignment.test.ts +18 -6
  70. package/dist/assemblyai-Cxg9eobY.js +0 -18
  71. package/dist/cartesia-DwDk2tEu.js +0 -10
  72. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  73. package/dist/host/pipeline-session.d.ts +0 -52
  74. package/dist/host/session-ctx.d.ts +0 -73
  75. package/dist/host/session.d.ts +0 -62
  76. package/host/pipeline-session-ctx.test.ts +0 -31
  77. package/host/pipeline-session-ctx.ts +0 -36
  78. package/host/pipeline-session.test.ts +0 -672
  79. package/host/pipeline-session.ts +0 -533
  80. package/host/s2s-fixtures.test.ts +0 -237
  81. package/host/session-ctx.test.ts +0 -387
  82. package/host/session-ctx.ts +0 -134
  83. package/host/session-fixture-replay.test.ts +0 -128
  84. package/host/session.test.ts +0 -634
  85. package/host/session.ts +0 -412
  86. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
@@ -0,0 +1,322 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ /**
3
+ * Rime TTS opener (host-only).
4
+ *
5
+ * The user-facing descriptor factory (`rime(...)`) lives in
6
+ * `sdk/providers/tts/rime.ts`. This module is the host-side
7
+ * counterpart: it takes the descriptor options + an API key and
8
+ * returns a {@link TtsOpener} that the pipeline session drives.
9
+ *
10
+ * **Protocol.** Connects to Rime's `ws2` JSON WebSocket endpoint
11
+ * (`wss://users-ws.rime.ai/ws2`). Client-to-server messages are JSON:
12
+ * - `{ "text": "..." }` — append text to the synthesis buffer
13
+ * - `{ "operation": "clear" }` — drop buffered text (barge-in)
14
+ * - `{ "operation": "eos" }` — drain buffer, close connection (NOT used
15
+ * during a session: it would tear down the WS, forcing reconnect per
16
+ * turn). We force end-of-turn synthesis with a trailing `"."` instead.
17
+ * The server responds with JSON frames:
18
+ * - `{ type: "chunk", data: <base64 PCM16 LE>, contextId: string | null }`
19
+ * - `{ type: "timestamps", ... }` (ignored)
20
+ * - `{ type: "error", message: string }` (surfaced as `tts_stream_error`)
21
+ *
22
+ * **Single long-lived connection per session.** Rime buffers text until it
23
+ * sees terminal punctuation (`.`, `?`, `!`), so we use one WebSocket per
24
+ * `open()` call and reuse it across turns. `clear` resets the buffer
25
+ * between cancellations.
26
+ *
27
+ * **Done detection.** After `flush()` sends a trailing `"."` to force the
28
+ * server to synthesize any half-buffered text, we arm a quiescence timer
29
+ * that fires 500 ms after the last received audio chunk. When it fires,
30
+ * `done` is emitted.
31
+ *
32
+ * **Audio format.** The URL requests `audioFormat=pcm` at the negotiated
33
+ * `sampleRate`, which returns raw PCM16 little-endian. We decode the base64
34
+ * payload and construct a zero-copy `Int16Array` view over the decoded bytes.
35
+ */
36
+
37
+ import { createNanoEvents, type Emitter } from "nanoevents";
38
+ import WebSocket from "ws";
39
+ import { RIME_DEFAULT_VOICE, type RimeOptions } from "../../../sdk/providers/tts/rime.ts";
40
+ import {
41
+ makeTtsError,
42
+ type TtsEvents,
43
+ type TtsOpener,
44
+ type TtsOpenOptions,
45
+ type TtsSession,
46
+ } from "../../../sdk/providers.ts";
47
+
48
+ /** Internal: TtsSession with a test-only handle to the raw WebSocket. */
49
+ export interface RimeSession extends TtsSession {
50
+ /** @internal Test-only: exposes the underlying raw WebSocket. */
51
+ readonly _ws: WebSocket;
52
+ }
53
+
54
+ /** PCM16 sample rates accepted by the Rime `ws2` endpoint. */
55
+ const RIME_PCM16_RATES = [
56
+ 8000, 16_000, 22_050, 24_000, 44_100, 48_000,
57
+ ] as const satisfies readonly number[];
58
+
59
+ function assertSupportedSampleRate(rate: number): number {
60
+ if ((RIME_PCM16_RATES as readonly number[]).includes(rate)) return rate;
61
+ throw makeTtsError(
62
+ "tts_connect_failed",
63
+ `Rime TTS: unsupported sample rate ${rate}. Supported: ${RIME_PCM16_RATES.join(", ")}.`,
64
+ );
65
+ }
66
+
67
+ /**
68
+ * Decode a base64 string from Rime into a zero-copy `Int16Array`.
69
+ *
70
+ * Rime's `ws2` endpoint returns base64-encoded PCM16 LE in each chunk.
71
+ * `Buffer.from(base64, "base64")` gives us a Node.js Buffer (which is a
72
+ * Uint8Array subclass) with `byteOffset === 0`. PCM16 bytes always come in
73
+ * pairs so the length is guaranteed to be even.
74
+ */
75
+ function base64ToPcm(data: string): Int16Array {
76
+ const bytes = Buffer.from(data, "base64");
77
+ // Defensive: drop a trailing odd byte rather than throwing on misalignment.
78
+ const evenLen = bytes.byteLength - (bytes.byteLength % 2);
79
+ if (evenLen === 0) return new Int16Array(0);
80
+ return new Int16Array(bytes.buffer, bytes.byteOffset, evenLen / 2);
81
+ }
82
+
83
+ /**
84
+ * Shape of JSON messages received from Rime's `ws2` endpoint.
85
+ *
86
+ * Only `chunk` messages carry audio; `timestamps` messages are informational
87
+ * and can be ignored for the current use case.
88
+ */
89
+ interface RimeMessage {
90
+ type: "chunk" | "timestamps" | "error" | string;
91
+ /** Base64-encoded PCM16 LE audio. Present on `type === "chunk"`. */
92
+ data?: string;
93
+ /** Context discriminator for the in-flight utterance. May be null. */
94
+ contextId?: string | null;
95
+ /** Error description. Present on `type === "error"`. */
96
+ message?: string;
97
+ }
98
+
99
+ /** Quiescence timeout in ms — how long to wait after the last audio chunk before emitting `done`. */
100
+ const QUIESCENCE_MS = 500;
101
+
102
+ /**
103
+ * After `flush()`, how long to wait for the FIRST audio chunk before
104
+ * giving up and emitting `done`. Greeting and short replies hit this
105
+ * path: `flush()` runs immediately after `sendText()`, so audio TTFB
106
+ * exceeds the 500 ms quiescence window. Once the first chunk arrives,
107
+ * we transition to the shorter quiescence timeout.
108
+ */
109
+ const FIRST_AUDIO_TIMEOUT_MS = 5000;
110
+
111
+ /** Wait for the WebSocket `open` event; reject on first `error`. */
112
+ function waitForOpen(ws: WebSocket): Promise<void> {
113
+ return new Promise<void>((resolve, reject) => {
114
+ const onOpen = () => {
115
+ ws.removeListener("error", onError);
116
+ resolve();
117
+ };
118
+ const onError = (err: Error) => {
119
+ ws.removeListener("open", onOpen);
120
+ reject(
121
+ makeTtsError(
122
+ "tts_connect_failed",
123
+ `Rime TTS: connect failed: ${err?.message ?? String(err)}`,
124
+ ),
125
+ );
126
+ };
127
+ ws.once("open", onOpen);
128
+ ws.once("error", onError);
129
+ });
130
+ }
131
+
132
+ /**
133
+ * Handle one incoming WebSocket message frame.
134
+ *
135
+ * Extracted into a top-level function to keep `open()` under the cognitive
136
+ * complexity limit while retaining full access to the session state via refs.
137
+ */
138
+ function handleRimeMessage(
139
+ raw: WebSocket.Data,
140
+ emitter: Emitter<TtsEvents>,
141
+ armQuiescence: () => void,
142
+ isActiveTimer: () => boolean,
143
+ ): void {
144
+ let msg: RimeMessage;
145
+ try {
146
+ msg = JSON.parse(typeof raw === "string" ? raw : raw.toString()) as RimeMessage;
147
+ } catch {
148
+ // Unparseable frame — ignore.
149
+ return;
150
+ }
151
+
152
+ if (msg.type === "chunk" && typeof msg.data === "string") {
153
+ const pcm = base64ToPcm(msg.data);
154
+ if (pcm.length > 0) {
155
+ emitter.emit("audio", pcm);
156
+ // While we're waiting on a flush (long timer for first audio, or
157
+ // short timer between chunks), each chunk resets to the short
158
+ // quiescence window — so `done` fires only after audio stops.
159
+ if (isActiveTimer()) armQuiescence();
160
+ }
161
+ return;
162
+ }
163
+ if (msg.type === "error") {
164
+ emitter.emit(
165
+ "error",
166
+ makeTtsError("tts_stream_error", `Rime TTS: ${msg.message ?? "unknown error"}`),
167
+ );
168
+ }
169
+ // Ignore `type === "timestamps"` and unknown message types.
170
+ }
171
+
172
+ /** Build a {@link TtsOpener} from resolved Rime descriptor options. */
173
+ export function openRime(opts: RimeOptions): TtsOpener {
174
+ return {
175
+ name: "rime",
176
+ async open(openOpts: TtsOpenOptions): Promise<TtsSession> {
177
+ const apiKey = openOpts.apiKey || process.env.RIME_API_KEY;
178
+ if (!apiKey) {
179
+ throw makeTtsError(
180
+ "tts_auth_failed",
181
+ "Rime TTS: missing API key. Set RIME_API_KEY in the agent env.",
182
+ );
183
+ }
184
+
185
+ const sampleRate = assertSupportedSampleRate(openOpts.sampleRate);
186
+ const model = opts.model ?? "mistv2";
187
+ const lang = opts.language ?? "eng";
188
+ const voice = opts.voice ?? RIME_DEFAULT_VOICE;
189
+
190
+ // Construct the ws2 URL with query parameters.
191
+ const url = `wss://users-ws.rime.ai/ws2?speaker=${encodeURIComponent(voice)}&modelId=${encodeURIComponent(model)}&audioFormat=pcm&samplingRate=${sampleRate}&lang=${encodeURIComponent(lang)}`;
192
+
193
+ let ws: WebSocket;
194
+ try {
195
+ ws = new WebSocket(url, { headers: { Authorization: `Bearer ${apiKey}` } });
196
+ } catch (cause) {
197
+ throw makeTtsError(
198
+ "tts_connect_failed",
199
+ `Rime TTS: failed to create WebSocket: ${cause instanceof Error ? cause.message : String(cause)}`,
200
+ );
201
+ }
202
+
203
+ await waitForOpen(ws);
204
+
205
+ const emitter: Emitter<TtsEvents> = createNanoEvents<TtsEvents>();
206
+ let closed = false;
207
+ let doneEmitted = false;
208
+ /**
209
+ * After `flush()`, we arm a timer that fires `done`. Initial timeout is
210
+ * `FIRST_AUDIO_TIMEOUT_MS` to give Rime headroom on TTFB; the first
211
+ * chunk swaps it for a shorter `QUIESCENCE_MS` window that resets on
212
+ * each subsequent chunk. `cancel()` emits `done` synchronously.
213
+ */
214
+ let quiescenceTimer: ReturnType<typeof setTimeout> | null = null;
215
+
216
+ const clearQuiescence = () => {
217
+ if (quiescenceTimer !== null) {
218
+ clearTimeout(quiescenceTimer);
219
+ quiescenceTimer = null;
220
+ }
221
+ };
222
+
223
+ const emitDoneOnce = () => {
224
+ clearQuiescence();
225
+ if (doneEmitted || closed) return;
226
+ doneEmitted = true;
227
+ emitter.emit("done");
228
+ };
229
+
230
+ const armQuiescence = () => {
231
+ clearQuiescence();
232
+ quiescenceTimer = setTimeout(emitDoneOnce, QUIESCENCE_MS);
233
+ };
234
+
235
+ const armFirstAudioTimer = () => {
236
+ clearQuiescence();
237
+ quiescenceTimer = setTimeout(emitDoneOnce, FIRST_AUDIO_TIMEOUT_MS);
238
+ };
239
+
240
+ ws.on("message", (raw: WebSocket.Data) => {
241
+ if (closed) return;
242
+ handleRimeMessage(raw, emitter, armQuiescence, () => quiescenceTimer !== null);
243
+ });
244
+
245
+ ws.on("error", (err: Error) => {
246
+ if (closed) return;
247
+ emitter.emit(
248
+ "error",
249
+ makeTtsError("tts_stream_error", `Rime TTS stream error: ${err?.message ?? String(err)}`),
250
+ );
251
+ });
252
+
253
+ ws.on("close", () => {
254
+ if (closed) return;
255
+ // Unexpected server-side close — surface `done` so the pipeline
256
+ // doesn't hang waiting for an utterance that will never complete.
257
+ emitDoneOnce();
258
+ });
259
+
260
+ const close = async (): Promise<void> => {
261
+ if (closed) return;
262
+ closed = true;
263
+ clearQuiescence();
264
+ try {
265
+ ws.close();
266
+ } catch {
267
+ // Swallow: caller has already decided to tear down.
268
+ }
269
+ };
270
+
271
+ if (openOpts.signal.aborted) {
272
+ void close();
273
+ } else {
274
+ openOpts.signal.addEventListener("abort", () => void close(), { once: true });
275
+ }
276
+
277
+ const session: RimeSession = {
278
+ sendText(text: string) {
279
+ if (closed || text.length === 0) return;
280
+ if (ws.readyState !== WebSocket.OPEN) return;
281
+ // Reset done state at the start of a new turn.
282
+ doneEmitted = false;
283
+ ws.send(JSON.stringify({ text }));
284
+ },
285
+
286
+ flush() {
287
+ if (closed) return;
288
+ if (ws.readyState !== WebSocket.OPEN) return;
289
+ // Force synthesis of any text buffered behind a missing terminal
290
+ // punctuation: append a trailing `"."`. Sending the JSON `eos`
291
+ // operation would close the connection, requiring a reconnect on
292
+ // every turn — `"."` keeps the WS reusable. Use the longer
293
+ // first-audio timer until the initial chunk arrives; the chunk
294
+ // handler swaps it for short quiescence on each subsequent chunk.
295
+ ws.send(JSON.stringify({ text: "." }));
296
+ armFirstAudioTimer();
297
+ },
298
+
299
+ cancel() {
300
+ if (closed) return;
301
+ // Drop Rime's server-side buffer for barge-in.
302
+ if (ws.readyState === WebSocket.OPEN) {
303
+ ws.send(JSON.stringify({ operation: "clear" }));
304
+ }
305
+ // Emit `done` synchronously — the orchestrator's state machine
306
+ // advances on `done`, and barge-in must not be microtask-deferred.
307
+ emitDoneOnce();
308
+ },
309
+
310
+ on(event, fn) {
311
+ return emitter.on(event, fn);
312
+ },
313
+
314
+ close,
315
+
316
+ _ws: ws,
317
+ };
318
+
319
+ return session;
320
+ },
321
+ };
322
+ }
@@ -17,8 +17,10 @@ export * from "./builtin-tools.ts";
17
17
  export * from "./runtime.ts";
18
18
  export * from "./runtime-config.ts";
19
19
  export * from "./server.ts";
20
- export * from "./session.ts";
21
- export * from "./session-ctx.ts";
20
+ export * from "./session-core.ts";
22
21
  export * from "./tool-executor.ts";
22
+ export * from "./transports/pipeline-transport.ts";
23
+ export * from "./transports/s2s-transport.ts";
24
+ export * from "./transports/types.ts";
23
25
  export * from "./unstorage-kv.ts";
24
26
  export * from "./ws-handler.ts";
@@ -11,10 +11,10 @@ import {
11
11
  createFakeTtsProvider,
12
12
  } from "./_pipeline-test-fakes.ts";
13
13
  import { CONFORMANCE_AGENT, testRuntime } from "./_runtime-conformance.ts";
14
- import { flush, makeAgent, makeClient, makeMockHandle, silentLogger } from "./_test-utils.ts";
14
+ import { flush, makeAgent, makeClientSink, makeMockHandle, silentLogger } from "./_test-utils.ts";
15
15
  import { createRuntime } from "./runtime.ts";
16
- import { _internals } from "./session.ts";
17
16
  import { executeToolCall } from "./tool-executor.ts";
17
+ import { _internals } from "./transports/s2s-transport.ts";
18
18
  import { createUnstorageKv } from "./unstorage-kv.ts";
19
19
 
20
20
  describe("toAgentConfig", () => {
@@ -322,7 +322,9 @@ describe("executeToolCall", () => {
322
322
  describe("createRuntime sandbox mode", () => {
323
323
  test("uses provided executeTool and toolSchemas", async () => {
324
324
  const mockExecuteTool = vi.fn(async () => "mocked-result");
325
- const mockToolSchemas = [{ name: "mock_tool", description: "A mock tool", parameters: {} }];
325
+ const mockToolSchemas = [
326
+ { type: "function" as const, name: "mock_tool", description: "A mock tool", parameters: {} },
327
+ ];
326
328
 
327
329
  const runtime = createRuntime({
328
330
  agent: makeAgent(),
@@ -360,11 +362,7 @@ describe("createRuntime shutdown", () => {
360
362
 
361
363
  test("shutdown stops active sessions gracefully", async () => {
362
364
  const mockHandle = makeMockHandle();
363
- const connectSpy = vi.spyOn(_internals, "connectS2s").mockImplementation(async () => {
364
- // Fire "ready" so session.start() resolves
365
- setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
366
- return mockHandle;
367
- });
365
+ const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
368
366
 
369
367
  const agent = makeAgent();
370
368
  const runtime = createRuntime({ agent, env: {}, logger: silentLogger });
@@ -392,10 +390,7 @@ describe("createRuntime shutdown", () => {
392
390
  mockHandle.close = vi.fn(() => {
393
391
  throw new Error("close failed");
394
392
  });
395
- const connectSpy = vi.spyOn(_internals, "connectS2s").mockImplementation(async () => {
396
- setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
397
- return mockHandle;
398
- });
393
+ const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
399
394
 
400
395
  const agent = makeAgent();
401
396
  const runtime = createRuntime({ agent, env: {}, logger });
@@ -422,10 +417,7 @@ describe("createRuntime shutdown", () => {
422
417
  mockHandle.close = vi.fn(() => {
423
418
  // intentionally do nothing — session stop will hang
424
419
  });
425
- const connectSpy = vi.spyOn(_internals, "connectS2s").mockImplementation(async () => {
426
- setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
427
- return mockHandle;
428
- });
420
+ const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
429
421
 
430
422
  const agent = makeAgent();
431
423
  const runtime = createRuntime({
@@ -483,12 +475,7 @@ describe("createRuntime createSession", () => {
483
475
  test("createSession returns a Session object", () => {
484
476
  const agent = makeAgent();
485
477
  const runtime = createRuntime({ agent, env: {} });
486
- const client = {
487
- open: true,
488
- event: vi.fn(),
489
- playAudioChunk: vi.fn(),
490
- playAudioDone: vi.fn(),
491
- };
478
+ const client = makeClientSink();
492
479
  const session = runtime.createSession({
493
480
  id: "test-session",
494
481
  agent: agent.name,
@@ -506,12 +493,7 @@ describe("createRuntime createSession", () => {
506
493
  test("createSession passes skipGreeting option", () => {
507
494
  const agent = makeAgent();
508
495
  const runtime = createRuntime({ agent, env: {} });
509
- const client = {
510
- open: true,
511
- event: vi.fn(),
512
- playAudioChunk: vi.fn(),
513
- playAudioDone: vi.fn(),
514
- };
496
+ const client = makeClientSink();
515
497
  // Should not throw when skipGreeting is set
516
498
  const session = runtime.createSession({
517
499
  id: "test-session",
@@ -525,12 +507,7 @@ describe("createRuntime createSession", () => {
525
507
  test("createSession passes resumeFrom option", () => {
526
508
  const agent = makeAgent();
527
509
  const runtime = createRuntime({ agent, env: {} });
528
- const client = {
529
- open: true,
530
- event: vi.fn(),
531
- playAudioChunk: vi.fn(),
532
- playAudioDone: vi.fn(),
533
- };
510
+ const client = makeClientSink();
534
511
  const session = runtime.createSession({
535
512
  id: "test-session",
536
513
  agent: agent.name,
@@ -604,13 +581,8 @@ describe("createRuntime with custom options", () => {
604
581
  agent,
605
582
  env: { ASSEMBLYAI_API_KEY: "test-api-key" },
606
583
  });
607
- const client = {
608
- open: true,
609
- event: vi.fn(),
610
- playAudioChunk: vi.fn(),
611
- playAudioDone: vi.fn(),
612
- };
613
- // Should not throw — the API key gets passed to createS2sSession internally
584
+ const client = makeClientSink();
585
+ // Should not throw — the API key gets passed to createS2sTransport internally
614
586
  const session = runtime.createSession({
615
587
  id: "test-session",
616
588
  agent: agent.name,
@@ -651,7 +623,7 @@ describe("Runtime — session routing", () => {
651
623
  tts,
652
624
  });
653
625
 
654
- const client = makeClient();
626
+ const client = makeClientSink();
655
627
  const session = runtime.createSession({
656
628
  id: "sess-pipeline",
657
629
  agent: "test-agent",
@@ -677,10 +649,7 @@ describe("Runtime — session routing", () => {
677
649
 
678
650
  test("manifest without stt/llm/tts routes to S2sSession (createWebSocket IS called)", async () => {
679
651
  const mockHandle = makeMockHandle();
680
- const connectSpy = vi.spyOn(_internals, "connectS2s").mockImplementation(async () => {
681
- setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
682
- return mockHandle;
683
- });
652
+ const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
684
653
 
685
654
  const createWebSocket = vi.fn();
686
655
  const runtime = createRuntime({
@@ -690,7 +659,7 @@ describe("Runtime — session routing", () => {
690
659
  createWebSocket,
691
660
  });
692
661
 
693
- const client = makeClient();
662
+ const client = makeClientSink();
694
663
  const session = runtime.createSession({
695
664
  id: "sess-s2s",
696
665
  agent: "test-agent",