@alexkroman1/aai 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/CHANGELOG.md +19 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/assemblyai-C969QGi4.js +35 -0
  5. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  6. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  7. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  8. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  9. package/dist/host/providers/tts/rime.d.ts +44 -0
  10. package/dist/host/runtime-barrel.d.ts +4 -2
  11. package/dist/host/runtime-barrel.js +1434 -1209
  12. package/dist/host/runtime.d.ts +2 -2
  13. package/dist/host/s2s.d.ts +16 -16
  14. package/dist/host/session-core.d.ts +37 -0
  15. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  16. package/dist/host/transports/s2s-transport.d.ts +19 -0
  17. package/dist/host/transports/types.d.ts +45 -0
  18. package/dist/host/ws-handler.d.ts +14 -10
  19. package/dist/sdk/_internal-types.d.ts +2 -0
  20. package/dist/sdk/manifest-barrel.js +1 -1
  21. package/dist/sdk/protocol.d.ts +6 -5
  22. package/dist/sdk/providers/llm-barrel.js +1 -1
  23. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  24. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  25. package/dist/sdk/providers/stt-barrel.js +2 -2
  26. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  27. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -2
  30. package/host/_pipeline-test-fakes.ts +6 -3
  31. package/host/_test-utils.ts +209 -128
  32. package/host/builtin-tools.ts +1 -0
  33. package/host/cleanup.test.ts +25 -298
  34. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  35. package/host/providers/resolve.ts +10 -2
  36. package/host/providers/stt/deepgram.test.ts +229 -0
  37. package/host/providers/stt/deepgram.ts +172 -0
  38. package/host/providers/tts/cartesia.ts +7 -3
  39. package/host/providers/tts/rime.test.ts +251 -0
  40. package/host/providers/tts/rime.ts +322 -0
  41. package/host/runtime-barrel.ts +4 -2
  42. package/host/runtime.test.ts +16 -47
  43. package/host/runtime.ts +131 -23
  44. package/host/s2s.test.ts +122 -131
  45. package/host/s2s.ts +44 -52
  46. package/host/session-core.test.ts +257 -0
  47. package/host/session-core.ts +262 -0
  48. package/host/to-vercel-tools.test.ts +9 -1
  49. package/host/transports/pipeline-transport.test.ts +653 -0
  50. package/host/transports/pipeline-transport.ts +532 -0
  51. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  52. package/host/transports/s2s-transport.test.ts +56 -0
  53. package/host/transports/s2s-transport.ts +116 -0
  54. package/host/transports/types.test.ts +22 -0
  55. package/host/transports/types.ts +51 -0
  56. package/host/ws-handler.test.ts +324 -242
  57. package/host/ws-handler.ts +56 -59
  58. package/package.json +2 -1
  59. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  60. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  61. package/sdk/_internal-types.ts +3 -0
  62. package/sdk/protocol-compat.test.ts +8 -0
  63. package/sdk/protocol.ts +6 -5
  64. package/sdk/providers/stt/deepgram.ts +43 -0
  65. package/sdk/providers/stt-barrel.ts +2 -0
  66. package/sdk/providers/tts/cartesia.ts +15 -5
  67. package/sdk/providers/tts/rime.ts +52 -0
  68. package/sdk/providers/tts-barrel.ts +2 -0
  69. package/sdk/schema-alignment.test.ts +18 -6
  70. package/dist/assemblyai-Cxg9eobY.js +0 -18
  71. package/dist/cartesia-DwDk2tEu.js +0 -10
  72. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  73. package/dist/host/pipeline-session.d.ts +0 -52
  74. package/dist/host/session-ctx.d.ts +0 -73
  75. package/dist/host/session.d.ts +0 -62
  76. package/host/pipeline-session-ctx.test.ts +0 -31
  77. package/host/pipeline-session-ctx.ts +0 -36
  78. package/host/pipeline-session.test.ts +0 -672
  79. package/host/pipeline-session.ts +0 -533
  80. package/host/s2s-fixtures.test.ts +0 -237
  81. package/host/session-ctx.test.ts +0 -387
  82. package/host/session-ctx.ts +0 -134
  83. package/host/session-fixture-replay.test.ts +0 -128
  84. package/host/session.test.ts +0 -634
  85. package/host/session.ts +0 -412
  86. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
@@ -0,0 +1,653 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+
3
+ import { describe, expect, test, vi } from "vitest";
4
+ import {
5
+ createFailingSttProvider,
6
+ createFailingTtsProvider,
7
+ createFakeLanguageModel,
8
+ createFakeSttProvider,
9
+ createFakeTtsProvider,
10
+ type ScriptedPart,
11
+ } from "../_pipeline-test-fakes.ts";
12
+ import { silentLogger } from "../_test-utils.ts";
13
+ import { createPipelineTransport, type PipelineTransportOptions } from "./pipeline-transport.ts";
14
+ import type { TransportCallbacks } from "./types.ts";
15
+
16
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
17
+
18
+ function makeCallbacks(): TransportCallbacks {
19
+ return {
20
+ onReplyStarted: vi.fn(),
21
+ onReplyDone: vi.fn(),
22
+ onCancelled: vi.fn(),
23
+ onAudioChunk: vi.fn(),
24
+ onAudioDone: vi.fn(),
25
+ onUserTranscript: vi.fn(),
26
+ onAgentTranscript: vi.fn(),
27
+ onToolCall: vi.fn(),
28
+ onError: vi.fn(),
29
+ onSpeechStarted: vi.fn(),
30
+ onSpeechStopped: vi.fn(),
31
+ onSessionReady: vi.fn(),
32
+ };
33
+ }
34
+
35
+ function makeOpts(
36
+ overrides: Partial<PipelineTransportOptions> = {},
37
+ {
38
+ stt = createFakeSttProvider(),
39
+ tts = createFakeTtsProvider(),
40
+ callbacks = makeCallbacks(),
41
+ }: {
42
+ stt?: ReturnType<typeof createFakeSttProvider>;
43
+ tts?: ReturnType<typeof createFakeTtsProvider>;
44
+ callbacks?: TransportCallbacks;
45
+ } = {},
46
+ ): {
47
+ opts: PipelineTransportOptions;
48
+ stt: ReturnType<typeof createFakeSttProvider>;
49
+ tts: ReturnType<typeof createFakeTtsProvider>;
50
+ callbacks: TransportCallbacks;
51
+ } {
52
+ const opts: PipelineTransportOptions = {
53
+ sid: "test-sid",
54
+ agent: "test-agent",
55
+ stt,
56
+ llm: createFakeLanguageModel({ script: [] }),
57
+ tts,
58
+ callbacks,
59
+ sessionConfig: {
60
+ systemPrompt: "You are a test assistant.",
61
+ greeting: "",
62
+ },
63
+ providerKeys: { stt: "stt-key", tts: "tts-key" },
64
+ logger: silentLogger,
65
+ ...overrides,
66
+ };
67
+ return { opts, stt, tts, callbacks };
68
+ }
69
+
70
+ // ─── Tests ───────────────────────────────────────────────────────────────────
71
+
72
+ describe("PipelineTransport", () => {
73
+ describe("start()", () => {
74
+ test("opens both STT and TTS sessions", async () => {
75
+ const { opts, stt, tts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } });
76
+ const t = createPipelineTransport(opts);
77
+ await t.start();
78
+ expect(stt.last()).toBeDefined();
79
+ expect(tts.last()).toBeDefined();
80
+ await t.stop();
81
+ });
82
+
83
+ test("passes correct keys and sample rate to STT opener", async () => {
84
+ const stt = createFakeSttProvider();
85
+ const { opts } = makeOpts(
86
+ {
87
+ stt,
88
+ providerKeys: { stt: "MY_STT_KEY", tts: "t" },
89
+ sttSampleRate: 8000,
90
+ sttPrompt: "be brief",
91
+ sessionConfig: { systemPrompt: "s", greeting: "" },
92
+ },
93
+ { stt },
94
+ );
95
+ const t = createPipelineTransport(opts);
96
+ await t.start();
97
+ expect(stt.last()?.opts.sampleRate).toBe(8000);
98
+ expect(stt.last()?.opts.apiKey).toBe("MY_STT_KEY");
99
+ expect(stt.last()?.opts.sttPrompt).toBe("be brief");
100
+ await t.stop();
101
+ });
102
+
103
+ test("fires onSessionReady with the sid", async () => {
104
+ const { opts, callbacks } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } });
105
+ const t = createPipelineTransport(opts);
106
+ await t.start();
107
+ expect(callbacks.onSessionReady).toHaveBeenCalledWith("test-sid");
108
+ await t.stop();
109
+ });
110
+ });
111
+
112
+ describe("greeting", () => {
113
+ test("sends greeting via ttsSession.sendText and fires onReplyStarted + onAgentTranscript + onReplyDone", async () => {
114
+ const stt = createFakeSttProvider();
115
+ const tts = createFakeTtsProvider();
116
+ const callbacks = makeCallbacks();
117
+ const { opts } = makeOpts(
118
+ { sessionConfig: { systemPrompt: "s", greeting: "Hi there!" } },
119
+ { stt, tts, callbacks },
120
+ );
121
+ const t = createPipelineTransport(opts);
122
+ await t.start();
123
+ // Greeting runs as a chained turn — waitFor covers the async flush.
124
+ await vi.waitFor(() => {
125
+ expect(callbacks.onReplyDone).toHaveBeenCalledOnce();
126
+ });
127
+ expect(tts.last()?.textChunks).toContain("Hi there!");
128
+ expect(callbacks.onReplyStarted).toHaveBeenCalledWith(expect.stringContaining("greeting"));
129
+ expect(callbacks.onAgentTranscript).toHaveBeenCalledWith("Hi there!", false);
130
+ // onAudioDone is NOT fired by the transport — session-core's flushReply
131
+ // (triggered by onReplyDone) owns the audioDone + replyDone pairing.
132
+ expect(callbacks.onAudioDone).not.toHaveBeenCalled();
133
+ await t.stop();
134
+ });
135
+
136
+ test("skipGreeting suppresses the greeting turn", async () => {
137
+ const tts = createFakeTtsProvider();
138
+ const callbacks = makeCallbacks();
139
+ const { opts } = makeOpts(
140
+ {
141
+ skipGreeting: true,
142
+ sessionConfig: { systemPrompt: "s", greeting: "Hello!" },
143
+ },
144
+ { tts, callbacks },
145
+ );
146
+ const t = createPipelineTransport(opts);
147
+ await t.start();
148
+ await new Promise((r) => setTimeout(r, 20));
149
+ expect(callbacks.onReplyStarted).not.toHaveBeenCalled();
150
+ expect(tts.last()?.textChunks).toHaveLength(0);
151
+ await t.stop();
152
+ });
153
+ });
154
+
155
+ describe("STT → LLM turn", () => {
156
+ test("final STT event fires onUserTranscript and onReplyStarted", async () => {
157
+ const stt = createFakeSttProvider();
158
+ const callbacks = makeCallbacks();
159
+ const { opts } = makeOpts(
160
+ { sessionConfig: { systemPrompt: "s", greeting: "" } },
161
+ { stt, callbacks },
162
+ );
163
+ const t = createPipelineTransport(opts);
164
+ await t.start();
165
+ stt.last()?.fireFinal("Hello agent");
166
+ await vi.waitFor(() => {
167
+ expect(callbacks.onUserTranscript).toHaveBeenCalledWith("Hello agent");
168
+ });
169
+ expect(callbacks.onReplyStarted).toHaveBeenCalledWith(expect.stringMatching(/^pipeline-/));
170
+ await t.stop();
171
+ });
172
+
173
+ test("empty / whitespace-only final is ignored", async () => {
174
+ const stt = createFakeSttProvider();
175
+ const callbacks = makeCallbacks();
176
+ const { opts } = makeOpts(
177
+ { sessionConfig: { systemPrompt: "s", greeting: "" } },
178
+ { stt, callbacks },
179
+ );
180
+ const t = createPipelineTransport(opts);
181
+ await t.start();
182
+ stt.last()?.fireFinal(" ");
183
+ await new Promise((r) => setTimeout(r, 10));
184
+ expect(callbacks.onUserTranscript).not.toHaveBeenCalled();
185
+ expect(callbacks.onReplyStarted).not.toHaveBeenCalled();
186
+ await t.stop();
187
+ });
188
+
189
+ test("LLM text chunk is forwarded to ttsSession.sendText", async () => {
190
+ const script: ScriptedPart[] = [
191
+ { type: "text", text: "I am " },
192
+ { type: "text", text: "the answer" },
193
+ ];
194
+ const stt = createFakeSttProvider();
195
+ const tts = createFakeTtsProvider();
196
+ const { opts } = makeOpts(
197
+ {
198
+ llm: createFakeLanguageModel({ script }),
199
+ sessionConfig: { systemPrompt: "s", greeting: "" },
200
+ },
201
+ { stt, tts },
202
+ );
203
+ const t = createPipelineTransport(opts);
204
+ await t.start();
205
+ stt.last()?.fireFinal("what is the answer?");
206
+ await vi.waitFor(() => {
207
+ expect(tts.last()?.textChunks.length).toBeGreaterThan(0);
208
+ });
209
+ expect(tts.last()?.textChunks.join("")).toContain("the answer");
210
+ await t.stop();
211
+ });
212
+
213
+ test("TTS audio event is forwarded to callbacks.onAudioChunk as Uint8Array", async () => {
214
+ const stt = createFakeSttProvider();
215
+ const tts = createFakeTtsProvider();
216
+ const callbacks = makeCallbacks();
217
+ const { opts } = makeOpts(
218
+ { sessionConfig: { systemPrompt: "s", greeting: "" } },
219
+ { stt, tts, callbacks },
220
+ );
221
+ const t = createPipelineTransport(opts);
222
+ await t.start();
223
+ const pcm = new Int16Array([100, 200, 300]);
224
+ tts.last()?.fireAudio(pcm);
225
+ expect(callbacks.onAudioChunk).toHaveBeenCalledOnce();
226
+ // biome-ignore lint/style/noNonNullAssertion: test assertion — calledOnce proven above
227
+ const arg = (callbacks.onAudioChunk as ReturnType<typeof vi.fn>).mock
228
+ .calls[0]![0] as Uint8Array;
229
+ expect(arg).toBeInstanceOf(Uint8Array);
230
+ expect(arg.byteLength).toBe(pcm.byteLength);
231
+ await t.stop();
232
+ });
233
+
234
+ test("full turn: onUserTranscript → onReplyStarted → onAgentTranscript → onReplyDone (no transport-level onAudioDone)", async () => {
235
+ const script: ScriptedPart[] = [{ type: "text", text: "Sure!" }];
236
+ const stt = createFakeSttProvider();
237
+ const tts = createFakeTtsProvider();
238
+ const callbacks = makeCallbacks();
239
+ const { opts } = makeOpts(
240
+ {
241
+ llm: createFakeLanguageModel({ script }),
242
+ sessionConfig: { systemPrompt: "s", greeting: "" },
243
+ },
244
+ { stt, tts, callbacks },
245
+ );
246
+ const t = createPipelineTransport(opts);
247
+ await t.start();
248
+ stt.last()?.fireFinal("test question");
249
+ await vi.waitFor(() => {
250
+ expect(callbacks.onReplyDone).toHaveBeenCalledOnce();
251
+ });
252
+ expect(callbacks.onUserTranscript).toHaveBeenCalledWith("test question");
253
+ expect(callbacks.onReplyStarted).toHaveBeenCalled();
254
+ expect(callbacks.onAgentTranscript).toHaveBeenCalledWith("Sure!", false);
255
+ // onAudioDone is NOT fired by the transport — session-core's flushReply
256
+ // (triggered by onReplyDone) owns the audioDone + replyDone pairing.
257
+ expect(callbacks.onAudioDone).not.toHaveBeenCalled();
258
+ await t.stop();
259
+ });
260
+
261
+ test("TTS flush is called after LLM stream finishes", async () => {
262
+ const script: ScriptedPart[] = [{ type: "text", text: "hi" }];
263
+ const stt = createFakeSttProvider();
264
+ const tts = createFakeTtsProvider();
265
+ const { opts } = makeOpts(
266
+ {
267
+ llm: createFakeLanguageModel({ script }),
268
+ sessionConfig: { systemPrompt: "s", greeting: "" },
269
+ },
270
+ { stt, tts },
271
+ );
272
+ const t = createPipelineTransport(opts);
273
+ await t.start();
274
+ stt.last()?.fireFinal("go");
275
+ await vi.waitFor(() => {
276
+ expect(tts.last()?.flush).toHaveBeenCalledOnce();
277
+ });
278
+ await t.stop();
279
+ });
280
+ });
281
+
282
+ describe("streamText config plumbing", () => {
283
+ const dummyToolSchemas = [
284
+ {
285
+ type: "function" as const,
286
+ name: "noop",
287
+ description: "No-op tool for plumbing tests.",
288
+ parameters: { type: "object" as const, properties: {}, additionalProperties: false },
289
+ },
290
+ ];
291
+ const dummyExecuteTool = async () => "{}";
292
+
293
+ test("forwards toolChoice to doStream (default 'auto' when omitted)", async () => {
294
+ const stt = createFakeSttProvider();
295
+ const llm = createFakeLanguageModel({ script: [{ type: "text", text: "ok" }] });
296
+ const { opts } = makeOpts(
297
+ {
298
+ llm,
299
+ toolSchemas: dummyToolSchemas,
300
+ executeTool: dummyExecuteTool,
301
+ sessionConfig: { systemPrompt: "s", greeting: "" },
302
+ },
303
+ { stt },
304
+ );
305
+ const t = createPipelineTransport(opts);
306
+ await t.start();
307
+ stt.last()?.fireFinal("hi");
308
+ await vi.waitFor(() => {
309
+ expect(llm.calls.length).toBeGreaterThan(0);
310
+ });
311
+ expect(llm.calls[0]?.toolChoice).toEqual({ type: "auto" });
312
+ await t.stop();
313
+ });
314
+
315
+ test("forwards explicit toolChoice='required' to doStream", async () => {
316
+ const stt = createFakeSttProvider();
317
+ const llm = createFakeLanguageModel({ script: [{ type: "text", text: "ok" }] });
318
+ const { opts } = makeOpts(
319
+ {
320
+ llm,
321
+ toolChoice: "required",
322
+ toolSchemas: dummyToolSchemas,
323
+ executeTool: dummyExecuteTool,
324
+ sessionConfig: { systemPrompt: "s", greeting: "" },
325
+ },
326
+ { stt },
327
+ );
328
+ const t = createPipelineTransport(opts);
329
+ await t.start();
330
+ stt.last()?.fireFinal("hi");
331
+ await vi.waitFor(() => {
332
+ expect(llm.calls.length).toBeGreaterThan(0);
333
+ });
334
+ expect(llm.calls[0]?.toolChoice).toEqual({ type: "required" });
335
+ await t.stop();
336
+ });
337
+
338
+ test("maxSteps caps the doStream loop", async () => {
339
+ // Script two steps that each emit a text part. With maxSteps=1 only the
340
+ // first step should run; without plumbing it would default to 5 and both
341
+ // would fire.
342
+ const stt = createFakeSttProvider();
343
+ const llm = createFakeLanguageModel({
344
+ steps: [[{ type: "text", text: "step1" }], [{ type: "text", text: "step2" }]],
345
+ });
346
+ const { opts } = makeOpts(
347
+ {
348
+ llm,
349
+ maxSteps: 1,
350
+ sessionConfig: { systemPrompt: "s", greeting: "" },
351
+ },
352
+ { stt },
353
+ );
354
+ const t = createPipelineTransport(opts);
355
+ await t.start();
356
+ stt.last()?.fireFinal("hi");
357
+ await vi.waitFor(() => {
358
+ expect(llm.calls.length).toBeGreaterThanOrEqual(1);
359
+ });
360
+ // Let any extra step have a chance to run.
361
+ await new Promise((r) => setTimeout(r, 20));
362
+ expect(llm.calls.length).toBe(1);
363
+ await t.stop();
364
+ });
365
+ });
366
+
367
+ describe("barge-in", () => {
368
+ test("partial STT event during an in-flight turn triggers cancel and onCancelled", async () => {
369
+ const script: ScriptedPart[] = [
370
+ { type: "text", text: "Hello " },
371
+ { type: "text", text: "how can " },
372
+ { type: "text", text: "I help?" },
373
+ ];
374
+ const stt = createFakeSttProvider();
375
+ const tts = createFakeTtsProvider();
376
+ const callbacks = makeCallbacks();
377
+ const { opts } = makeOpts(
378
+ {
379
+ llm: createFakeLanguageModel({ script, delayMs: 20 }),
380
+ sessionConfig: { systemPrompt: "s", greeting: "" },
381
+ },
382
+ { stt, tts, callbacks },
383
+ );
384
+ const t = createPipelineTransport(opts);
385
+ await t.start();
386
+
387
+ // Start a turn, wait until TTS is receiving text (deep in AGENT_REPLYING).
388
+ stt.last()?.fireFinal("hi there");
389
+ await vi.waitFor(() => {
390
+ expect(tts.last()?.textChunks.length).toBeGreaterThan(0);
391
+ });
392
+
393
+ // Fire barge-in partial.
394
+ stt.last()?.firePartial("wait");
395
+ expect(callbacks.onCancelled).toHaveBeenCalled();
396
+ expect(tts.last()?.cancel).toHaveBeenCalled();
397
+ await t.stop();
398
+ });
399
+
400
+ test("cancelReply() aborts the turn and calls ttsSession.cancel()", async () => {
401
+ const script: ScriptedPart[] = [
402
+ { type: "text", text: "some " },
403
+ { type: "text", text: "reply" },
404
+ ];
405
+ const stt = createFakeSttProvider();
406
+ const tts = createFakeTtsProvider();
407
+ const callbacks = makeCallbacks();
408
+ const { opts } = makeOpts(
409
+ {
410
+ llm: createFakeLanguageModel({ script, delayMs: 20 }),
411
+ sessionConfig: { systemPrompt: "s", greeting: "" },
412
+ },
413
+ { stt, tts, callbacks },
414
+ );
415
+ const t = createPipelineTransport(opts);
416
+ await t.start();
417
+
418
+ stt.last()?.fireFinal("question");
419
+ await vi.waitFor(() => {
420
+ expect(tts.last()?.textChunks.length).toBeGreaterThan(0);
421
+ });
422
+
423
+ t.cancelReply();
424
+ expect(tts.last()?.cancel).toHaveBeenCalled();
425
+ // cancelReply() does NOT fire callbacks.onCancelled — session-core calls
426
+ // client.cancelled() itself when the cancel originates from the client.
427
+ // onCancelled is only fired from within the transport for barge-in (STT partial).
428
+ expect(callbacks.onCancelled).not.toHaveBeenCalled();
429
+ await t.stop();
430
+ });
431
+ });
432
+
433
+ describe("stop()", () => {
434
+ test("closes both STT and TTS sessions", async () => {
435
+ const stt = createFakeSttProvider();
436
+ const tts = createFakeTtsProvider();
437
+ const { opts } = makeOpts(
438
+ { sessionConfig: { systemPrompt: "s", greeting: "" } },
439
+ { stt, tts },
440
+ );
441
+ const t = createPipelineTransport(opts);
442
+ await t.start();
443
+ await t.stop();
444
+ expect(stt.last()?.closed.value).toBe(true);
445
+ expect(tts.last()?.closed.value).toBe(true);
446
+ });
447
+
448
+ test("stop() is idempotent", async () => {
449
+ const stt = createFakeSttProvider();
450
+ const tts = createFakeTtsProvider();
451
+ const { opts } = makeOpts(
452
+ { sessionConfig: { systemPrompt: "s", greeting: "" } },
453
+ { stt, tts },
454
+ );
455
+ const t = createPipelineTransport(opts);
456
+ await t.start();
457
+ await t.stop();
458
+ await t.stop(); // should not throw or double-close
459
+ expect(stt.last()?.closed.value).toBe(true);
460
+ });
461
+ });
462
+
463
+ describe("sendUserAudio()", () => {
464
+ test("converts aligned Uint8Array to Int16Array and calls sttSession.sendAudio", async () => {
465
+ const stt = createFakeSttProvider();
466
+ const { opts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } }, { stt });
467
+ const t = createPipelineTransport(opts);
468
+ await t.start();
469
+ const buf = new ArrayBuffer(4);
470
+ const bytes = new Uint8Array(buf);
471
+ bytes.set([0x01, 0x02, 0x03, 0x04]);
472
+ t.sendUserAudio(bytes);
473
+ const sttSession = stt.last();
474
+ expect(sttSession?.sendAudio).toHaveBeenCalledOnce();
475
+ // biome-ignore lint/style/noNonNullAssertion: test assertion — calledOnce proven above
476
+ const pcm = (sttSession?.sendAudio as ReturnType<typeof vi.fn>).mock
477
+ .calls[0]![0] as Int16Array;
478
+ expect(pcm).toBeInstanceOf(Int16Array);
479
+ expect(pcm.length).toBe(2);
480
+ await t.stop();
481
+ });
482
+
483
+ test("handles odd-length Uint8Array by copying and truncating", async () => {
484
+ const stt = createFakeSttProvider();
485
+ const { opts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } }, { stt });
486
+ const t = createPipelineTransport(opts);
487
+ await t.start();
488
+ const bytes = new Uint8Array([1, 2, 3]); // 3 bytes → 1 sample
489
+ t.sendUserAudio(bytes);
490
+ // biome-ignore lint/style/noNonNullAssertion: test assertion — audio was sent synchronously above
491
+ const pcm = (stt.last()?.sendAudio as ReturnType<typeof vi.fn>).mock
492
+ .calls[0]![0] as Int16Array;
493
+ expect(pcm.length).toBe(1);
494
+ await t.stop();
495
+ });
496
+ });
497
+
498
+ describe("sendToolResult()", () => {
499
+ test("is a no-op (Option A: inline tool execution)", async () => {
500
+ const { opts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } });
501
+ const t = createPipelineTransport(opts);
502
+ await t.start();
503
+ expect(() => t.sendToolResult("call-1", "result")).not.toThrow();
504
+ await t.stop();
505
+ });
506
+ });
507
+
508
+ describe("tool observability", () => {
509
+ test("callbacks.onToolCall fires for each tool-call stream part", async () => {
510
+ const executeTool = vi.fn(async () => "sunny");
511
+ const script: ScriptedPart[] = [
512
+ {
513
+ type: "tool-call",
514
+ toolCallId: "tc-1",
515
+ toolName: "get_weather",
516
+ input: JSON.stringify({ city: "SF" }),
517
+ },
518
+ { type: "tool-result", toolCallId: "tc-1", toolName: "get_weather", result: "sunny" },
519
+ { type: "text", text: "It's sunny." },
520
+ ];
521
+ const stt = createFakeSttProvider();
522
+ const tts = createFakeTtsProvider();
523
+ const callbacks = makeCallbacks();
524
+ const { opts } = makeOpts(
525
+ {
526
+ llm: createFakeLanguageModel({ script }),
527
+ executeTool,
528
+ toolSchemas: [
529
+ {
530
+ type: "function" as const,
531
+ name: "get_weather",
532
+ description: "Look up the weather.",
533
+ parameters: {
534
+ type: "object" as const,
535
+ properties: { city: { type: "string" } },
536
+ required: ["city"],
537
+ },
538
+ },
539
+ ],
540
+ sessionConfig: { systemPrompt: "s", greeting: "" },
541
+ },
542
+ { stt, tts, callbacks },
543
+ );
544
+ const t = createPipelineTransport(opts);
545
+ await t.start();
546
+ stt.last()?.fireFinal("how's the weather?");
547
+ await vi.waitFor(() => {
548
+ expect(callbacks.onReplyDone).toHaveBeenCalled();
549
+ });
550
+ // onToolCall fires for observability (Option A).
551
+ expect(callbacks.onToolCall).toHaveBeenCalledWith("tc-1", "get_weather", expect.any(Object));
552
+ await t.stop();
553
+ });
554
+ });
555
+
556
+ describe("provider errors", () => {
557
+ test("STT error fires onError('stt', ...) and terminates transport", async () => {
558
+ const stt = createFakeSttProvider();
559
+ const callbacks = makeCallbacks();
560
+ const { opts } = makeOpts(
561
+ { sessionConfig: { systemPrompt: "s", greeting: "" } },
562
+ { stt, callbacks },
563
+ );
564
+ const t = createPipelineTransport(opts);
565
+ await t.start();
566
+ stt.last()?.fireError("stt_stream_error", "stt failed");
567
+ expect(callbacks.onError).toHaveBeenCalledWith("stt", "stt failed");
568
+ await t.stop();
569
+ });
570
+
571
+ test("TTS error fires onError('tts', ...) and terminates transport", async () => {
572
+ const tts = createFakeTtsProvider();
573
+ const callbacks = makeCallbacks();
574
+ const { opts } = makeOpts(
575
+ { sessionConfig: { systemPrompt: "s", greeting: "" } },
576
+ { tts, callbacks },
577
+ );
578
+ const t = createPipelineTransport(opts);
579
+ await t.start();
580
+ tts.last()?.fireError("tts_stream_error", "tts failed");
581
+ expect(callbacks.onError).toHaveBeenCalledWith("tts", "tts failed");
582
+ await t.stop();
583
+ });
584
+
585
+ test("STT open failure fires onError('stt', ...) via reportOpenRejection", async () => {
586
+ const callbacks = makeCallbacks();
587
+ const { opts } = makeOpts(
588
+ {
589
+ stt: createFailingSttProvider("stt_connect_failed", "connect failed"),
590
+ sessionConfig: { systemPrompt: "s", greeting: "" },
591
+ },
592
+ { callbacks },
593
+ );
594
+ const t = createPipelineTransport(opts);
595
+ await t.start();
596
+ expect(callbacks.onError).toHaveBeenCalledWith("stt", "connect failed");
597
+ await t.stop();
598
+ });
599
+
600
+ test("TTS open failure fires onError('tts', ...) via reportOpenRejection", async () => {
601
+ const callbacks = makeCallbacks();
602
+ const { opts } = makeOpts(
603
+ {
604
+ tts: createFailingTtsProvider("tts_connect_failed", "tts connect failed"),
605
+ sessionConfig: { systemPrompt: "s", greeting: "" },
606
+ },
607
+ { callbacks },
608
+ );
609
+ const t = createPipelineTransport(opts);
610
+ await t.start();
611
+ expect(callbacks.onError).toHaveBeenCalledWith("tts", "tts connect failed");
612
+ await t.stop();
613
+ });
614
+
615
+ test("when STT fails, TTS session is still opened but then immediately closed", async () => {
616
+ const tts = createFakeTtsProvider();
617
+ const { opts } = makeOpts(
618
+ {
619
+ stt: createFailingSttProvider("stt_connect_failed", "bad key"),
620
+ tts,
621
+ sessionConfig: { systemPrompt: "s", greeting: "" },
622
+ },
623
+ { tts },
624
+ );
625
+ const t = createPipelineTransport(opts);
626
+ await t.start();
627
+ // TTS was opened (Promise.allSettled runs both concurrently) but then closed.
628
+ expect(tts.last()?.closed.value).toBe(true);
629
+ await t.stop();
630
+ });
631
+ });
632
+
633
+ describe("history seeding", () => {
634
+ test("sessionConfig.history is used as initial conversation messages", async () => {
635
+ // History seeding is internal — we verify it indirectly by checking
636
+ // that the LLM receives the correct message array.
637
+ // For this test we just ensure start() doesn't throw when history is set.
638
+ const { opts } = makeOpts({
639
+ sessionConfig: {
640
+ systemPrompt: "s",
641
+ greeting: "",
642
+ history: [
643
+ { role: "user", content: "hi" },
644
+ { role: "assistant", content: "hello" },
645
+ ],
646
+ },
647
+ });
648
+ const t = createPipelineTransport(opts);
649
+ await expect(t.start()).resolves.toBeUndefined();
650
+ await t.stop();
651
+ });
652
+ });
653
+ });