@alexkroman1/aai 1.4.5 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/CHANGELOG.md +19 -0
- package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
- package/dist/assemblyai-C969QGi4.js +35 -0
- package/dist/cartesia-BfQPOQ7Y.js +37 -0
- package/dist/host/_pipeline-test-fakes.d.ts +3 -1
- package/dist/host/providers/stt/deepgram.d.ts +28 -0
- package/dist/host/providers/tts/cartesia.d.ts +1 -1
- package/dist/host/providers/tts/rime.d.ts +44 -0
- package/dist/host/runtime-barrel.d.ts +4 -2
- package/dist/host/runtime-barrel.js +1434 -1209
- package/dist/host/runtime.d.ts +2 -2
- package/dist/host/s2s.d.ts +16 -16
- package/dist/host/session-core.d.ts +37 -0
- package/dist/host/transports/pipeline-transport.d.ts +48 -0
- package/dist/host/transports/s2s-transport.d.ts +19 -0
- package/dist/host/transports/types.d.ts +45 -0
- package/dist/host/ws-handler.d.ts +14 -10
- package/dist/sdk/_internal-types.d.ts +2 -0
- package/dist/sdk/manifest-barrel.js +1 -1
- package/dist/sdk/protocol.d.ts +6 -5
- package/dist/sdk/providers/llm-barrel.js +1 -1
- package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
- package/dist/sdk/providers/stt-barrel.d.ts +1 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
- package/dist/sdk/providers/tts/rime.d.ts +42 -0
- package/dist/sdk/providers/tts-barrel.d.ts +1 -0
- package/dist/sdk/providers/tts-barrel.js +2 -2
- package/host/_pipeline-test-fakes.ts +6 -3
- package/host/_test-utils.ts +209 -128
- package/host/builtin-tools.ts +1 -0
- package/host/cleanup.test.ts +25 -298
- package/host/integration/pipeline-reference.integration.test.ts +30 -35
- package/host/providers/resolve.ts +10 -2
- package/host/providers/stt/deepgram.test.ts +229 -0
- package/host/providers/stt/deepgram.ts +172 -0
- package/host/providers/tts/cartesia.ts +7 -3
- package/host/providers/tts/rime.test.ts +251 -0
- package/host/providers/tts/rime.ts +322 -0
- package/host/runtime-barrel.ts +4 -2
- package/host/runtime.test.ts +16 -47
- package/host/runtime.ts +131 -23
- package/host/s2s.test.ts +122 -131
- package/host/s2s.ts +44 -52
- package/host/session-core.test.ts +257 -0
- package/host/session-core.ts +262 -0
- package/host/to-vercel-tools.test.ts +9 -1
- package/host/transports/pipeline-transport.test.ts +653 -0
- package/host/transports/pipeline-transport.ts +532 -0
- package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
- package/host/transports/s2s-transport.test.ts +56 -0
- package/host/transports/s2s-transport.ts +116 -0
- package/host/transports/types.test.ts +22 -0
- package/host/transports/types.ts +51 -0
- package/host/ws-handler.test.ts +324 -242
- package/host/ws-handler.ts +56 -59
- package/package.json +2 -1
- package/sdk/__snapshots__/exports.test.ts.snap +3 -3
- package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
- package/sdk/_internal-types.ts +3 -0
- package/sdk/protocol-compat.test.ts +8 -0
- package/sdk/protocol.ts +6 -5
- package/sdk/providers/stt/deepgram.ts +43 -0
- package/sdk/providers/stt-barrel.ts +2 -0
- package/sdk/providers/tts/cartesia.ts +15 -5
- package/sdk/providers/tts/rime.ts +52 -0
- package/sdk/providers/tts-barrel.ts +2 -0
- package/sdk/schema-alignment.test.ts +18 -6
- package/dist/assemblyai-Cxg9eobY.js +0 -18
- package/dist/cartesia-DwDk2tEu.js +0 -10
- package/dist/host/pipeline-session-ctx.d.ts +0 -24
- package/dist/host/pipeline-session.d.ts +0 -52
- package/dist/host/session-ctx.d.ts +0 -73
- package/dist/host/session.d.ts +0 -62
- package/host/pipeline-session-ctx.test.ts +0 -31
- package/host/pipeline-session-ctx.ts +0 -36
- package/host/pipeline-session.test.ts +0 -672
- package/host/pipeline-session.ts +0 -533
- package/host/s2s-fixtures.test.ts +0 -237
- package/host/session-ctx.test.ts +0 -387
- package/host/session-ctx.ts +0 -134
- package/host/session-fixture-replay.test.ts +0 -128
- package/host/session.test.ts +0 -634
- package/host/session.ts +0 -412
- /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
|
@@ -0,0 +1,653 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
|
|
3
|
+
import { describe, expect, test, vi } from "vitest";
|
|
4
|
+
import {
|
|
5
|
+
createFailingSttProvider,
|
|
6
|
+
createFailingTtsProvider,
|
|
7
|
+
createFakeLanguageModel,
|
|
8
|
+
createFakeSttProvider,
|
|
9
|
+
createFakeTtsProvider,
|
|
10
|
+
type ScriptedPart,
|
|
11
|
+
} from "../_pipeline-test-fakes.ts";
|
|
12
|
+
import { silentLogger } from "../_test-utils.ts";
|
|
13
|
+
import { createPipelineTransport, type PipelineTransportOptions } from "./pipeline-transport.ts";
|
|
14
|
+
import type { TransportCallbacks } from "./types.ts";
|
|
15
|
+
|
|
16
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
function makeCallbacks(): TransportCallbacks {
|
|
19
|
+
return {
|
|
20
|
+
onReplyStarted: vi.fn(),
|
|
21
|
+
onReplyDone: vi.fn(),
|
|
22
|
+
onCancelled: vi.fn(),
|
|
23
|
+
onAudioChunk: vi.fn(),
|
|
24
|
+
onAudioDone: vi.fn(),
|
|
25
|
+
onUserTranscript: vi.fn(),
|
|
26
|
+
onAgentTranscript: vi.fn(),
|
|
27
|
+
onToolCall: vi.fn(),
|
|
28
|
+
onError: vi.fn(),
|
|
29
|
+
onSpeechStarted: vi.fn(),
|
|
30
|
+
onSpeechStopped: vi.fn(),
|
|
31
|
+
onSessionReady: vi.fn(),
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function makeOpts(
|
|
36
|
+
overrides: Partial<PipelineTransportOptions> = {},
|
|
37
|
+
{
|
|
38
|
+
stt = createFakeSttProvider(),
|
|
39
|
+
tts = createFakeTtsProvider(),
|
|
40
|
+
callbacks = makeCallbacks(),
|
|
41
|
+
}: {
|
|
42
|
+
stt?: ReturnType<typeof createFakeSttProvider>;
|
|
43
|
+
tts?: ReturnType<typeof createFakeTtsProvider>;
|
|
44
|
+
callbacks?: TransportCallbacks;
|
|
45
|
+
} = {},
|
|
46
|
+
): {
|
|
47
|
+
opts: PipelineTransportOptions;
|
|
48
|
+
stt: ReturnType<typeof createFakeSttProvider>;
|
|
49
|
+
tts: ReturnType<typeof createFakeTtsProvider>;
|
|
50
|
+
callbacks: TransportCallbacks;
|
|
51
|
+
} {
|
|
52
|
+
const opts: PipelineTransportOptions = {
|
|
53
|
+
sid: "test-sid",
|
|
54
|
+
agent: "test-agent",
|
|
55
|
+
stt,
|
|
56
|
+
llm: createFakeLanguageModel({ script: [] }),
|
|
57
|
+
tts,
|
|
58
|
+
callbacks,
|
|
59
|
+
sessionConfig: {
|
|
60
|
+
systemPrompt: "You are a test assistant.",
|
|
61
|
+
greeting: "",
|
|
62
|
+
},
|
|
63
|
+
providerKeys: { stt: "stt-key", tts: "tts-key" },
|
|
64
|
+
logger: silentLogger,
|
|
65
|
+
...overrides,
|
|
66
|
+
};
|
|
67
|
+
return { opts, stt, tts, callbacks };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ─── Tests ───────────────────────────────────────────────────────────────────
|
|
71
|
+
|
|
72
|
+
describe("PipelineTransport", () => {
|
|
73
|
+
describe("start()", () => {
|
|
74
|
+
test("opens both STT and TTS sessions", async () => {
|
|
75
|
+
const { opts, stt, tts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } });
|
|
76
|
+
const t = createPipelineTransport(opts);
|
|
77
|
+
await t.start();
|
|
78
|
+
expect(stt.last()).toBeDefined();
|
|
79
|
+
expect(tts.last()).toBeDefined();
|
|
80
|
+
await t.stop();
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("passes correct keys and sample rate to STT opener", async () => {
|
|
84
|
+
const stt = createFakeSttProvider();
|
|
85
|
+
const { opts } = makeOpts(
|
|
86
|
+
{
|
|
87
|
+
stt,
|
|
88
|
+
providerKeys: { stt: "MY_STT_KEY", tts: "t" },
|
|
89
|
+
sttSampleRate: 8000,
|
|
90
|
+
sttPrompt: "be brief",
|
|
91
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
92
|
+
},
|
|
93
|
+
{ stt },
|
|
94
|
+
);
|
|
95
|
+
const t = createPipelineTransport(opts);
|
|
96
|
+
await t.start();
|
|
97
|
+
expect(stt.last()?.opts.sampleRate).toBe(8000);
|
|
98
|
+
expect(stt.last()?.opts.apiKey).toBe("MY_STT_KEY");
|
|
99
|
+
expect(stt.last()?.opts.sttPrompt).toBe("be brief");
|
|
100
|
+
await t.stop();
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test("fires onSessionReady with the sid", async () => {
|
|
104
|
+
const { opts, callbacks } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } });
|
|
105
|
+
const t = createPipelineTransport(opts);
|
|
106
|
+
await t.start();
|
|
107
|
+
expect(callbacks.onSessionReady).toHaveBeenCalledWith("test-sid");
|
|
108
|
+
await t.stop();
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
describe("greeting", () => {
|
|
113
|
+
test("sends greeting via ttsSession.sendText and fires onReplyStarted + onAgentTranscript + onReplyDone", async () => {
|
|
114
|
+
const stt = createFakeSttProvider();
|
|
115
|
+
const tts = createFakeTtsProvider();
|
|
116
|
+
const callbacks = makeCallbacks();
|
|
117
|
+
const { opts } = makeOpts(
|
|
118
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "Hi there!" } },
|
|
119
|
+
{ stt, tts, callbacks },
|
|
120
|
+
);
|
|
121
|
+
const t = createPipelineTransport(opts);
|
|
122
|
+
await t.start();
|
|
123
|
+
// Greeting runs as a chained turn — waitFor covers the async flush.
|
|
124
|
+
await vi.waitFor(() => {
|
|
125
|
+
expect(callbacks.onReplyDone).toHaveBeenCalledOnce();
|
|
126
|
+
});
|
|
127
|
+
expect(tts.last()?.textChunks).toContain("Hi there!");
|
|
128
|
+
expect(callbacks.onReplyStarted).toHaveBeenCalledWith(expect.stringContaining("greeting"));
|
|
129
|
+
expect(callbacks.onAgentTranscript).toHaveBeenCalledWith("Hi there!", false);
|
|
130
|
+
// onAudioDone is NOT fired by the transport — session-core's flushReply
|
|
131
|
+
// (triggered by onReplyDone) owns the audioDone + replyDone pairing.
|
|
132
|
+
expect(callbacks.onAudioDone).not.toHaveBeenCalled();
|
|
133
|
+
await t.stop();
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test("skipGreeting suppresses the greeting turn", async () => {
|
|
137
|
+
const tts = createFakeTtsProvider();
|
|
138
|
+
const callbacks = makeCallbacks();
|
|
139
|
+
const { opts } = makeOpts(
|
|
140
|
+
{
|
|
141
|
+
skipGreeting: true,
|
|
142
|
+
sessionConfig: { systemPrompt: "s", greeting: "Hello!" },
|
|
143
|
+
},
|
|
144
|
+
{ tts, callbacks },
|
|
145
|
+
);
|
|
146
|
+
const t = createPipelineTransport(opts);
|
|
147
|
+
await t.start();
|
|
148
|
+
await new Promise((r) => setTimeout(r, 20));
|
|
149
|
+
expect(callbacks.onReplyStarted).not.toHaveBeenCalled();
|
|
150
|
+
expect(tts.last()?.textChunks).toHaveLength(0);
|
|
151
|
+
await t.stop();
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
describe("STT → LLM turn", () => {
|
|
156
|
+
test("final STT event fires onUserTranscript and onReplyStarted", async () => {
|
|
157
|
+
const stt = createFakeSttProvider();
|
|
158
|
+
const callbacks = makeCallbacks();
|
|
159
|
+
const { opts } = makeOpts(
|
|
160
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "" } },
|
|
161
|
+
{ stt, callbacks },
|
|
162
|
+
);
|
|
163
|
+
const t = createPipelineTransport(opts);
|
|
164
|
+
await t.start();
|
|
165
|
+
stt.last()?.fireFinal("Hello agent");
|
|
166
|
+
await vi.waitFor(() => {
|
|
167
|
+
expect(callbacks.onUserTranscript).toHaveBeenCalledWith("Hello agent");
|
|
168
|
+
});
|
|
169
|
+
expect(callbacks.onReplyStarted).toHaveBeenCalledWith(expect.stringMatching(/^pipeline-/));
|
|
170
|
+
await t.stop();
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
test("empty / whitespace-only final is ignored", async () => {
|
|
174
|
+
const stt = createFakeSttProvider();
|
|
175
|
+
const callbacks = makeCallbacks();
|
|
176
|
+
const { opts } = makeOpts(
|
|
177
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "" } },
|
|
178
|
+
{ stt, callbacks },
|
|
179
|
+
);
|
|
180
|
+
const t = createPipelineTransport(opts);
|
|
181
|
+
await t.start();
|
|
182
|
+
stt.last()?.fireFinal(" ");
|
|
183
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
184
|
+
expect(callbacks.onUserTranscript).not.toHaveBeenCalled();
|
|
185
|
+
expect(callbacks.onReplyStarted).not.toHaveBeenCalled();
|
|
186
|
+
await t.stop();
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("LLM text chunk is forwarded to ttsSession.sendText", async () => {
|
|
190
|
+
const script: ScriptedPart[] = [
|
|
191
|
+
{ type: "text", text: "I am " },
|
|
192
|
+
{ type: "text", text: "the answer" },
|
|
193
|
+
];
|
|
194
|
+
const stt = createFakeSttProvider();
|
|
195
|
+
const tts = createFakeTtsProvider();
|
|
196
|
+
const { opts } = makeOpts(
|
|
197
|
+
{
|
|
198
|
+
llm: createFakeLanguageModel({ script }),
|
|
199
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
200
|
+
},
|
|
201
|
+
{ stt, tts },
|
|
202
|
+
);
|
|
203
|
+
const t = createPipelineTransport(opts);
|
|
204
|
+
await t.start();
|
|
205
|
+
stt.last()?.fireFinal("what is the answer?");
|
|
206
|
+
await vi.waitFor(() => {
|
|
207
|
+
expect(tts.last()?.textChunks.length).toBeGreaterThan(0);
|
|
208
|
+
});
|
|
209
|
+
expect(tts.last()?.textChunks.join("")).toContain("the answer");
|
|
210
|
+
await t.stop();
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
test("TTS audio event is forwarded to callbacks.onAudioChunk as Uint8Array", async () => {
|
|
214
|
+
const stt = createFakeSttProvider();
|
|
215
|
+
const tts = createFakeTtsProvider();
|
|
216
|
+
const callbacks = makeCallbacks();
|
|
217
|
+
const { opts } = makeOpts(
|
|
218
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "" } },
|
|
219
|
+
{ stt, tts, callbacks },
|
|
220
|
+
);
|
|
221
|
+
const t = createPipelineTransport(opts);
|
|
222
|
+
await t.start();
|
|
223
|
+
const pcm = new Int16Array([100, 200, 300]);
|
|
224
|
+
tts.last()?.fireAudio(pcm);
|
|
225
|
+
expect(callbacks.onAudioChunk).toHaveBeenCalledOnce();
|
|
226
|
+
// biome-ignore lint/style/noNonNullAssertion: test assertion — calledOnce proven above
|
|
227
|
+
const arg = (callbacks.onAudioChunk as ReturnType<typeof vi.fn>).mock
|
|
228
|
+
.calls[0]![0] as Uint8Array;
|
|
229
|
+
expect(arg).toBeInstanceOf(Uint8Array);
|
|
230
|
+
expect(arg.byteLength).toBe(pcm.byteLength);
|
|
231
|
+
await t.stop();
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
test("full turn: onUserTranscript → onReplyStarted → onAgentTranscript → onReplyDone (no transport-level onAudioDone)", async () => {
|
|
235
|
+
const script: ScriptedPart[] = [{ type: "text", text: "Sure!" }];
|
|
236
|
+
const stt = createFakeSttProvider();
|
|
237
|
+
const tts = createFakeTtsProvider();
|
|
238
|
+
const callbacks = makeCallbacks();
|
|
239
|
+
const { opts } = makeOpts(
|
|
240
|
+
{
|
|
241
|
+
llm: createFakeLanguageModel({ script }),
|
|
242
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
243
|
+
},
|
|
244
|
+
{ stt, tts, callbacks },
|
|
245
|
+
);
|
|
246
|
+
const t = createPipelineTransport(opts);
|
|
247
|
+
await t.start();
|
|
248
|
+
stt.last()?.fireFinal("test question");
|
|
249
|
+
await vi.waitFor(() => {
|
|
250
|
+
expect(callbacks.onReplyDone).toHaveBeenCalledOnce();
|
|
251
|
+
});
|
|
252
|
+
expect(callbacks.onUserTranscript).toHaveBeenCalledWith("test question");
|
|
253
|
+
expect(callbacks.onReplyStarted).toHaveBeenCalled();
|
|
254
|
+
expect(callbacks.onAgentTranscript).toHaveBeenCalledWith("Sure!", false);
|
|
255
|
+
// onAudioDone is NOT fired by the transport — session-core's flushReply
|
|
256
|
+
// (triggered by onReplyDone) owns the audioDone + replyDone pairing.
|
|
257
|
+
expect(callbacks.onAudioDone).not.toHaveBeenCalled();
|
|
258
|
+
await t.stop();
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test("TTS flush is called after LLM stream finishes", async () => {
|
|
262
|
+
const script: ScriptedPart[] = [{ type: "text", text: "hi" }];
|
|
263
|
+
const stt = createFakeSttProvider();
|
|
264
|
+
const tts = createFakeTtsProvider();
|
|
265
|
+
const { opts } = makeOpts(
|
|
266
|
+
{
|
|
267
|
+
llm: createFakeLanguageModel({ script }),
|
|
268
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
269
|
+
},
|
|
270
|
+
{ stt, tts },
|
|
271
|
+
);
|
|
272
|
+
const t = createPipelineTransport(opts);
|
|
273
|
+
await t.start();
|
|
274
|
+
stt.last()?.fireFinal("go");
|
|
275
|
+
await vi.waitFor(() => {
|
|
276
|
+
expect(tts.last()?.flush).toHaveBeenCalledOnce();
|
|
277
|
+
});
|
|
278
|
+
await t.stop();
|
|
279
|
+
});
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
describe("streamText config plumbing", () => {
|
|
283
|
+
const dummyToolSchemas = [
|
|
284
|
+
{
|
|
285
|
+
type: "function" as const,
|
|
286
|
+
name: "noop",
|
|
287
|
+
description: "No-op tool for plumbing tests.",
|
|
288
|
+
parameters: { type: "object" as const, properties: {}, additionalProperties: false },
|
|
289
|
+
},
|
|
290
|
+
];
|
|
291
|
+
const dummyExecuteTool = async () => "{}";
|
|
292
|
+
|
|
293
|
+
test("forwards toolChoice to doStream (default 'auto' when omitted)", async () => {
|
|
294
|
+
const stt = createFakeSttProvider();
|
|
295
|
+
const llm = createFakeLanguageModel({ script: [{ type: "text", text: "ok" }] });
|
|
296
|
+
const { opts } = makeOpts(
|
|
297
|
+
{
|
|
298
|
+
llm,
|
|
299
|
+
toolSchemas: dummyToolSchemas,
|
|
300
|
+
executeTool: dummyExecuteTool,
|
|
301
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
302
|
+
},
|
|
303
|
+
{ stt },
|
|
304
|
+
);
|
|
305
|
+
const t = createPipelineTransport(opts);
|
|
306
|
+
await t.start();
|
|
307
|
+
stt.last()?.fireFinal("hi");
|
|
308
|
+
await vi.waitFor(() => {
|
|
309
|
+
expect(llm.calls.length).toBeGreaterThan(0);
|
|
310
|
+
});
|
|
311
|
+
expect(llm.calls[0]?.toolChoice).toEqual({ type: "auto" });
|
|
312
|
+
await t.stop();
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
test("forwards explicit toolChoice='required' to doStream", async () => {
|
|
316
|
+
const stt = createFakeSttProvider();
|
|
317
|
+
const llm = createFakeLanguageModel({ script: [{ type: "text", text: "ok" }] });
|
|
318
|
+
const { opts } = makeOpts(
|
|
319
|
+
{
|
|
320
|
+
llm,
|
|
321
|
+
toolChoice: "required",
|
|
322
|
+
toolSchemas: dummyToolSchemas,
|
|
323
|
+
executeTool: dummyExecuteTool,
|
|
324
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
325
|
+
},
|
|
326
|
+
{ stt },
|
|
327
|
+
);
|
|
328
|
+
const t = createPipelineTransport(opts);
|
|
329
|
+
await t.start();
|
|
330
|
+
stt.last()?.fireFinal("hi");
|
|
331
|
+
await vi.waitFor(() => {
|
|
332
|
+
expect(llm.calls.length).toBeGreaterThan(0);
|
|
333
|
+
});
|
|
334
|
+
expect(llm.calls[0]?.toolChoice).toEqual({ type: "required" });
|
|
335
|
+
await t.stop();
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
test("maxSteps caps the doStream loop", async () => {
|
|
339
|
+
// Script two steps that each emit a text part. With maxSteps=1 only the
|
|
340
|
+
// first step should run; without plumbing it would default to 5 and both
|
|
341
|
+
// would fire.
|
|
342
|
+
const stt = createFakeSttProvider();
|
|
343
|
+
const llm = createFakeLanguageModel({
|
|
344
|
+
steps: [[{ type: "text", text: "step1" }], [{ type: "text", text: "step2" }]],
|
|
345
|
+
});
|
|
346
|
+
const { opts } = makeOpts(
|
|
347
|
+
{
|
|
348
|
+
llm,
|
|
349
|
+
maxSteps: 1,
|
|
350
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
351
|
+
},
|
|
352
|
+
{ stt },
|
|
353
|
+
);
|
|
354
|
+
const t = createPipelineTransport(opts);
|
|
355
|
+
await t.start();
|
|
356
|
+
stt.last()?.fireFinal("hi");
|
|
357
|
+
await vi.waitFor(() => {
|
|
358
|
+
expect(llm.calls.length).toBeGreaterThanOrEqual(1);
|
|
359
|
+
});
|
|
360
|
+
// Let any extra step have a chance to run.
|
|
361
|
+
await new Promise((r) => setTimeout(r, 20));
|
|
362
|
+
expect(llm.calls.length).toBe(1);
|
|
363
|
+
await t.stop();
|
|
364
|
+
});
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
describe("barge-in", () => {
|
|
368
|
+
test("partial STT event during an in-flight turn triggers cancel and onCancelled", async () => {
|
|
369
|
+
const script: ScriptedPart[] = [
|
|
370
|
+
{ type: "text", text: "Hello " },
|
|
371
|
+
{ type: "text", text: "how can " },
|
|
372
|
+
{ type: "text", text: "I help?" },
|
|
373
|
+
];
|
|
374
|
+
const stt = createFakeSttProvider();
|
|
375
|
+
const tts = createFakeTtsProvider();
|
|
376
|
+
const callbacks = makeCallbacks();
|
|
377
|
+
const { opts } = makeOpts(
|
|
378
|
+
{
|
|
379
|
+
llm: createFakeLanguageModel({ script, delayMs: 20 }),
|
|
380
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
381
|
+
},
|
|
382
|
+
{ stt, tts, callbacks },
|
|
383
|
+
);
|
|
384
|
+
const t = createPipelineTransport(opts);
|
|
385
|
+
await t.start();
|
|
386
|
+
|
|
387
|
+
// Start a turn, wait until TTS is receiving text (deep in AGENT_REPLYING).
|
|
388
|
+
stt.last()?.fireFinal("hi there");
|
|
389
|
+
await vi.waitFor(() => {
|
|
390
|
+
expect(tts.last()?.textChunks.length).toBeGreaterThan(0);
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
// Fire barge-in partial.
|
|
394
|
+
stt.last()?.firePartial("wait");
|
|
395
|
+
expect(callbacks.onCancelled).toHaveBeenCalled();
|
|
396
|
+
expect(tts.last()?.cancel).toHaveBeenCalled();
|
|
397
|
+
await t.stop();
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
test("cancelReply() aborts the turn and calls ttsSession.cancel()", async () => {
|
|
401
|
+
const script: ScriptedPart[] = [
|
|
402
|
+
{ type: "text", text: "some " },
|
|
403
|
+
{ type: "text", text: "reply" },
|
|
404
|
+
];
|
|
405
|
+
const stt = createFakeSttProvider();
|
|
406
|
+
const tts = createFakeTtsProvider();
|
|
407
|
+
const callbacks = makeCallbacks();
|
|
408
|
+
const { opts } = makeOpts(
|
|
409
|
+
{
|
|
410
|
+
llm: createFakeLanguageModel({ script, delayMs: 20 }),
|
|
411
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
412
|
+
},
|
|
413
|
+
{ stt, tts, callbacks },
|
|
414
|
+
);
|
|
415
|
+
const t = createPipelineTransport(opts);
|
|
416
|
+
await t.start();
|
|
417
|
+
|
|
418
|
+
stt.last()?.fireFinal("question");
|
|
419
|
+
await vi.waitFor(() => {
|
|
420
|
+
expect(tts.last()?.textChunks.length).toBeGreaterThan(0);
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
t.cancelReply();
|
|
424
|
+
expect(tts.last()?.cancel).toHaveBeenCalled();
|
|
425
|
+
// cancelReply() does NOT fire callbacks.onCancelled — session-core calls
|
|
426
|
+
// client.cancelled() itself when the cancel originates from the client.
|
|
427
|
+
// onCancelled is only fired from within the transport for barge-in (STT partial).
|
|
428
|
+
expect(callbacks.onCancelled).not.toHaveBeenCalled();
|
|
429
|
+
await t.stop();
|
|
430
|
+
});
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
describe("stop()", () => {
|
|
434
|
+
test("closes both STT and TTS sessions", async () => {
|
|
435
|
+
const stt = createFakeSttProvider();
|
|
436
|
+
const tts = createFakeTtsProvider();
|
|
437
|
+
const { opts } = makeOpts(
|
|
438
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "" } },
|
|
439
|
+
{ stt, tts },
|
|
440
|
+
);
|
|
441
|
+
const t = createPipelineTransport(opts);
|
|
442
|
+
await t.start();
|
|
443
|
+
await t.stop();
|
|
444
|
+
expect(stt.last()?.closed.value).toBe(true);
|
|
445
|
+
expect(tts.last()?.closed.value).toBe(true);
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
test("stop() is idempotent", async () => {
|
|
449
|
+
const stt = createFakeSttProvider();
|
|
450
|
+
const tts = createFakeTtsProvider();
|
|
451
|
+
const { opts } = makeOpts(
|
|
452
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "" } },
|
|
453
|
+
{ stt, tts },
|
|
454
|
+
);
|
|
455
|
+
const t = createPipelineTransport(opts);
|
|
456
|
+
await t.start();
|
|
457
|
+
await t.stop();
|
|
458
|
+
await t.stop(); // should not throw or double-close
|
|
459
|
+
expect(stt.last()?.closed.value).toBe(true);
|
|
460
|
+
});
|
|
461
|
+
});
|
|
462
|
+
|
|
463
|
+
describe("sendUserAudio()", () => {
|
|
464
|
+
test("converts aligned Uint8Array to Int16Array and calls sttSession.sendAudio", async () => {
|
|
465
|
+
const stt = createFakeSttProvider();
|
|
466
|
+
const { opts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } }, { stt });
|
|
467
|
+
const t = createPipelineTransport(opts);
|
|
468
|
+
await t.start();
|
|
469
|
+
const buf = new ArrayBuffer(4);
|
|
470
|
+
const bytes = new Uint8Array(buf);
|
|
471
|
+
bytes.set([0x01, 0x02, 0x03, 0x04]);
|
|
472
|
+
t.sendUserAudio(bytes);
|
|
473
|
+
const sttSession = stt.last();
|
|
474
|
+
expect(sttSession?.sendAudio).toHaveBeenCalledOnce();
|
|
475
|
+
// biome-ignore lint/style/noNonNullAssertion: test assertion — calledOnce proven above
|
|
476
|
+
const pcm = (sttSession?.sendAudio as ReturnType<typeof vi.fn>).mock
|
|
477
|
+
.calls[0]![0] as Int16Array;
|
|
478
|
+
expect(pcm).toBeInstanceOf(Int16Array);
|
|
479
|
+
expect(pcm.length).toBe(2);
|
|
480
|
+
await t.stop();
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
test("handles odd-length Uint8Array by copying and truncating", async () => {
|
|
484
|
+
const stt = createFakeSttProvider();
|
|
485
|
+
const { opts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } }, { stt });
|
|
486
|
+
const t = createPipelineTransport(opts);
|
|
487
|
+
await t.start();
|
|
488
|
+
const bytes = new Uint8Array([1, 2, 3]); // 3 bytes → 1 sample
|
|
489
|
+
t.sendUserAudio(bytes);
|
|
490
|
+
// biome-ignore lint/style/noNonNullAssertion: test assertion — audio was sent synchronously above
|
|
491
|
+
const pcm = (stt.last()?.sendAudio as ReturnType<typeof vi.fn>).mock
|
|
492
|
+
.calls[0]![0] as Int16Array;
|
|
493
|
+
expect(pcm.length).toBe(1);
|
|
494
|
+
await t.stop();
|
|
495
|
+
});
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
describe("sendToolResult()", () => {
|
|
499
|
+
test("is a no-op (Option A: inline tool execution)", async () => {
|
|
500
|
+
const { opts } = makeOpts({ sessionConfig: { systemPrompt: "s", greeting: "" } });
|
|
501
|
+
const t = createPipelineTransport(opts);
|
|
502
|
+
await t.start();
|
|
503
|
+
expect(() => t.sendToolResult("call-1", "result")).not.toThrow();
|
|
504
|
+
await t.stop();
|
|
505
|
+
});
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
describe("tool observability", () => {
|
|
509
|
+
test("callbacks.onToolCall fires for each tool-call stream part", async () => {
|
|
510
|
+
const executeTool = vi.fn(async () => "sunny");
|
|
511
|
+
const script: ScriptedPart[] = [
|
|
512
|
+
{
|
|
513
|
+
type: "tool-call",
|
|
514
|
+
toolCallId: "tc-1",
|
|
515
|
+
toolName: "get_weather",
|
|
516
|
+
input: JSON.stringify({ city: "SF" }),
|
|
517
|
+
},
|
|
518
|
+
{ type: "tool-result", toolCallId: "tc-1", toolName: "get_weather", result: "sunny" },
|
|
519
|
+
{ type: "text", text: "It's sunny." },
|
|
520
|
+
];
|
|
521
|
+
const stt = createFakeSttProvider();
|
|
522
|
+
const tts = createFakeTtsProvider();
|
|
523
|
+
const callbacks = makeCallbacks();
|
|
524
|
+
const { opts } = makeOpts(
|
|
525
|
+
{
|
|
526
|
+
llm: createFakeLanguageModel({ script }),
|
|
527
|
+
executeTool,
|
|
528
|
+
toolSchemas: [
|
|
529
|
+
{
|
|
530
|
+
type: "function" as const,
|
|
531
|
+
name: "get_weather",
|
|
532
|
+
description: "Look up the weather.",
|
|
533
|
+
parameters: {
|
|
534
|
+
type: "object" as const,
|
|
535
|
+
properties: { city: { type: "string" } },
|
|
536
|
+
required: ["city"],
|
|
537
|
+
},
|
|
538
|
+
},
|
|
539
|
+
],
|
|
540
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
541
|
+
},
|
|
542
|
+
{ stt, tts, callbacks },
|
|
543
|
+
);
|
|
544
|
+
const t = createPipelineTransport(opts);
|
|
545
|
+
await t.start();
|
|
546
|
+
stt.last()?.fireFinal("how's the weather?");
|
|
547
|
+
await vi.waitFor(() => {
|
|
548
|
+
expect(callbacks.onReplyDone).toHaveBeenCalled();
|
|
549
|
+
});
|
|
550
|
+
// onToolCall fires for observability (Option A).
|
|
551
|
+
expect(callbacks.onToolCall).toHaveBeenCalledWith("tc-1", "get_weather", expect.any(Object));
|
|
552
|
+
await t.stop();
|
|
553
|
+
});
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
describe("provider errors", () => {
|
|
557
|
+
test("STT error fires onError('stt', ...) and terminates transport", async () => {
|
|
558
|
+
const stt = createFakeSttProvider();
|
|
559
|
+
const callbacks = makeCallbacks();
|
|
560
|
+
const { opts } = makeOpts(
|
|
561
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "" } },
|
|
562
|
+
{ stt, callbacks },
|
|
563
|
+
);
|
|
564
|
+
const t = createPipelineTransport(opts);
|
|
565
|
+
await t.start();
|
|
566
|
+
stt.last()?.fireError("stt_stream_error", "stt failed");
|
|
567
|
+
expect(callbacks.onError).toHaveBeenCalledWith("stt", "stt failed");
|
|
568
|
+
await t.stop();
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
test("TTS error fires onError('tts', ...) and terminates transport", async () => {
|
|
572
|
+
const tts = createFakeTtsProvider();
|
|
573
|
+
const callbacks = makeCallbacks();
|
|
574
|
+
const { opts } = makeOpts(
|
|
575
|
+
{ sessionConfig: { systemPrompt: "s", greeting: "" } },
|
|
576
|
+
{ tts, callbacks },
|
|
577
|
+
);
|
|
578
|
+
const t = createPipelineTransport(opts);
|
|
579
|
+
await t.start();
|
|
580
|
+
tts.last()?.fireError("tts_stream_error", "tts failed");
|
|
581
|
+
expect(callbacks.onError).toHaveBeenCalledWith("tts", "tts failed");
|
|
582
|
+
await t.stop();
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
test("STT open failure fires onError('stt', ...) via reportOpenRejection", async () => {
|
|
586
|
+
const callbacks = makeCallbacks();
|
|
587
|
+
const { opts } = makeOpts(
|
|
588
|
+
{
|
|
589
|
+
stt: createFailingSttProvider("stt_connect_failed", "connect failed"),
|
|
590
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
591
|
+
},
|
|
592
|
+
{ callbacks },
|
|
593
|
+
);
|
|
594
|
+
const t = createPipelineTransport(opts);
|
|
595
|
+
await t.start();
|
|
596
|
+
expect(callbacks.onError).toHaveBeenCalledWith("stt", "connect failed");
|
|
597
|
+
await t.stop();
|
|
598
|
+
});
|
|
599
|
+
|
|
600
|
+
test("TTS open failure fires onError('tts', ...) via reportOpenRejection", async () => {
|
|
601
|
+
const callbacks = makeCallbacks();
|
|
602
|
+
const { opts } = makeOpts(
|
|
603
|
+
{
|
|
604
|
+
tts: createFailingTtsProvider("tts_connect_failed", "tts connect failed"),
|
|
605
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
606
|
+
},
|
|
607
|
+
{ callbacks },
|
|
608
|
+
);
|
|
609
|
+
const t = createPipelineTransport(opts);
|
|
610
|
+
await t.start();
|
|
611
|
+
expect(callbacks.onError).toHaveBeenCalledWith("tts", "tts connect failed");
|
|
612
|
+
await t.stop();
|
|
613
|
+
});
|
|
614
|
+
|
|
615
|
+
test("when STT fails, TTS session is still opened but then immediately closed", async () => {
|
|
616
|
+
const tts = createFakeTtsProvider();
|
|
617
|
+
const { opts } = makeOpts(
|
|
618
|
+
{
|
|
619
|
+
stt: createFailingSttProvider("stt_connect_failed", "bad key"),
|
|
620
|
+
tts,
|
|
621
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
622
|
+
},
|
|
623
|
+
{ tts },
|
|
624
|
+
);
|
|
625
|
+
const t = createPipelineTransport(opts);
|
|
626
|
+
await t.start();
|
|
627
|
+
// TTS was opened (Promise.allSettled runs both concurrently) but then closed.
|
|
628
|
+
expect(tts.last()?.closed.value).toBe(true);
|
|
629
|
+
await t.stop();
|
|
630
|
+
});
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
describe("history seeding", () => {
|
|
634
|
+
test("sessionConfig.history is used as initial conversation messages", async () => {
|
|
635
|
+
// History seeding is internal — we verify it indirectly by checking
|
|
636
|
+
// that the LLM receives the correct message array.
|
|
637
|
+
// For this test we just ensure start() doesn't throw when history is set.
|
|
638
|
+
const { opts } = makeOpts({
|
|
639
|
+
sessionConfig: {
|
|
640
|
+
systemPrompt: "s",
|
|
641
|
+
greeting: "",
|
|
642
|
+
history: [
|
|
643
|
+
{ role: "user", content: "hi" },
|
|
644
|
+
{ role: "assistant", content: "hello" },
|
|
645
|
+
],
|
|
646
|
+
},
|
|
647
|
+
});
|
|
648
|
+
const t = createPipelineTransport(opts);
|
|
649
|
+
await expect(t.start()).resolves.toBeUndefined();
|
|
650
|
+
await t.stop();
|
|
651
|
+
});
|
|
652
|
+
});
|
|
653
|
+
});
|