@alexkroman1/aai 1.5.1 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +17 -17
- package/CHANGELOG.md +18 -0
- package/dist/host/providers/resolve.d.ts +2 -2
- package/dist/host/providers/stt/elevenlabs.d.ts +16 -0
- package/dist/host/providers/stt/soniox.d.ts +25 -0
- package/dist/host/runtime-barrel.js +491 -81
- package/dist/sdk/providers/llm/google.d.ts +22 -0
- package/dist/sdk/providers/llm/groq.d.ts +21 -0
- package/dist/sdk/providers/llm/mistral.d.ts +21 -0
- package/dist/sdk/providers/llm/openai.d.ts +21 -0
- package/dist/sdk/providers/llm/xai.d.ts +21 -0
- package/dist/sdk/providers/llm-barrel.d.ts +5 -0
- package/dist/sdk/providers/llm-barrel.js +2 -2
- package/dist/sdk/providers/stt/elevenlabs.d.ts +36 -0
- package/dist/sdk/providers/stt/soniox.d.ts +37 -0
- package/dist/sdk/providers/stt-barrel.d.ts +2 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/soniox-DCQ3GqJq.js +69 -0
- package/dist/xai-jfQsxxPZ.js +55 -0
- package/host/providers/resolve.test.ts +110 -0
- package/host/providers/resolve.ts +60 -10
- package/host/providers/stt/elevenlabs.test.ts +200 -0
- package/host/providers/stt/elevenlabs.ts +145 -0
- package/host/providers/stt/soniox.test.ts +338 -0
- package/host/providers/stt/soniox.ts +239 -0
- package/host/transports/pipeline-transport.test.ts +91 -0
- package/host/transports/pipeline-transport.ts +53 -30
- package/host/transports/s2s-transport.test.ts +222 -2
- package/host/transports/s2s-transport.ts +176 -40
- package/package.json +7 -2
- package/sdk/providers/llm/google.ts +30 -0
- package/sdk/providers/llm/groq.ts +29 -0
- package/sdk/providers/llm/mistral.ts +29 -0
- package/sdk/providers/llm/openai.ts +29 -0
- package/sdk/providers/llm/xai.ts +29 -0
- package/sdk/providers/llm-barrel.ts +10 -0
- package/sdk/providers/stt/elevenlabs.ts +44 -0
- package/sdk/providers/stt/soniox.ts +45 -0
- package/sdk/providers/stt-barrel.ts +4 -0
- package/dist/anthropic-CcLZygAr.js +0 -10
- package/dist/assemblyai-C969QGi4.js +0 -35
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Soniox real-time STT opener (host-only).
|
|
4
|
+
*
|
|
5
|
+
* The user-facing descriptor factory (`soniox(...)`) lives in
|
|
6
|
+
* `sdk/providers/stt/soniox.ts`. This module is the host-side
|
|
7
|
+
* counterpart: it takes the descriptor options + an API key and
|
|
8
|
+
* returns an {@link SttOpener} that the pipeline session drives.
|
|
9
|
+
*
|
|
10
|
+
* Soniox's published JS client (`@soniox/speech-to-text-web`) is
|
|
11
|
+
* browser-only — it depends on `MediaRecorder` and `getUserMedia`. For
|
|
12
|
+
* server-side use we talk to the WebSocket directly:
|
|
13
|
+
* `wss://stt-rt.soniox.com/transcribe-websocket`
|
|
14
|
+
*
|
|
15
|
+
* Wire format:
|
|
16
|
+
* - First text frame: JSON config with api_key, model, audio_format,
|
|
17
|
+
* sample_rate, num_channels (and optional language hints).
|
|
18
|
+
* - Subsequent binary frames: 16-bit signed little-endian PCM audio.
|
|
19
|
+
* - Server replies: JSON `{ tokens: [{ text, is_final }] }` messages.
|
|
20
|
+
* Final tokens accumulate; non-final tokens are a rolling preview.
|
|
21
|
+
* - On error: `{ error_code, error_message }`.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { createNanoEvents, type Emitter } from "nanoevents";
|
|
25
|
+
import WebSocket from "ws";
|
|
26
|
+
import type { SonioxOptions } from "../../../sdk/providers/stt/soniox.ts";
|
|
27
|
+
import {
|
|
28
|
+
makeSttError,
|
|
29
|
+
type SttEvents,
|
|
30
|
+
type SttOpener,
|
|
31
|
+
type SttOpenOptions,
|
|
32
|
+
type SttSession,
|
|
33
|
+
} from "../../../sdk/providers.ts";
|
|
34
|
+
|
|
35
|
+
const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
36
|
+
|
|
37
|
+
/** Soniox token shape from the wire protocol. */
|
|
38
|
+
interface SonioxToken {
|
|
39
|
+
text?: string;
|
|
40
|
+
is_final?: boolean;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface SonioxResponse {
|
|
44
|
+
tokens?: SonioxToken[];
|
|
45
|
+
finished?: boolean;
|
|
46
|
+
error_code?: number;
|
|
47
|
+
error_message?: string;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Walk a batch of Soniox tokens, sending finals into `appendFinal` and
|
|
52
|
+
* returning the concatenated non-finals as a rolling preview string.
|
|
53
|
+
*/
|
|
54
|
+
function consumeTokens(tokens: SonioxToken[], appendFinal: (text: string) => void): string {
|
|
55
|
+
let nonFinal = "";
|
|
56
|
+
for (const tok of tokens) {
|
|
57
|
+
const text = tok.text ?? "";
|
|
58
|
+
if (text.length === 0) continue;
|
|
59
|
+
if (tok.is_final) {
|
|
60
|
+
appendFinal(text);
|
|
61
|
+
} else {
|
|
62
|
+
nonFinal += text;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return nonFinal;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** Resolve once the WebSocket opens; reject on the first error. */
|
|
69
|
+
function waitForOpen(ws: WebSocket): Promise<void> {
|
|
70
|
+
return new Promise((resolve, reject) => {
|
|
71
|
+
const onOpen = () => {
|
|
72
|
+
ws.off("error", onErr);
|
|
73
|
+
resolve();
|
|
74
|
+
};
|
|
75
|
+
const onErr = (err: Error) => {
|
|
76
|
+
ws.off("open", onOpen);
|
|
77
|
+
reject(err);
|
|
78
|
+
};
|
|
79
|
+
ws.once("open", onOpen);
|
|
80
|
+
ws.once("error", onErr);
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Build the initial JSON config frame for a Soniox session. */
|
|
85
|
+
function buildConfigFrame(
|
|
86
|
+
apiKey: string,
|
|
87
|
+
opts: SonioxOptions,
|
|
88
|
+
sampleRate: number,
|
|
89
|
+
): Record<string, unknown> {
|
|
90
|
+
const config: Record<string, unknown> = {
|
|
91
|
+
api_key: apiKey,
|
|
92
|
+
model: opts.model ?? "stt-rt-v3",
|
|
93
|
+
audio_format: "pcm_s16le",
|
|
94
|
+
sample_rate: sampleRate,
|
|
95
|
+
num_channels: 1,
|
|
96
|
+
};
|
|
97
|
+
if (opts.languageHints && opts.languageHints.length > 0) {
|
|
98
|
+
config.language_hints = [...opts.languageHints];
|
|
99
|
+
}
|
|
100
|
+
return config;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Parse a Soniox text frame into a {@link SonioxResponse}; returns null on garbage. */
|
|
104
|
+
function parseFrame(raw: WebSocket.RawData): SonioxResponse | null {
|
|
105
|
+
try {
|
|
106
|
+
return JSON.parse(raw.toString()) as SonioxResponse;
|
|
107
|
+
} catch {
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Handle one server response. Emits `error`, `final`, and `partial` events
|
|
114
|
+
* onto `emitter` based on the token batch and the running `finalBuf`. The
|
|
115
|
+
* caller owns `finalBuf` so it survives across messages and can be flushed
|
|
116
|
+
* on close.
|
|
117
|
+
*/
|
|
118
|
+
function handleResponse(
|
|
119
|
+
res: SonioxResponse,
|
|
120
|
+
emitter: Emitter<SttEvents>,
|
|
121
|
+
finalBuf: { value: string },
|
|
122
|
+
): void {
|
|
123
|
+
if (res.error_code !== undefined) {
|
|
124
|
+
emitter.emit(
|
|
125
|
+
"error",
|
|
126
|
+
makeSttError(
|
|
127
|
+
"stt_stream_error",
|
|
128
|
+
`Soniox error ${res.error_code}: ${res.error_message ?? "unknown"}`,
|
|
129
|
+
),
|
|
130
|
+
);
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
if (!res.tokens || res.tokens.length === 0) return;
|
|
134
|
+
const nonFinal = consumeTokens(res.tokens, (text) => {
|
|
135
|
+
finalBuf.value += text;
|
|
136
|
+
});
|
|
137
|
+
// Flush an accumulated final whenever the next batch's non-final preview
|
|
138
|
+
// begins (or when the session finishes). This batches contiguous final
|
|
139
|
+
// tokens into a single `final` event, matching what downstream pipeline
|
|
140
|
+
// session code expects.
|
|
141
|
+
if (finalBuf.value.length > 0 && (nonFinal.length > 0 || res.finished)) {
|
|
142
|
+
emitter.emit("final", finalBuf.value);
|
|
143
|
+
finalBuf.value = "";
|
|
144
|
+
}
|
|
145
|
+
if (nonFinal.length > 0) {
|
|
146
|
+
emitter.emit("partial", nonFinal);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/** Build an {@link SttOpener} from resolved Soniox descriptor options. */
|
|
151
|
+
export function openSoniox(opts: SonioxOptions = {}): SttOpener {
|
|
152
|
+
return {
|
|
153
|
+
name: "soniox",
|
|
154
|
+
async open(openOpts: SttOpenOptions): Promise<SttSession> {
|
|
155
|
+
const apiKey = openOpts.apiKey || process.env.SONIOX_API_KEY;
|
|
156
|
+
if (!apiKey) {
|
|
157
|
+
throw makeSttError(
|
|
158
|
+
"stt_auth_failed",
|
|
159
|
+
"Soniox STT: missing API key. Set SONIOX_API_KEY in the agent env.",
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const ws = new WebSocket(SONIOX_WS_URL);
|
|
164
|
+
const emitter: Emitter<SttEvents> = createNanoEvents<SttEvents>();
|
|
165
|
+
let closed = false;
|
|
166
|
+
// Soniox emits final tokens once and non-final tokens repeatedly. We
|
|
167
|
+
// accumulate finals into a buffer flushed on each non-final boundary
|
|
168
|
+
// and forward non-finals as the rolling partial. Mirrors how the
|
|
169
|
+
// existing AssemblyAI/Deepgram openers map provider-specific token
|
|
170
|
+
// streams onto the SttEvents `partial`/`final` contract.
|
|
171
|
+
const finalBuf = { value: "" };
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
await waitForOpen(ws);
|
|
175
|
+
} catch (cause) {
|
|
176
|
+
throw makeSttError(
|
|
177
|
+
"stt_connect_failed",
|
|
178
|
+
`Soniox STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`,
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Initial config frame (text). Sent first; audio binary frames follow.
|
|
183
|
+
ws.send(JSON.stringify(buildConfigFrame(apiKey, opts, openOpts.sampleRate)));
|
|
184
|
+
|
|
185
|
+
ws.on("message", (raw: WebSocket.RawData) => {
|
|
186
|
+
if (closed) return;
|
|
187
|
+
const res = parseFrame(raw);
|
|
188
|
+
if (res) handleResponse(res, emitter, finalBuf);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
ws.on("error", (err: Error) => {
|
|
192
|
+
if (closed) return;
|
|
193
|
+
emitter.emit("error", makeSttError("stt_stream_error", err.message ?? String(err)));
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
ws.on("close", (code: number) => {
|
|
197
|
+
if (closed) return;
|
|
198
|
+
// 1000 = normal closure.
|
|
199
|
+
if (code !== 1000) {
|
|
200
|
+
emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
const close = async (): Promise<void> => {
|
|
205
|
+
if (closed) return;
|
|
206
|
+
closed = true;
|
|
207
|
+
// Flush any trailing final tokens that arrived right before close.
|
|
208
|
+
if (finalBuf.value.length > 0) {
|
|
209
|
+
emitter.emit("final", finalBuf.value);
|
|
210
|
+
finalBuf.value = "";
|
|
211
|
+
}
|
|
212
|
+
try {
|
|
213
|
+
ws.close();
|
|
214
|
+
} catch {
|
|
215
|
+
// Swallow: caller has already decided to tear down.
|
|
216
|
+
}
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
if (openOpts.signal.aborted) {
|
|
220
|
+
void close();
|
|
221
|
+
} else {
|
|
222
|
+
openOpts.signal.addEventListener("abort", () => void close(), { once: true });
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
sendAudio(pcm: Int16Array) {
|
|
227
|
+
if (closed || ws.readyState !== WebSocket.OPEN) return;
|
|
228
|
+
// Sending the underlying buffer directly avoids a copy. ws will
|
|
229
|
+
// hand it to the OS as a binary frame.
|
|
230
|
+
ws.send(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength), { binary: true });
|
|
231
|
+
},
|
|
232
|
+
on(event, fn) {
|
|
233
|
+
return emitter.on(event, fn);
|
|
234
|
+
},
|
|
235
|
+
close,
|
|
236
|
+
};
|
|
237
|
+
},
|
|
238
|
+
};
|
|
239
|
+
}
|
|
@@ -210,6 +210,97 @@ describe("PipelineTransport", () => {
|
|
|
210
210
|
await t.stop();
|
|
211
211
|
});
|
|
212
212
|
|
|
213
|
+
test("inserts a separator between text segments split by a mid-turn tool call", async () => {
|
|
214
|
+
// Multi-step turn: step 1 ends with a text segment + tool-call, step 2
|
|
215
|
+
// begins with a fresh text segment. Without the fix, the deltas fuse
|
|
216
|
+
// into "...up.Got it" — both in the transcript and in TTS input.
|
|
217
|
+
const stt = createFakeSttProvider();
|
|
218
|
+
const tts = createFakeTtsProvider();
|
|
219
|
+
const callbacks = makeCallbacks();
|
|
220
|
+
const executeTool = vi.fn(async () => "result");
|
|
221
|
+
const { opts } = makeOpts(
|
|
222
|
+
{
|
|
223
|
+
llm: createFakeLanguageModel({
|
|
224
|
+
steps: [
|
|
225
|
+
[
|
|
226
|
+
{ type: "text", text: "Let me look that up." },
|
|
227
|
+
{ type: "tool-call", toolCallId: "tc-1", toolName: "lookup", input: "{}" },
|
|
228
|
+
],
|
|
229
|
+
[{ type: "text", text: "Got it. Here's the answer." }],
|
|
230
|
+
],
|
|
231
|
+
}),
|
|
232
|
+
executeTool,
|
|
233
|
+
toolSchemas: [
|
|
234
|
+
{
|
|
235
|
+
type: "function" as const,
|
|
236
|
+
name: "lookup",
|
|
237
|
+
description: "Look something up.",
|
|
238
|
+
parameters: { type: "object" as const, properties: {}, required: [] },
|
|
239
|
+
},
|
|
240
|
+
],
|
|
241
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
242
|
+
},
|
|
243
|
+
{ stt, tts, callbacks },
|
|
244
|
+
);
|
|
245
|
+
const t = createPipelineTransport(opts);
|
|
246
|
+
await t.start();
|
|
247
|
+
stt.last()?.fireFinal("look it up");
|
|
248
|
+
await vi.waitFor(() => {
|
|
249
|
+
expect(callbacks.onAgentTranscript).toHaveBeenCalled();
|
|
250
|
+
});
|
|
251
|
+
expect(callbacks.onAgentTranscript).toHaveBeenCalledWith(
|
|
252
|
+
"Let me look that up. Got it. Here's the answer.",
|
|
253
|
+
false,
|
|
254
|
+
);
|
|
255
|
+
expect(tts.last()?.textChunks.join("")).toBe(
|
|
256
|
+
"Let me look that up. Got it. Here's the answer.",
|
|
257
|
+
);
|
|
258
|
+
await t.stop();
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test("does not double-space when a segment boundary already carries whitespace", async () => {
|
|
262
|
+
// Trailing space on segment 1 — we must not insert an extra space.
|
|
263
|
+
const stt = createFakeSttProvider();
|
|
264
|
+
const tts = createFakeTtsProvider();
|
|
265
|
+
const callbacks = makeCallbacks();
|
|
266
|
+
const executeTool = vi.fn(async () => "result");
|
|
267
|
+
const { opts } = makeOpts(
|
|
268
|
+
{
|
|
269
|
+
llm: createFakeLanguageModel({
|
|
270
|
+
steps: [
|
|
271
|
+
[
|
|
272
|
+
{ type: "text", text: "First sentence. " },
|
|
273
|
+
{ type: "tool-call", toolCallId: "tc-1", toolName: "lookup", input: "{}" },
|
|
274
|
+
],
|
|
275
|
+
[{ type: "text", text: "Second sentence." }],
|
|
276
|
+
],
|
|
277
|
+
}),
|
|
278
|
+
executeTool,
|
|
279
|
+
toolSchemas: [
|
|
280
|
+
{
|
|
281
|
+
type: "function" as const,
|
|
282
|
+
name: "lookup",
|
|
283
|
+
description: "Look something up.",
|
|
284
|
+
parameters: { type: "object" as const, properties: {}, required: [] },
|
|
285
|
+
},
|
|
286
|
+
],
|
|
287
|
+
sessionConfig: { systemPrompt: "s", greeting: "" },
|
|
288
|
+
},
|
|
289
|
+
{ stt, tts, callbacks },
|
|
290
|
+
);
|
|
291
|
+
const t = createPipelineTransport(opts);
|
|
292
|
+
await t.start();
|
|
293
|
+
stt.last()?.fireFinal("look it up");
|
|
294
|
+
await vi.waitFor(() => {
|
|
295
|
+
expect(callbacks.onAgentTranscript).toHaveBeenCalled();
|
|
296
|
+
});
|
|
297
|
+
expect(callbacks.onAgentTranscript).toHaveBeenCalledWith(
|
|
298
|
+
"First sentence. Second sentence.",
|
|
299
|
+
false,
|
|
300
|
+
);
|
|
301
|
+
await t.stop();
|
|
302
|
+
});
|
|
303
|
+
|
|
213
304
|
test("TTS audio event is forwarded to callbacks.onAudioChunk as Uint8Array", async () => {
|
|
214
305
|
const stt = createFakeSttProvider();
|
|
215
306
|
const tts = createFakeTtsProvider();
|
|
@@ -217,9 +217,10 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
|
|
|
217
217
|
stopWhen: stepCountIs(maxSteps),
|
|
218
218
|
abortSignal: ctl.signal,
|
|
219
219
|
});
|
|
220
|
+
const handlePart = makeStreamPartHandler(onDelta);
|
|
220
221
|
for await (const part of result.fullStream) {
|
|
221
222
|
if (ctl.signal.aborted) break;
|
|
222
|
-
|
|
223
|
+
handlePart(part);
|
|
223
224
|
}
|
|
224
225
|
} catch (err: unknown) {
|
|
225
226
|
if (!ctl.signal.aborted) {
|
|
@@ -230,8 +231,33 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
|
|
|
230
231
|
}
|
|
231
232
|
}
|
|
232
233
|
|
|
233
|
-
|
|
234
|
-
|
|
234
|
+
/**
|
|
235
|
+
* Stateful per-turn handler for `streamText` `fullStream` parts.
|
|
236
|
+
*
|
|
237
|
+
* Tracks text-segment boundaries so that consecutive segments — which the
|
|
238
|
+
* Vercel SDK emits across tool-call hops as `text-end` followed later by a
|
|
239
|
+
* fresh `text-start` — don't fuse into "...up.Got it" when concatenated for
|
|
240
|
+
* the transcript or streamed to TTS. When a boundary is crossed and neither
|
|
241
|
+
* side carries whitespace, a single space is injected into both streams.
|
|
242
|
+
*/
|
|
243
|
+
function makeStreamPartHandler(onDelta: (delta: string) => void) {
|
|
244
|
+
let pendingSeparator = false;
|
|
245
|
+
let lastChar = "";
|
|
246
|
+
|
|
247
|
+
function emitText(delta: string): void {
|
|
248
|
+
if (delta.length === 0) return;
|
|
249
|
+
let out = delta;
|
|
250
|
+
if (pendingSeparator) {
|
|
251
|
+
pendingSeparator = false;
|
|
252
|
+
const boundaryHasSpace = lastChar === "" || /\s/.test(lastChar) || /^\s/.test(out);
|
|
253
|
+
if (!boundaryHasSpace) out = ` ${out}`;
|
|
254
|
+
}
|
|
255
|
+
lastChar = out.slice(-1);
|
|
256
|
+
onDelta(out);
|
|
257
|
+
ttsSession?.sendText(out);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return function handlePart(part: {
|
|
235
261
|
readonly type: string;
|
|
236
262
|
readonly text?: string;
|
|
237
263
|
readonly input?: unknown;
|
|
@@ -239,34 +265,31 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
|
|
|
239
265
|
readonly toolCallId?: string;
|
|
240
266
|
readonly toolName?: string;
|
|
241
267
|
readonly error?: unknown;
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
return;
|
|
268
|
+
}): void {
|
|
269
|
+
switch (part.type) {
|
|
270
|
+
case "text-delta":
|
|
271
|
+
emitText(part.text ?? "");
|
|
272
|
+
return;
|
|
273
|
+
case "text-end":
|
|
274
|
+
pendingSeparator = true;
|
|
275
|
+
return;
|
|
276
|
+
case "tool-call": {
|
|
277
|
+
// Option A: fire callbacks.onToolCall for observability only.
|
|
278
|
+
// Actual execution happens inline via toVercelTools.
|
|
279
|
+
const input = (part.input ?? {}) as Record<string, unknown>;
|
|
280
|
+
callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
case "error": {
|
|
284
|
+
const msg = errorMessage(part.error);
|
|
285
|
+
log.error("LLM stream error", { message: msg, sid: opts.sid });
|
|
286
|
+
emitError("llm", msg);
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
default:
|
|
290
|
+
return;
|
|
266
291
|
}
|
|
267
|
-
|
|
268
|
-
return;
|
|
269
|
-
}
|
|
292
|
+
};
|
|
270
293
|
}
|
|
271
294
|
|
|
272
295
|
// ---- TTS flush ------------------------------------------------------------
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import { describe, expect, test, vi } from "vitest";
|
|
2
|
-
import {
|
|
1
|
+
import { afterEach, describe, expect, test, vi } from "vitest";
|
|
2
|
+
import { makeMockHandle, silentLogger } from "../_test-utils.ts";
|
|
3
|
+
import type { S2sCallbacks, S2sHandle } from "../s2s.ts";
|
|
4
|
+
import { _internals, createS2sTransport } from "./s2s-transport.ts";
|
|
3
5
|
import type { TransportCallbacks } from "./types.ts";
|
|
4
6
|
|
|
5
7
|
function makeCallbacks(): TransportCallbacks {
|
|
@@ -15,6 +17,7 @@ function makeCallbacks(): TransportCallbacks {
|
|
|
15
17
|
onError: vi.fn(),
|
|
16
18
|
onSpeechStarted: vi.fn(),
|
|
17
19
|
onSpeechStopped: vi.fn(),
|
|
20
|
+
onSessionReady: vi.fn(),
|
|
18
21
|
};
|
|
19
22
|
}
|
|
20
23
|
|
|
@@ -54,3 +57,220 @@ describe("S2sTransport", () => {
|
|
|
54
57
|
expect(close).toHaveBeenCalled();
|
|
55
58
|
});
|
|
56
59
|
});
|
|
60
|
+
|
|
61
|
+
// ─── Reconnect tests ────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
/** Capture the S2sCallbacks that the transport hands to connectS2s. */
|
|
64
|
+
function setupSpiedTransport(): {
|
|
65
|
+
callbacks: TransportCallbacks;
|
|
66
|
+
handles: S2sHandle[];
|
|
67
|
+
capturedCallbacks: S2sCallbacks[];
|
|
68
|
+
spy: ReturnType<typeof vi.spyOn>;
|
|
69
|
+
} {
|
|
70
|
+
const handles: S2sHandle[] = [];
|
|
71
|
+
const capturedCallbacks: S2sCallbacks[] = [];
|
|
72
|
+
const spy = vi
|
|
73
|
+
.spyOn(_internals, "connectS2s")
|
|
74
|
+
.mockImplementation(async (opts: import("../s2s.ts").ConnectS2sOptions) => {
|
|
75
|
+
capturedCallbacks.push(opts.callbacks);
|
|
76
|
+
const h = makeMockHandle();
|
|
77
|
+
handles.push(h);
|
|
78
|
+
return h;
|
|
79
|
+
});
|
|
80
|
+
return {
|
|
81
|
+
callbacks: makeCallbacks(),
|
|
82
|
+
handles,
|
|
83
|
+
capturedCallbacks,
|
|
84
|
+
spy,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
describe("S2sTransport reconnect", () => {
|
|
89
|
+
afterEach(() => {
|
|
90
|
+
vi.restoreAllMocks();
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
test("attempts session.resume on transient close (1005) inside the resume window", async () => {
|
|
94
|
+
const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
|
|
95
|
+
|
|
96
|
+
const t = createS2sTransport({
|
|
97
|
+
apiKey: "k",
|
|
98
|
+
s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
|
|
99
|
+
sessionConfig: { systemPrompt: "test", tools: [] },
|
|
100
|
+
toolSchemas: [],
|
|
101
|
+
callbacks,
|
|
102
|
+
sid: "sid-1",
|
|
103
|
+
agent: "a",
|
|
104
|
+
logger: silentLogger,
|
|
105
|
+
});
|
|
106
|
+
await t.start();
|
|
107
|
+
|
|
108
|
+
// Establish session, start a reply, then drop the socket.
|
|
109
|
+
const cb1 = capturedCallbacks[0];
|
|
110
|
+
if (!cb1) throw new Error("expected first callbacks");
|
|
111
|
+
cb1.onSessionReady("sess_abc");
|
|
112
|
+
cb1.onReplyStarted("rep_1");
|
|
113
|
+
cb1.onClose(1005, "");
|
|
114
|
+
|
|
115
|
+
// Wait for the async resume() to fire connectS2s a second time.
|
|
116
|
+
await vi.waitFor(() => {
|
|
117
|
+
expect(handles.length).toBe(2);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// The new handle should have received resumeSession with the prior id.
|
|
121
|
+
const newHandle = handles[1];
|
|
122
|
+
if (!newHandle) throw new Error("expected new handle");
|
|
123
|
+
expect(newHandle.resumeSession).toHaveBeenCalledWith("sess_abc");
|
|
124
|
+
|
|
125
|
+
// The in-flight reply was unblocked via onCancelled, NOT a fatal error.
|
|
126
|
+
expect(callbacks.onCancelled).toHaveBeenCalledOnce();
|
|
127
|
+
expect(callbacks.onError).not.toHaveBeenCalled();
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test("does NOT reconnect on fatal close codes (1008 unauthorized)", async () => {
|
|
131
|
+
const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
|
|
132
|
+
|
|
133
|
+
const t = createS2sTransport({
|
|
134
|
+
apiKey: "k",
|
|
135
|
+
s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
|
|
136
|
+
sessionConfig: { systemPrompt: "test", tools: [] },
|
|
137
|
+
toolSchemas: [],
|
|
138
|
+
callbacks,
|
|
139
|
+
sid: "sid-1",
|
|
140
|
+
agent: "a",
|
|
141
|
+
logger: silentLogger,
|
|
142
|
+
});
|
|
143
|
+
await t.start();
|
|
144
|
+
|
|
145
|
+
const cb1 = capturedCallbacks[0];
|
|
146
|
+
if (!cb1) throw new Error("expected first callbacks");
|
|
147
|
+
cb1.onSessionReady("sess_abc");
|
|
148
|
+
cb1.onReplyStarted("rep_1");
|
|
149
|
+
cb1.onClose(1008, "unauthorized");
|
|
150
|
+
|
|
151
|
+
// No reconnect — only one connectS2s call total.
|
|
152
|
+
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
153
|
+
expect(handles.length).toBe(1);
|
|
154
|
+
// Fatal error surfaces, since a reply was in flight.
|
|
155
|
+
expect(callbacks.onError).toHaveBeenCalledWith(
|
|
156
|
+
"connection",
|
|
157
|
+
expect.stringContaining("S2S closed mid-reply"),
|
|
158
|
+
);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
test("does NOT reconnect when stop() was called", async () => {
|
|
162
|
+
const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
|
|
163
|
+
|
|
164
|
+
const t = createS2sTransport({
|
|
165
|
+
apiKey: "k",
|
|
166
|
+
s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
|
|
167
|
+
sessionConfig: { systemPrompt: "test", tools: [] },
|
|
168
|
+
toolSchemas: [],
|
|
169
|
+
callbacks,
|
|
170
|
+
sid: "sid-1",
|
|
171
|
+
agent: "a",
|
|
172
|
+
logger: silentLogger,
|
|
173
|
+
});
|
|
174
|
+
await t.start();
|
|
175
|
+
|
|
176
|
+
const cb1 = capturedCallbacks[0];
|
|
177
|
+
if (!cb1) throw new Error("expected first callbacks");
|
|
178
|
+
cb1.onSessionReady("sess_abc");
|
|
179
|
+
await t.stop();
|
|
180
|
+
|
|
181
|
+
// Simulate the upstream's close arriving after stop() — it should be
|
|
182
|
+
// treated as a clean shutdown, not a transient drop worth resuming.
|
|
183
|
+
cb1.onClose(1005, "");
|
|
184
|
+
|
|
185
|
+
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
186
|
+
expect(handles.length).toBe(1);
|
|
187
|
+
expect(callbacks.onError).not.toHaveBeenCalled();
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
test("surfaces resume failure when the resumed socket also closes", async () => {
|
|
191
|
+
const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
|
|
192
|
+
|
|
193
|
+
const t = createS2sTransport({
|
|
194
|
+
apiKey: "k",
|
|
195
|
+
s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
|
|
196
|
+
sessionConfig: { systemPrompt: "test", tools: [] },
|
|
197
|
+
toolSchemas: [],
|
|
198
|
+
callbacks,
|
|
199
|
+
sid: "sid-1",
|
|
200
|
+
agent: "a",
|
|
201
|
+
logger: silentLogger,
|
|
202
|
+
});
|
|
203
|
+
await t.start();
|
|
204
|
+
|
|
205
|
+
capturedCallbacks[0]?.onSessionReady("sess_abc");
|
|
206
|
+
capturedCallbacks[0]?.onReplyStarted("rep_1");
|
|
207
|
+
capturedCallbacks[0]?.onClose(1005, "");
|
|
208
|
+
|
|
209
|
+
await vi.waitFor(() => expect(handles.length).toBe(2));
|
|
210
|
+
|
|
211
|
+
// The resume socket also drops before its session.ready arrives.
|
|
212
|
+
const cb2 = capturedCallbacks[1];
|
|
213
|
+
if (!cb2) throw new Error("expected resume callbacks");
|
|
214
|
+
cb2.onClose(1006, "");
|
|
215
|
+
|
|
216
|
+
expect(callbacks.onError).toHaveBeenCalledWith(
|
|
217
|
+
"connection",
|
|
218
|
+
expect.stringContaining("resume failed"),
|
|
219
|
+
);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
test("surfaces resume failure when server reports session_not_found", async () => {
|
|
223
|
+
const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
|
|
224
|
+
|
|
225
|
+
const t = createS2sTransport({
|
|
226
|
+
apiKey: "k",
|
|
227
|
+
s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
|
|
228
|
+
sessionConfig: { systemPrompt: "test", tools: [] },
|
|
229
|
+
toolSchemas: [],
|
|
230
|
+
callbacks,
|
|
231
|
+
sid: "sid-1",
|
|
232
|
+
agent: "a",
|
|
233
|
+
logger: silentLogger,
|
|
234
|
+
});
|
|
235
|
+
await t.start();
|
|
236
|
+
|
|
237
|
+
capturedCallbacks[0]?.onSessionReady("sess_abc");
|
|
238
|
+
capturedCallbacks[0]?.onClose(1005, "");
|
|
239
|
+
|
|
240
|
+
await vi.waitFor(() => expect(handles.length).toBe(2));
|
|
241
|
+
|
|
242
|
+
capturedCallbacks[1]?.onSessionExpired();
|
|
243
|
+
|
|
244
|
+
expect(callbacks.onError).toHaveBeenCalledWith(
|
|
245
|
+
"connection",
|
|
246
|
+
expect.stringContaining("session expired"),
|
|
247
|
+
);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
test("after a successful resume, a later transient drop also resumes", async () => {
|
|
251
|
+
const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
|
|
252
|
+
|
|
253
|
+
const t = createS2sTransport({
|
|
254
|
+
apiKey: "k",
|
|
255
|
+
s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
|
|
256
|
+
sessionConfig: { systemPrompt: "test", tools: [] },
|
|
257
|
+
toolSchemas: [],
|
|
258
|
+
callbacks,
|
|
259
|
+
sid: "sid-1",
|
|
260
|
+
agent: "a",
|
|
261
|
+
logger: silentLogger,
|
|
262
|
+
});
|
|
263
|
+
await t.start();
|
|
264
|
+
|
|
265
|
+
// First connection establishes, drops, resumes, becomes ready again.
|
|
266
|
+
capturedCallbacks[0]?.onSessionReady("sess_abc");
|
|
267
|
+
capturedCallbacks[0]?.onClose(1005, "");
|
|
268
|
+
await vi.waitFor(() => expect(handles.length).toBe(2));
|
|
269
|
+
capturedCallbacks[1]?.onSessionReady("sess_abc");
|
|
270
|
+
|
|
271
|
+
// Second drop — should trigger another resume attempt.
|
|
272
|
+
capturedCallbacks[1]?.onClose(1006, "");
|
|
273
|
+
await vi.waitFor(() => expect(handles.length).toBe(3));
|
|
274
|
+
expect(handles[2]?.resumeSession).toHaveBeenCalledWith("sess_abc");
|
|
275
|
+
});
|
|
276
|
+
});
|