@alexkroman1/aai 1.5.1 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.turbo/turbo-build.log +17 -17
  2. package/CHANGELOG.md +18 -0
  3. package/dist/host/providers/resolve.d.ts +2 -2
  4. package/dist/host/providers/stt/elevenlabs.d.ts +16 -0
  5. package/dist/host/providers/stt/soniox.d.ts +25 -0
  6. package/dist/host/runtime-barrel.js +491 -81
  7. package/dist/sdk/providers/llm/google.d.ts +22 -0
  8. package/dist/sdk/providers/llm/groq.d.ts +21 -0
  9. package/dist/sdk/providers/llm/mistral.d.ts +21 -0
  10. package/dist/sdk/providers/llm/openai.d.ts +21 -0
  11. package/dist/sdk/providers/llm/xai.d.ts +21 -0
  12. package/dist/sdk/providers/llm-barrel.d.ts +5 -0
  13. package/dist/sdk/providers/llm-barrel.js +2 -2
  14. package/dist/sdk/providers/stt/elevenlabs.d.ts +36 -0
  15. package/dist/sdk/providers/stt/soniox.d.ts +37 -0
  16. package/dist/sdk/providers/stt-barrel.d.ts +2 -0
  17. package/dist/sdk/providers/stt-barrel.js +2 -2
  18. package/dist/soniox-DCQ3GqJq.js +69 -0
  19. package/dist/xai-jfQsxxPZ.js +55 -0
  20. package/host/providers/resolve.test.ts +110 -0
  21. package/host/providers/resolve.ts +60 -10
  22. package/host/providers/stt/elevenlabs.test.ts +200 -0
  23. package/host/providers/stt/elevenlabs.ts +145 -0
  24. package/host/providers/stt/soniox.test.ts +338 -0
  25. package/host/providers/stt/soniox.ts +239 -0
  26. package/host/transports/pipeline-transport.test.ts +91 -0
  27. package/host/transports/pipeline-transport.ts +53 -30
  28. package/host/transports/s2s-transport.test.ts +222 -2
  29. package/host/transports/s2s-transport.ts +176 -40
  30. package/package.json +7 -2
  31. package/sdk/providers/llm/google.ts +30 -0
  32. package/sdk/providers/llm/groq.ts +29 -0
  33. package/sdk/providers/llm/mistral.ts +29 -0
  34. package/sdk/providers/llm/openai.ts +29 -0
  35. package/sdk/providers/llm/xai.ts +29 -0
  36. package/sdk/providers/llm-barrel.ts +10 -0
  37. package/sdk/providers/stt/elevenlabs.ts +44 -0
  38. package/sdk/providers/stt/soniox.ts +45 -0
  39. package/sdk/providers/stt-barrel.ts +4 -0
  40. package/dist/anthropic-CcLZygAr.js +0 -10
  41. package/dist/assemblyai-C969QGi4.js +0 -35
@@ -0,0 +1,239 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ /**
3
+ * Soniox real-time STT opener (host-only).
4
+ *
5
+ * The user-facing descriptor factory (`soniox(...)`) lives in
6
+ * `sdk/providers/stt/soniox.ts`. This module is the host-side
7
+ * counterpart: it takes the descriptor options + an API key and
8
+ * returns an {@link SttOpener} that the pipeline session drives.
9
+ *
10
+ * Soniox's published JS client (`@soniox/speech-to-text-web`) is
11
+ * browser-only — it depends on `MediaRecorder` and `getUserMedia`. For
12
+ * server-side use we talk to the WebSocket directly:
13
+ * `wss://stt-rt.soniox.com/transcribe-websocket`
14
+ *
15
+ * Wire format:
16
+ * - First text frame: JSON config with api_key, model, audio_format,
17
+ * sample_rate, num_channels (and optional language hints).
18
+ * - Subsequent binary frames: 16-bit signed little-endian PCM audio.
19
+ * - Server replies: JSON `{ tokens: [{ text, is_final }] }` messages.
20
+ * Final tokens accumulate; non-final tokens are a rolling preview.
21
+ * - On error: `{ error_code, error_message }`.
22
+ */
23
+
24
+ import { createNanoEvents, type Emitter } from "nanoevents";
25
+ import WebSocket from "ws";
26
+ import type { SonioxOptions } from "../../../sdk/providers/stt/soniox.ts";
27
+ import {
28
+ makeSttError,
29
+ type SttEvents,
30
+ type SttOpener,
31
+ type SttOpenOptions,
32
+ type SttSession,
33
+ } from "../../../sdk/providers.ts";
34
+
35
+ const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
36
+
37
+ /** Soniox token shape from the wire protocol. */
38
+ interface SonioxToken {
39
+ text?: string;
40
+ is_final?: boolean;
41
+ }
42
+
43
+ interface SonioxResponse {
44
+ tokens?: SonioxToken[];
45
+ finished?: boolean;
46
+ error_code?: number;
47
+ error_message?: string;
48
+ }
49
+
50
+ /**
51
+ * Walk a batch of Soniox tokens, sending finals into `appendFinal` and
52
+ * returning the concatenated non-finals as a rolling preview string.
53
+ */
54
+ function consumeTokens(tokens: SonioxToken[], appendFinal: (text: string) => void): string {
55
+ let nonFinal = "";
56
+ for (const tok of tokens) {
57
+ const text = tok.text ?? "";
58
+ if (text.length === 0) continue;
59
+ if (tok.is_final) {
60
+ appendFinal(text);
61
+ } else {
62
+ nonFinal += text;
63
+ }
64
+ }
65
+ return nonFinal;
66
+ }
67
+
68
+ /** Resolve once the WebSocket opens; reject on the first error. */
69
+ function waitForOpen(ws: WebSocket): Promise<void> {
70
+ return new Promise((resolve, reject) => {
71
+ const onOpen = () => {
72
+ ws.off("error", onErr);
73
+ resolve();
74
+ };
75
+ const onErr = (err: Error) => {
76
+ ws.off("open", onOpen);
77
+ reject(err);
78
+ };
79
+ ws.once("open", onOpen);
80
+ ws.once("error", onErr);
81
+ });
82
+ }
83
+
84
+ /** Build the initial JSON config frame for a Soniox session. */
85
+ function buildConfigFrame(
86
+ apiKey: string,
87
+ opts: SonioxOptions,
88
+ sampleRate: number,
89
+ ): Record<string, unknown> {
90
+ const config: Record<string, unknown> = {
91
+ api_key: apiKey,
92
+ model: opts.model ?? "stt-rt-v3",
93
+ audio_format: "pcm_s16le",
94
+ sample_rate: sampleRate,
95
+ num_channels: 1,
96
+ };
97
+ if (opts.languageHints && opts.languageHints.length > 0) {
98
+ config.language_hints = [...opts.languageHints];
99
+ }
100
+ return config;
101
+ }
102
+
103
+ /** Parse a Soniox text frame into a {@link SonioxResponse}; returns null on garbage. */
104
+ function parseFrame(raw: WebSocket.RawData): SonioxResponse | null {
105
+ try {
106
+ return JSON.parse(raw.toString()) as SonioxResponse;
107
+ } catch {
108
+ return null;
109
+ }
110
+ }
111
+
112
+ /**
113
+ * Handle one server response. Emits `error`, `final`, and `partial` events
114
+ * onto `emitter` based on the token batch and the running `finalBuf`. The
115
+ * caller owns `finalBuf` so it survives across messages and can be flushed
116
+ * on close.
117
+ */
118
+ function handleResponse(
119
+ res: SonioxResponse,
120
+ emitter: Emitter<SttEvents>,
121
+ finalBuf: { value: string },
122
+ ): void {
123
+ if (res.error_code !== undefined) {
124
+ emitter.emit(
125
+ "error",
126
+ makeSttError(
127
+ "stt_stream_error",
128
+ `Soniox error ${res.error_code}: ${res.error_message ?? "unknown"}`,
129
+ ),
130
+ );
131
+ return;
132
+ }
133
+ if (!res.tokens || res.tokens.length === 0) return;
134
+ const nonFinal = consumeTokens(res.tokens, (text) => {
135
+ finalBuf.value += text;
136
+ });
137
+ // Flush an accumulated final whenever the next batch's non-final preview
138
+ // begins (or when the session finishes). This batches contiguous final
139
+ // tokens into a single `final` event, matching what downstream pipeline
140
+ // session code expects.
141
+ if (finalBuf.value.length > 0 && (nonFinal.length > 0 || res.finished)) {
142
+ emitter.emit("final", finalBuf.value);
143
+ finalBuf.value = "";
144
+ }
145
+ if (nonFinal.length > 0) {
146
+ emitter.emit("partial", nonFinal);
147
+ }
148
+ }
149
+
150
+ /** Build an {@link SttOpener} from resolved Soniox descriptor options. */
151
+ export function openSoniox(opts: SonioxOptions = {}): SttOpener {
152
+ return {
153
+ name: "soniox",
154
+ async open(openOpts: SttOpenOptions): Promise<SttSession> {
155
+ const apiKey = openOpts.apiKey || process.env.SONIOX_API_KEY;
156
+ if (!apiKey) {
157
+ throw makeSttError(
158
+ "stt_auth_failed",
159
+ "Soniox STT: missing API key. Set SONIOX_API_KEY in the agent env.",
160
+ );
161
+ }
162
+
163
+ const ws = new WebSocket(SONIOX_WS_URL);
164
+ const emitter: Emitter<SttEvents> = createNanoEvents<SttEvents>();
165
+ let closed = false;
166
+ // Soniox emits final tokens once and non-final tokens repeatedly. We
167
+ // accumulate finals into a buffer flushed on each non-final boundary
168
+ // and forward non-finals as the rolling partial. Mirrors how the
169
+ // existing AssemblyAI/Deepgram openers map provider-specific token
170
+ // streams onto the SttEvents `partial`/`final` contract.
171
+ const finalBuf = { value: "" };
172
+
173
+ try {
174
+ await waitForOpen(ws);
175
+ } catch (cause) {
176
+ throw makeSttError(
177
+ "stt_connect_failed",
178
+ `Soniox STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`,
179
+ );
180
+ }
181
+
182
+ // Initial config frame (text). Sent first; audio binary frames follow.
183
+ ws.send(JSON.stringify(buildConfigFrame(apiKey, opts, openOpts.sampleRate)));
184
+
185
+ ws.on("message", (raw: WebSocket.RawData) => {
186
+ if (closed) return;
187
+ const res = parseFrame(raw);
188
+ if (res) handleResponse(res, emitter, finalBuf);
189
+ });
190
+
191
+ ws.on("error", (err: Error) => {
192
+ if (closed) return;
193
+ emitter.emit("error", makeSttError("stt_stream_error", err.message ?? String(err)));
194
+ });
195
+
196
+ ws.on("close", (code: number) => {
197
+ if (closed) return;
198
+ // 1000 = normal closure.
199
+ if (code !== 1000) {
200
+ emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
201
+ }
202
+ });
203
+
204
+ const close = async (): Promise<void> => {
205
+ if (closed) return;
206
+ closed = true;
207
+ // Flush any trailing final tokens that arrived right before close.
208
+ if (finalBuf.value.length > 0) {
209
+ emitter.emit("final", finalBuf.value);
210
+ finalBuf.value = "";
211
+ }
212
+ try {
213
+ ws.close();
214
+ } catch {
215
+ // Swallow: caller has already decided to tear down.
216
+ }
217
+ };
218
+
219
+ if (openOpts.signal.aborted) {
220
+ void close();
221
+ } else {
222
+ openOpts.signal.addEventListener("abort", () => void close(), { once: true });
223
+ }
224
+
225
+ return {
226
+ sendAudio(pcm: Int16Array) {
227
+ if (closed || ws.readyState !== WebSocket.OPEN) return;
228
+ // Sending the underlying buffer directly avoids a copy. ws will
229
+ // hand it to the OS as a binary frame.
230
+ ws.send(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength), { binary: true });
231
+ },
232
+ on(event, fn) {
233
+ return emitter.on(event, fn);
234
+ },
235
+ close,
236
+ };
237
+ },
238
+ };
239
+ }
@@ -210,6 +210,97 @@ describe("PipelineTransport", () => {
210
210
  await t.stop();
211
211
  });
212
212
 
213
+ test("inserts a separator between text segments split by a mid-turn tool call", async () => {
214
+ // Multi-step turn: step 1 ends with a text segment + tool-call, step 2
215
+ // begins with a fresh text segment. Without the fix, the deltas fuse
216
+ // into "...up.Got it" — both in the transcript and in TTS input.
217
+ const stt = createFakeSttProvider();
218
+ const tts = createFakeTtsProvider();
219
+ const callbacks = makeCallbacks();
220
+ const executeTool = vi.fn(async () => "result");
221
+ const { opts } = makeOpts(
222
+ {
223
+ llm: createFakeLanguageModel({
224
+ steps: [
225
+ [
226
+ { type: "text", text: "Let me look that up." },
227
+ { type: "tool-call", toolCallId: "tc-1", toolName: "lookup", input: "{}" },
228
+ ],
229
+ [{ type: "text", text: "Got it. Here's the answer." }],
230
+ ],
231
+ }),
232
+ executeTool,
233
+ toolSchemas: [
234
+ {
235
+ type: "function" as const,
236
+ name: "lookup",
237
+ description: "Look something up.",
238
+ parameters: { type: "object" as const, properties: {}, required: [] },
239
+ },
240
+ ],
241
+ sessionConfig: { systemPrompt: "s", greeting: "" },
242
+ },
243
+ { stt, tts, callbacks },
244
+ );
245
+ const t = createPipelineTransport(opts);
246
+ await t.start();
247
+ stt.last()?.fireFinal("look it up");
248
+ await vi.waitFor(() => {
249
+ expect(callbacks.onAgentTranscript).toHaveBeenCalled();
250
+ });
251
+ expect(callbacks.onAgentTranscript).toHaveBeenCalledWith(
252
+ "Let me look that up. Got it. Here's the answer.",
253
+ false,
254
+ );
255
+ expect(tts.last()?.textChunks.join("")).toBe(
256
+ "Let me look that up. Got it. Here's the answer.",
257
+ );
258
+ await t.stop();
259
+ });
260
+
261
+ test("does not double-space when a segment boundary already carries whitespace", async () => {
262
+ // Trailing space on segment 1 — we must not insert an extra space.
263
+ const stt = createFakeSttProvider();
264
+ const tts = createFakeTtsProvider();
265
+ const callbacks = makeCallbacks();
266
+ const executeTool = vi.fn(async () => "result");
267
+ const { opts } = makeOpts(
268
+ {
269
+ llm: createFakeLanguageModel({
270
+ steps: [
271
+ [
272
+ { type: "text", text: "First sentence. " },
273
+ { type: "tool-call", toolCallId: "tc-1", toolName: "lookup", input: "{}" },
274
+ ],
275
+ [{ type: "text", text: "Second sentence." }],
276
+ ],
277
+ }),
278
+ executeTool,
279
+ toolSchemas: [
280
+ {
281
+ type: "function" as const,
282
+ name: "lookup",
283
+ description: "Look something up.",
284
+ parameters: { type: "object" as const, properties: {}, required: [] },
285
+ },
286
+ ],
287
+ sessionConfig: { systemPrompt: "s", greeting: "" },
288
+ },
289
+ { stt, tts, callbacks },
290
+ );
291
+ const t = createPipelineTransport(opts);
292
+ await t.start();
293
+ stt.last()?.fireFinal("look it up");
294
+ await vi.waitFor(() => {
295
+ expect(callbacks.onAgentTranscript).toHaveBeenCalled();
296
+ });
297
+ expect(callbacks.onAgentTranscript).toHaveBeenCalledWith(
298
+ "First sentence. Second sentence.",
299
+ false,
300
+ );
301
+ await t.stop();
302
+ });
303
+
213
304
  test("TTS audio event is forwarded to callbacks.onAudioChunk as Uint8Array", async () => {
214
305
  const stt = createFakeSttProvider();
215
306
  const tts = createFakeTtsProvider();
@@ -217,9 +217,10 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
217
217
  stopWhen: stepCountIs(maxSteps),
218
218
  abortSignal: ctl.signal,
219
219
  });
220
+ const handlePart = makeStreamPartHandler(onDelta);
220
221
  for await (const part of result.fullStream) {
221
222
  if (ctl.signal.aborted) break;
222
- handleStreamPart(part, ctl, onDelta);
223
+ handlePart(part);
223
224
  }
224
225
  } catch (err: unknown) {
225
226
  if (!ctl.signal.aborted) {
@@ -230,8 +231,33 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
230
231
  }
231
232
  }
232
233
 
233
- function handleStreamPart(
234
- part: {
234
+ /**
235
+ * Stateful per-turn handler for `streamText` `fullStream` parts.
236
+ *
237
+ * Tracks text-segment boundaries so that consecutive segments — which the
238
+ * Vercel SDK emits across tool-call hops as `text-end` followed later by a
239
+ * fresh `text-start` — don't fuse into "...up.Got it" when concatenated for
240
+ * the transcript or streamed to TTS. When a boundary is crossed and neither
241
+ * side carries whitespace, a single space is injected into both streams.
242
+ */
243
+ function makeStreamPartHandler(onDelta: (delta: string) => void) {
244
+ let pendingSeparator = false;
245
+ let lastChar = "";
246
+
247
+ function emitText(delta: string): void {
248
+ if (delta.length === 0) return;
249
+ let out = delta;
250
+ if (pendingSeparator) {
251
+ pendingSeparator = false;
252
+ const boundaryHasSpace = lastChar === "" || /\s/.test(lastChar) || /^\s/.test(out);
253
+ if (!boundaryHasSpace) out = ` ${out}`;
254
+ }
255
+ lastChar = out.slice(-1);
256
+ onDelta(out);
257
+ ttsSession?.sendText(out);
258
+ }
259
+
260
+ return function handlePart(part: {
235
261
  readonly type: string;
236
262
  readonly text?: string;
237
263
  readonly input?: unknown;
@@ -239,34 +265,31 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
239
265
  readonly toolCallId?: string;
240
266
  readonly toolName?: string;
241
267
  readonly error?: unknown;
242
- },
243
- _ctl: AbortController,
244
- onDelta: (delta: string) => void,
245
- ): void {
246
- switch (part.type) {
247
- case "text-delta": {
248
- const delta = part.text ?? "";
249
- if (delta.length === 0) return;
250
- onDelta(delta);
251
- ttsSession?.sendText(delta);
252
- return;
253
- }
254
- case "tool-call": {
255
- // Option A: fire callbacks.onToolCall for observability only.
256
- // Actual execution happens inline via toVercelTools.
257
- const input = (part.input ?? {}) as Record<string, unknown>;
258
- callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
259
- return;
260
- }
261
- case "error": {
262
- const msg = errorMessage(part.error);
263
- log.error("LLM stream error", { message: msg, sid: opts.sid });
264
- emitError("llm", msg);
265
- return;
268
+ }): void {
269
+ switch (part.type) {
270
+ case "text-delta":
271
+ emitText(part.text ?? "");
272
+ return;
273
+ case "text-end":
274
+ pendingSeparator = true;
275
+ return;
276
+ case "tool-call": {
277
+ // Option A: fire callbacks.onToolCall for observability only.
278
+ // Actual execution happens inline via toVercelTools.
279
+ const input = (part.input ?? {}) as Record<string, unknown>;
280
+ callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
281
+ return;
282
+ }
283
+ case "error": {
284
+ const msg = errorMessage(part.error);
285
+ log.error("LLM stream error", { message: msg, sid: opts.sid });
286
+ emitError("llm", msg);
287
+ return;
288
+ }
289
+ default:
290
+ return;
266
291
  }
267
- default:
268
- return;
269
- }
292
+ };
270
293
  }
271
294
 
272
295
  // ---- TTS flush ------------------------------------------------------------
@@ -1,5 +1,7 @@
1
- import { describe, expect, test, vi } from "vitest";
2
- import { createS2sTransport } from "./s2s-transport.ts";
1
+ import { afterEach, describe, expect, test, vi } from "vitest";
2
+ import { makeMockHandle, silentLogger } from "../_test-utils.ts";
3
+ import type { S2sCallbacks, S2sHandle } from "../s2s.ts";
4
+ import { _internals, createS2sTransport } from "./s2s-transport.ts";
3
5
  import type { TransportCallbacks } from "./types.ts";
4
6
 
5
7
  function makeCallbacks(): TransportCallbacks {
@@ -15,6 +17,7 @@ function makeCallbacks(): TransportCallbacks {
15
17
  onError: vi.fn(),
16
18
  onSpeechStarted: vi.fn(),
17
19
  onSpeechStopped: vi.fn(),
20
+ onSessionReady: vi.fn(),
18
21
  };
19
22
  }
20
23
 
@@ -54,3 +57,220 @@ describe("S2sTransport", () => {
54
57
  expect(close).toHaveBeenCalled();
55
58
  });
56
59
  });
60
+
61
+ // ─── Reconnect tests ────────────────────────────────────────────────────────
62
+
63
+ /** Capture the S2sCallbacks that the transport hands to connectS2s. */
64
+ function setupSpiedTransport(): {
65
+ callbacks: TransportCallbacks;
66
+ handles: S2sHandle[];
67
+ capturedCallbacks: S2sCallbacks[];
68
+ spy: ReturnType<typeof vi.spyOn>;
69
+ } {
70
+ const handles: S2sHandle[] = [];
71
+ const capturedCallbacks: S2sCallbacks[] = [];
72
+ const spy = vi
73
+ .spyOn(_internals, "connectS2s")
74
+ .mockImplementation(async (opts: import("../s2s.ts").ConnectS2sOptions) => {
75
+ capturedCallbacks.push(opts.callbacks);
76
+ const h = makeMockHandle();
77
+ handles.push(h);
78
+ return h;
79
+ });
80
+ return {
81
+ callbacks: makeCallbacks(),
82
+ handles,
83
+ capturedCallbacks,
84
+ spy,
85
+ };
86
+ }
87
+
88
+ describe("S2sTransport reconnect", () => {
89
+ afterEach(() => {
90
+ vi.restoreAllMocks();
91
+ });
92
+
93
+ test("attempts session.resume on transient close (1005) inside the resume window", async () => {
94
+ const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
95
+
96
+ const t = createS2sTransport({
97
+ apiKey: "k",
98
+ s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
99
+ sessionConfig: { systemPrompt: "test", tools: [] },
100
+ toolSchemas: [],
101
+ callbacks,
102
+ sid: "sid-1",
103
+ agent: "a",
104
+ logger: silentLogger,
105
+ });
106
+ await t.start();
107
+
108
+ // Establish session, start a reply, then drop the socket.
109
+ const cb1 = capturedCallbacks[0];
110
+ if (!cb1) throw new Error("expected first callbacks");
111
+ cb1.onSessionReady("sess_abc");
112
+ cb1.onReplyStarted("rep_1");
113
+ cb1.onClose(1005, "");
114
+
115
+ // Wait for the async resume() to fire connectS2s a second time.
116
+ await vi.waitFor(() => {
117
+ expect(handles.length).toBe(2);
118
+ });
119
+
120
+ // The new handle should have received resumeSession with the prior id.
121
+ const newHandle = handles[1];
122
+ if (!newHandle) throw new Error("expected new handle");
123
+ expect(newHandle.resumeSession).toHaveBeenCalledWith("sess_abc");
124
+
125
+ // The in-flight reply was unblocked via onCancelled, NOT a fatal error.
126
+ expect(callbacks.onCancelled).toHaveBeenCalledOnce();
127
+ expect(callbacks.onError).not.toHaveBeenCalled();
128
+ });
129
+
130
+ test("does NOT reconnect on fatal close codes (1008 unauthorized)", async () => {
131
+ const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
132
+
133
+ const t = createS2sTransport({
134
+ apiKey: "k",
135
+ s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
136
+ sessionConfig: { systemPrompt: "test", tools: [] },
137
+ toolSchemas: [],
138
+ callbacks,
139
+ sid: "sid-1",
140
+ agent: "a",
141
+ logger: silentLogger,
142
+ });
143
+ await t.start();
144
+
145
+ const cb1 = capturedCallbacks[0];
146
+ if (!cb1) throw new Error("expected first callbacks");
147
+ cb1.onSessionReady("sess_abc");
148
+ cb1.onReplyStarted("rep_1");
149
+ cb1.onClose(1008, "unauthorized");
150
+
151
+ // No reconnect — only one connectS2s call total.
152
+ await new Promise((resolve) => setTimeout(resolve, 5));
153
+ expect(handles.length).toBe(1);
154
+ // Fatal error surfaces, since a reply was in flight.
155
+ expect(callbacks.onError).toHaveBeenCalledWith(
156
+ "connection",
157
+ expect.stringContaining("S2S closed mid-reply"),
158
+ );
159
+ });
160
+
161
+ test("does NOT reconnect when stop() was called", async () => {
162
+ const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
163
+
164
+ const t = createS2sTransport({
165
+ apiKey: "k",
166
+ s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
167
+ sessionConfig: { systemPrompt: "test", tools: [] },
168
+ toolSchemas: [],
169
+ callbacks,
170
+ sid: "sid-1",
171
+ agent: "a",
172
+ logger: silentLogger,
173
+ });
174
+ await t.start();
175
+
176
+ const cb1 = capturedCallbacks[0];
177
+ if (!cb1) throw new Error("expected first callbacks");
178
+ cb1.onSessionReady("sess_abc");
179
+ await t.stop();
180
+
181
+ // Simulate the upstream's close arriving after stop() — it should be
182
+ // treated as a clean shutdown, not a transient drop worth resuming.
183
+ cb1.onClose(1005, "");
184
+
185
+ await new Promise((resolve) => setTimeout(resolve, 5));
186
+ expect(handles.length).toBe(1);
187
+ expect(callbacks.onError).not.toHaveBeenCalled();
188
+ });
189
+
190
+ test("surfaces resume failure when the resumed socket also closes", async () => {
191
+ const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
192
+
193
+ const t = createS2sTransport({
194
+ apiKey: "k",
195
+ s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
196
+ sessionConfig: { systemPrompt: "test", tools: [] },
197
+ toolSchemas: [],
198
+ callbacks,
199
+ sid: "sid-1",
200
+ agent: "a",
201
+ logger: silentLogger,
202
+ });
203
+ await t.start();
204
+
205
+ capturedCallbacks[0]?.onSessionReady("sess_abc");
206
+ capturedCallbacks[0]?.onReplyStarted("rep_1");
207
+ capturedCallbacks[0]?.onClose(1005, "");
208
+
209
+ await vi.waitFor(() => expect(handles.length).toBe(2));
210
+
211
+ // The resume socket also drops before its session.ready arrives.
212
+ const cb2 = capturedCallbacks[1];
213
+ if (!cb2) throw new Error("expected resume callbacks");
214
+ cb2.onClose(1006, "");
215
+
216
+ expect(callbacks.onError).toHaveBeenCalledWith(
217
+ "connection",
218
+ expect.stringContaining("resume failed"),
219
+ );
220
+ });
221
+
222
+ test("surfaces resume failure when server reports session_not_found", async () => {
223
+ const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
224
+
225
+ const t = createS2sTransport({
226
+ apiKey: "k",
227
+ s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
228
+ sessionConfig: { systemPrompt: "test", tools: [] },
229
+ toolSchemas: [],
230
+ callbacks,
231
+ sid: "sid-1",
232
+ agent: "a",
233
+ logger: silentLogger,
234
+ });
235
+ await t.start();
236
+
237
+ capturedCallbacks[0]?.onSessionReady("sess_abc");
238
+ capturedCallbacks[0]?.onClose(1005, "");
239
+
240
+ await vi.waitFor(() => expect(handles.length).toBe(2));
241
+
242
+ capturedCallbacks[1]?.onSessionExpired();
243
+
244
+ expect(callbacks.onError).toHaveBeenCalledWith(
245
+ "connection",
246
+ expect.stringContaining("session expired"),
247
+ );
248
+ });
249
+
250
+ test("after a successful resume, a later transient drop also resumes", async () => {
251
+ const { callbacks, handles, capturedCallbacks } = setupSpiedTransport();
252
+
253
+ const t = createS2sTransport({
254
+ apiKey: "k",
255
+ s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
256
+ sessionConfig: { systemPrompt: "test", tools: [] },
257
+ toolSchemas: [],
258
+ callbacks,
259
+ sid: "sid-1",
260
+ agent: "a",
261
+ logger: silentLogger,
262
+ });
263
+ await t.start();
264
+
265
+ // First connection establishes, drops, resumes, becomes ready again.
266
+ capturedCallbacks[0]?.onSessionReady("sess_abc");
267
+ capturedCallbacks[0]?.onClose(1005, "");
268
+ await vi.waitFor(() => expect(handles.length).toBe(2));
269
+ capturedCallbacks[1]?.onSessionReady("sess_abc");
270
+
271
+ // Second drop — should trigger another resume attempt.
272
+ capturedCallbacks[1]?.onClose(1006, "");
273
+ await vi.waitFor(() => expect(handles.length).toBe(3));
274
+ expect(handles[2]?.resumeSession).toHaveBeenCalledWith("sess_abc");
275
+ });
276
+ });