@alexkroman1/aai 1.7.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.turbo/turbo-build.log +11 -9
  2. package/CHANGELOG.md +10 -0
  3. package/dist/{_internal-types-CrnTi9Ew.js → _internal-types-CfOAbK6V.js} +22 -35
  4. package/dist/constants-y68COEGj.js +29 -0
  5. package/dist/host/_base64.d.ts +2 -0
  6. package/dist/host/_mock-ws.d.ts +0 -61
  7. package/dist/host/_pipeline-test-fakes.d.ts +7 -4
  8. package/dist/host/_run-code.d.ts +0 -25
  9. package/dist/host/_runtime-conformance.d.ts +3 -34
  10. package/dist/host/memory-vector.d.ts +0 -11
  11. package/dist/host/providers/resolve-kv.d.ts +0 -7
  12. package/dist/host/providers/resolve-vector.d.ts +0 -8
  13. package/dist/host/providers/stt/assemblyai.d.ts +0 -14
  14. package/dist/host/providers/stt/deepgram.d.ts +2 -14
  15. package/dist/host/providers/stt/soniox.d.ts +0 -22
  16. package/dist/host/providers/tts/rime.d.ts +10 -31
  17. package/dist/host/runtime-barrel.js +619 -630
  18. package/dist/host/runtime-config.d.ts +9 -6
  19. package/dist/host/runtime.d.ts +3 -0
  20. package/dist/host/to-vercel-tools.d.ts +3 -33
  21. package/dist/host/transports/openai-realtime-transport.d.ts +43 -0
  22. package/dist/host/unstorage-kv.d.ts +0 -26
  23. package/dist/index.js +3 -3
  24. package/dist/openai-realtime-cjPAHMMx.js +10 -0
  25. package/dist/sdk/_internal-types.d.ts +6 -55
  26. package/dist/sdk/allowed-hosts.d.ts +4 -3
  27. package/dist/sdk/constants.d.ts +4 -29
  28. package/dist/sdk/define.d.ts +7 -4
  29. package/dist/sdk/kv.d.ts +13 -37
  30. package/dist/sdk/manifest-barrel.js +1 -1
  31. package/dist/sdk/manifest.d.ts +8 -2
  32. package/dist/sdk/protocol.js +1 -1
  33. package/dist/sdk/providers/s2s/openai-realtime.d.ts +17 -0
  34. package/dist/sdk/providers/s2s-barrel.d.ts +9 -0
  35. package/dist/sdk/providers/s2s-barrel.js +2 -0
  36. package/dist/sdk/providers/tts/rime.d.ts +1 -1
  37. package/dist/sdk/providers.d.ts +6 -2
  38. package/dist/sdk/types.d.ts +7 -1
  39. package/dist/{types-KUgezM6u.js → types-DOWVZhb9.js} +1 -7
  40. package/dist/{ws-upgrade-BeOQ7fXL.js → ws-upgrade-CG8-by1n.js} +2 -3
  41. package/host/_base64.ts +9 -0
  42. package/host/_mock-ws.ts +0 -65
  43. package/host/_pipeline-test-fakes.ts +19 -31
  44. package/host/_run-code.ts +10 -53
  45. package/host/_runtime-conformance.ts +3 -44
  46. package/host/_test-utils.ts +20 -42
  47. package/host/builtin-tools.test.ts +127 -222
  48. package/host/builtin-tools.ts +6 -10
  49. package/host/cleanup.test.ts +30 -73
  50. package/host/integration/pipeline-reference.integration.test.ts +12 -17
  51. package/host/integration.test.ts +0 -7
  52. package/host/memory-vector.test.ts +3 -1
  53. package/host/memory-vector.ts +16 -21
  54. package/host/pinecone-vector.test.ts +14 -17
  55. package/host/pinecone-vector.ts +10 -19
  56. package/host/providers/providers.test-d.ts +5 -3
  57. package/host/providers/resolve-kv.ts +23 -41
  58. package/host/providers/resolve-vector.ts +3 -12
  59. package/host/providers/resolve.test.ts +15 -28
  60. package/host/providers/resolve.ts +24 -24
  61. package/host/providers/stt/assemblyai.test.ts +2 -14
  62. package/host/providers/stt/assemblyai.ts +12 -35
  63. package/host/providers/stt/deepgram.test.ts +23 -83
  64. package/host/providers/stt/deepgram.ts +15 -40
  65. package/host/providers/stt/elevenlabs.test.ts +26 -38
  66. package/host/providers/stt/elevenlabs.ts +10 -9
  67. package/host/providers/stt/soniox.test.ts +35 -85
  68. package/host/providers/stt/soniox.ts +8 -53
  69. package/host/providers/tts/cartesia.test.ts +19 -58
  70. package/host/providers/tts/cartesia.ts +36 -66
  71. package/host/providers/tts/rime.test.ts +12 -38
  72. package/host/providers/tts/rime.ts +23 -86
  73. package/host/runtime-config.test.ts +9 -9
  74. package/host/runtime-config.ts +16 -22
  75. package/host/runtime.test.ts +111 -73
  76. package/host/runtime.ts +138 -86
  77. package/host/s2s.test.ts +92 -191
  78. package/host/s2s.ts +55 -49
  79. package/host/server-shutdown.test.ts +9 -30
  80. package/host/server.test.ts +2 -13
  81. package/host/server.ts +85 -100
  82. package/host/session-core.test.ts +15 -30
  83. package/host/session-core.ts +10 -13
  84. package/host/session-prompt.test.ts +1 -5
  85. package/host/to-vercel-tools.test.ts +53 -72
  86. package/host/to-vercel-tools.ts +9 -39
  87. package/host/tool-executor.test.ts +25 -51
  88. package/host/tool-executor.ts +18 -12
  89. package/host/transports/openai-realtime-transport.test.ts +371 -0
  90. package/host/transports/openai-realtime-transport.ts +319 -0
  91. package/host/transports/pipeline-transport.test.ts +125 -298
  92. package/host/transports/pipeline-transport.ts +20 -68
  93. package/host/transports/s2s-transport-fixtures.test.ts +31 -92
  94. package/host/transports/s2s-transport.test.ts +65 -134
  95. package/host/transports/s2s-transport.ts +15 -43
  96. package/host/transports/types.test.ts +4 -8
  97. package/host/unstorage-kv.test.ts +3 -2
  98. package/host/unstorage-kv.ts +5 -35
  99. package/host/ws-handler.test.ts +72 -176
  100. package/host/ws-handler.ts +6 -12
  101. package/package.json +6 -1
  102. package/sdk/__snapshots__/exports.test.ts.snap +7 -0
  103. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  104. package/sdk/_internal-types.test.ts +6 -9
  105. package/sdk/_internal-types.ts +16 -57
  106. package/sdk/_test-matchers.ts +25 -15
  107. package/sdk/allowed-hosts.test.ts +50 -114
  108. package/sdk/allowed-hosts.ts +8 -14
  109. package/sdk/constants.ts +5 -52
  110. package/sdk/define.test.ts +7 -6
  111. package/sdk/define.ts +7 -3
  112. package/sdk/exports.test.ts +6 -1
  113. package/sdk/kv.ts +13 -37
  114. package/sdk/manifest.test-d.ts +5 -0
  115. package/sdk/manifest.test.ts +61 -9
  116. package/sdk/manifest.ts +11 -11
  117. package/sdk/protocol-compat.test.ts +66 -98
  118. package/sdk/protocol-snapshot.test.ts +2 -16
  119. package/sdk/protocol.test.ts +13 -22
  120. package/sdk/providers/s2s/openai-realtime.ts +36 -0
  121. package/sdk/providers/s2s-barrel.ts +12 -0
  122. package/sdk/providers/tts/rime.ts +1 -1
  123. package/sdk/providers.ts +24 -5
  124. package/sdk/schema-alignment.test.ts +25 -73
  125. package/sdk/schema-shapes.test.ts +1 -29
  126. package/sdk/system-prompt.test.ts +0 -1
  127. package/sdk/system-prompt.ts +17 -19
  128. package/sdk/types-inference.test.ts +10 -36
  129. package/sdk/types.ts +7 -0
  130. package/sdk/ws-upgrade.test.ts +24 -23
  131. package/sdk/ws-upgrade.ts +2 -3
  132. package/tsdown.config.ts +8 -11
  133. package/dist/constants-C2nirZUI.js +0 -54
@@ -85,16 +85,17 @@ export function openElevenLabs(opts: ElevenLabsOptions = {}): SttOpener {
85
85
  const emitter: Emitter<SttEvents> = createNanoEvents<SttEvents>();
86
86
  let closed = false;
87
87
 
88
- connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (msg) => {
88
+ function emitTranscript(event: "partial" | "final", text: string | undefined) {
89
89
  if (closed) return;
90
- const text = msg.text ?? "";
91
- if (text.length > 0) emitter.emit("partial", text);
90
+ if (text && text.length > 0) emitter.emit(event, text);
91
+ }
92
+
93
+ connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (msg) => {
94
+ emitTranscript("partial", msg.text);
92
95
  });
93
96
 
94
97
  connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (msg) => {
95
- if (closed) return;
96
- const text = msg.text ?? "";
97
- if (text.length > 0) emitter.emit("final", text);
98
+ emitTranscript("final", msg.text);
98
99
  });
99
100
 
100
101
  connection.on(RealtimeEvents.ERROR, (payload) => {
@@ -111,15 +112,15 @@ export function openElevenLabs(opts: ElevenLabsOptions = {}): SttOpener {
111
112
  emitter.emit("error", makeSttError("stt_auth_failed", msg.error));
112
113
  });
113
114
 
114
- const close = async (): Promise<void> => {
115
+ async function close(): Promise<void> {
115
116
  if (closed) return;
116
117
  closed = true;
117
118
  try {
118
119
  connection.close();
119
120
  } catch {
120
- // Swallow: caller has already decided to tear down.
121
+ // Already tearing down ignore close errors.
121
122
  }
122
- };
123
+ }
123
124
 
124
125
  if (openOpts.signal.aborted) {
125
126
  void close();
@@ -5,20 +5,6 @@ import { describe, expect, test, vi } from "vitest";
5
5
  import { flush } from "../../_test-utils.ts";
6
6
  import { openSoniox } from "./soniox.ts";
7
7
 
8
- // ---------------------------------------------------------------------------
9
- // Mock the `ws` package. Each FakeWS:
10
- // - extends EventEmitter for `on`/`off`/`once` semantics that match the
11
- // real `ws.WebSocket` API
12
- // - exposes `readyState` initialised to OPEN once "open" fires
13
- // - records sent frames so tests can assert on them
14
- // - exposes `_fire(ev, data)` so tests inject incoming server frames
15
- //
16
- // Vitest hoists `vi.mock` to module top, so the factory can't reference
17
- // outer top-level declarations. `vi.hoisted` runs even earlier and lets
18
- // us share `FakeWS` + the `latest` capture between the factory and the
19
- // test bodies.
20
- // ---------------------------------------------------------------------------
21
-
22
8
  interface FakeWSInstance {
23
9
  readyState: number;
24
10
  sent: Array<string | Uint8Array>;
@@ -32,6 +18,7 @@ interface FakeWSInstance {
32
18
 
33
19
  type Listener = (...args: unknown[]) => void;
34
20
 
21
+ // `vi.mock` is hoisted above top-level decls, so share state via `vi.hoisted`.
35
22
  const { latest, FakeWS } = vi.hoisted(() => {
36
23
  const latestRef: { ws: FakeWSInstance | undefined } = { ws: undefined };
37
24
  class FakeWSImpl implements FakeWSInstance {
@@ -87,37 +74,37 @@ const { latest, FakeWS } = vi.hoisted(() => {
87
74
 
88
75
  vi.mock("ws", () => ({ default: FakeWS, WebSocket: FakeWS }));
89
76
 
90
- async function openSession(
91
- opts: { apiKey?: string; languageHints?: string[]; model?: string } = {},
92
- ): Promise<{
77
+ interface OpenSessionOpts {
78
+ apiKey?: string;
79
+ languageHints?: string[];
80
+ model?: string;
81
+ }
82
+
83
+ async function openSession(opts: OpenSessionOpts = {}): Promise<{
93
84
  session: import("../../../sdk/providers.ts").SttSession;
94
85
  ws: FakeWSInstance;
95
86
  controller: AbortController;
96
87
  }> {
97
88
  latest.ws = undefined;
98
- const opener = openSoniox(
99
- opts.languageHints || opts.model
100
- ? {
101
- ...(opts.model ? { model: opts.model } : {}),
102
- ...(opts.languageHints ? { languageHints: opts.languageHints } : {}),
103
- }
104
- : {},
105
- );
89
+ const openerOpts: { model?: string; languageHints?: string[] } = {};
90
+ if (opts.model) openerOpts.model = opts.model;
91
+ if (opts.languageHints) openerOpts.languageHints = opts.languageHints;
92
+ const opener = openSoniox(openerOpts);
106
93
  const controller = new AbortController();
107
94
  const session = await opener.open({
108
95
  sampleRate: 16_000,
109
96
  apiKey: opts.apiKey ?? "test-key",
110
97
  signal: controller.signal,
111
98
  });
112
- // The constructor schedules an `open` event via setImmediate; the
113
- // adapter's `await waitForOpen(ws)` already drained it, so `latest.ws`
114
- // is fully wired by now. Capture into a local const so TS narrows the
115
- // type — direct property access on a mutable ref keeps the union.
116
- const ws: FakeWSInstance | undefined = latest.ws;
99
+ const ws = latest.ws;
117
100
  if (!ws) throw new Error("no fake ws captured");
118
101
  return { session, ws, controller };
119
102
  }
120
103
 
104
+ function frame(payload: unknown): Buffer {
105
+ return Buffer.from(JSON.stringify(payload));
106
+ }
107
+
121
108
  describe("Soniox real-time STT adapter", () => {
122
109
  test("openSoniox() returns an opener with name 'soniox'", () => {
123
110
  expect(openSoniox({}).name).toBe("soniox");
@@ -166,14 +153,12 @@ describe("Soniox real-time STT adapter", () => {
166
153
 
167
154
  ws._fire(
168
155
  "message",
169
- Buffer.from(
170
- JSON.stringify({
171
- tokens: [
172
- { text: "hel", is_final: false },
173
- { text: "lo", is_final: false },
174
- ],
175
- }),
176
- ),
156
+ frame({
157
+ tokens: [
158
+ { text: "hel", is_final: false },
159
+ { text: "lo", is_final: false },
160
+ ],
161
+ }),
177
162
  );
178
163
 
179
164
  await flush();
@@ -188,31 +173,19 @@ describe("Soniox real-time STT adapter", () => {
188
173
  session.on("final", (t) => finals.push(t));
189
174
  session.on("partial", (t) => partials.push(t));
190
175
 
191
- // Frame 1: only final tokens — buffered, NOT yet emitted.
192
176
  ws._fire(
193
177
  "message",
194
- Buffer.from(
195
- JSON.stringify({
196
- tokens: [
197
- { text: "hello", is_final: true },
198
- { text: " world", is_final: true },
199
- ],
200
- }),
201
- ),
178
+ frame({
179
+ tokens: [
180
+ { text: "hello", is_final: true },
181
+ { text: " world", is_final: true },
182
+ ],
183
+ }),
202
184
  );
203
185
  await flush();
204
186
  expect(finals).toEqual([]);
205
187
 
206
- // Frame 2: a non-final preview token flushes the buffered final
207
- // and emits the new partial.
208
- ws._fire(
209
- "message",
210
- Buffer.from(
211
- JSON.stringify({
212
- tokens: [{ text: "how", is_final: false }],
213
- }),
214
- ),
215
- );
188
+ ws._fire("message", frame({ tokens: [{ text: "how", is_final: false }] }));
216
189
  await flush();
217
190
  expect(finals).toEqual(["hello world"]);
218
191
  expect(partials).toEqual(["how"]);
@@ -224,15 +197,7 @@ describe("Soniox real-time STT adapter", () => {
224
197
  const finals: string[] = [];
225
198
  session.on("final", (t) => finals.push(t));
226
199
 
227
- ws._fire(
228
- "message",
229
- Buffer.from(
230
- JSON.stringify({
231
- tokens: [{ text: "bye", is_final: true }],
232
- finished: true,
233
- }),
234
- ),
235
- );
200
+ ws._fire("message", frame({ tokens: [{ text: "bye", is_final: true }], finished: true }));
236
201
 
237
202
  await flush();
238
203
  expect(finals).toEqual(["bye"]);
@@ -244,16 +209,9 @@ describe("Soniox real-time STT adapter", () => {
244
209
  const finals: string[] = [];
245
210
  session.on("final", (t) => finals.push(t));
246
211
 
247
- ws._fire(
248
- "message",
249
- Buffer.from(
250
- JSON.stringify({
251
- tokens: [{ text: "trailing", is_final: true }],
252
- }),
253
- ),
254
- );
212
+ ws._fire("message", frame({ tokens: [{ text: "trailing", is_final: true }] }));
255
213
  await flush();
256
- expect(finals).toEqual([]); // not flushed yet — no boundary
214
+ expect(finals).toEqual([]);
257
215
 
258
216
  await session.close();
259
217
  expect(finals).toEqual(["trailing"]);
@@ -264,10 +222,7 @@ describe("Soniox real-time STT adapter", () => {
264
222
  const errors: { code: string; message: string }[] = [];
265
223
  session.on("error", (e) => errors.push({ code: e.code, message: e.message }));
266
224
 
267
- ws._fire(
268
- "message",
269
- Buffer.from(JSON.stringify({ error_code: 503, error_message: "service unavailable" })),
270
- );
225
+ ws._fire("message", frame({ error_code: 503, error_message: "service unavailable" }));
271
226
 
272
227
  await flush();
273
228
  expect(errors).toHaveLength(1);
@@ -325,12 +280,7 @@ describe("Soniox real-time STT adapter", () => {
325
280
  await session.close();
326
281
  await session.close();
327
282
 
328
- ws._fire(
329
- "message",
330
- Buffer.from(
331
- JSON.stringify({ tokens: [{ text: "ignored", is_final: true }], finished: true }),
332
- ),
333
- );
283
+ ws._fire("message", frame({ tokens: [{ text: "ignored", is_final: true }], finished: true }));
334
284
 
335
285
  await flush();
336
286
  expect(finals).toEqual([]);
@@ -1,25 +1,4 @@
1
1
  // Copyright 2026 the AAI authors. MIT license.
2
- /**
3
- * Soniox real-time STT opener (host-only).
4
- *
5
- * The user-facing descriptor factory (`soniox(...)`) lives in
6
- * `sdk/providers/stt/soniox.ts`. This module is the host-side
7
- * counterpart: it takes the descriptor options + an API key and
8
- * returns an {@link SttOpener} that the pipeline session drives.
9
- *
10
- * Soniox's published JS client (`@soniox/speech-to-text-web`) is
11
- * browser-only — it depends on `MediaRecorder` and `getUserMedia`. For
12
- * server-side use we talk to the WebSocket directly:
13
- * `wss://stt-rt.soniox.com/transcribe-websocket`
14
- *
15
- * Wire format:
16
- * - First text frame: JSON config with api_key, model, audio_format,
17
- * sample_rate, num_channels (and optional language hints).
18
- * - Subsequent binary frames: 16-bit signed little-endian PCM audio.
19
- * - Server replies: JSON `{ tokens: [{ text, is_final }] }` messages.
20
- * Final tokens accumulate; non-final tokens are a rolling preview.
21
- * - On error: `{ error_code, error_message }`.
22
- */
23
2
 
24
3
  import { createNanoEvents, type Emitter } from "nanoevents";
25
4
  import WebSocket from "ws";
@@ -32,9 +11,10 @@ import {
32
11
  type SttSession,
33
12
  } from "../../../sdk/providers.ts";
34
13
 
14
+ // `@soniox/speech-to-text-web` is browser-only (MediaRecorder/getUserMedia),
15
+ // so we speak the WebSocket protocol directly.
35
16
  const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
36
17
 
37
- /** Soniox token shape from the wire protocol. */
38
18
  interface SonioxToken {
39
19
  text?: string;
40
20
  is_final?: boolean;
@@ -47,10 +27,6 @@ interface SonioxResponse {
47
27
  error_message?: string;
48
28
  }
49
29
 
50
- /**
51
- * Walk a batch of Soniox tokens, sending finals into `appendFinal` and
52
- * returning the concatenated non-finals as a rolling preview string.
53
- */
54
30
  function consumeTokens(tokens: SonioxToken[], appendFinal: (text: string) => void): string {
55
31
  let nonFinal = "";
56
32
  for (const tok of tokens) {
@@ -65,14 +41,13 @@ function consumeTokens(tokens: SonioxToken[], appendFinal: (text: string) => voi
65
41
  return nonFinal;
66
42
  }
67
43
 
68
- /** Resolve once the WebSocket opens; reject on the first error. */
69
44
  function waitForOpen(ws: WebSocket): Promise<void> {
70
45
  return new Promise((resolve, reject) => {
71
- const onOpen = () => {
46
+ const onOpen = (): void => {
72
47
  ws.off("error", onErr);
73
48
  resolve();
74
49
  };
75
- const onErr = (err: Error) => {
50
+ const onErr = (err: Error): void => {
76
51
  ws.off("open", onOpen);
77
52
  reject(err);
78
53
  };
@@ -81,7 +56,6 @@ function waitForOpen(ws: WebSocket): Promise<void> {
81
56
  });
82
57
  }
83
58
 
84
- /** Build the initial JSON config frame for a Soniox session. */
85
59
  function buildConfigFrame(
86
60
  apiKey: string,
87
61
  opts: SonioxOptions,
@@ -100,7 +74,6 @@ function buildConfigFrame(
100
74
  return config;
101
75
  }
102
76
 
103
- /** Parse a Soniox text frame into a {@link SonioxResponse}; returns null on garbage. */
104
77
  function parseFrame(raw: WebSocket.RawData): SonioxResponse | null {
105
78
  try {
106
79
  return JSON.parse(raw.toString()) as SonioxResponse;
@@ -109,12 +82,6 @@ function parseFrame(raw: WebSocket.RawData): SonioxResponse | null {
109
82
  }
110
83
  }
111
84
 
112
- /**
113
- * Handle one server response. Emits `error`, `final`, and `partial` events
114
- * onto `emitter` based on the token batch and the running `finalBuf`. The
115
- * caller owns `finalBuf` so it survives across messages and can be flushed
116
- * on close.
117
- */
118
85
  function handleResponse(
119
86
  res: SonioxResponse,
120
87
  emitter: Emitter<SttEvents>,
@@ -134,10 +101,8 @@ function handleResponse(
134
101
  const nonFinal = consumeTokens(res.tokens, (text) => {
135
102
  finalBuf.value += text;
136
103
  });
137
- // Flush an accumulated final whenever the next batch's non-final preview
138
- // begins (or when the session finishes). This batches contiguous final
139
- // tokens into a single `final` event, matching what downstream pipeline
140
- // session code expects.
104
+ // Batch contiguous finals into one `final` event by flushing only when
105
+ // a new non-final preview starts (or the session finishes).
141
106
  if (finalBuf.value.length > 0 && (nonFinal.length > 0 || res.finished)) {
142
107
  emitter.emit("final", finalBuf.value);
143
108
  finalBuf.value = "";
@@ -147,7 +112,6 @@ function handleResponse(
147
112
  }
148
113
  }
149
114
 
150
- /** Build an {@link SttOpener} from resolved Soniox descriptor options. */
151
115
  export function openSoniox(opts: SonioxOptions = {}): SttOpener {
152
116
  return {
153
117
  name: "soniox",
@@ -163,11 +127,6 @@ export function openSoniox(opts: SonioxOptions = {}): SttOpener {
163
127
  const ws = new WebSocket(SONIOX_WS_URL);
164
128
  const emitter: Emitter<SttEvents> = createNanoEvents<SttEvents>();
165
129
  let closed = false;
166
- // Soniox emits final tokens once and non-final tokens repeatedly. We
167
- // accumulate finals into a buffer flushed on each non-final boundary
168
- // and forward non-finals as the rolling partial. Mirrors how the
169
- // existing AssemblyAI/Deepgram openers map provider-specific token
170
- // streams onto the SttEvents `partial`/`final` contract.
171
130
  const finalBuf = { value: "" };
172
131
 
173
132
  try {
@@ -179,7 +138,6 @@ export function openSoniox(opts: SonioxOptions = {}): SttOpener {
179
138
  );
180
139
  }
181
140
 
182
- // Initial config frame (text). Sent first; audio binary frames follow.
183
141
  ws.send(JSON.stringify(buildConfigFrame(apiKey, opts, openOpts.sampleRate)));
184
142
 
185
143
  ws.on("message", (raw: WebSocket.RawData) => {
@@ -195,7 +153,6 @@ export function openSoniox(opts: SonioxOptions = {}): SttOpener {
195
153
 
196
154
  ws.on("close", (code: number) => {
197
155
  if (closed) return;
198
- // 1000 = normal closure.
199
156
  if (code !== 1000) {
200
157
  emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
201
158
  }
@@ -204,7 +161,6 @@ export function openSoniox(opts: SonioxOptions = {}): SttOpener {
204
161
  const close = async (): Promise<void> => {
205
162
  if (closed) return;
206
163
  closed = true;
207
- // Flush any trailing final tokens that arrived right before close.
208
164
  if (finalBuf.value.length > 0) {
209
165
  emitter.emit("final", finalBuf.value);
210
166
  finalBuf.value = "";
@@ -212,7 +168,7 @@ export function openSoniox(opts: SonioxOptions = {}): SttOpener {
212
168
  try {
213
169
  ws.close();
214
170
  } catch {
215
- // Swallow: caller has already decided to tear down.
171
+ // Caller is tearing down; ws.close errors are not actionable.
216
172
  }
217
173
  };
218
174
 
@@ -225,8 +181,7 @@ export function openSoniox(opts: SonioxOptions = {}): SttOpener {
225
181
  return {
226
182
  sendAudio(pcm: Int16Array) {
227
183
  if (closed || ws.readyState !== WebSocket.OPEN) return;
228
- // Sending the underlying buffer directly avoids a copy. ws will
229
- // hand it to the OS as a binary frame.
184
+ // Pass the underlying buffer to avoid a copy.
230
185
  ws.send(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength), { binary: true });
231
186
  },
232
187
  on(event, fn) {
@@ -1,11 +1,9 @@
1
1
  // Copyright 2025 the AAI authors. MIT license.
2
- /** Unit test for the Cartesia TTS adapter. Mocks `@cartesia/cartesia-js`. */
3
2
 
4
3
  import { beforeEach, describe, expect, test, vi } from "vitest";
5
4
  import { flush } from "../../_test-utils.ts";
6
5
  import { type CartesiaSession, openCartesia } from "./cartesia.ts";
7
6
 
8
- // Recorded interactions on the fake `TTSWSContext` — one entry per method call.
9
7
  interface RecordedSend {
10
8
  kind: "send" | "cancel";
11
9
  contextId: string;
@@ -17,7 +15,6 @@ interface RecordedSend {
17
15
 
18
16
  const sends: RecordedSend[] = [];
19
17
 
20
- /** Minimal shape of the request the adapter sends to Cartesia. */
21
18
  interface FakeGenerationRequest {
22
19
  transcript: string;
23
20
  continue: boolean;
@@ -25,17 +22,12 @@ interface FakeGenerationRequest {
25
22
  model_id?: string;
26
23
  }
27
24
 
28
- /**
29
- * Fake `TTSWSContext`. Mirrors the fields the adapter touches:
30
- * `contextId`, `send`, `cancel`.
31
- */
32
25
  interface FakeContext {
33
26
  contextId: string;
34
27
  send(req: FakeGenerationRequest): Promise<void>;
35
28
  cancel(): Promise<void>;
36
29
  }
37
30
 
38
- /** Fake `TTSWS`. EventEmitter-ish with a `_fire` test hook. */
39
31
  interface FakeTTSWS {
40
32
  contexts: FakeContext[];
41
33
  context(opts: { contextId: string }): FakeContext;
@@ -45,7 +37,7 @@ interface FakeTTSWS {
45
37
  }
46
38
 
47
39
  vi.mock("@cartesia/cartesia-js", () => {
48
- const makeWs = (): FakeTTSWS => {
40
+ function makeWs(): FakeTTSWS {
49
41
  const listeners = new Map<string, Array<(...args: unknown[]) => void>>();
50
42
  const ws: FakeTTSWS = {
51
43
  contexts: [],
@@ -75,7 +67,7 @@ vi.mock("@cartesia/cartesia-js", () => {
75
67
  listeners.set(event, arr);
76
68
  return ws;
77
69
  },
78
- close(_props) {
70
+ close() {
79
71
  /* no-op */
80
72
  },
81
73
  _fire(event, payload) {
@@ -83,7 +75,7 @@ vi.mock("@cartesia/cartesia-js", () => {
83
75
  },
84
76
  };
85
77
  return ws;
86
- };
78
+ }
87
79
  return {
88
80
  Cartesia: class {
89
81
  tts = {
@@ -93,6 +85,17 @@ vi.mock("@cartesia/cartesia-js", () => {
93
85
  };
94
86
  });
95
87
 
88
+ function expectedSend(contextId: string, transcript: string, cont: boolean): RecordedSend {
89
+ return {
90
+ kind: "send",
91
+ contextId,
92
+ transcript,
93
+ continue: cont,
94
+ language: "en",
95
+ model_id: "sonic-2",
96
+ };
97
+ }
98
+
96
99
  beforeEach(() => {
97
100
  sends.length = 0;
98
101
  });
@@ -121,34 +124,11 @@ describe("cartesia TTS adapter", () => {
121
124
  session.flush();
122
125
  await flush();
123
126
 
124
- // All three sends for turn 1 carry the same contextId — two deltas with
125
- // continue: true, then an empty-transcript send with continue: false.
126
127
  const turn1Sends = sends.filter((s) => s.contextId === turn1);
127
128
  expect(turn1Sends).toEqual([
128
- {
129
- kind: "send",
130
- contextId: turn1,
131
- transcript: "hello",
132
- continue: true,
133
- language: "en",
134
- model_id: "sonic-2",
135
- },
136
- {
137
- kind: "send",
138
- contextId: turn1,
139
- transcript: " world",
140
- continue: true,
141
- language: "en",
142
- model_id: "sonic-2",
143
- },
144
- {
145
- kind: "send",
146
- contextId: turn1,
147
- transcript: "",
148
- continue: false,
149
- language: "en",
150
- model_id: "sonic-2",
151
- },
129
+ expectedSend(turn1, "hello", true),
130
+ expectedSend(turn1, " world", true),
131
+ expectedSend(turn1, "", false),
152
132
  ]);
153
133
 
154
134
  // Rotation is deferred until the next sendText so Cartesia's late
@@ -156,21 +136,11 @@ describe("cartesia TTS adapter", () => {
156
136
  // pass the context-id filter.
157
137
  expect(session._currentContextId()).toBe(turn1);
158
138
 
159
- // Subsequent sendText rotates to a fresh context.
160
139
  session.sendText("next");
161
140
  const turn2 = session._currentContextId();
162
141
  expect(turn2).not.toBe(turn1);
163
142
  await flush();
164
- expect(sends.filter((s) => s.contextId === turn2)).toEqual([
165
- {
166
- kind: "send",
167
- contextId: turn2,
168
- transcript: "next",
169
- continue: true,
170
- language: "en",
171
- model_id: "sonic-2",
172
- },
173
- ]);
143
+ expect(sends.filter((s) => s.contextId === turn2)).toEqual([expectedSend(turn2, "next", true)]);
174
144
 
175
145
  controller.abort();
176
146
  await session.close();
@@ -191,16 +161,8 @@ describe("cartesia TTS adapter", () => {
191
161
 
192
162
  await flush();
193
163
 
194
- // We expect: send("hello", continue:true) on turn1, then cancel(turn1).
195
164
  expect(sends).toEqual([
196
- {
197
- kind: "send",
198
- contextId: turn1,
199
- transcript: "hello",
200
- continue: true,
201
- language: "en",
202
- model_id: "sonic-2",
203
- },
165
+ expectedSend(turn1, "hello", true),
204
166
  { kind: "cancel", contextId: turn1 },
205
167
  ]);
206
168
 
@@ -209,7 +171,6 @@ describe("cartesia TTS adapter", () => {
209
171
  // keep passing the filter until the next turn actually begins.
210
172
  expect(session._currentContextId()).toBe(turn1);
211
173
 
212
- // A subsequent sendText mints a fresh context for turn2.
213
174
  session.sendText("again");
214
175
  const turn2 = session._currentContextId();
215
176
  expect(turn2).not.toBe(turn1);