@alexkroman1/aai 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/CHANGELOG.md +19 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/assemblyai-C969QGi4.js +35 -0
  5. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  6. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  7. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  8. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  9. package/dist/host/providers/tts/rime.d.ts +44 -0
  10. package/dist/host/runtime-barrel.d.ts +4 -2
  11. package/dist/host/runtime-barrel.js +1434 -1209
  12. package/dist/host/runtime.d.ts +2 -2
  13. package/dist/host/s2s.d.ts +16 -16
  14. package/dist/host/session-core.d.ts +37 -0
  15. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  16. package/dist/host/transports/s2s-transport.d.ts +19 -0
  17. package/dist/host/transports/types.d.ts +45 -0
  18. package/dist/host/ws-handler.d.ts +14 -10
  19. package/dist/sdk/_internal-types.d.ts +2 -0
  20. package/dist/sdk/manifest-barrel.js +1 -1
  21. package/dist/sdk/protocol.d.ts +6 -5
  22. package/dist/sdk/providers/llm-barrel.js +1 -1
  23. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  24. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  25. package/dist/sdk/providers/stt-barrel.js +2 -2
  26. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  27. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -2
  30. package/host/_pipeline-test-fakes.ts +6 -3
  31. package/host/_test-utils.ts +209 -128
  32. package/host/builtin-tools.ts +1 -0
  33. package/host/cleanup.test.ts +25 -298
  34. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  35. package/host/providers/resolve.ts +10 -2
  36. package/host/providers/stt/deepgram.test.ts +229 -0
  37. package/host/providers/stt/deepgram.ts +172 -0
  38. package/host/providers/tts/cartesia.ts +7 -3
  39. package/host/providers/tts/rime.test.ts +251 -0
  40. package/host/providers/tts/rime.ts +322 -0
  41. package/host/runtime-barrel.ts +4 -2
  42. package/host/runtime.test.ts +16 -47
  43. package/host/runtime.ts +131 -23
  44. package/host/s2s.test.ts +122 -131
  45. package/host/s2s.ts +44 -52
  46. package/host/session-core.test.ts +257 -0
  47. package/host/session-core.ts +262 -0
  48. package/host/to-vercel-tools.test.ts +9 -1
  49. package/host/transports/pipeline-transport.test.ts +653 -0
  50. package/host/transports/pipeline-transport.ts +532 -0
  51. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  52. package/host/transports/s2s-transport.test.ts +56 -0
  53. package/host/transports/s2s-transport.ts +116 -0
  54. package/host/transports/types.test.ts +22 -0
  55. package/host/transports/types.ts +51 -0
  56. package/host/ws-handler.test.ts +324 -242
  57. package/host/ws-handler.ts +56 -59
  58. package/package.json +2 -1
  59. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  60. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  61. package/sdk/_internal-types.ts +3 -0
  62. package/sdk/protocol-compat.test.ts +8 -0
  63. package/sdk/protocol.ts +6 -5
  64. package/sdk/providers/stt/deepgram.ts +43 -0
  65. package/sdk/providers/stt-barrel.ts +2 -0
  66. package/sdk/providers/tts/cartesia.ts +15 -5
  67. package/sdk/providers/tts/rime.ts +52 -0
  68. package/sdk/providers/tts-barrel.ts +2 -0
  69. package/sdk/schema-alignment.test.ts +18 -6
  70. package/dist/assemblyai-Cxg9eobY.js +0 -18
  71. package/dist/cartesia-DwDk2tEu.js +0 -10
  72. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  73. package/dist/host/pipeline-session.d.ts +0 -52
  74. package/dist/host/session-ctx.d.ts +0 -73
  75. package/dist/host/session.d.ts +0 -62
  76. package/host/pipeline-session-ctx.test.ts +0 -31
  77. package/host/pipeline-session-ctx.ts +0 -36
  78. package/host/pipeline-session.test.ts +0 -672
  79. package/host/pipeline-session.ts +0 -533
  80. package/host/s2s-fixtures.test.ts +0 -237
  81. package/host/session-ctx.test.ts +0 -387
  82. package/host/session-ctx.ts +0 -134
  83. package/host/session-fixture-replay.test.ts +0 -128
  84. package/host/session.test.ts +0 -634
  85. package/host/session.ts +0 -412
  86. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
@@ -0,0 +1,229 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ /** Unit test for the Deepgram STT adapter (mocked SDK). */
3
+
4
+ import { describe, expect, test, vi } from "vitest";
5
+ import { flush } from "../../_test-utils.ts";
6
+ import { type DeepgramSession, openDeepgram } from "./deepgram.ts";
7
+
8
+ // ---------------------------------------------------------------------------
9
+ // Mock the `@deepgram/sdk` so no real sockets are opened.
10
+ //
11
+ // Each fake `V1Socket` keeps one listener per event (matching the real SDK's
12
+ // `on()` which replaces rather than appends) and exposes `_fire(event, data)`
13
+ // for tests to inject events. The adapter's `open()` returns a
14
+ // `DeepgramSession` with a `_connection` pointer (which in tests is the fake)
15
+ // giving the test a handle to `_fire`.
16
+ // ---------------------------------------------------------------------------
17
+
18
+ interface FakeSocket {
19
+ on(ev: string, fn: (...args: unknown[]) => void): void;
20
+ connect(): FakeSocket;
21
+ waitForOpen(): Promise<void>;
22
+ close(): void;
23
+ sendMedia(_data: ArrayBufferView): void;
24
+ _fire(ev: string, ...args: unknown[]): void;
25
+ }
26
+
27
+ vi.mock("@deepgram/sdk", () => {
28
+ const makeFakeSocket = (): FakeSocket => {
29
+ const listeners = new Map<string, (...args: unknown[]) => void>();
30
+ const fake: FakeSocket = {
31
+ on(ev, fn) {
32
+ // V1Socket replaces — not appends — the listener per event.
33
+ listeners.set(ev, fn);
34
+ },
35
+ connect() {
36
+ return fake;
37
+ },
38
+ async waitForOpen() {
39
+ // Immediately resolves in tests.
40
+ },
41
+ close() {
42
+ /* no-op */
43
+ },
44
+ sendMedia(_data: ArrayBufferView) {
45
+ /* no-op */
46
+ },
47
+ _fire(ev, ...args) {
48
+ const fn = listeners.get(ev);
49
+ if (fn) fn(...args);
50
+ },
51
+ };
52
+ return fake;
53
+ };
54
+
55
+ return {
56
+ DeepgramClient: class {
57
+ listen = {
58
+ v1: {
59
+ connect: (_args: unknown): Promise<FakeSocket> => Promise.resolve(makeFakeSocket()),
60
+ },
61
+ };
62
+ },
63
+ };
64
+ });
65
+
66
+ // ---------------------------------------------------------------------------
67
+ // Helpers
68
+ // ---------------------------------------------------------------------------
69
+
70
+ function makeResult(transcript: string, isFinal: boolean) {
71
+ return {
72
+ type: "Results" as const,
73
+ channel_index: [0],
74
+ duration: 1,
75
+ start: 0,
76
+ is_final: isFinal,
77
+ channel: { alternatives: [{ transcript, confidence: 0.9, words: [] }] },
78
+ metadata: { request_id: "mock" },
79
+ };
80
+ }
81
+
82
+ // ---------------------------------------------------------------------------
83
+ // Tests
84
+ // ---------------------------------------------------------------------------
85
+
86
+ describe("Deepgram STT adapter", () => {
87
+ test("openDeepgram({}) returns an opener with name 'deepgram'", () => {
88
+ const opener = openDeepgram({});
89
+ expect(opener.name).toBe("deepgram");
90
+ });
91
+
92
+ test("throws stt_auth_failed when API key is missing", async () => {
93
+ // Clear env var for this test.
94
+ const saved = process.env.DEEPGRAM_API_KEY;
95
+ delete process.env.DEEPGRAM_API_KEY;
96
+
97
+ const opener = openDeepgram({});
98
+ const controller = new AbortController();
99
+
100
+ await expect(
101
+ opener.open({ sampleRate: 16_000, apiKey: "", signal: controller.signal }),
102
+ ).rejects.toMatchObject({ code: "stt_auth_failed" });
103
+
104
+ process.env.DEEPGRAM_API_KEY = saved;
105
+ });
106
+
107
+ test("final transcript fires 'final' event with text", async () => {
108
+ const opener = openDeepgram({ model: "nova-3" });
109
+ const controller = new AbortController();
110
+ const session = (await opener.open({
111
+ sampleRate: 16_000,
112
+ apiKey: "test-key",
113
+ signal: controller.signal,
114
+ })) as DeepgramSession;
115
+
116
+ const finals: string[] = [];
117
+ session.on("final", (t) => finals.push(t));
118
+
119
+ const fake = session._connection as unknown as FakeSocket;
120
+ fake._fire("message", makeResult("hello world", true));
121
+
122
+ await flush();
123
+ expect(finals).toEqual(["hello world"]);
124
+
125
+ await session.close();
126
+ });
127
+
128
+ test("interim transcript fires 'partial' event with text", async () => {
129
+ const opener = openDeepgram({ model: "nova-3" });
130
+ const controller = new AbortController();
131
+ const session = (await opener.open({
132
+ sampleRate: 16_000,
133
+ apiKey: "test-key",
134
+ signal: controller.signal,
135
+ })) as DeepgramSession;
136
+
137
+ const partials: string[] = [];
138
+ session.on("partial", (t) => partials.push(t));
139
+
140
+ const fake = session._connection as unknown as FakeSocket;
141
+ fake._fire("message", makeResult("hel", false));
142
+ fake._fire("message", makeResult("hello", false));
143
+
144
+ await flush();
145
+ expect(partials).toEqual(["hel", "hello"]);
146
+
147
+ await session.close();
148
+ });
149
+
150
+ test("empty transcript is NOT emitted (neither partial nor final)", async () => {
151
+ const opener = openDeepgram({});
152
+ const controller = new AbortController();
153
+ const session = (await opener.open({
154
+ sampleRate: 16_000,
155
+ apiKey: "test-key",
156
+ signal: controller.signal,
157
+ })) as DeepgramSession;
158
+
159
+ const partials: string[] = [];
160
+ const finals: string[] = [];
161
+ session.on("partial", (t) => partials.push(t));
162
+ session.on("final", (t) => finals.push(t));
163
+
164
+ const fake = session._connection as unknown as FakeSocket;
165
+ // Fire results with empty transcript — neither should be emitted.
166
+ fake._fire("message", makeResult("", false));
167
+ fake._fire("message", makeResult("", true));
168
+
169
+ await flush();
170
+ expect(partials).toEqual([]);
171
+ expect(finals).toEqual([]);
172
+
173
+ await session.close();
174
+ });
175
+
176
+ test("close fires close() and subsequent events are ignored (no double-close crash)", async () => {
177
+ const opener = openDeepgram({});
178
+ const controller = new AbortController();
179
+ const session = (await opener.open({
180
+ sampleRate: 16_000,
181
+ apiKey: "test-key",
182
+ signal: controller.signal,
183
+ })) as DeepgramSession;
184
+
185
+ const finals: string[] = [];
186
+ session.on("final", (t) => finals.push(t));
187
+
188
+ await session.close();
189
+
190
+ // Subsequent close should not throw.
191
+ await session.close();
192
+
193
+ // Events after close should be dropped.
194
+ const fake = session._connection as unknown as FakeSocket;
195
+ fake._fire("message", makeResult("should be ignored", true));
196
+
197
+ await flush();
198
+ expect(finals).toEqual([]);
199
+ });
200
+
201
+ test("sendAudio(Int16Array) forwards PCM bytes to the connection", async () => {
202
+ const opener = openDeepgram({});
203
+ const controller = new AbortController();
204
+ const session = (await opener.open({
205
+ sampleRate: 16_000,
206
+ apiKey: "test-key",
207
+ signal: controller.signal,
208
+ })) as DeepgramSession;
209
+
210
+ const fake = session._connection as unknown as FakeSocket;
211
+ const sent: ArrayBufferView[] = [];
212
+ fake.sendMedia = (data: ArrayBufferView) => sent.push(data);
213
+
214
+ const pcm = new Int16Array([100, 200, 300]);
215
+ session.sendAudio(pcm);
216
+
217
+ expect(sent).toHaveLength(1);
218
+ // The sent buffer should contain the same bytes as the Int16Array.
219
+ const sentBytes = new Uint8Array(
220
+ (sent[0] as Uint8Array).buffer,
221
+ (sent[0] as Uint8Array).byteOffset,
222
+ (sent[0] as Uint8Array).byteLength,
223
+ );
224
+ const expectedBytes = new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength);
225
+ expect(sentBytes).toEqual(expectedBytes);
226
+
227
+ await session.close();
228
+ });
229
+ });
@@ -0,0 +1,172 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ /**
3
+ * Deepgram Nova streaming STT opener (host-only).
4
+ *
5
+ * The user-facing descriptor factory (`deepgram(...)`) lives in
6
+ * `sdk/providers/stt/deepgram.ts`. This module is the host-side
7
+ * counterpart: it takes the descriptor options + an API key and
8
+ * returns an {@link SttOpener} that the pipeline session drives.
9
+ *
10
+ * Default model: `"nova-3"`. Any string is forwarded verbatim to the SDK.
11
+ *
12
+ * This adapter targets the Deepgram SDK v5 (`@deepgram/sdk@^5`). The v5
13
+ * streaming API is:
14
+ * `client.listen.v1.connect(args)` → `Promise<V1Socket>`
15
+ * followed by:
16
+ * `socket.connect()` + `socket.waitForOpen()` to establish the connection.
17
+ */
18
+
19
+ import { DeepgramClient, type listen } from "@deepgram/sdk";
20
+ import { createNanoEvents, type Emitter } from "nanoevents";
21
+ import type { DeepgramOptions } from "../../../sdk/providers/stt/deepgram.ts";
22
+ import {
23
+ makeSttError,
24
+ type SttEvents,
25
+ type SttOpener,
26
+ type SttOpenOptions,
27
+ type SttSession,
28
+ } from "../../../sdk/providers.ts";
29
+
30
+ // V1Socket type from the Deepgram SDK (accessed through the listen namespace).
31
+ type V1Socket = Awaited<ReturnType<InstanceType<typeof DeepgramClient>["listen"]["v1"]["connect"]>>;
32
+
33
+ /** Internal: SttSession with a test-only handle to the raw SDK socket. */
34
+ export interface DeepgramSession extends SttSession {
35
+ /** @internal Test-only: exposes the underlying SDK socket for fixture replay. */
36
+ readonly _connection: V1Socket;
37
+ }
38
+
39
+ type MessagePayload =
40
+ | listen.ListenV1Results
41
+ | listen.ListenV1Metadata
42
+ | listen.ListenV1UtteranceEnd
43
+ | listen.ListenV1SpeechStarted;
44
+
45
+ /**
46
+ * Handle an incoming Deepgram transcript message, emitting `partial` or
47
+ * `final` events on the emitter. Empty transcripts are silently dropped.
48
+ */
49
+ function handleMessage(data: MessagePayload, closed: boolean, emitter: Emitter<SttEvents>): void {
50
+ if (closed) return;
51
+ if (data.type !== "Results") return;
52
+ const result = data as listen.ListenV1Results;
53
+ const text = result.channel?.alternatives?.[0]?.transcript ?? "";
54
+ if (result.is_final) {
55
+ if (text.length > 0) emitter.emit("final", text);
56
+ } else if (text.length > 0) {
57
+ emitter.emit("partial", text);
58
+ }
59
+ }
60
+
61
+ /** Wire Deepgram socket events onto the nanoevents emitter. */
62
+ function wireSocketEvents(
63
+ connection: V1Socket,
64
+ emitter: Emitter<SttEvents>,
65
+ getIsClosed: () => boolean,
66
+ ): void {
67
+ connection.on("message", (data: MessagePayload) => handleMessage(data, getIsClosed(), emitter));
68
+ connection.on("error", (err: Error) => {
69
+ if (getIsClosed()) return;
70
+ emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
71
+ });
72
+ connection.on("close", (event: { code?: number }) => {
73
+ if (getIsClosed()) return;
74
+ const code = event?.code;
75
+ // 1000 = normal closure.
76
+ if (code !== undefined && code !== 1000) {
77
+ emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
78
+ }
79
+ });
80
+ }
81
+
82
+ /** Wire the AbortSignal to the close function. */
83
+ function wireAbortSignal(signal: AbortSignal, close: () => Promise<void>): void {
84
+ if (signal.aborted) {
85
+ void close();
86
+ } else {
87
+ signal.addEventListener("abort", () => void close(), { once: true });
88
+ }
89
+ }
90
+
91
+ /** Build an {@link SttOpener} from resolved Deepgram descriptor options. */
92
+ export function openDeepgram(opts: DeepgramOptions = {}): SttOpener {
93
+ return {
94
+ name: "deepgram",
95
+ async open(openOpts: SttOpenOptions): Promise<SttSession> {
96
+ const apiKey = openOpts.apiKey || process.env.DEEPGRAM_API_KEY;
97
+ if (!apiKey) {
98
+ throw makeSttError(
99
+ "stt_auth_failed",
100
+ "Deepgram STT: missing API key. Set DEEPGRAM_API_KEY in the agent env.",
101
+ );
102
+ }
103
+
104
+ const client = new DeepgramClient({ apiKey });
105
+ let connection: V1Socket;
106
+ try {
107
+ connection = await client.listen.v1.connect({
108
+ model: opts.model ?? "nova-3",
109
+ language: opts.language ?? "en",
110
+ encoding: "linear16",
111
+ sample_rate: openOpts.sampleRate,
112
+ channels: 1,
113
+ interim_results: "true",
114
+ smart_format: "true",
115
+ endpointing: 300,
116
+ utterance_end_ms: "1000",
117
+ // Pass the API key explicitly as the Authorization header so the
118
+ // WebSocket connection authenticates even without env var fallback.
119
+ Authorization: apiKey,
120
+ });
121
+ } catch (cause) {
122
+ throw makeSttError(
123
+ "stt_connect_failed",
124
+ `Deepgram STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`,
125
+ );
126
+ }
127
+
128
+ const emitter: Emitter<SttEvents> = createNanoEvents<SttEvents>();
129
+ let closed = false;
130
+
131
+ wireSocketEvents(connection, emitter, () => closed);
132
+
133
+ // Actually open the WebSocket connection (registers internal handlers
134
+ // and initiates the TCP/TLS handshake).
135
+ connection.connect();
136
+ try {
137
+ await connection.waitForOpen();
138
+ } catch (cause) {
139
+ throw makeSttError(
140
+ "stt_connect_failed",
141
+ `Deepgram STT: WebSocket open failed: ${cause instanceof Error ? cause.message : String(cause)}`,
142
+ );
143
+ }
144
+
145
+ const close = async (): Promise<void> => {
146
+ if (closed) return;
147
+ closed = true;
148
+ try {
149
+ connection.close();
150
+ } catch {
151
+ // Swallow: the caller has already decided to tear down.
152
+ }
153
+ };
154
+
155
+ wireAbortSignal(openOpts.signal, close);
156
+
157
+ const session: DeepgramSession = {
158
+ sendAudio(pcm: Int16Array) {
159
+ if (closed) return;
160
+ connection.sendMedia(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
161
+ },
162
+ on(event, fn) {
163
+ return emitter.on(event, fn);
164
+ },
165
+ close,
166
+ _connection: connection,
167
+ };
168
+
169
+ return session;
170
+ },
171
+ };
172
+ }
@@ -24,7 +24,10 @@ import { randomUUID } from "node:crypto";
24
24
  import { Cartesia } from "@cartesia/cartesia-js";
25
25
  import type { TTSWS, TTSWSContext } from "@cartesia/cartesia-js/resources/tts";
26
26
  import { createNanoEvents, type Emitter } from "nanoevents";
27
- import type { CartesiaOptions } from "../../../sdk/providers/tts/cartesia.ts";
27
+ import {
28
+ CARTESIA_DEFAULT_VOICE,
29
+ type CartesiaOptions,
30
+ } from "../../../sdk/providers/tts/cartesia.ts";
28
31
  import {
29
32
  makeTtsError,
30
33
  type TtsEvents,
@@ -73,6 +76,7 @@ export function openCartesia(opts: CartesiaOptions): TtsOpener {
73
76
  const sampleRate = assertSupportedSampleRate(openOpts.sampleRate);
74
77
  const model = opts.model ?? "sonic-2";
75
78
  const language = opts.language ?? "en";
79
+ const voice = opts.voice ?? CARTESIA_DEFAULT_VOICE;
76
80
 
77
81
  const client = new Cartesia({ apiKey });
78
82
  let ws: TTSWS;
@@ -92,7 +96,7 @@ export function openCartesia(opts: CartesiaOptions): TtsOpener {
92
96
  const mintContext = (): TTSWSContext =>
93
97
  ws.context({
94
98
  model_id: model,
95
- voice: { mode: "id", id: opts.voice },
99
+ voice: { mode: "id", id: voice },
96
100
  output_format: {
97
101
  container: "raw",
98
102
  encoding: "pcm_s16le",
@@ -175,7 +179,7 @@ export function openCartesia(opts: CartesiaOptions): TtsOpener {
175
179
 
176
180
  const baseRequest = {
177
181
  model_id: model,
178
- voice: { mode: "id" as const, id: opts.voice },
182
+ voice: { mode: "id" as const, id: voice },
179
183
  output_format: {
180
184
  container: "raw" as const,
181
185
  encoding: "pcm_s16le" as const,
@@ -0,0 +1,251 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ /** Unit test for the Rime TTS adapter. Mocks the `ws` package. */
3
+
4
+ import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
5
+ import { openRime, type RimeSession } from "./rime.ts";
6
+
7
+ // ──────────────────────────────────────────────────────────────────────────────
8
+ // Fake WebSocket — hoisted so `vi.mock` factory can reference it
9
+ // ──────────────────────────────────────────────────────────────────────────────
10
+
11
+ type WsEvent = "open" | "message" | "error" | "close";
12
+ type WsListener = (...args: unknown[]) => void;
13
+
14
+ const { FakeWebSocket } = vi.hoisted(() => {
15
+ class FakeWebSocket {
16
+ static OPEN = 1;
17
+ static CLOSED = 3;
18
+
19
+ readyState = FakeWebSocket.OPEN;
20
+ sent: string[] = [];
21
+ private readonly listeners = new Map<string, WsListener[]>();
22
+
23
+ static instances: FakeWebSocket[] = [];
24
+
25
+ readonly url: string;
26
+
27
+ constructor(url: string, _opts?: unknown) {
28
+ this.url = url;
29
+ FakeWebSocket.instances.push(this);
30
+ // Simulate async open on next microtask (matches real ws behaviour).
31
+ queueMicrotask(() => this._fire("open"));
32
+ }
33
+
34
+ on(event: string, fn: WsListener) {
35
+ const arr = this.listeners.get(event) ?? [];
36
+ arr.push(fn);
37
+ this.listeners.set(event, arr);
38
+ }
39
+
40
+ once(event: string, fn: WsListener) {
41
+ const wrapper = (...args: unknown[]) => {
42
+ this.off(event, wrapper);
43
+ fn(...args);
44
+ };
45
+ this.on(event, wrapper);
46
+ }
47
+
48
+ removeListener(event: string, fn: WsListener) {
49
+ this.off(event, fn);
50
+ }
51
+
52
+ private off(event: string, fn: WsListener) {
53
+ const arr = this.listeners.get(event) ?? [];
54
+ this.listeners.set(
55
+ event,
56
+ arr.filter((l) => l !== fn),
57
+ );
58
+ }
59
+
60
+ send(data: string) {
61
+ this.sent.push(data);
62
+ }
63
+
64
+ close() {
65
+ this.readyState = FakeWebSocket.CLOSED;
66
+ this._fire("close");
67
+ }
68
+
69
+ /** Test helper: fire an event on this socket. */
70
+ _fire(event: WsEvent, ...args: unknown[]) {
71
+ for (const fn of this.listeners.get(event) ?? []) fn(...args);
72
+ }
73
+
74
+ /** Test helper: simulate a JSON message from the server. */
75
+ _msg(payload: unknown) {
76
+ this._fire("message", JSON.stringify(payload));
77
+ }
78
+ }
79
+
80
+ return { FakeWebSocket };
81
+ });
82
+
83
+ vi.mock("ws", () => ({
84
+ default: FakeWebSocket,
85
+ WebSocket: FakeWebSocket,
86
+ }));
87
+
88
+ // ──────────────────────────────────────────────────────────────────────────────
89
+ // Helpers
90
+ // ──────────────────────────────────────────────────────────────────────────────
91
+
92
+ beforeEach(() => {
93
+ FakeWebSocket.instances.length = 0;
94
+ vi.useFakeTimers();
95
+ });
96
+
97
+ afterEach(() => {
98
+ vi.useRealTimers();
99
+ });
100
+
101
+ async function openSession(apiKey = "test-key"): Promise<{
102
+ session: RimeSession;
103
+ ws: InstanceType<typeof FakeWebSocket>;
104
+ controller: AbortController;
105
+ }> {
106
+ const opener = openRime({ voice: "cove" });
107
+ const controller = new AbortController();
108
+
109
+ const openPromise = opener.open({
110
+ sampleRate: 16_000,
111
+ apiKey,
112
+ signal: controller.signal,
113
+ }) as Promise<RimeSession>;
114
+
115
+ // Let the microtask that fires FakeWebSocket "open" run.
116
+ await Promise.resolve();
117
+
118
+ const session = await openPromise;
119
+ // biome-ignore lint/style/noNonNullAssertion: at(-1) is always set after open() resolves
120
+ const ws = FakeWebSocket.instances.at(-1)!;
121
+ return { session, ws, controller };
122
+ }
123
+
124
+ // ──────────────────────────────────────────────────────────────────────────────
125
+ // Tests
126
+ // ──────────────────────────────────────────────────────────────────────────────
127
+
128
+ describe("rime TTS adapter", () => {
129
+ test("openRime returns an opener with name 'rime'", () => {
130
+ const opener = openRime({ voice: "cove" });
131
+ expect(opener.name).toBe("rime");
132
+ });
133
+
134
+ test("open() throws tts_auth_failed when API key is missing", async () => {
135
+ const opener = openRime({ voice: "cove" });
136
+ const controller = new AbortController();
137
+
138
+ const openPromise = opener.open({
139
+ sampleRate: 16_000,
140
+ apiKey: "",
141
+ signal: controller.signal,
142
+ });
143
+
144
+ await expect(openPromise).rejects.toMatchObject({ code: "tts_auth_failed" });
145
+ });
146
+
147
+ test("incoming chunk message emits audio as Int16Array", async () => {
148
+ const { session, ws } = await openSession();
149
+
150
+ const audioEvents: Int16Array[] = [];
151
+ session.on("audio", (pcm) => audioEvents.push(pcm));
152
+
153
+ // Encode 4 PCM16 samples (8 bytes) as base64.
154
+ const samples = new Int16Array([100, 200, 300, 400]);
155
+ const base64 = Buffer.from(samples.buffer).toString("base64");
156
+
157
+ ws._msg({ type: "chunk", data: base64, contextId: null });
158
+
159
+ expect(audioEvents.length).toBe(1);
160
+ const firstChunk = audioEvents[0];
161
+ expect(firstChunk).toBeInstanceOf(Int16Array);
162
+ // Each sample pair decodes correctly.
163
+ // biome-ignore lint/style/noNonNullAssertion: length was asserted to be 1 on the line above
164
+ const pcm = firstChunk!;
165
+ expect(pcm.length).toBe(4);
166
+ expect(pcm[0]).toBe(100);
167
+ expect(pcm[3]).toBe(400);
168
+ });
169
+
170
+ test("sendText forwards the text as a JSON {text} frame", async () => {
171
+ const { session, ws } = await openSession();
172
+
173
+ session.sendText("Hello, world!");
174
+
175
+ expect(ws.sent).toContain(JSON.stringify({ text: "Hello, world!" }));
176
+ });
177
+
178
+ test("flush() sends a trailing '.' and emits done after quiescence post-audio", async () => {
179
+ const { session, ws } = await openSession();
180
+
181
+ const doneEvents: number[] = [];
182
+ session.on("done", () => doneEvents.push(Date.now()));
183
+
184
+ session.sendText("Hi there");
185
+ session.flush();
186
+
187
+ // Trailing punctuation forces Rime to synthesize the buffer without
188
+ // closing the WS (which `eos` would do).
189
+ expect(ws.sent).toContain(JSON.stringify({ text: "." }));
190
+
191
+ // First-audio timer is 5 s — short window must not fire `done` yet.
192
+ vi.advanceTimersByTime(500);
193
+ expect(doneEvents.length).toBe(0);
194
+
195
+ // First chunk arrives → switch to short quiescence window.
196
+ const samples = new Int16Array([100, 200, 300, 400]);
197
+ ws._msg({
198
+ type: "chunk",
199
+ data: Buffer.from(samples.buffer).toString("base64"),
200
+ contextId: null,
201
+ });
202
+
203
+ vi.advanceTimersByTime(499);
204
+ expect(doneEvents.length).toBe(0);
205
+ vi.advanceTimersByTime(1);
206
+ expect(doneEvents.length).toBe(1);
207
+ });
208
+
209
+ test("flush() falls back to first-audio timeout when no chunk arrives", async () => {
210
+ const { session } = await openSession();
211
+
212
+ const doneEvents: number[] = [];
213
+ session.on("done", () => doneEvents.push(Date.now()));
214
+
215
+ session.sendText("Hi there");
216
+ session.flush();
217
+
218
+ // No chunk arrives — done must wait the full FIRST_AUDIO_TIMEOUT_MS (5 s).
219
+ vi.advanceTimersByTime(4999);
220
+ expect(doneEvents.length).toBe(0);
221
+ vi.advanceTimersByTime(1);
222
+ expect(doneEvents.length).toBe(1);
223
+ });
224
+
225
+ test("cancel() sends clear operation and emits done synchronously", async () => {
226
+ const { session, ws } = await openSession();
227
+
228
+ const doneEvents: number[] = [];
229
+ session.on("done", () => doneEvents.push(Date.now()));
230
+
231
+ session.sendText("Hello");
232
+ // cancel() must emit `done` synchronously — barge-in cannot be deferred.
233
+ session.cancel();
234
+
235
+ expect(ws.sent).toContain(JSON.stringify({ operation: "clear" }));
236
+ // done was emitted synchronously (before any await / timer).
237
+ expect(doneEvents.length).toBe(1);
238
+ });
239
+
240
+ test("close() closes the WebSocket and is idempotent", async () => {
241
+ const { session, ws } = await openSession();
242
+
243
+ expect(ws.readyState).toBe(FakeWebSocket.OPEN);
244
+
245
+ await session.close();
246
+ expect(ws.readyState).toBe(FakeWebSocket.CLOSED);
247
+
248
+ // Second close should not throw.
249
+ await expect(session.close()).resolves.toBeUndefined();
250
+ });
251
+ });