@alexkroman1/aai 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +9 -9
- package/CHANGELOG.md +13 -0
- package/dist/assemblyai-C969QGi4.js +35 -0
- package/dist/cartesia-BfQPOQ7Y.js +37 -0
- package/dist/host/_pipeline-test-fakes.d.ts +3 -1
- package/dist/host/providers/stt/deepgram.d.ts +28 -0
- package/dist/host/providers/tts/cartesia.d.ts +1 -1
- package/dist/host/providers/tts/rime.d.ts +44 -0
- package/dist/host/runtime-barrel.d.ts +4 -2
- package/dist/host/runtime-barrel.js +1432 -1208
- package/dist/host/runtime.d.ts +2 -2
- package/dist/host/s2s.d.ts +16 -16
- package/dist/host/session-core.d.ts +37 -0
- package/dist/host/transports/pipeline-transport.d.ts +48 -0
- package/dist/host/transports/s2s-transport.d.ts +19 -0
- package/dist/host/transports/types.d.ts +45 -0
- package/dist/host/ws-handler.d.ts +14 -10
- package/dist/sdk/protocol.d.ts +6 -5
- package/dist/sdk/providers/llm-barrel.js +1 -1
- package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
- package/dist/sdk/providers/stt-barrel.d.ts +1 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
- package/dist/sdk/providers/tts/rime.d.ts +42 -0
- package/dist/sdk/providers/tts-barrel.d.ts +1 -0
- package/dist/sdk/providers/tts-barrel.js +2 -2
- package/host/_pipeline-test-fakes.ts +6 -3
- package/host/_test-utils.ts +209 -128
- package/host/cleanup.test.ts +25 -298
- package/host/integration/pipeline-reference.integration.test.ts +30 -35
- package/host/providers/resolve.ts +10 -2
- package/host/providers/stt/deepgram.test.ts +229 -0
- package/host/providers/stt/deepgram.ts +172 -0
- package/host/providers/tts/cartesia.ts +7 -3
- package/host/providers/tts/rime.test.ts +251 -0
- package/host/providers/tts/rime.ts +322 -0
- package/host/runtime-barrel.ts +4 -2
- package/host/runtime.test.ts +13 -46
- package/host/runtime.ts +131 -23
- package/host/s2s.test.ts +122 -131
- package/host/s2s.ts +44 -52
- package/host/session-core.test.ts +257 -0
- package/host/session-core.ts +262 -0
- package/host/transports/pipeline-transport.test.ts +651 -0
- package/host/transports/pipeline-transport.ts +532 -0
- package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
- package/host/transports/s2s-transport.test.ts +56 -0
- package/host/transports/s2s-transport.ts +116 -0
- package/host/transports/types.test.ts +22 -0
- package/host/transports/types.ts +51 -0
- package/host/ws-handler.test.ts +324 -242
- package/host/ws-handler.ts +56 -59
- package/package.json +2 -1
- package/sdk/__snapshots__/exports.test.ts.snap +3 -3
- package/sdk/protocol-compat.test.ts +8 -0
- package/sdk/protocol.ts +6 -5
- package/sdk/providers/stt/deepgram.ts +43 -0
- package/sdk/providers/stt-barrel.ts +2 -0
- package/sdk/providers/tts/cartesia.ts +15 -5
- package/sdk/providers/tts/rime.ts +52 -0
- package/sdk/providers/tts-barrel.ts +2 -0
- package/dist/assemblyai-Cxg9eobY.js +0 -18
- package/dist/cartesia-DwDk2tEu.js +0 -10
- package/dist/host/pipeline-session-ctx.d.ts +0 -24
- package/dist/host/pipeline-session.d.ts +0 -52
- package/dist/host/session-ctx.d.ts +0 -73
- package/dist/host/session.d.ts +0 -62
- package/host/pipeline-session-ctx.test.ts +0 -31
- package/host/pipeline-session-ctx.ts +0 -36
- package/host/pipeline-session.test.ts +0 -672
- package/host/pipeline-session.ts +0 -533
- package/host/s2s-fixtures.test.ts +0 -237
- package/host/session-ctx.test.ts +0 -387
- package/host/session-ctx.ts +0 -134
- package/host/session-fixture-replay.test.ts +0 -128
- package/host/session.test.ts +0 -634
- package/host/session.ts +0 -412
- /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
/** Unit test for the Deepgram STT adapter (mocked SDK). */
|
|
3
|
+
|
|
4
|
+
import { describe, expect, test, vi } from "vitest";
|
|
5
|
+
import { flush } from "../../_test-utils.ts";
|
|
6
|
+
import { type DeepgramSession, openDeepgram } from "./deepgram.ts";
|
|
7
|
+
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Mock the `@deepgram/sdk` so no real sockets are opened.
|
|
10
|
+
//
|
|
11
|
+
// Each fake `V1Socket` keeps one listener per event (matching the real SDK's
|
|
12
|
+
// `on()` which replaces rather than appends) and exposes `_fire(event, data)`
|
|
13
|
+
// for tests to inject events. The adapter's `open()` returns a
|
|
14
|
+
// `DeepgramSession` with a `_connection` pointer (which in tests is the fake)
|
|
15
|
+
// giving the test a handle to `_fire`.
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
interface FakeSocket {
|
|
19
|
+
on(ev: string, fn: (...args: unknown[]) => void): void;
|
|
20
|
+
connect(): FakeSocket;
|
|
21
|
+
waitForOpen(): Promise<void>;
|
|
22
|
+
close(): void;
|
|
23
|
+
sendMedia(_data: ArrayBufferView): void;
|
|
24
|
+
_fire(ev: string, ...args: unknown[]): void;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
vi.mock("@deepgram/sdk", () => {
|
|
28
|
+
const makeFakeSocket = (): FakeSocket => {
|
|
29
|
+
const listeners = new Map<string, (...args: unknown[]) => void>();
|
|
30
|
+
const fake: FakeSocket = {
|
|
31
|
+
on(ev, fn) {
|
|
32
|
+
// V1Socket replaces — not appends — the listener per event.
|
|
33
|
+
listeners.set(ev, fn);
|
|
34
|
+
},
|
|
35
|
+
connect() {
|
|
36
|
+
return fake;
|
|
37
|
+
},
|
|
38
|
+
async waitForOpen() {
|
|
39
|
+
// Immediately resolves in tests.
|
|
40
|
+
},
|
|
41
|
+
close() {
|
|
42
|
+
/* no-op */
|
|
43
|
+
},
|
|
44
|
+
sendMedia(_data: ArrayBufferView) {
|
|
45
|
+
/* no-op */
|
|
46
|
+
},
|
|
47
|
+
_fire(ev, ...args) {
|
|
48
|
+
const fn = listeners.get(ev);
|
|
49
|
+
if (fn) fn(...args);
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
return fake;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
DeepgramClient: class {
|
|
57
|
+
listen = {
|
|
58
|
+
v1: {
|
|
59
|
+
connect: (_args: unknown): Promise<FakeSocket> => Promise.resolve(makeFakeSocket()),
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
// Helpers
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
function makeResult(transcript: string, isFinal: boolean) {
|
|
71
|
+
return {
|
|
72
|
+
type: "Results" as const,
|
|
73
|
+
channel_index: [0],
|
|
74
|
+
duration: 1,
|
|
75
|
+
start: 0,
|
|
76
|
+
is_final: isFinal,
|
|
77
|
+
channel: { alternatives: [{ transcript, confidence: 0.9, words: [] }] },
|
|
78
|
+
metadata: { request_id: "mock" },
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
// Tests
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
describe("Deepgram STT adapter", () => {
|
|
87
|
+
test("openDeepgram({}) returns an opener with name 'deepgram'", () => {
|
|
88
|
+
const opener = openDeepgram({});
|
|
89
|
+
expect(opener.name).toBe("deepgram");
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test("throws stt_auth_failed when API key is missing", async () => {
|
|
93
|
+
// Clear env var for this test.
|
|
94
|
+
const saved = process.env.DEEPGRAM_API_KEY;
|
|
95
|
+
delete process.env.DEEPGRAM_API_KEY;
|
|
96
|
+
|
|
97
|
+
const opener = openDeepgram({});
|
|
98
|
+
const controller = new AbortController();
|
|
99
|
+
|
|
100
|
+
await expect(
|
|
101
|
+
opener.open({ sampleRate: 16_000, apiKey: "", signal: controller.signal }),
|
|
102
|
+
).rejects.toMatchObject({ code: "stt_auth_failed" });
|
|
103
|
+
|
|
104
|
+
process.env.DEEPGRAM_API_KEY = saved;
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("final transcript fires 'final' event with text", async () => {
|
|
108
|
+
const opener = openDeepgram({ model: "nova-3" });
|
|
109
|
+
const controller = new AbortController();
|
|
110
|
+
const session = (await opener.open({
|
|
111
|
+
sampleRate: 16_000,
|
|
112
|
+
apiKey: "test-key",
|
|
113
|
+
signal: controller.signal,
|
|
114
|
+
})) as DeepgramSession;
|
|
115
|
+
|
|
116
|
+
const finals: string[] = [];
|
|
117
|
+
session.on("final", (t) => finals.push(t));
|
|
118
|
+
|
|
119
|
+
const fake = session._connection as unknown as FakeSocket;
|
|
120
|
+
fake._fire("message", makeResult("hello world", true));
|
|
121
|
+
|
|
122
|
+
await flush();
|
|
123
|
+
expect(finals).toEqual(["hello world"]);
|
|
124
|
+
|
|
125
|
+
await session.close();
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test("interim transcript fires 'partial' event with text", async () => {
|
|
129
|
+
const opener = openDeepgram({ model: "nova-3" });
|
|
130
|
+
const controller = new AbortController();
|
|
131
|
+
const session = (await opener.open({
|
|
132
|
+
sampleRate: 16_000,
|
|
133
|
+
apiKey: "test-key",
|
|
134
|
+
signal: controller.signal,
|
|
135
|
+
})) as DeepgramSession;
|
|
136
|
+
|
|
137
|
+
const partials: string[] = [];
|
|
138
|
+
session.on("partial", (t) => partials.push(t));
|
|
139
|
+
|
|
140
|
+
const fake = session._connection as unknown as FakeSocket;
|
|
141
|
+
fake._fire("message", makeResult("hel", false));
|
|
142
|
+
fake._fire("message", makeResult("hello", false));
|
|
143
|
+
|
|
144
|
+
await flush();
|
|
145
|
+
expect(partials).toEqual(["hel", "hello"]);
|
|
146
|
+
|
|
147
|
+
await session.close();
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test("empty transcript is NOT emitted (neither partial nor final)", async () => {
|
|
151
|
+
const opener = openDeepgram({});
|
|
152
|
+
const controller = new AbortController();
|
|
153
|
+
const session = (await opener.open({
|
|
154
|
+
sampleRate: 16_000,
|
|
155
|
+
apiKey: "test-key",
|
|
156
|
+
signal: controller.signal,
|
|
157
|
+
})) as DeepgramSession;
|
|
158
|
+
|
|
159
|
+
const partials: string[] = [];
|
|
160
|
+
const finals: string[] = [];
|
|
161
|
+
session.on("partial", (t) => partials.push(t));
|
|
162
|
+
session.on("final", (t) => finals.push(t));
|
|
163
|
+
|
|
164
|
+
const fake = session._connection as unknown as FakeSocket;
|
|
165
|
+
// Fire results with empty transcript — neither should be emitted.
|
|
166
|
+
fake._fire("message", makeResult("", false));
|
|
167
|
+
fake._fire("message", makeResult("", true));
|
|
168
|
+
|
|
169
|
+
await flush();
|
|
170
|
+
expect(partials).toEqual([]);
|
|
171
|
+
expect(finals).toEqual([]);
|
|
172
|
+
|
|
173
|
+
await session.close();
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
test("close fires close() and subsequent events are ignored (no double-close crash)", async () => {
|
|
177
|
+
const opener = openDeepgram({});
|
|
178
|
+
const controller = new AbortController();
|
|
179
|
+
const session = (await opener.open({
|
|
180
|
+
sampleRate: 16_000,
|
|
181
|
+
apiKey: "test-key",
|
|
182
|
+
signal: controller.signal,
|
|
183
|
+
})) as DeepgramSession;
|
|
184
|
+
|
|
185
|
+
const finals: string[] = [];
|
|
186
|
+
session.on("final", (t) => finals.push(t));
|
|
187
|
+
|
|
188
|
+
await session.close();
|
|
189
|
+
|
|
190
|
+
// Subsequent close should not throw.
|
|
191
|
+
await session.close();
|
|
192
|
+
|
|
193
|
+
// Events after close should be dropped.
|
|
194
|
+
const fake = session._connection as unknown as FakeSocket;
|
|
195
|
+
fake._fire("message", makeResult("should be ignored", true));
|
|
196
|
+
|
|
197
|
+
await flush();
|
|
198
|
+
expect(finals).toEqual([]);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
test("sendAudio(Int16Array) forwards PCM bytes to the connection", async () => {
|
|
202
|
+
const opener = openDeepgram({});
|
|
203
|
+
const controller = new AbortController();
|
|
204
|
+
const session = (await opener.open({
|
|
205
|
+
sampleRate: 16_000,
|
|
206
|
+
apiKey: "test-key",
|
|
207
|
+
signal: controller.signal,
|
|
208
|
+
})) as DeepgramSession;
|
|
209
|
+
|
|
210
|
+
const fake = session._connection as unknown as FakeSocket;
|
|
211
|
+
const sent: ArrayBufferView[] = [];
|
|
212
|
+
fake.sendMedia = (data: ArrayBufferView) => sent.push(data);
|
|
213
|
+
|
|
214
|
+
const pcm = new Int16Array([100, 200, 300]);
|
|
215
|
+
session.sendAudio(pcm);
|
|
216
|
+
|
|
217
|
+
expect(sent).toHaveLength(1);
|
|
218
|
+
// The sent buffer should contain the same bytes as the Int16Array.
|
|
219
|
+
const sentBytes = new Uint8Array(
|
|
220
|
+
(sent[0] as Uint8Array).buffer,
|
|
221
|
+
(sent[0] as Uint8Array).byteOffset,
|
|
222
|
+
(sent[0] as Uint8Array).byteLength,
|
|
223
|
+
);
|
|
224
|
+
const expectedBytes = new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
225
|
+
expect(sentBytes).toEqual(expectedBytes);
|
|
226
|
+
|
|
227
|
+
await session.close();
|
|
228
|
+
});
|
|
229
|
+
});
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Deepgram Nova streaming STT opener (host-only).
|
|
4
|
+
*
|
|
5
|
+
* The user-facing descriptor factory (`deepgram(...)`) lives in
|
|
6
|
+
* `sdk/providers/stt/deepgram.ts`. This module is the host-side
|
|
7
|
+
* counterpart: it takes the descriptor options + an API key and
|
|
8
|
+
* returns an {@link SttOpener} that the pipeline session drives.
|
|
9
|
+
*
|
|
10
|
+
* Default model: `"nova-3"`. Any string is forwarded verbatim to the SDK.
|
|
11
|
+
*
|
|
12
|
+
* This adapter targets the Deepgram SDK v5 (`@deepgram/sdk@^5`). The v5
|
|
13
|
+
* streaming API is:
|
|
14
|
+
* `client.listen.v1.connect(args)` → `Promise<V1Socket>`
|
|
15
|
+
* followed by:
|
|
16
|
+
* `socket.connect()` + `socket.waitForOpen()` to establish the connection.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { DeepgramClient, type listen } from "@deepgram/sdk";
|
|
20
|
+
import { createNanoEvents, type Emitter } from "nanoevents";
|
|
21
|
+
import type { DeepgramOptions } from "../../../sdk/providers/stt/deepgram.ts";
|
|
22
|
+
import {
|
|
23
|
+
makeSttError,
|
|
24
|
+
type SttEvents,
|
|
25
|
+
type SttOpener,
|
|
26
|
+
type SttOpenOptions,
|
|
27
|
+
type SttSession,
|
|
28
|
+
} from "../../../sdk/providers.ts";
|
|
29
|
+
|
|
30
|
+
// V1Socket type from the Deepgram SDK (accessed through the listen namespace).
|
|
31
|
+
type V1Socket = Awaited<ReturnType<InstanceType<typeof DeepgramClient>["listen"]["v1"]["connect"]>>;
|
|
32
|
+
|
|
33
|
+
/** Internal: SttSession with a test-only handle to the raw SDK socket. */
|
|
34
|
+
export interface DeepgramSession extends SttSession {
|
|
35
|
+
/** @internal Test-only: exposes the underlying SDK socket for fixture replay. */
|
|
36
|
+
readonly _connection: V1Socket;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
type MessagePayload =
|
|
40
|
+
| listen.ListenV1Results
|
|
41
|
+
| listen.ListenV1Metadata
|
|
42
|
+
| listen.ListenV1UtteranceEnd
|
|
43
|
+
| listen.ListenV1SpeechStarted;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Handle an incoming Deepgram transcript message, emitting `partial` or
|
|
47
|
+
* `final` events on the emitter. Empty transcripts are silently dropped.
|
|
48
|
+
*/
|
|
49
|
+
function handleMessage(data: MessagePayload, closed: boolean, emitter: Emitter<SttEvents>): void {
|
|
50
|
+
if (closed) return;
|
|
51
|
+
if (data.type !== "Results") return;
|
|
52
|
+
const result = data as listen.ListenV1Results;
|
|
53
|
+
const text = result.channel?.alternatives?.[0]?.transcript ?? "";
|
|
54
|
+
if (result.is_final) {
|
|
55
|
+
if (text.length > 0) emitter.emit("final", text);
|
|
56
|
+
} else if (text.length > 0) {
|
|
57
|
+
emitter.emit("partial", text);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Wire Deepgram socket events onto the nanoevents emitter. */
|
|
62
|
+
function wireSocketEvents(
|
|
63
|
+
connection: V1Socket,
|
|
64
|
+
emitter: Emitter<SttEvents>,
|
|
65
|
+
getIsClosed: () => boolean,
|
|
66
|
+
): void {
|
|
67
|
+
connection.on("message", (data: MessagePayload) => handleMessage(data, getIsClosed(), emitter));
|
|
68
|
+
connection.on("error", (err: Error) => {
|
|
69
|
+
if (getIsClosed()) return;
|
|
70
|
+
emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
|
|
71
|
+
});
|
|
72
|
+
connection.on("close", (event: { code?: number }) => {
|
|
73
|
+
if (getIsClosed()) return;
|
|
74
|
+
const code = event?.code;
|
|
75
|
+
// 1000 = normal closure.
|
|
76
|
+
if (code !== undefined && code !== 1000) {
|
|
77
|
+
emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** Wire the AbortSignal to the close function. */
|
|
83
|
+
function wireAbortSignal(signal: AbortSignal, close: () => Promise<void>): void {
|
|
84
|
+
if (signal.aborted) {
|
|
85
|
+
void close();
|
|
86
|
+
} else {
|
|
87
|
+
signal.addEventListener("abort", () => void close(), { once: true });
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** Build an {@link SttOpener} from resolved Deepgram descriptor options. */
|
|
92
|
+
export function openDeepgram(opts: DeepgramOptions = {}): SttOpener {
|
|
93
|
+
return {
|
|
94
|
+
name: "deepgram",
|
|
95
|
+
async open(openOpts: SttOpenOptions): Promise<SttSession> {
|
|
96
|
+
const apiKey = openOpts.apiKey || process.env.DEEPGRAM_API_KEY;
|
|
97
|
+
if (!apiKey) {
|
|
98
|
+
throw makeSttError(
|
|
99
|
+
"stt_auth_failed",
|
|
100
|
+
"Deepgram STT: missing API key. Set DEEPGRAM_API_KEY in the agent env.",
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const client = new DeepgramClient({ apiKey });
|
|
105
|
+
let connection: V1Socket;
|
|
106
|
+
try {
|
|
107
|
+
connection = await client.listen.v1.connect({
|
|
108
|
+
model: opts.model ?? "nova-3",
|
|
109
|
+
language: opts.language ?? "en",
|
|
110
|
+
encoding: "linear16",
|
|
111
|
+
sample_rate: openOpts.sampleRate,
|
|
112
|
+
channels: 1,
|
|
113
|
+
interim_results: "true",
|
|
114
|
+
smart_format: "true",
|
|
115
|
+
endpointing: 300,
|
|
116
|
+
utterance_end_ms: "1000",
|
|
117
|
+
// Pass the API key explicitly as the Authorization header so the
|
|
118
|
+
// WebSocket connection authenticates even without env var fallback.
|
|
119
|
+
Authorization: apiKey,
|
|
120
|
+
});
|
|
121
|
+
} catch (cause) {
|
|
122
|
+
throw makeSttError(
|
|
123
|
+
"stt_connect_failed",
|
|
124
|
+
`Deepgram STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`,
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const emitter: Emitter<SttEvents> = createNanoEvents<SttEvents>();
|
|
129
|
+
let closed = false;
|
|
130
|
+
|
|
131
|
+
wireSocketEvents(connection, emitter, () => closed);
|
|
132
|
+
|
|
133
|
+
// Actually open the WebSocket connection (registers internal handlers
|
|
134
|
+
// and initiates the TCP/TLS handshake).
|
|
135
|
+
connection.connect();
|
|
136
|
+
try {
|
|
137
|
+
await connection.waitForOpen();
|
|
138
|
+
} catch (cause) {
|
|
139
|
+
throw makeSttError(
|
|
140
|
+
"stt_connect_failed",
|
|
141
|
+
`Deepgram STT: WebSocket open failed: ${cause instanceof Error ? cause.message : String(cause)}`,
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const close = async (): Promise<void> => {
|
|
146
|
+
if (closed) return;
|
|
147
|
+
closed = true;
|
|
148
|
+
try {
|
|
149
|
+
connection.close();
|
|
150
|
+
} catch {
|
|
151
|
+
// Swallow: the caller has already decided to tear down.
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
wireAbortSignal(openOpts.signal, close);
|
|
156
|
+
|
|
157
|
+
const session: DeepgramSession = {
|
|
158
|
+
sendAudio(pcm: Int16Array) {
|
|
159
|
+
if (closed) return;
|
|
160
|
+
connection.sendMedia(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
161
|
+
},
|
|
162
|
+
on(event, fn) {
|
|
163
|
+
return emitter.on(event, fn);
|
|
164
|
+
},
|
|
165
|
+
close,
|
|
166
|
+
_connection: connection,
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
return session;
|
|
170
|
+
},
|
|
171
|
+
};
|
|
172
|
+
}
|
|
@@ -24,7 +24,10 @@ import { randomUUID } from "node:crypto";
|
|
|
24
24
|
import { Cartesia } from "@cartesia/cartesia-js";
|
|
25
25
|
import type { TTSWS, TTSWSContext } from "@cartesia/cartesia-js/resources/tts";
|
|
26
26
|
import { createNanoEvents, type Emitter } from "nanoevents";
|
|
27
|
-
import
|
|
27
|
+
import {
|
|
28
|
+
CARTESIA_DEFAULT_VOICE,
|
|
29
|
+
type CartesiaOptions,
|
|
30
|
+
} from "../../../sdk/providers/tts/cartesia.ts";
|
|
28
31
|
import {
|
|
29
32
|
makeTtsError,
|
|
30
33
|
type TtsEvents,
|
|
@@ -73,6 +76,7 @@ export function openCartesia(opts: CartesiaOptions): TtsOpener {
|
|
|
73
76
|
const sampleRate = assertSupportedSampleRate(openOpts.sampleRate);
|
|
74
77
|
const model = opts.model ?? "sonic-2";
|
|
75
78
|
const language = opts.language ?? "en";
|
|
79
|
+
const voice = opts.voice ?? CARTESIA_DEFAULT_VOICE;
|
|
76
80
|
|
|
77
81
|
const client = new Cartesia({ apiKey });
|
|
78
82
|
let ws: TTSWS;
|
|
@@ -92,7 +96,7 @@ export function openCartesia(opts: CartesiaOptions): TtsOpener {
|
|
|
92
96
|
const mintContext = (): TTSWSContext =>
|
|
93
97
|
ws.context({
|
|
94
98
|
model_id: model,
|
|
95
|
-
voice: { mode: "id", id:
|
|
99
|
+
voice: { mode: "id", id: voice },
|
|
96
100
|
output_format: {
|
|
97
101
|
container: "raw",
|
|
98
102
|
encoding: "pcm_s16le",
|
|
@@ -175,7 +179,7 @@ export function openCartesia(opts: CartesiaOptions): TtsOpener {
|
|
|
175
179
|
|
|
176
180
|
const baseRequest = {
|
|
177
181
|
model_id: model,
|
|
178
|
-
voice: { mode: "id" as const, id:
|
|
182
|
+
voice: { mode: "id" as const, id: voice },
|
|
179
183
|
output_format: {
|
|
180
184
|
container: "raw" as const,
|
|
181
185
|
encoding: "pcm_s16le" as const,
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
/** Unit test for the Rime TTS adapter. Mocks the `ws` package. */
|
|
3
|
+
|
|
4
|
+
import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
|
|
5
|
+
import { openRime, type RimeSession } from "./rime.ts";
|
|
6
|
+
|
|
7
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
8
|
+
// Fake WebSocket — hoisted so `vi.mock` factory can reference it
|
|
9
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
type WsEvent = "open" | "message" | "error" | "close";
|
|
12
|
+
type WsListener = (...args: unknown[]) => void;
|
|
13
|
+
|
|
14
|
+
const { FakeWebSocket } = vi.hoisted(() => {
|
|
15
|
+
class FakeWebSocket {
|
|
16
|
+
static OPEN = 1;
|
|
17
|
+
static CLOSED = 3;
|
|
18
|
+
|
|
19
|
+
readyState = FakeWebSocket.OPEN;
|
|
20
|
+
sent: string[] = [];
|
|
21
|
+
private readonly listeners = new Map<string, WsListener[]>();
|
|
22
|
+
|
|
23
|
+
static instances: FakeWebSocket[] = [];
|
|
24
|
+
|
|
25
|
+
readonly url: string;
|
|
26
|
+
|
|
27
|
+
constructor(url: string, _opts?: unknown) {
|
|
28
|
+
this.url = url;
|
|
29
|
+
FakeWebSocket.instances.push(this);
|
|
30
|
+
// Simulate async open on next microtask (matches real ws behaviour).
|
|
31
|
+
queueMicrotask(() => this._fire("open"));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
on(event: string, fn: WsListener) {
|
|
35
|
+
const arr = this.listeners.get(event) ?? [];
|
|
36
|
+
arr.push(fn);
|
|
37
|
+
this.listeners.set(event, arr);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
once(event: string, fn: WsListener) {
|
|
41
|
+
const wrapper = (...args: unknown[]) => {
|
|
42
|
+
this.off(event, wrapper);
|
|
43
|
+
fn(...args);
|
|
44
|
+
};
|
|
45
|
+
this.on(event, wrapper);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
removeListener(event: string, fn: WsListener) {
|
|
49
|
+
this.off(event, fn);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
private off(event: string, fn: WsListener) {
|
|
53
|
+
const arr = this.listeners.get(event) ?? [];
|
|
54
|
+
this.listeners.set(
|
|
55
|
+
event,
|
|
56
|
+
arr.filter((l) => l !== fn),
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
send(data: string) {
|
|
61
|
+
this.sent.push(data);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
close() {
|
|
65
|
+
this.readyState = FakeWebSocket.CLOSED;
|
|
66
|
+
this._fire("close");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Test helper: fire an event on this socket. */
|
|
70
|
+
_fire(event: WsEvent, ...args: unknown[]) {
|
|
71
|
+
for (const fn of this.listeners.get(event) ?? []) fn(...args);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Test helper: simulate a JSON message from the server. */
|
|
75
|
+
_msg(payload: unknown) {
|
|
76
|
+
this._fire("message", JSON.stringify(payload));
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return { FakeWebSocket };
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
vi.mock("ws", () => ({
|
|
84
|
+
default: FakeWebSocket,
|
|
85
|
+
WebSocket: FakeWebSocket,
|
|
86
|
+
}));
|
|
87
|
+
|
|
88
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
89
|
+
// Helpers
|
|
90
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
beforeEach(() => {
|
|
93
|
+
FakeWebSocket.instances.length = 0;
|
|
94
|
+
vi.useFakeTimers();
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
afterEach(() => {
|
|
98
|
+
vi.useRealTimers();
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
async function openSession(apiKey = "test-key"): Promise<{
|
|
102
|
+
session: RimeSession;
|
|
103
|
+
ws: InstanceType<typeof FakeWebSocket>;
|
|
104
|
+
controller: AbortController;
|
|
105
|
+
}> {
|
|
106
|
+
const opener = openRime({ voice: "cove" });
|
|
107
|
+
const controller = new AbortController();
|
|
108
|
+
|
|
109
|
+
const openPromise = opener.open({
|
|
110
|
+
sampleRate: 16_000,
|
|
111
|
+
apiKey,
|
|
112
|
+
signal: controller.signal,
|
|
113
|
+
}) as Promise<RimeSession>;
|
|
114
|
+
|
|
115
|
+
// Let the microtask that fires FakeWebSocket "open" run.
|
|
116
|
+
await Promise.resolve();
|
|
117
|
+
|
|
118
|
+
const session = await openPromise;
|
|
119
|
+
// biome-ignore lint/style/noNonNullAssertion: at(-1) is always set after open() resolves
|
|
120
|
+
const ws = FakeWebSocket.instances.at(-1)!;
|
|
121
|
+
return { session, ws, controller };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
125
|
+
// Tests
|
|
126
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
describe("rime TTS adapter", () => {
|
|
129
|
+
test("openRime returns an opener with name 'rime'", () => {
|
|
130
|
+
const opener = openRime({ voice: "cove" });
|
|
131
|
+
expect(opener.name).toBe("rime");
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
test("open() throws tts_auth_failed when API key is missing", async () => {
|
|
135
|
+
const opener = openRime({ voice: "cove" });
|
|
136
|
+
const controller = new AbortController();
|
|
137
|
+
|
|
138
|
+
const openPromise = opener.open({
|
|
139
|
+
sampleRate: 16_000,
|
|
140
|
+
apiKey: "",
|
|
141
|
+
signal: controller.signal,
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
await expect(openPromise).rejects.toMatchObject({ code: "tts_auth_failed" });
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
test("incoming chunk message emits audio as Int16Array", async () => {
|
|
148
|
+
const { session, ws } = await openSession();
|
|
149
|
+
|
|
150
|
+
const audioEvents: Int16Array[] = [];
|
|
151
|
+
session.on("audio", (pcm) => audioEvents.push(pcm));
|
|
152
|
+
|
|
153
|
+
// Encode 4 PCM16 samples (8 bytes) as base64.
|
|
154
|
+
const samples = new Int16Array([100, 200, 300, 400]);
|
|
155
|
+
const base64 = Buffer.from(samples.buffer).toString("base64");
|
|
156
|
+
|
|
157
|
+
ws._msg({ type: "chunk", data: base64, contextId: null });
|
|
158
|
+
|
|
159
|
+
expect(audioEvents.length).toBe(1);
|
|
160
|
+
const firstChunk = audioEvents[0];
|
|
161
|
+
expect(firstChunk).toBeInstanceOf(Int16Array);
|
|
162
|
+
// Each sample pair decodes correctly.
|
|
163
|
+
// biome-ignore lint/style/noNonNullAssertion: length was asserted to be 1 on the line above
|
|
164
|
+
const pcm = firstChunk!;
|
|
165
|
+
expect(pcm.length).toBe(4);
|
|
166
|
+
expect(pcm[0]).toBe(100);
|
|
167
|
+
expect(pcm[3]).toBe(400);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test("sendText forwards the text as a JSON {text} frame", async () => {
|
|
171
|
+
const { session, ws } = await openSession();
|
|
172
|
+
|
|
173
|
+
session.sendText("Hello, world!");
|
|
174
|
+
|
|
175
|
+
expect(ws.sent).toContain(JSON.stringify({ text: "Hello, world!" }));
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
test("flush() sends a trailing '.' and emits done after quiescence post-audio", async () => {
|
|
179
|
+
const { session, ws } = await openSession();
|
|
180
|
+
|
|
181
|
+
const doneEvents: number[] = [];
|
|
182
|
+
session.on("done", () => doneEvents.push(Date.now()));
|
|
183
|
+
|
|
184
|
+
session.sendText("Hi there");
|
|
185
|
+
session.flush();
|
|
186
|
+
|
|
187
|
+
// Trailing punctuation forces Rime to synthesize the buffer without
|
|
188
|
+
// closing the WS (which `eos` would do).
|
|
189
|
+
expect(ws.sent).toContain(JSON.stringify({ text: "." }));
|
|
190
|
+
|
|
191
|
+
// First-audio timer is 5 s — short window must not fire `done` yet.
|
|
192
|
+
vi.advanceTimersByTime(500);
|
|
193
|
+
expect(doneEvents.length).toBe(0);
|
|
194
|
+
|
|
195
|
+
// First chunk arrives → switch to short quiescence window.
|
|
196
|
+
const samples = new Int16Array([100, 200, 300, 400]);
|
|
197
|
+
ws._msg({
|
|
198
|
+
type: "chunk",
|
|
199
|
+
data: Buffer.from(samples.buffer).toString("base64"),
|
|
200
|
+
contextId: null,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
vi.advanceTimersByTime(499);
|
|
204
|
+
expect(doneEvents.length).toBe(0);
|
|
205
|
+
vi.advanceTimersByTime(1);
|
|
206
|
+
expect(doneEvents.length).toBe(1);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test("flush() falls back to first-audio timeout when no chunk arrives", async () => {
|
|
210
|
+
const { session } = await openSession();
|
|
211
|
+
|
|
212
|
+
const doneEvents: number[] = [];
|
|
213
|
+
session.on("done", () => doneEvents.push(Date.now()));
|
|
214
|
+
|
|
215
|
+
session.sendText("Hi there");
|
|
216
|
+
session.flush();
|
|
217
|
+
|
|
218
|
+
// No chunk arrives — done must wait the full FIRST_AUDIO_TIMEOUT_MS (5 s).
|
|
219
|
+
vi.advanceTimersByTime(4999);
|
|
220
|
+
expect(doneEvents.length).toBe(0);
|
|
221
|
+
vi.advanceTimersByTime(1);
|
|
222
|
+
expect(doneEvents.length).toBe(1);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test("cancel() sends clear operation and emits done synchronously", async () => {
|
|
226
|
+
const { session, ws } = await openSession();
|
|
227
|
+
|
|
228
|
+
const doneEvents: number[] = [];
|
|
229
|
+
session.on("done", () => doneEvents.push(Date.now()));
|
|
230
|
+
|
|
231
|
+
session.sendText("Hello");
|
|
232
|
+
// cancel() must emit `done` synchronously — barge-in cannot be deferred.
|
|
233
|
+
session.cancel();
|
|
234
|
+
|
|
235
|
+
expect(ws.sent).toContain(JSON.stringify({ operation: "clear" }));
|
|
236
|
+
// done was emitted synchronously (before any await / timer).
|
|
237
|
+
expect(doneEvents.length).toBe(1);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
test("close() closes the WebSocket and is idempotent", async () => {
|
|
241
|
+
const { session, ws } = await openSession();
|
|
242
|
+
|
|
243
|
+
expect(ws.readyState).toBe(FakeWebSocket.OPEN);
|
|
244
|
+
|
|
245
|
+
await session.close();
|
|
246
|
+
expect(ws.readyState).toBe(FakeWebSocket.CLOSED);
|
|
247
|
+
|
|
248
|
+
// Second close should not throw.
|
|
249
|
+
await expect(session.close()).resolves.toBeUndefined();
|
|
250
|
+
});
|
|
251
|
+
});
|