@alexkroman1/aai 1.2.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.turbo/turbo-build.log +14 -12
  2. package/CHANGELOG.md +20 -0
  3. package/dist/{constants-VTFoymJ-.js → constants-BL3nvg4I.js} +8 -1
  4. package/dist/host/_pipeline-test-fakes.d.ts +117 -0
  5. package/dist/host/pipeline-session-ctx.d.ts +24 -0
  6. package/dist/host/pipeline-session.d.ts +48 -0
  7. package/dist/host/providers/llm.d.ts +2 -0
  8. package/dist/host/providers/stt/assemblyai.d.ts +31 -0
  9. package/dist/host/providers/stt-barrel.d.ts +8 -0
  10. package/dist/host/providers/stt-barrel.js +92 -0
  11. package/dist/host/providers/stt.d.ts +2 -0
  12. package/dist/host/providers/tts/cartesia.d.ts +39 -0
  13. package/dist/host/providers/tts-barrel.d.ts +8 -0
  14. package/dist/host/providers/tts-barrel.js +182 -0
  15. package/dist/host/providers/tts.d.ts +2 -0
  16. package/dist/host/runtime-barrel.js +565 -81
  17. package/dist/host/runtime.d.ts +17 -0
  18. package/dist/host/s2s.d.ts +5 -0
  19. package/dist/host/session-ctx.d.ts +22 -4
  20. package/dist/host/to-vercel-tools.d.ts +45 -0
  21. package/dist/index.js +7 -2
  22. package/dist/sdk/_internal-types.d.ts +15 -1
  23. package/dist/sdk/constants.d.ts +7 -0
  24. package/dist/sdk/define.d.ts +21 -0
  25. package/dist/sdk/manifest.d.ts +22 -0
  26. package/dist/sdk/protocol.d.ts +3 -3
  27. package/dist/sdk/protocol.js +1 -1
  28. package/dist/sdk/providers.d.ts +70 -0
  29. package/dist/sdk/types.d.ts +16 -0
  30. package/exports-no-dev-deps.test.ts +39 -14
  31. package/host/_pipeline-test-fakes.ts +357 -0
  32. package/host/_test-utils.ts +1 -0
  33. package/host/integration/fixtures/README.md +49 -0
  34. package/host/integration/pipeline-reference.integration.test.ts +124 -0
  35. package/host/pipeline-session-ctx.test.ts +31 -0
  36. package/host/pipeline-session-ctx.ts +36 -0
  37. package/host/pipeline-session.test.ts +572 -0
  38. package/host/pipeline-session.ts +489 -0
  39. package/host/providers/llm.ts +3 -0
  40. package/host/providers/providers.test-d.ts +31 -0
  41. package/host/providers/stt/assemblyai.test.ts +100 -0
  42. package/host/providers/stt/assemblyai.ts +154 -0
  43. package/host/providers/stt/fixtures/assemblyai/basic-turn.json +30 -0
  44. package/host/providers/stt-barrel.ts +13 -0
  45. package/host/providers/stt.ts +3 -0
  46. package/host/providers/tts/cartesia.test.ts +210 -0
  47. package/host/providers/tts/cartesia.ts +251 -0
  48. package/host/providers/tts-barrel.ts +13 -0
  49. package/host/providers/tts.ts +3 -0
  50. package/host/runtime.test.ts +81 -1
  51. package/host/runtime.ts +61 -0
  52. package/host/s2s.test.ts +19 -0
  53. package/host/s2s.ts +10 -0
  54. package/host/session-ctx.ts +35 -8
  55. package/host/to-vercel-tools.test.ts +187 -0
  56. package/host/to-vercel-tools.ts +74 -0
  57. package/package.json +15 -1
  58. package/sdk/__snapshots__/exports.test.ts.snap +2 -0
  59. package/sdk/_internal-types.ts +16 -0
  60. package/sdk/constants.ts +8 -0
  61. package/sdk/define.test-d.ts +21 -0
  62. package/sdk/define.test.ts +33 -0
  63. package/sdk/define.ts +21 -0
  64. package/sdk/manifest.test-d.ts +14 -0
  65. package/sdk/manifest.test.ts +51 -0
  66. package/sdk/manifest.ts +39 -0
  67. package/sdk/providers.ts +90 -0
  68. package/sdk/types.ts +16 -0
  69. package/vitest.config.ts +1 -0
@@ -0,0 +1,489 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * Pipeline session — pluggable STT → LLM → TTS orchestrator.
4
+ *
5
+ * Alternative to the S2S session (see `session.ts`) that drives three
6
+ * independent providers. A new partial STT event while the agent is replying
7
+ * triggers barge-in (aborts the LLM stream and cancels TTS).
8
+ */
9
+
10
+ import type { LanguageModel, ModelMessage } from "ai";
11
+ import { stepCountIs, streamText } from "ai";
12
+ import type { AgentConfig, ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
13
+ import { DEFAULT_STT_SAMPLE_RATE, PIPELINE_FLUSH_TIMEOUT_MS } from "../sdk/constants.ts";
14
+ import type { ClientSink, SessionErrorCode } from "../sdk/protocol.ts";
15
+ import type {
16
+ SttError,
17
+ SttProvider,
18
+ SttSession,
19
+ TtsError,
20
+ TtsProvider,
21
+ TtsSession,
22
+ Unsubscribe,
23
+ } from "../sdk/providers.ts";
24
+ import { buildSystemPrompt } from "../sdk/system-prompt.ts";
25
+ import type { Message } from "../sdk/types.ts";
26
+ import { errorMessage } from "../sdk/utils.ts";
27
+ import { buildPipelineCtx, type PipelineSessionCtx } from "./pipeline-session-ctx.ts";
28
+ import { consoleLogger, type Logger } from "./runtime-config.ts";
29
+ import type { Session } from "./session.ts";
30
+ import { toVercelTools } from "./to-vercel-tools.ts";
31
+
32
+ /** Configuration options for {@link createPipelineSession}. */
33
+ export interface PipelineSessionOptions {
34
+ /** Unique session identifier. */
35
+ id: string;
36
+ /** Agent slug. */
37
+ agent: string;
38
+ /** Sink for wire events + audio back to the browser client. */
39
+ client: ClientSink;
40
+ /** Serializable agent config (name, system prompt, maxSteps, etc.). */
41
+ agentConfig: AgentConfig;
42
+ /** JSON Schema definitions for the agent's tools. */
43
+ toolSchemas: readonly ToolSchema[];
44
+ /** Optional natural-language guidance appended to the system prompt. */
45
+ toolGuidance?: readonly string[] | undefined;
46
+ /** Function to invoke tools by name. */
47
+ executeTool: ExecuteTool;
48
+ /** STT provider (injected via manifest in pipeline mode). */
49
+ stt: SttProvider;
50
+ /** LLM provider (Vercel AI SDK `LanguageModel`). */
51
+ llm: LanguageModel;
52
+ /** TTS provider (injected via manifest in pipeline mode). */
53
+ tts: TtsProvider;
54
+ /** STT API key. */
55
+ sttApiKey: string;
56
+ /** TTS API key. */
57
+ ttsApiKey: string;
58
+ /** Audio sample rate (PCM16, Hz). Defaults to {@link DEFAULT_STT_SAMPLE_RATE}. */
59
+ sampleRate?: number | undefined;
60
+ /** Logger. Defaults to the console logger. */
61
+ logger?: Logger | undefined;
62
+ /** Sliding-window conversation history size. */
63
+ maxHistory?: number | undefined;
64
+ }
65
+
66
+ function toModelMessage(m: Message): ModelMessage {
67
+ if (m.role === "user") return { role: "user", content: m.content };
68
+ if (m.role === "assistant") return { role: "assistant", content: m.content };
69
+ return { role: "assistant", content: m.content };
70
+ }
71
+
72
+ function emitError(client: ClientSink, code: SessionErrorCode, message: string): void {
73
+ client.event({ type: "error", code, message });
74
+ }
75
+
76
+ type StreamPartHandlerDeps = {
77
+ client: ClientSink;
78
+ tts: TtsSession | null;
79
+ log: Logger;
80
+ sessionId: string;
81
+ onTextDelta: (delta: string) => void;
82
+ };
83
+
84
+ function handleStreamPart(
85
+ part: {
86
+ readonly type: string;
87
+ readonly text?: string;
88
+ readonly input?: unknown;
89
+ readonly output?: unknown;
90
+ readonly toolCallId?: string;
91
+ readonly toolName?: string;
92
+ readonly error?: unknown;
93
+ },
94
+ deps: StreamPartHandlerDeps,
95
+ ): void {
96
+ switch (part.type) {
97
+ case "text-delta": {
98
+ const delta = part.text ?? "";
99
+ if (delta.length === 0) return;
100
+ deps.onTextDelta(delta);
101
+ deps.tts?.sendText(delta);
102
+ deps.client.event({ type: "agent_transcript", text: delta });
103
+ return;
104
+ }
105
+ case "tool-call": {
106
+ const input = (part.input ?? {}) as Readonly<Record<string, unknown>>;
107
+ deps.client.event({
108
+ type: "tool_call",
109
+ toolCallId: part.toolCallId ?? "",
110
+ toolName: part.toolName ?? "",
111
+ args: input,
112
+ });
113
+ return;
114
+ }
115
+ case "tool-result": {
116
+ const output = part.output;
117
+ const resultString = typeof output === "string" ? output : JSON.stringify(output);
118
+ deps.client.event({
119
+ type: "tool_call_done",
120
+ toolCallId: part.toolCallId ?? "",
121
+ result: resultString,
122
+ });
123
+ return;
124
+ }
125
+ case "error": {
126
+ const msg = errorMessage(part.error);
127
+ deps.log.error("LLM stream error", { message: msg, sessionId: deps.sessionId });
128
+ emitError(deps.client, "llm", msg);
129
+ return;
130
+ }
131
+ default:
132
+ return;
133
+ }
134
+ }
135
+
136
+ /** Create a pluggable-provider voice session. */
137
+ export function createPipelineSession(opts: PipelineSessionOptions): Session {
138
+ const log = opts.logger ?? consoleLogger;
139
+ const sampleRate = opts.sampleRate ?? DEFAULT_STT_SAMPLE_RATE;
140
+ const { client, agentConfig, toolSchemas, executeTool } = opts;
141
+
142
+ const hasTools = toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0;
143
+ const systemPrompt = buildSystemPrompt(agentConfig, {
144
+ hasTools,
145
+ voice: true,
146
+ toolGuidance: opts.toolGuidance,
147
+ });
148
+
149
+ const ctx: PipelineSessionCtx = buildPipelineCtx({
150
+ id: opts.id,
151
+ agent: opts.agent,
152
+ client,
153
+ agentConfig,
154
+ executeTool,
155
+ log,
156
+ maxHistory: opts.maxHistory,
157
+ });
158
+
159
+ const sessionAbort = new AbortController();
160
+ let audioReady = false;
161
+ let terminated = false;
162
+ let turnController: AbortController | null = null;
163
+ let nextReplyId = 0;
164
+ const sttSubs: Unsubscribe[] = [];
165
+ const ttsSubs: Unsubscribe[] = [];
166
+
167
+ /**
168
+ * Tear down the session after an unrecoverable provider error. Aborts the
169
+ * in-flight turn, cancels TTS, signals providers to close via sessionAbort,
170
+ * and flips `terminated` so future STT events and audio frames become
171
+ * no-ops. Idempotent.
172
+ */
173
+ function terminate(): void {
174
+ if (terminated) return;
175
+ terminated = true;
176
+ if (turnController !== null) {
177
+ turnController.abort();
178
+ turnController = null;
179
+ }
180
+ ctx.tts?.cancel();
181
+ ctx.cancelReply();
182
+ sessionAbort.abort();
183
+ }
184
+
185
+ function onSttPartial(_text: string): void {
186
+ if (terminated) return;
187
+ if (turnController === null) return;
188
+ log.info("Pipeline barge-in", { sessionId: opts.id });
189
+ turnController.abort();
190
+ turnController = null;
191
+ ctx.tts?.cancel();
192
+ ctx.cancelReply();
193
+ client.event({ type: "cancelled" });
194
+ }
195
+
196
+ function onSttFinal(text: string): void {
197
+ if (terminated) return;
198
+ const trimmed = text.trim();
199
+ if (trimmed.length === 0) return;
200
+ // If a prior turn is still running (duplicate/late STT final or a
201
+ // second utterance without an intervening partial), abort it before
202
+ // launching a new one. Matches LiveKit's `current_speech.interrupt()`
203
+ // and Pipecat's `InterruptionFrame` broadcast: single-slot current
204
+ // turn, replace in-flight rather than queue.
205
+ if (turnController !== null) {
206
+ log.info("Pipeline replacing in-flight turn", { sessionId: opts.id });
207
+ turnController.abort();
208
+ turnController = null;
209
+ ctx.tts?.cancel();
210
+ ctx.cancelReply();
211
+ client.event({ type: "cancelled" });
212
+ }
213
+ client.event({ type: "user_transcript", text });
214
+ const turn = runTurn(trimmed).catch((err: unknown) => {
215
+ log.error("Pipeline turn crashed", { error: errorMessage(err), sessionId: opts.id });
216
+ });
217
+ ctx.chainTurn(turn);
218
+ }
219
+
220
+ function onSttError(err: SttError): void {
221
+ if (terminated) return;
222
+ log.error("STT error", { code: err.code, message: err.message, sessionId: opts.id });
223
+ emitError(client, "stt", err.message);
224
+ terminate();
225
+ }
226
+
227
+ function onTtsError(err: TtsError): void {
228
+ if (terminated) return;
229
+ log.error("TTS error", { code: err.code, message: err.message, sessionId: opts.id });
230
+ emitError(client, "tts", err.message);
231
+ terminate();
232
+ }
233
+
234
+ async function consumeLlmStream(
235
+ ctl: AbortController,
236
+ messages: ModelMessage[],
237
+ tools: ReturnType<typeof toVercelTools>,
238
+ onDelta: (delta: string) => void,
239
+ ): Promise<void> {
240
+ const deps: StreamPartHandlerDeps = {
241
+ client,
242
+ tts: ctx.tts,
243
+ log,
244
+ sessionId: opts.id,
245
+ onTextDelta: onDelta,
246
+ };
247
+ try {
248
+ // Vercel AI SDK v6 defaults to a single step — without `stopWhen`, the
249
+ // stream terminates after the first tool result and the agent can't
250
+ // follow up on its own tool calls.
251
+ const maxSteps = agentConfig.maxSteps ?? 5;
252
+ const result = streamText({
253
+ model: opts.llm,
254
+ system: systemPrompt,
255
+ messages,
256
+ tools,
257
+ stopWhen: stepCountIs(maxSteps),
258
+ abortSignal: ctl.signal,
259
+ });
260
+ for await (const part of result.fullStream) {
261
+ if (ctl.signal.aborted) break;
262
+ handleStreamPart(part, deps);
263
+ }
264
+ } catch (err: unknown) {
265
+ if (!ctl.signal.aborted) {
266
+ const msg = errorMessage(err);
267
+ log.error("LLM streamText failed", { error: msg, sessionId: opts.id });
268
+ emitError(client, "llm", msg);
269
+ }
270
+ }
271
+ }
272
+
273
+ /**
274
+ * Flush TTS and wait for drain. Resolves on any of:
275
+ * - TTS emits `done`
276
+ * - `signal` aborts (barge-in, provider error, session stop)
277
+ * - `PIPELINE_FLUSH_TIMEOUT_MS` elapses
278
+ * Resolves immediately if no TTS session.
279
+ */
280
+ function flushTtsAndWait(signal: AbortSignal): Promise<void> {
281
+ const tts = ctx.tts;
282
+ if (!tts) return Promise.resolve();
283
+ return new Promise<void>((resolve) => {
284
+ let off: Unsubscribe | null = null;
285
+ let timer: ReturnType<typeof setTimeout> | null = null;
286
+ const cleanup = () => {
287
+ if (off) {
288
+ off();
289
+ off = null;
290
+ }
291
+ if (timer) {
292
+ clearTimeout(timer);
293
+ timer = null;
294
+ }
295
+ signal.removeEventListener("abort", onAbort);
296
+ };
297
+ const finish = () => {
298
+ cleanup();
299
+ resolve();
300
+ };
301
+ const onAbort = () => finish();
302
+ if (signal.aborted) {
303
+ resolve();
304
+ return;
305
+ }
306
+ signal.addEventListener("abort", onAbort, { once: true });
307
+ off = tts.on("done", finish);
308
+ timer = setTimeout(() => {
309
+ log.warn("TTS flush timeout", {
310
+ sessionId: opts.id,
311
+ timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS,
312
+ });
313
+ finish();
314
+ }, PIPELINE_FLUSH_TIMEOUT_MS);
315
+ tts.flush();
316
+ });
317
+ }
318
+
319
+ async function runTurn(userText: string): Promise<void> {
320
+ const replyId = `pipeline-${++nextReplyId}`;
321
+ ctx.beginReply(replyId);
322
+ ctx.pushMessages({ role: "user", content: userText });
323
+
324
+ const ctl = new AbortController();
325
+ turnController = ctl;
326
+
327
+ const tools = toVercelTools(toolSchemas, {
328
+ executeTool,
329
+ sessionId: opts.id,
330
+ messages: () => ctx.conversationMessages,
331
+ signal: ctl.signal,
332
+ });
333
+
334
+ const messages: ModelMessage[] = ctx.conversationMessages.map(toModelMessage);
335
+ let accumulated = "";
336
+ await consumeLlmStream(ctl, messages, tools, (delta) => {
337
+ accumulated += delta;
338
+ });
339
+
340
+ if (ctl.signal.aborted) {
341
+ if (turnController === ctl) turnController = null;
342
+ return;
343
+ }
344
+
345
+ await flushTtsAndWait(ctl.signal);
346
+
347
+ if (ctl.signal.aborted) {
348
+ if (turnController === ctl) turnController = null;
349
+ return;
350
+ }
351
+
352
+ if (accumulated.length > 0) {
353
+ ctx.pushMessages({ role: "assistant", content: accumulated });
354
+ }
355
+ client.playAudioDone();
356
+ client.event({ type: "reply_done" });
357
+ if (turnController === ctl) turnController = null;
358
+ }
359
+
360
+ function reportOpenRejection(which: "stt" | "tts", reason: unknown): void {
361
+ const msg = errorMessage(reason);
362
+ log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
363
+ error: msg,
364
+ sessionId: opts.id,
365
+ });
366
+ emitError(client, which, msg);
367
+ }
368
+
369
+ async function adoptStt(sttSession: SttSession, teardown: boolean): Promise<void> {
370
+ if (teardown) {
371
+ await sttSession.close().catch(() => undefined);
372
+ return;
373
+ }
374
+ ctx.stt = sttSession;
375
+ sttSubs.push(sttSession.on("partial", onSttPartial));
376
+ sttSubs.push(sttSession.on("final", onSttFinal));
377
+ sttSubs.push(sttSession.on("error", onSttError));
378
+ }
379
+
380
+ async function adoptTts(ttsSession: TtsSession, teardown: boolean): Promise<void> {
381
+ if (teardown) {
382
+ await ttsSession.close().catch(() => undefined);
383
+ return;
384
+ }
385
+ ctx.tts = ttsSession;
386
+ ttsSubs.push(
387
+ ttsSession.on("audio", (pcm) => {
388
+ client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
389
+ }),
390
+ );
391
+ ttsSubs.push(ttsSession.on("error", onTtsError));
392
+ }
393
+
394
+ async function openProviders(): Promise<void> {
395
+ const [sttResult, ttsResult] = await Promise.allSettled([
396
+ opts.stt.open({
397
+ sampleRate,
398
+ apiKey: opts.sttApiKey,
399
+ sttPrompt: agentConfig.sttPrompt,
400
+ signal: sessionAbort.signal,
401
+ }),
402
+ opts.tts.open({
403
+ sampleRate,
404
+ apiKey: opts.ttsApiKey,
405
+ signal: sessionAbort.signal,
406
+ }),
407
+ ]);
408
+
409
+ if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
410
+ if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
411
+
412
+ const aborted = sessionAbort.signal.aborted;
413
+ const sttFailed = sttResult.status === "rejected";
414
+ const ttsFailed = ttsResult.status === "rejected";
415
+ const teardown = aborted || sttFailed || ttsFailed;
416
+
417
+ if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
418
+ if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
419
+
420
+ // If either provider failed (but the session wasn't itself aborted),
421
+ // mark the session terminated so subsequent events become no-ops.
422
+ // Aborted-by-stop() sessions don't need terminate() — stop() handles cleanup.
423
+ if (!aborted && (sttFailed || ttsFailed)) terminate();
424
+ }
425
+
426
+ return {
427
+ async start(): Promise<void> {
428
+ await openProviders();
429
+ },
430
+ async stop(): Promise<void> {
431
+ if (sessionAbort.signal.aborted) return;
432
+ sessionAbort.abort();
433
+ turnController?.abort();
434
+ for (const off of sttSubs) off();
435
+ for (const off of ttsSubs) off();
436
+ sttSubs.length = 0;
437
+ ttsSubs.length = 0;
438
+ if (ctx.turnPromise !== null) await ctx.turnPromise;
439
+ await ctx.stt?.close().catch(() => {
440
+ /* already closed */
441
+ });
442
+ await ctx.tts?.close().catch(() => {
443
+ /* already closed */
444
+ });
445
+ },
446
+ onAudio(data: Uint8Array): void {
447
+ if (terminated || !audioReady) return;
448
+ const offset = data.byteOffset;
449
+ const length = data.byteLength;
450
+ let pcm: Int16Array;
451
+ if (offset % 2 === 0 && length % 2 === 0) {
452
+ pcm = new Int16Array(data.buffer, offset, length / 2);
453
+ } else {
454
+ const copy = new Uint8Array(length - (length % 2));
455
+ copy.set(data.subarray(0, copy.byteLength));
456
+ pcm = new Int16Array(copy.buffer);
457
+ }
458
+ ctx.stt?.sendAudio(pcm);
459
+ },
460
+ onAudioReady(): void {
461
+ audioReady = true;
462
+ },
463
+ onCancel(): void {
464
+ if (terminated) return;
465
+ turnController?.abort();
466
+ turnController = null;
467
+ ctx.tts?.cancel();
468
+ ctx.cancelReply();
469
+ client.event({ type: "cancelled" });
470
+ },
471
+ onReset(): void {
472
+ if (terminated) return;
473
+ turnController?.abort();
474
+ turnController = null;
475
+ ctx.tts?.cancel();
476
+ ctx.cancelReply();
477
+ ctx.conversationMessages = [];
478
+ ctx.turnPromise = null;
479
+ client.event({ type: "reset" });
480
+ },
481
+ onHistory(incoming: readonly { role: "user" | "assistant"; content: string }[]): void {
482
+ if (terminated) return;
483
+ ctx.pushMessages(...incoming.map((m) => ({ role: m.role, content: m.content })));
484
+ },
485
+ waitForTurn(): Promise<void> {
486
+ return ctx.turnPromise ?? Promise.resolve();
487
+ },
488
+ };
489
+ }
@@ -0,0 +1,3 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /** LLM provider type — re-exported from sdk/ for host-side consumption. */
3
+ export type * from "../../sdk/providers.ts";
@@ -0,0 +1,31 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ import type { LanguageModel } from "ai";
3
+ import { expectTypeOf, test } from "vitest";
4
+ import type { LlmProvider } from "./llm.ts";
5
+ import type { SttEvents, SttProvider, SttSession, Unsubscribe } from "./stt.ts";
6
+ import type { TtsEvents, TtsSession } from "./tts.ts";
7
+
8
+ test("SttProvider.open returns Promise<SttSession>", () => {
9
+ expectTypeOf<SttProvider["open"]>().returns.toEqualTypeOf<Promise<SttSession>>();
10
+ });
11
+
12
+ test("SttEvents.partial takes a string", () => {
13
+ expectTypeOf<SttEvents["partial"]>().parameters.toEqualTypeOf<[string]>();
14
+ });
15
+
16
+ test("TtsSession.cancel is synchronous", () => {
17
+ expectTypeOf<TtsSession["cancel"]>().returns.toEqualTypeOf<void>();
18
+ });
19
+
20
+ test("TtsEvents.audio takes Int16Array", () => {
21
+ expectTypeOf<TtsEvents["audio"]>().parameters.toEqualTypeOf<[Int16Array]>();
22
+ });
23
+
24
+ test("LlmProvider is Vercel AI SDK's LanguageModel", () => {
25
+ expectTypeOf<LlmProvider>().toEqualTypeOf<LanguageModel>();
26
+ });
27
+
28
+ test("Stt/Tts on() returns Unsubscribe", () => {
29
+ expectTypeOf<SttSession["on"]>().returns.toEqualTypeOf<Unsubscribe>();
30
+ expectTypeOf<TtsSession["on"]>().returns.toEqualTypeOf<Unsubscribe>();
31
+ });
@@ -0,0 +1,100 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /** Fixture-replay unit test for the AssemblyAI STT adapter. */
3
+
4
+ import { readFile } from "node:fs/promises";
5
+ import { dirname, join } from "node:path";
6
+ import { fileURLToPath } from "node:url";
7
+ import type { TurnEvent } from "assemblyai";
8
+ import { describe, expect, test, vi } from "vitest";
9
+ import { flush } from "../../_test-utils.ts";
10
+ import { type AssemblyAISession, assemblyAI } from "./assemblyai.ts";
11
+
12
+ const here = dirname(fileURLToPath(import.meta.url));
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Mock the `assemblyai` SDK so no real sockets are opened.
16
+ //
17
+ // Each fake `StreamingTranscriber` keeps its own listener map and exposes
18
+ // `_fire(event, payload)` for tests to inject events. The adapter's
19
+ // `open()` returns an `AssemblyAISession` with a `_transcriber` pointer,
20
+ // which in the test is the fake — giving the test a handle to `_fire`.
21
+ // ---------------------------------------------------------------------------
22
+
23
+ interface FakeTranscriber {
24
+ on(ev: string, fn: (...args: unknown[]) => void): void;
25
+ connect(): Promise<void>;
26
+ close(): Promise<void>;
27
+ sendAudio(_data: ArrayBufferLike): void;
28
+ _fire(ev: string, ...args: unknown[]): void;
29
+ }
30
+
31
+ vi.mock("assemblyai", () => {
32
+ const makeFakeTranscriber = (): FakeTranscriber => {
33
+ const listeners = new Map<string, Array<(...args: unknown[]) => void>>();
34
+ return {
35
+ on(ev, fn) {
36
+ const arr = listeners.get(ev) ?? [];
37
+ arr.push(fn);
38
+ listeners.set(ev, arr);
39
+ },
40
+ async connect() {
41
+ this._fire("open", { type: "Begin", id: "mock-sess", expires_at: 0 });
42
+ },
43
+ async close() {
44
+ /* no-op */
45
+ },
46
+ sendAudio(_data: ArrayBufferLike) {
47
+ /* no-op */
48
+ },
49
+ _fire(ev, ...args) {
50
+ for (const fn of listeners.get(ev) ?? []) fn(...args);
51
+ },
52
+ };
53
+ };
54
+ return {
55
+ AssemblyAI: class {
56
+ streaming = {
57
+ transcriber: (_params: unknown): FakeTranscriber => makeFakeTranscriber(),
58
+ };
59
+ },
60
+ };
61
+ });
62
+
63
+ describe("assemblyAI STT adapter — fixture replay", () => {
64
+ test("maps turn events onto partial/final SttEvents", async () => {
65
+ const fixture = JSON.parse(
66
+ await readFile(join(here, "fixtures/assemblyai/basic-turn.json"), "utf8"),
67
+ ) as Record<string, unknown>[];
68
+
69
+ const provider = assemblyAI({ model: "u3pro-rt", apiKey: "k" });
70
+ const controller = new AbortController();
71
+ const session = (await provider.open({
72
+ sampleRate: 16_000,
73
+ apiKey: "k",
74
+ signal: controller.signal,
75
+ })) as AssemblyAISession;
76
+
77
+ const partials: string[] = [];
78
+ const finals: string[] = [];
79
+ const errors: string[] = [];
80
+ session.on("partial", (t) => partials.push(t));
81
+ session.on("final", (t) => finals.push(t));
82
+ session.on("error", (e) => errors.push(e.message));
83
+
84
+ // Replay fixture through the fake transcriber. The JSON's "type" field
85
+ // distinguishes Begin from Turn; we only dispatch turn messages since
86
+ // Begin is consumed inside `connect()` by the real SDK.
87
+ const fake = session._transcriber as unknown as FakeTranscriber;
88
+ for (const msg of fixture) {
89
+ if (msg.type === "Turn") fake._fire("turn", msg as TurnEvent);
90
+ }
91
+
92
+ await flush();
93
+
94
+ expect(partials).toEqual(["what", "what's the"]);
95
+ expect(finals).toEqual(["what's the weather?"]);
96
+ expect(errors).toEqual([]);
97
+
98
+ await session.close();
99
+ });
100
+ });