@alexkroman1/aai 1.2.3 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +14 -12
- package/CHANGELOG.md +20 -0
- package/dist/{constants-VTFoymJ-.js → constants-BL3nvg4I.js} +8 -1
- package/dist/host/_pipeline-test-fakes.d.ts +117 -0
- package/dist/host/pipeline-session-ctx.d.ts +24 -0
- package/dist/host/pipeline-session.d.ts +48 -0
- package/dist/host/providers/llm.d.ts +2 -0
- package/dist/host/providers/stt/assemblyai.d.ts +31 -0
- package/dist/host/providers/stt-barrel.d.ts +8 -0
- package/dist/host/providers/stt-barrel.js +92 -0
- package/dist/host/providers/stt.d.ts +2 -0
- package/dist/host/providers/tts/cartesia.d.ts +39 -0
- package/dist/host/providers/tts-barrel.d.ts +8 -0
- package/dist/host/providers/tts-barrel.js +182 -0
- package/dist/host/providers/tts.d.ts +2 -0
- package/dist/host/runtime-barrel.js +565 -81
- package/dist/host/runtime.d.ts +17 -0
- package/dist/host/s2s.d.ts +5 -0
- package/dist/host/session-ctx.d.ts +22 -4
- package/dist/host/to-vercel-tools.d.ts +45 -0
- package/dist/index.js +7 -2
- package/dist/sdk/_internal-types.d.ts +15 -1
- package/dist/sdk/constants.d.ts +7 -0
- package/dist/sdk/define.d.ts +21 -0
- package/dist/sdk/manifest.d.ts +22 -0
- package/dist/sdk/protocol.d.ts +3 -3
- package/dist/sdk/protocol.js +1 -1
- package/dist/sdk/providers.d.ts +70 -0
- package/dist/sdk/types.d.ts +16 -0
- package/exports-no-dev-deps.test.ts +39 -14
- package/host/_pipeline-test-fakes.ts +357 -0
- package/host/_test-utils.ts +1 -0
- package/host/integration/fixtures/README.md +49 -0
- package/host/integration/pipeline-reference.integration.test.ts +124 -0
- package/host/pipeline-session-ctx.test.ts +31 -0
- package/host/pipeline-session-ctx.ts +36 -0
- package/host/pipeline-session.test.ts +572 -0
- package/host/pipeline-session.ts +489 -0
- package/host/providers/llm.ts +3 -0
- package/host/providers/providers.test-d.ts +31 -0
- package/host/providers/stt/assemblyai.test.ts +100 -0
- package/host/providers/stt/assemblyai.ts +154 -0
- package/host/providers/stt/fixtures/assemblyai/basic-turn.json +30 -0
- package/host/providers/stt-barrel.ts +13 -0
- package/host/providers/stt.ts +3 -0
- package/host/providers/tts/cartesia.test.ts +210 -0
- package/host/providers/tts/cartesia.ts +251 -0
- package/host/providers/tts-barrel.ts +13 -0
- package/host/providers/tts.ts +3 -0
- package/host/runtime.test.ts +81 -1
- package/host/runtime.ts +61 -0
- package/host/s2s.test.ts +19 -0
- package/host/s2s.ts +10 -0
- package/host/session-ctx.ts +35 -8
- package/host/to-vercel-tools.test.ts +187 -0
- package/host/to-vercel-tools.ts +74 -0
- package/package.json +15 -1
- package/sdk/__snapshots__/exports.test.ts.snap +2 -0
- package/sdk/_internal-types.ts +16 -0
- package/sdk/constants.ts +8 -0
- package/sdk/define.test-d.ts +21 -0
- package/sdk/define.test.ts +33 -0
- package/sdk/define.ts +21 -0
- package/sdk/manifest.test-d.ts +14 -0
- package/sdk/manifest.test.ts +51 -0
- package/sdk/manifest.ts +39 -0
- package/sdk/providers.ts +90 -0
- package/sdk/types.ts +16 -0
- package/vitest.config.ts +1 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* In-memory fake providers + fake `LanguageModel` for pipeline-session tests.
|
|
4
|
+
*
|
|
5
|
+
* These fakes do not touch the network. Each `createFake*Provider()` returns a
|
|
6
|
+
* provider whose `open()` records the most recently opened session so tests
|
|
7
|
+
* can reach into it via `.last()` and drive events (partial/final transcripts,
|
|
8
|
+
* TTS chunks) or observe calls (`sendText`, `flush`, `cancel`).
|
|
9
|
+
*
|
|
10
|
+
* The fake `LanguageModel` implements the minimum of {@link LanguageModelV3}
|
|
11
|
+
* required by `streamText` — `doStream()` returns a `ReadableStream` of
|
|
12
|
+
* {@link LanguageModelV3StreamPart}s produced from a scripted sequence.
|
|
13
|
+
*
|
|
14
|
+
* @internal Not part of the public API.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import type { LanguageModel } from "ai";
|
|
18
|
+
import { createNanoEvents, type Emitter } from "nanoevents";
|
|
19
|
+
import { vi } from "vitest";
|
|
20
|
+
import type {
|
|
21
|
+
SttEvents,
|
|
22
|
+
SttOpenOptions,
|
|
23
|
+
SttProvider,
|
|
24
|
+
SttSession,
|
|
25
|
+
TtsEvents,
|
|
26
|
+
TtsOpenOptions,
|
|
27
|
+
TtsProvider,
|
|
28
|
+
TtsSession,
|
|
29
|
+
} from "../sdk/providers.ts";
|
|
30
|
+
|
|
31
|
+
// ─── Fake STT ───────────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
export type FakeSttSession = SttSession & {
|
|
34
|
+
readonly emitter: Emitter<SttEvents>;
|
|
35
|
+
readonly opts: SttOpenOptions;
|
|
36
|
+
readonly audioFrames: Int16Array[];
|
|
37
|
+
readonly closed: { value: boolean };
|
|
38
|
+
firePartial(text: string): void;
|
|
39
|
+
fireFinal(text: string): void;
|
|
40
|
+
fireError(
|
|
41
|
+
code: "stt_stream_error" | "stt_connect_failed" | "stt_auth_failed",
|
|
42
|
+
message: string,
|
|
43
|
+
): void;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export type FakeSttProvider = SttProvider & {
|
|
47
|
+
/** The most recently opened session, or undefined if `open()` hasn't been called. */
|
|
48
|
+
last(): FakeSttSession | undefined;
|
|
49
|
+
readonly sessions: FakeSttSession[];
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
export function createFakeSttProvider(): FakeSttProvider {
|
|
53
|
+
const sessions: FakeSttSession[] = [];
|
|
54
|
+
return {
|
|
55
|
+
name: "fake-stt",
|
|
56
|
+
sessions,
|
|
57
|
+
last: () => sessions.at(-1),
|
|
58
|
+
async open(opts: SttOpenOptions): Promise<SttSession> {
|
|
59
|
+
const emitter = createNanoEvents<SttEvents>();
|
|
60
|
+
const audioFrames: Int16Array[] = [];
|
|
61
|
+
const closed = { value: false };
|
|
62
|
+
const session: FakeSttSession = {
|
|
63
|
+
emitter,
|
|
64
|
+
opts,
|
|
65
|
+
audioFrames,
|
|
66
|
+
closed,
|
|
67
|
+
sendAudio: vi.fn((pcm: Int16Array) => {
|
|
68
|
+
audioFrames.push(pcm);
|
|
69
|
+
}),
|
|
70
|
+
on: emitter.on.bind(emitter) as SttSession["on"],
|
|
71
|
+
close: vi.fn(async () => {
|
|
72
|
+
closed.value = true;
|
|
73
|
+
}),
|
|
74
|
+
firePartial(text: string) {
|
|
75
|
+
emitter.emit("partial", text);
|
|
76
|
+
},
|
|
77
|
+
fireFinal(text: string) {
|
|
78
|
+
emitter.emit("final", text);
|
|
79
|
+
},
|
|
80
|
+
fireError(code, message) {
|
|
81
|
+
const err = Object.assign(new Error(message), { code }) as Parameters<
|
|
82
|
+
SttEvents["error"]
|
|
83
|
+
>[0];
|
|
84
|
+
emitter.emit("error", err);
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
sessions.push(session);
|
|
88
|
+
return session;
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ─── Fake TTS ───────────────────────────────────────────────────────────────
|
|
94
|
+
|
|
95
|
+
export type FakeTtsSession = TtsSession & {
|
|
96
|
+
readonly emitter: Emitter<TtsEvents>;
|
|
97
|
+
readonly opts: TtsOpenOptions;
|
|
98
|
+
readonly textChunks: string[];
|
|
99
|
+
readonly closed: { value: boolean };
|
|
100
|
+
readonly sendText: ReturnType<typeof vi.fn<(text: string) => void>>;
|
|
101
|
+
readonly flush: ReturnType<typeof vi.fn<() => void>>;
|
|
102
|
+
readonly cancel: ReturnType<typeof vi.fn<() => void>>;
|
|
103
|
+
fireAudio(pcm: Int16Array): void;
|
|
104
|
+
fireError(
|
|
105
|
+
code: "tts_stream_error" | "tts_connect_failed" | "tts_auth_failed",
|
|
106
|
+
message: string,
|
|
107
|
+
): void;
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
export type FakeTtsProvider = TtsProvider & {
|
|
111
|
+
/** The most recently opened session, or undefined if `open()` hasn't been called. */
|
|
112
|
+
last(): FakeTtsSession | undefined;
|
|
113
|
+
readonly sessions: FakeTtsSession[];
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Fake TTS provider. By default, `flush()` synchronously emits a single `done`
|
|
118
|
+
* event so tests don't have to script the drain separately. Pass
|
|
119
|
+
* `{ autoDoneOnFlush: false }` to drive `done` manually.
|
|
120
|
+
*/
|
|
121
|
+
export function createFakeTtsProvider(
|
|
122
|
+
options: { autoDoneOnFlush?: boolean } = {},
|
|
123
|
+
): FakeTtsProvider {
|
|
124
|
+
const autoDoneOnFlush = options.autoDoneOnFlush ?? true;
|
|
125
|
+
const sessions: FakeTtsSession[] = [];
|
|
126
|
+
return {
|
|
127
|
+
name: "fake-tts",
|
|
128
|
+
sessions,
|
|
129
|
+
last: () => sessions.at(-1),
|
|
130
|
+
async open(opts: TtsOpenOptions): Promise<TtsSession> {
|
|
131
|
+
const emitter = createNanoEvents<TtsEvents>();
|
|
132
|
+
const textChunks: string[] = [];
|
|
133
|
+
const closed = { value: false };
|
|
134
|
+
const sendText = vi.fn((text: string) => {
|
|
135
|
+
textChunks.push(text);
|
|
136
|
+
});
|
|
137
|
+
const flush = vi.fn(() => {
|
|
138
|
+
if (autoDoneOnFlush) emitter.emit("done");
|
|
139
|
+
});
|
|
140
|
+
const cancel = vi.fn(() => {
|
|
141
|
+
emitter.emit("done");
|
|
142
|
+
});
|
|
143
|
+
const session: FakeTtsSession = {
|
|
144
|
+
emitter,
|
|
145
|
+
opts,
|
|
146
|
+
textChunks,
|
|
147
|
+
closed,
|
|
148
|
+
sendText,
|
|
149
|
+
flush,
|
|
150
|
+
cancel,
|
|
151
|
+
on: emitter.on.bind(emitter) as TtsSession["on"],
|
|
152
|
+
close: vi.fn(async () => {
|
|
153
|
+
closed.value = true;
|
|
154
|
+
}),
|
|
155
|
+
fireAudio(pcm: Int16Array) {
|
|
156
|
+
emitter.emit("audio", pcm);
|
|
157
|
+
},
|
|
158
|
+
fireError(code, message) {
|
|
159
|
+
const err = Object.assign(new Error(message), { code }) as Parameters<
|
|
160
|
+
TtsEvents["error"]
|
|
161
|
+
>[0];
|
|
162
|
+
emitter.emit("error", err);
|
|
163
|
+
},
|
|
164
|
+
};
|
|
165
|
+
sessions.push(session);
|
|
166
|
+
return session;
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Fake STT provider that throws on `open()` with a given error code. Used to
|
|
173
|
+
* test atomic provider open — TTS should not be opened at all when STT fails.
|
|
174
|
+
*/
|
|
175
|
+
export function createFailingSttProvider(
|
|
176
|
+
code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error",
|
|
177
|
+
message: string,
|
|
178
|
+
): SttProvider {
|
|
179
|
+
return {
|
|
180
|
+
name: "failing-stt",
|
|
181
|
+
async open(): Promise<SttSession> {
|
|
182
|
+
const err = Object.assign(new Error(message), { code }) as Error & { code: typeof code };
|
|
183
|
+
throw err;
|
|
184
|
+
},
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Fake TTS provider that throws on `open()` with a given error code. Used to
|
|
190
|
+
* test atomic provider open — STT should be closed when TTS fails.
|
|
191
|
+
*/
|
|
192
|
+
export function createFailingTtsProvider(
|
|
193
|
+
code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error",
|
|
194
|
+
message: string,
|
|
195
|
+
): TtsProvider {
|
|
196
|
+
return {
|
|
197
|
+
name: "failing-tts",
|
|
198
|
+
async open(): Promise<TtsSession> {
|
|
199
|
+
const err = Object.assign(new Error(message), { code }) as Error & { code: typeof code };
|
|
200
|
+
throw err;
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ─── Fake LLM ───────────────────────────────────────────────────────────────
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* A scripted stream part. `text` yields a `text-delta` in the LLM provider's
|
|
209
|
+
* raw wire format; `tool-call` / `tool-result` emit the corresponding parts
|
|
210
|
+
* (v3 provider spec: `toolCallId`, `toolName`, `input` as JSON string for
|
|
211
|
+
* calls, `result` as JSON-serialisable value for results).
|
|
212
|
+
*/
|
|
213
|
+
export type ScriptedPart =
|
|
214
|
+
| { type: "text"; text: string }
|
|
215
|
+
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
216
|
+
| { type: "tool-result"; toolCallId: string; toolName: string; result: unknown }
|
|
217
|
+
| { type: "error"; error: unknown };
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Shape of the single stream part yielded by an LLM provider's `doStream()`.
|
|
221
|
+
* This is a loose local definition — the real type lives in `@ai-sdk/provider`
|
|
222
|
+
* as `LanguageModelV3StreamPart`, but we don't want a direct dependency on
|
|
223
|
+
* that package. The test fakes only need enough of the shape that the
|
|
224
|
+
* `ai` package's `streamText` will forward through to consumers.
|
|
225
|
+
*/
|
|
226
|
+
type StreamPart =
|
|
227
|
+
| { type: "stream-start"; warnings: never[] }
|
|
228
|
+
| { type: "text-start"; id: string }
|
|
229
|
+
| { type: "text-delta"; id: string; delta: string }
|
|
230
|
+
| { type: "text-end"; id: string }
|
|
231
|
+
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
232
|
+
| { type: "tool-result"; toolCallId: string; toolName: string; result: NonNullable<unknown> }
|
|
233
|
+
| { type: "error"; error: unknown }
|
|
234
|
+
| {
|
|
235
|
+
type: "finish";
|
|
236
|
+
usage: { inputTokens: number; outputTokens: number; totalTokens: number };
|
|
237
|
+
finishReason: string;
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
function scriptedPartToStreamPart(part: ScriptedPart, textId: string): StreamPart {
|
|
241
|
+
switch (part.type) {
|
|
242
|
+
case "text":
|
|
243
|
+
return { type: "text-delta", id: textId, delta: part.text };
|
|
244
|
+
case "tool-call":
|
|
245
|
+
return {
|
|
246
|
+
type: "tool-call",
|
|
247
|
+
toolCallId: part.toolCallId,
|
|
248
|
+
toolName: part.toolName,
|
|
249
|
+
input: part.input,
|
|
250
|
+
};
|
|
251
|
+
case "tool-result":
|
|
252
|
+
return {
|
|
253
|
+
type: "tool-result",
|
|
254
|
+
toolCallId: part.toolCallId,
|
|
255
|
+
toolName: part.toolName,
|
|
256
|
+
result: part.result as NonNullable<unknown>,
|
|
257
|
+
};
|
|
258
|
+
case "error":
|
|
259
|
+
return { type: "error", error: part.error };
|
|
260
|
+
default: {
|
|
261
|
+
const never: never = part;
|
|
262
|
+
return never;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/** Wait `ms` or resolve immediately when `signal` aborts. */
|
|
268
|
+
function delayOrAbort(ms: number, signal?: AbortSignal): Promise<void> {
|
|
269
|
+
return new Promise<void>((resolve) => {
|
|
270
|
+
const t = setTimeout(resolve, ms);
|
|
271
|
+
signal?.addEventListener(
|
|
272
|
+
"abort",
|
|
273
|
+
() => {
|
|
274
|
+
clearTimeout(t);
|
|
275
|
+
resolve();
|
|
276
|
+
},
|
|
277
|
+
{ once: true },
|
|
278
|
+
);
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function streamScript(
|
|
283
|
+
controller: ReadableStreamDefaultController<StreamPart>,
|
|
284
|
+
script: ScriptedPart[],
|
|
285
|
+
delayMs: number | undefined,
|
|
286
|
+
signal: AbortSignal | undefined,
|
|
287
|
+
): Promise<void> {
|
|
288
|
+
const textId = "text-1";
|
|
289
|
+
controller.enqueue({ type: "stream-start", warnings: [] });
|
|
290
|
+
controller.enqueue({ type: "text-start", id: textId });
|
|
291
|
+
try {
|
|
292
|
+
for (const part of script) {
|
|
293
|
+
if (signal?.aborted) break;
|
|
294
|
+
if (delayMs !== undefined && delayMs > 0) await delayOrAbort(delayMs, signal);
|
|
295
|
+
if (signal?.aborted) break;
|
|
296
|
+
controller.enqueue(scriptedPartToStreamPart(part, textId));
|
|
297
|
+
}
|
|
298
|
+
} finally {
|
|
299
|
+
controller.enqueue({ type: "text-end", id: textId });
|
|
300
|
+
controller.enqueue({
|
|
301
|
+
type: "finish",
|
|
302
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
|
|
303
|
+
finishReason: signal?.aborted ? "other" : "stop",
|
|
304
|
+
});
|
|
305
|
+
controller.close();
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Create a fake {@link LanguageModel} that yields a scripted sequence of
|
|
311
|
+
* parts when `streamText` drives `doStream()`. The fake ignores the prompt
|
|
312
|
+
* and tools — it simply replays the script.
|
|
313
|
+
*
|
|
314
|
+
* Pass `{ delayMs: N }` to space out parts with `setTimeout(N)` so that
|
|
315
|
+
* barge-in tests can abort mid-stream deterministically.
|
|
316
|
+
*
|
|
317
|
+
* Pass `{ steps: ScriptedPart[][] }` (instead of `script`) for multi-step
|
|
318
|
+
* scenarios: each call to `doStream()` consumes the next step's parts.
|
|
319
|
+
* This is how `streamText` drives multi-turn tool loops under `stopWhen`.
|
|
320
|
+
*
|
|
321
|
+
* The returned value is cast to the `LanguageModel` union because we
|
|
322
|
+
* implement the provider shape structurally rather than importing the
|
|
323
|
+
* full `@ai-sdk/provider` types into the aai package.
|
|
324
|
+
*/
|
|
325
|
+
export function createFakeLanguageModel(
|
|
326
|
+
options:
|
|
327
|
+
| { script: ScriptedPart[]; delayMs?: number }
|
|
328
|
+
| { steps: ScriptedPart[][]; delayMs?: number },
|
|
329
|
+
): LanguageModel {
|
|
330
|
+
const delayMs = options.delayMs;
|
|
331
|
+
const steps: ScriptedPart[][] = "steps" in options ? options.steps : [options.script];
|
|
332
|
+
let stepIndex = 0;
|
|
333
|
+
const model = {
|
|
334
|
+
specificationVersion: "v3" as const,
|
|
335
|
+
provider: "fake-llm",
|
|
336
|
+
modelId: "fake-llm-1",
|
|
337
|
+
supportedUrls: {} as Record<string, RegExp[]>,
|
|
338
|
+
async doGenerate(): Promise<never> {
|
|
339
|
+
throw new Error("fake LLM: doGenerate not implemented");
|
|
340
|
+
},
|
|
341
|
+
async doStream(opts: { abortSignal?: AbortSignal }): Promise<{
|
|
342
|
+
stream: ReadableStream<StreamPart>;
|
|
343
|
+
}> {
|
|
344
|
+
// Advance one step per call; after the last scripted step, keep
|
|
345
|
+
// yielding an empty step so an unexpected extra call completes cleanly.
|
|
346
|
+
const current = steps[stepIndex] ?? [];
|
|
347
|
+
stepIndex++;
|
|
348
|
+
const stream = new ReadableStream<StreamPart>({
|
|
349
|
+
start(controller) {
|
|
350
|
+
void streamScript(controller, current, delayMs, opts.abortSignal);
|
|
351
|
+
},
|
|
352
|
+
});
|
|
353
|
+
return { stream };
|
|
354
|
+
},
|
|
355
|
+
};
|
|
356
|
+
return model as unknown as LanguageModel;
|
|
357
|
+
}
|
package/host/_test-utils.ts
CHANGED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Integration Test Fixtures
|
|
2
|
+
|
|
3
|
+
## hello-how-are-you.pcm16
|
|
4
|
+
|
|
5
|
+
Required by `pipeline-reference.integration.test.ts`. A ~2-second monoaural
|
|
6
|
+
16 kHz signed 16-bit little-endian PCM file of a voice saying "hello, how
|
|
7
|
+
are you?" (or any similar short question the LLM can respond to).
|
|
8
|
+
|
|
9
|
+
### Generating it
|
|
10
|
+
|
|
11
|
+
Easiest: record yourself via the macOS Terminal using `sox`:
|
|
12
|
+
|
|
13
|
+
```sh
|
|
14
|
+
sox -d -r 16000 -c 1 -b 16 -e signed-integer hello-how-are-you.pcm16 trim 0 2
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Or export from a DAW as raw PCM16 mono @ 16 kHz.
|
|
18
|
+
|
|
19
|
+
Or use a CLI TTS to synthesize it (e.g. the same Cartesia voice we use at
|
|
20
|
+
runtime, piped through `ffmpeg -ar 16000 -ac 1 -f s16le`):
|
|
21
|
+
|
|
22
|
+
```sh
|
|
23
|
+
# sketch — adjust to your preferred TTS
|
|
24
|
+
curl -X POST https://api.cartesia.ai/tts/bytes ... \
|
|
25
|
+
| ffmpeg -i pipe:0 -ar 16000 -ac 1 -f s16le hello-how-are-you.pcm16
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Why it's not checked in
|
|
29
|
+
|
|
30
|
+
Audio fixtures are binary blobs — we keep them out of the repo. Contributors
|
|
31
|
+
with API keys + local fixtures can run the integration test; CI should
|
|
32
|
+
provide both as secrets and generate/restore the fixture from a secure
|
|
33
|
+
artifact store before the integration job runs.
|
|
34
|
+
|
|
35
|
+
### Running the test
|
|
36
|
+
|
|
37
|
+
```sh
|
|
38
|
+
export ASSEMBLYAI_API_KEY=...
|
|
39
|
+
export OPENAI_API_KEY=...
|
|
40
|
+
export CARTESIA_API_KEY=...
|
|
41
|
+
VITEST_PROFILE=integration \
|
|
42
|
+
VITEST_INCLUDE='host/integration/**/*.integration.test.ts' \
|
|
43
|
+
pnpm --filter @alexkroman1/aai exec vitest run \
|
|
44
|
+
-c ../../vitest.slow.config.ts
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
If the fixture is missing, the test throws a clear error pointing here. If
|
|
48
|
+
any of the env vars are missing, the whole suite is skipped via
|
|
49
|
+
`describe.skipIf`.
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Integration test for the pluggable-providers pipeline reference stack.
|
|
4
|
+
*
|
|
5
|
+
* Runs only when VITEST_PROFILE=integration is set AND all three API keys
|
|
6
|
+
* plus the input audio fixture are available. Exercises the full STT → LLM → TTS
|
|
7
|
+
* path with real providers (AssemblyAI u3pro-rt + OpenAI gpt-4o-mini +
|
|
8
|
+
* Cartesia) so latency and wire-format issues are caught before release.
|
|
9
|
+
*
|
|
10
|
+
* To run locally:
|
|
11
|
+
*
|
|
12
|
+
* export ASSEMBLYAI_API_KEY=...
|
|
13
|
+
* export OPENAI_API_KEY=...
|
|
14
|
+
* export CARTESIA_API_KEY=...
|
|
15
|
+
* VITEST_PROFILE=integration \
|
|
16
|
+
* VITEST_INCLUDE=host/integration/**\/*.integration.test.ts \
|
|
17
|
+
* pnpm --filter @alexkroman1/aai exec vitest run -c ../../vitest.slow.config.ts
|
|
18
|
+
*
|
|
19
|
+
* See fixtures/README.md for how to generate the required audio input.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { readFile, stat } from "node:fs/promises";
|
|
23
|
+
import { dirname, join } from "node:path";
|
|
24
|
+
import { fileURLToPath } from "node:url";
|
|
25
|
+
import { openai } from "@ai-sdk/openai";
|
|
26
|
+
import { describe, expect, test } from "vitest";
|
|
27
|
+
import type { AgentConfig, ExecuteTool } from "../../sdk/_internal-types.ts";
|
|
28
|
+
import type { ClientEvent, ClientSink } from "../../sdk/protocol.ts";
|
|
29
|
+
import { createPipelineSession } from "../pipeline-session.ts";
|
|
30
|
+
import { assemblyAI } from "../providers/stt/assemblyai.ts";
|
|
31
|
+
import { cartesia } from "../providers/tts/cartesia.ts";
|
|
32
|
+
import { consoleLogger } from "../runtime-config.ts";
|
|
33
|
+
|
|
34
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
35
|
+
const fixturePath = join(here, "fixtures/hello-how-are-you.pcm16");
|
|
36
|
+
|
|
37
|
+
async function fixtureExists(): Promise<boolean> {
|
|
38
|
+
try {
|
|
39
|
+
const s = await stat(fixturePath);
|
|
40
|
+
return s.isFile() && s.size > 0;
|
|
41
|
+
} catch {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const envReady = Boolean(
|
|
47
|
+
process.env.VITEST_PROFILE === "integration" &&
|
|
48
|
+
process.env.ASSEMBLYAI_API_KEY &&
|
|
49
|
+
process.env.OPENAI_API_KEY &&
|
|
50
|
+
process.env.CARTESIA_API_KEY,
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
describe.skipIf(!envReady)("pipeline integration — reference stack", () => {
|
|
54
|
+
test("audio in → transcript, LLM reply, TTS audio out", async () => {
|
|
55
|
+
if (!(await fixtureExists())) {
|
|
56
|
+
throw new Error(
|
|
57
|
+
`Fixture not found at ${fixturePath}. ` +
|
|
58
|
+
"See fixtures/README.md for instructions on generating it.",
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
const pcm = await readFile(fixturePath);
|
|
62
|
+
const events: ClientEvent[] = [];
|
|
63
|
+
const audioOut: Uint8Array[] = [];
|
|
64
|
+
const client: ClientSink = {
|
|
65
|
+
open: true,
|
|
66
|
+
event: (e) => {
|
|
67
|
+
events.push(e);
|
|
68
|
+
},
|
|
69
|
+
playAudioChunk: (chunk) => {
|
|
70
|
+
audioOut.push(chunk);
|
|
71
|
+
},
|
|
72
|
+
playAudioDone: () => {
|
|
73
|
+
/* no-op: test asserts on audioOut chunks directly */
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
const agentConfig: AgentConfig = {
|
|
78
|
+
name: "int",
|
|
79
|
+
systemPrompt: "You reply in one short sentence.",
|
|
80
|
+
greeting: "",
|
|
81
|
+
maxSteps: 1,
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const executeTool: ExecuteTool = async () => "";
|
|
85
|
+
|
|
86
|
+
const session = createPipelineSession({
|
|
87
|
+
id: "int-1",
|
|
88
|
+
agent: "pipeline-reference",
|
|
89
|
+
client,
|
|
90
|
+
agentConfig,
|
|
91
|
+
toolSchemas: [],
|
|
92
|
+
executeTool,
|
|
93
|
+
stt: assemblyAI({ model: "u3pro-rt" }),
|
|
94
|
+
llm: openai("gpt-4o-mini"),
|
|
95
|
+
tts: cartesia({ voice: "694f9389-aac1-45b6-b726-9d9369183238" }),
|
|
96
|
+
// biome-ignore lint/style/noNonNullAssertion: envReady guard ensures presence
|
|
97
|
+
sttApiKey: process.env.ASSEMBLYAI_API_KEY!,
|
|
98
|
+
// biome-ignore lint/style/noNonNullAssertion: envReady guard ensures presence
|
|
99
|
+
ttsApiKey: process.env.CARTESIA_API_KEY!,
|
|
100
|
+
logger: consoleLogger,
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
await session.start();
|
|
104
|
+
session.onAudioReady();
|
|
105
|
+
|
|
106
|
+
// Stream the PCM fixture in ~100ms chunks (16 kHz PCM16 → 3200 bytes/100ms).
|
|
107
|
+
const chunkBytes = 3200;
|
|
108
|
+
for (let i = 0; i < pcm.length; i += chunkBytes) {
|
|
109
|
+
const chunk = pcm.subarray(i, Math.min(i + chunkBytes, pcm.length));
|
|
110
|
+
session.onAudio(new Uint8Array(chunk));
|
|
111
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
112
|
+
}
|
|
113
|
+
await session.waitForTurn();
|
|
114
|
+
await session.stop();
|
|
115
|
+
|
|
116
|
+
const userTranscript = events.find((e) => e.type === "user_transcript");
|
|
117
|
+
expect(userTranscript).toBeDefined();
|
|
118
|
+
expect(String((userTranscript as { text: string }).text).toLowerCase()).toContain(
|
|
119
|
+
"how are you",
|
|
120
|
+
);
|
|
121
|
+
expect(events.some((e) => e.type === "reply_done")).toBe(true);
|
|
122
|
+
expect(audioOut.length).toBeGreaterThan(0);
|
|
123
|
+
}, 60_000);
|
|
124
|
+
});
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
import { describe, expect, test } from "vitest";
|
|
3
|
+
import { buildPipelineCtx } from "./pipeline-session-ctx.ts";
|
|
4
|
+
import { consoleLogger } from "./runtime-config.ts";
|
|
5
|
+
|
|
6
|
+
const baseDeps = {
|
|
7
|
+
id: "sess-1",
|
|
8
|
+
agent: "demo",
|
|
9
|
+
client: { event: () => undefined, playAudioChunk: () => undefined } as never,
|
|
10
|
+
agentConfig: { name: "demo", systemPrompt: "", maxSteps: 3 } as never,
|
|
11
|
+
executeTool: (async () => "") as never,
|
|
12
|
+
log: consoleLogger,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
describe("buildPipelineCtx", () => {
|
|
16
|
+
test("starts with null provider sessions", () => {
|
|
17
|
+
const ctx = buildPipelineCtx(baseDeps);
|
|
18
|
+
expect(ctx.stt).toBeNull();
|
|
19
|
+
expect(ctx.tts).toBeNull();
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("pushMessages + beginReply + cancelReply match S2S semantics", () => {
|
|
23
|
+
const ctx = buildPipelineCtx(baseDeps);
|
|
24
|
+
ctx.pushMessages({ role: "user", content: "hi" });
|
|
25
|
+
ctx.beginReply("r1");
|
|
26
|
+
expect(ctx.reply.currentReplyId).toBe("r1");
|
|
27
|
+
ctx.cancelReply();
|
|
28
|
+
expect(ctx.reply.currentReplyId).toBeNull();
|
|
29
|
+
expect(ctx.conversationMessages).toEqual([{ role: "user", content: "hi" }]);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/** Pipeline session context — base ctx + STT/TTS session slots. */
|
|
3
|
+
|
|
4
|
+
import type { AgentConfig, ExecuteTool } from "../sdk/_internal-types.ts";
|
|
5
|
+
import type { ClientSink } from "../sdk/protocol.ts";
|
|
6
|
+
import type { SttSession, TtsSession } from "../sdk/providers.ts";
|
|
7
|
+
import type { Logger } from "./runtime-config.ts";
|
|
8
|
+
import { _buildBaseCtx, type BaseSessionCtx } from "./session-ctx.ts";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Pipeline session context — {@link BaseSessionCtx} plus STT/TTS provider
|
|
12
|
+
* session handles. Replaces the S2S `s2s` field with decoupled `stt` + `tts`
|
|
13
|
+
* slots so the pipeline orchestrator can drive independent providers.
|
|
14
|
+
*/
|
|
15
|
+
export type PipelineSessionCtx = BaseSessionCtx & {
|
|
16
|
+
stt: SttSession | null;
|
|
17
|
+
tts: TtsSession | null;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export function buildPipelineCtx(opts: {
|
|
21
|
+
id: string;
|
|
22
|
+
agent: string;
|
|
23
|
+
client: ClientSink;
|
|
24
|
+
agentConfig: AgentConfig;
|
|
25
|
+
executeTool: ExecuteTool;
|
|
26
|
+
log: Logger;
|
|
27
|
+
maxHistory?: number | undefined;
|
|
28
|
+
}): PipelineSessionCtx {
|
|
29
|
+
// Mutate the base ctx in place rather than spreading into a new object —
|
|
30
|
+
// the helper methods close over the base ctx reference, so spreading would
|
|
31
|
+
// leave them writing to an orphan object.
|
|
32
|
+
const base = _buildBaseCtx(opts) as PipelineSessionCtx;
|
|
33
|
+
base.stt = null;
|
|
34
|
+
base.tts = null;
|
|
35
|
+
return base;
|
|
36
|
+
}
|