@alexkroman1/aai 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +14 -12
- package/CHANGELOG.md +14 -0
- package/dist/host/_pipeline-test-fakes.d.ts +107 -0
- package/dist/host/pipeline-session-ctx.d.ts +24 -0
- package/dist/host/pipeline-session.d.ts +48 -0
- package/dist/host/providers/llm.d.ts +2 -0
- package/dist/host/providers/stt/assemblyai.d.ts +31 -0
- package/dist/host/providers/stt-barrel.d.ts +8 -0
- package/dist/host/providers/stt-barrel.js +92 -0
- package/dist/host/providers/stt.d.ts +2 -0
- package/dist/host/providers/tts/cartesia.d.ts +39 -0
- package/dist/host/providers/tts-barrel.d.ts +8 -0
- package/dist/host/providers/tts-barrel.js +182 -0
- package/dist/host/providers/tts.d.ts +2 -0
- package/dist/host/runtime-barrel.js +498 -80
- package/dist/host/runtime.d.ts +17 -0
- package/dist/host/s2s.d.ts +5 -0
- package/dist/host/session-ctx.d.ts +22 -4
- package/dist/host/to-vercel-tools.d.ts +44 -0
- package/dist/index.js +5 -0
- package/dist/sdk/_internal-types.d.ts +15 -1
- package/dist/sdk/define.d.ts +21 -0
- package/dist/sdk/manifest.d.ts +22 -0
- package/dist/sdk/protocol.d.ts +3 -3
- package/dist/sdk/providers.d.ts +70 -0
- package/dist/sdk/types.d.ts +16 -0
- package/exports-no-dev-deps.test.ts +39 -14
- package/host/_pipeline-test-fakes.ts +323 -0
- package/host/_test-utils.ts +1 -0
- package/host/integration/fixtures/README.md +49 -0
- package/host/integration/pipeline-reference.integration.test.ts +124 -0
- package/host/pipeline-session-ctx.test.ts +31 -0
- package/host/pipeline-session-ctx.ts +36 -0
- package/host/pipeline-session.test.ts +337 -0
- package/host/pipeline-session.ts +405 -0
- package/host/providers/llm.ts +3 -0
- package/host/providers/providers.test-d.ts +31 -0
- package/host/providers/stt/assemblyai.test.ts +100 -0
- package/host/providers/stt/assemblyai.ts +154 -0
- package/host/providers/stt/fixtures/assemblyai/basic-turn.json +30 -0
- package/host/providers/stt-barrel.ts +13 -0
- package/host/providers/stt.ts +3 -0
- package/host/providers/tts/cartesia.test.ts +210 -0
- package/host/providers/tts/cartesia.ts +251 -0
- package/host/providers/tts-barrel.ts +13 -0
- package/host/providers/tts.ts +3 -0
- package/host/runtime.test.ts +81 -1
- package/host/runtime.ts +61 -0
- package/host/s2s.test.ts +19 -0
- package/host/s2s.ts +10 -0
- package/host/session-ctx.ts +35 -8
- package/host/to-vercel-tools.test.ts +153 -0
- package/host/to-vercel-tools.ts +70 -0
- package/package.json +15 -1
- package/sdk/__snapshots__/exports.test.ts.snap +1 -0
- package/sdk/_internal-types.ts +16 -0
- package/sdk/define.test-d.ts +21 -0
- package/sdk/define.test.ts +33 -0
- package/sdk/define.ts +21 -0
- package/sdk/manifest.test-d.ts +14 -0
- package/sdk/manifest.test.ts +51 -0
- package/sdk/manifest.ts +39 -0
- package/sdk/providers.ts +90 -0
- package/sdk/types.ts +16 -0
- package/vitest.config.ts +1 -0
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/** Tests for the pipeline-session orchestrator (see pipeline-session.ts). */
|
|
3
|
+
|
|
4
|
+
import { describe, expect, test, vi } from "vitest";
|
|
5
|
+
import type { AgentConfig } from "../sdk/_internal-types.ts";
|
|
6
|
+
import type { ClientEvent } from "../sdk/protocol.ts";
|
|
7
|
+
import { DEFAULT_SYSTEM_PROMPT } from "../sdk/types.ts";
|
|
8
|
+
import {
|
|
9
|
+
createFakeLanguageModel,
|
|
10
|
+
createFakeSttProvider,
|
|
11
|
+
createFakeTtsProvider,
|
|
12
|
+
type ScriptedPart,
|
|
13
|
+
} from "./_pipeline-test-fakes.ts";
|
|
14
|
+
import { makeClient, silentLogger } from "./_test-utils.ts";
|
|
15
|
+
import { createPipelineSession, type PipelineSessionOptions } from "./pipeline-session.ts";
|
|
16
|
+
|
|
17
|
+
const CONFIG: AgentConfig = {
|
|
18
|
+
name: "pipeline-agent",
|
|
19
|
+
systemPrompt: DEFAULT_SYSTEM_PROMPT,
|
|
20
|
+
greeting: "",
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function makeOpts(overrides: Partial<PipelineSessionOptions> = {}): {
|
|
24
|
+
opts: PipelineSessionOptions;
|
|
25
|
+
stt: ReturnType<typeof createFakeSttProvider>;
|
|
26
|
+
tts: ReturnType<typeof createFakeTtsProvider>;
|
|
27
|
+
client: ReturnType<typeof makeClient>;
|
|
28
|
+
} {
|
|
29
|
+
const stt = createFakeSttProvider();
|
|
30
|
+
const tts = createFakeTtsProvider();
|
|
31
|
+
const client = makeClient();
|
|
32
|
+
const opts: PipelineSessionOptions = {
|
|
33
|
+
id: "sess-1",
|
|
34
|
+
agent: "pipeline-agent",
|
|
35
|
+
client,
|
|
36
|
+
agentConfig: CONFIG,
|
|
37
|
+
toolSchemas: [],
|
|
38
|
+
executeTool: vi.fn(async () => "ok"),
|
|
39
|
+
stt,
|
|
40
|
+
llm: createFakeLanguageModel({ script: [] }),
|
|
41
|
+
tts,
|
|
42
|
+
sttApiKey: "stt-key",
|
|
43
|
+
ttsApiKey: "tts-key",
|
|
44
|
+
sampleRate: 16_000,
|
|
45
|
+
logger: silentLogger,
|
|
46
|
+
...overrides,
|
|
47
|
+
};
|
|
48
|
+
return { opts, stt, tts, client };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function eventTypes(events: readonly unknown[]): string[] {
|
|
52
|
+
return events.map((e) => (e as ClientEvent).type);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
describe("createPipelineSession — happy path", () => {
|
|
56
|
+
test("STT final → LLM stream → TTS sendText/flush → reply_done", async () => {
|
|
57
|
+
const script: ScriptedPart[] = [
|
|
58
|
+
{ type: "text", text: "Hello" },
|
|
59
|
+
{ type: "text", text: " there" },
|
|
60
|
+
];
|
|
61
|
+
const { opts, stt, tts, client } = makeOpts({
|
|
62
|
+
llm: createFakeLanguageModel({ script }),
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const session = createPipelineSession(opts);
|
|
66
|
+
await session.start();
|
|
67
|
+
|
|
68
|
+
const sttSession = stt.last();
|
|
69
|
+
expect(sttSession).toBeDefined();
|
|
70
|
+
const ttsSession = tts.last();
|
|
71
|
+
expect(ttsSession).toBeDefined();
|
|
72
|
+
if (!(sttSession && ttsSession)) return;
|
|
73
|
+
|
|
74
|
+
sttSession.firePartial("Hello");
|
|
75
|
+
sttSession.fireFinal("Hello there, how are you?");
|
|
76
|
+
await session.waitForTurn();
|
|
77
|
+
|
|
78
|
+
// Verify TTS received each text-delta, then a flush
|
|
79
|
+
expect(ttsSession.textChunks).toEqual(["Hello", " there"]);
|
|
80
|
+
expect(ttsSession.flush).toHaveBeenCalledTimes(1);
|
|
81
|
+
|
|
82
|
+
// Verify wire events in order
|
|
83
|
+
const types = eventTypes(client.events);
|
|
84
|
+
expect(types).toEqual([
|
|
85
|
+
"user_transcript",
|
|
86
|
+
"agent_transcript", // "Hello"
|
|
87
|
+
"agent_transcript", // " there"
|
|
88
|
+
"reply_done",
|
|
89
|
+
]);
|
|
90
|
+
|
|
91
|
+
// user_transcript text matches
|
|
92
|
+
expect(client.events[0]).toMatchObject({
|
|
93
|
+
type: "user_transcript",
|
|
94
|
+
text: "Hello there, how are you?",
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
await session.stop();
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
describe("createPipelineSession — empty utterance", () => {
|
|
102
|
+
test("whitespace-only final skips reply (no TTS, no LLM, no wire events)", async () => {
|
|
103
|
+
const llm = createFakeLanguageModel({ script: [{ type: "text", text: "unexpected" }] });
|
|
104
|
+
const doStreamSpy = vi.spyOn(
|
|
105
|
+
llm as unknown as { doStream: (...a: unknown[]) => unknown },
|
|
106
|
+
"doStream",
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
const { opts, stt, tts, client } = makeOpts({ llm });
|
|
110
|
+
const session = createPipelineSession(opts);
|
|
111
|
+
await session.start();
|
|
112
|
+
|
|
113
|
+
const sttSession = stt.last();
|
|
114
|
+
const ttsSession = tts.last();
|
|
115
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
116
|
+
|
|
117
|
+
sttSession.firePartial(" ");
|
|
118
|
+
sttSession.fireFinal(" ");
|
|
119
|
+
await session.waitForTurn();
|
|
120
|
+
|
|
121
|
+
expect(doStreamSpy).not.toHaveBeenCalled();
|
|
122
|
+
expect(ttsSession.sendText).not.toHaveBeenCalled();
|
|
123
|
+
expect(ttsSession.flush).not.toHaveBeenCalled();
|
|
124
|
+
expect(client.events).toEqual([]);
|
|
125
|
+
|
|
126
|
+
await session.stop();
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
describe("createPipelineSession — barge-in", () => {
|
|
131
|
+
test("stt.partial during AGENT_REPLYING aborts LLM, cancels TTS, emits cancelled", async () => {
|
|
132
|
+
// Script with delayMs so we can fire a partial between parts.
|
|
133
|
+
const script: ScriptedPart[] = [
|
|
134
|
+
{ type: "text", text: "Hello " },
|
|
135
|
+
{ type: "text", text: "how can " },
|
|
136
|
+
{ type: "text", text: "I help?" },
|
|
137
|
+
];
|
|
138
|
+
const { opts, stt, tts, client } = makeOpts({
|
|
139
|
+
llm: createFakeLanguageModel({ script, delayMs: 20 }),
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
const session = createPipelineSession(opts);
|
|
143
|
+
await session.start();
|
|
144
|
+
|
|
145
|
+
const sttSession = stt.last();
|
|
146
|
+
const ttsSession = tts.last();
|
|
147
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
148
|
+
|
|
149
|
+
// Kick off a reply.
|
|
150
|
+
sttSession.firePartial("hi");
|
|
151
|
+
sttSession.fireFinal("hi there");
|
|
152
|
+
// Wait until at least one text delta has been forwarded to TTS so we're
|
|
153
|
+
// firmly in AGENT_REPLYING before the barge-in partial.
|
|
154
|
+
await vi.waitFor(() => {
|
|
155
|
+
expect(ttsSession.sendText.mock.calls.length).toBeGreaterThan(0);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// Barge-in: user starts speaking again.
|
|
159
|
+
sttSession.firePartial("wait");
|
|
160
|
+
await session.waitForTurn();
|
|
161
|
+
|
|
162
|
+
// TTS.cancel must have been called exactly once.
|
|
163
|
+
expect(ttsSession.cancel).toHaveBeenCalledTimes(1);
|
|
164
|
+
// Wire events: user_transcript, some agent_transcript(s), then cancelled.
|
|
165
|
+
// No reply_done — barge-in short-circuits the drain.
|
|
166
|
+
const types = eventTypes(client.events);
|
|
167
|
+
expect(types).toContain("user_transcript");
|
|
168
|
+
expect(types).toContain("cancelled");
|
|
169
|
+
expect(types).not.toContain("reply_done");
|
|
170
|
+
expect(types.indexOf("cancelled")).toBeGreaterThan(types.indexOf("user_transcript"));
|
|
171
|
+
|
|
172
|
+
// After the barge-in lands, the state machine is back to USER_SPEAKING.
|
|
173
|
+
// A new final should start a fresh turn.
|
|
174
|
+
await session.stop();
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
describe("createPipelineSession — tool calls", () => {
|
|
179
|
+
test("tool-call and tool-result parts emit wire events; reply_done still fires", async () => {
|
|
180
|
+
const script: ScriptedPart[] = [
|
|
181
|
+
{ type: "text", text: "Let me check" },
|
|
182
|
+
{
|
|
183
|
+
type: "tool-call",
|
|
184
|
+
toolCallId: "tc-1",
|
|
185
|
+
toolName: "get_weather",
|
|
186
|
+
input: JSON.stringify({ city: "SF" }),
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
type: "tool-result",
|
|
190
|
+
toolCallId: "tc-1",
|
|
191
|
+
toolName: "get_weather",
|
|
192
|
+
result: "sunny, 72F",
|
|
193
|
+
},
|
|
194
|
+
{ type: "text", text: " — it's sunny." },
|
|
195
|
+
];
|
|
196
|
+
const { opts, stt, tts, client } = makeOpts({
|
|
197
|
+
llm: createFakeLanguageModel({ script }),
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
const session = createPipelineSession(opts);
|
|
201
|
+
await session.start();
|
|
202
|
+
|
|
203
|
+
const sttSession = stt.last();
|
|
204
|
+
const ttsSession = tts.last();
|
|
205
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
206
|
+
|
|
207
|
+
sttSession.fireFinal("how's the weather?");
|
|
208
|
+
await session.waitForTurn();
|
|
209
|
+
|
|
210
|
+
const types = eventTypes(client.events);
|
|
211
|
+
expect(types).toEqual([
|
|
212
|
+
"user_transcript",
|
|
213
|
+
"agent_transcript", // "Let me check"
|
|
214
|
+
"tool_call",
|
|
215
|
+
"tool_call_done",
|
|
216
|
+
"agent_transcript", // " — it's sunny."
|
|
217
|
+
"reply_done",
|
|
218
|
+
]);
|
|
219
|
+
|
|
220
|
+
const toolCall = client.events.find((e) => (e as ClientEvent).type === "tool_call");
|
|
221
|
+
expect(toolCall).toMatchObject({
|
|
222
|
+
type: "tool_call",
|
|
223
|
+
toolCallId: "tc-1",
|
|
224
|
+
toolName: "get_weather",
|
|
225
|
+
});
|
|
226
|
+
const toolDone = client.events.find((e) => (e as ClientEvent).type === "tool_call_done");
|
|
227
|
+
expect(toolDone).toMatchObject({
|
|
228
|
+
type: "tool_call_done",
|
|
229
|
+
toolCallId: "tc-1",
|
|
230
|
+
result: "sunny, 72F",
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
await session.stop();
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
describe("createPipelineSession — multi-step tool loop", () => {
|
|
238
|
+
test("streamText loops across multiple tool calls when maxSteps > 1", async () => {
|
|
239
|
+
// 4 steps: three tool calls (each in its own model step), then a final
|
|
240
|
+
// text completion. Without `stopWhen: stepCountIs(n)` the AI SDK v6
|
|
241
|
+
// default is a single step, so tool loops would terminate after the
|
|
242
|
+
// first tool-result and `executeTool` would only fire once.
|
|
243
|
+
const steps: ScriptedPart[][] = [
|
|
244
|
+
[
|
|
245
|
+
{
|
|
246
|
+
type: "tool-call",
|
|
247
|
+
toolCallId: "tc-1",
|
|
248
|
+
toolName: "get_weather",
|
|
249
|
+
input: JSON.stringify({ city: "SF" }),
|
|
250
|
+
},
|
|
251
|
+
],
|
|
252
|
+
[
|
|
253
|
+
{
|
|
254
|
+
type: "tool-call",
|
|
255
|
+
toolCallId: "tc-2",
|
|
256
|
+
toolName: "get_weather",
|
|
257
|
+
input: JSON.stringify({ city: "LA" }),
|
|
258
|
+
},
|
|
259
|
+
],
|
|
260
|
+
[
|
|
261
|
+
{
|
|
262
|
+
type: "tool-call",
|
|
263
|
+
toolCallId: "tc-3",
|
|
264
|
+
toolName: "get_weather",
|
|
265
|
+
input: JSON.stringify({ city: "NY" }),
|
|
266
|
+
},
|
|
267
|
+
],
|
|
268
|
+
[{ type: "text", text: "Weather for all three cities retrieved." }],
|
|
269
|
+
];
|
|
270
|
+
const executeTool = vi.fn(
|
|
271
|
+
async (name: string, args: Readonly<Record<string, unknown>>) =>
|
|
272
|
+
`result-${name}-${(args as { city?: string }).city ?? "?"}`,
|
|
273
|
+
);
|
|
274
|
+
const { opts, stt, tts, client } = makeOpts({
|
|
275
|
+
llm: createFakeLanguageModel({ steps }),
|
|
276
|
+
executeTool,
|
|
277
|
+
toolSchemas: [
|
|
278
|
+
{
|
|
279
|
+
name: "get_weather",
|
|
280
|
+
description: "Look up the weather for a city.",
|
|
281
|
+
parameters: {
|
|
282
|
+
type: "object",
|
|
283
|
+
properties: { city: { type: "string" } },
|
|
284
|
+
required: ["city"],
|
|
285
|
+
},
|
|
286
|
+
},
|
|
287
|
+
],
|
|
288
|
+
agentConfig: { ...CONFIG, maxSteps: 5 },
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
const session = createPipelineSession(opts);
|
|
292
|
+
await session.start();
|
|
293
|
+
|
|
294
|
+
const sttSession = stt.last();
|
|
295
|
+
const ttsSession = tts.last();
|
|
296
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
297
|
+
|
|
298
|
+
sttSession.fireFinal("weather everywhere?");
|
|
299
|
+
await session.waitForTurn();
|
|
300
|
+
|
|
301
|
+
// All three tool calls ran.
|
|
302
|
+
expect(executeTool).toHaveBeenCalledTimes(3);
|
|
303
|
+
const toolCallEvents = client.events.filter((e) => (e as ClientEvent).type === "tool_call");
|
|
304
|
+
expect(toolCallEvents).toHaveLength(3);
|
|
305
|
+
|
|
306
|
+
// And the reply finished with a final text + reply_done, proving the
|
|
307
|
+
// loop actually terminated naturally rather than being cut short.
|
|
308
|
+
const types = eventTypes(client.events);
|
|
309
|
+
expect(types).toContain("reply_done");
|
|
310
|
+
expect(ttsSession.textChunks).toEqual(["Weather for all three cities retrieved."]);
|
|
311
|
+
|
|
312
|
+
await session.stop();
|
|
313
|
+
});
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
describe("createPipelineSession — STT error", () => {
|
|
317
|
+
test("stt error emits single error wire event with code stt", async () => {
|
|
318
|
+
const { opts, stt, client } = makeOpts();
|
|
319
|
+
const session = createPipelineSession(opts);
|
|
320
|
+
await session.start();
|
|
321
|
+
|
|
322
|
+
const sttSession = stt.last();
|
|
323
|
+
if (!sttSession) throw new Error("STT didn't open");
|
|
324
|
+
|
|
325
|
+
sttSession.fireError("stt_stream_error", "oops");
|
|
326
|
+
|
|
327
|
+
const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
|
|
328
|
+
expect(errors).toHaveLength(1);
|
|
329
|
+
expect(errors[0]).toMatchObject({
|
|
330
|
+
type: "error",
|
|
331
|
+
code: "stt",
|
|
332
|
+
message: "oops",
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
await session.stop();
|
|
336
|
+
});
|
|
337
|
+
});
|