@alexkroman1/aai 0.12.3 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/.turbo/turbo-build.log +20 -0
  2. package/CHANGELOG.md +174 -0
  3. package/dist/constants-VTFoymJ-.js +47 -0
  4. package/dist/host/_run-code.d.ts +1 -1
  5. package/dist/host/_runtime-conformance.d.ts +4 -5
  6. package/dist/host/builtin-tools.d.ts +11 -9
  7. package/dist/host/runtime-barrel.d.ts +15 -0
  8. package/dist/{direct-executor-DRRrZUp0.js → host/runtime-barrel.js} +453 -348
  9. package/dist/host/runtime-config.d.ts +42 -0
  10. package/dist/host/runtime.d.ts +119 -35
  11. package/dist/host/s2s.d.ts +14 -38
  12. package/dist/host/server.d.ts +16 -8
  13. package/dist/host/session-ctx.d.ts +55 -0
  14. package/dist/host/session.d.ts +20 -70
  15. package/dist/host/tool-executor.d.ts +20 -0
  16. package/dist/host/unstorage-kv.d.ts +1 -1
  17. package/dist/host/ws-handler.d.ts +4 -2
  18. package/dist/index.d.ts +9 -20
  19. package/dist/index.js +63 -2
  20. package/dist/{isolate → sdk}/_internal-types.d.ts +5 -9
  21. package/dist/{isolate → sdk}/constants.d.ts +6 -4
  22. package/dist/sdk/define.d.ts +66 -0
  23. package/dist/{isolate → sdk}/kv.d.ts +1 -49
  24. package/dist/sdk/manifest-barrel.d.ts +8 -0
  25. package/dist/sdk/manifest-barrel.js +52 -0
  26. package/dist/sdk/manifest.d.ts +50 -0
  27. package/dist/{isolate → sdk}/protocol.d.ts +59 -36
  28. package/dist/sdk/protocol.js +163 -0
  29. package/dist/{isolate → sdk}/system-prompt.d.ts +2 -2
  30. package/dist/sdk/types.d.ts +201 -0
  31. package/dist/sdk/ws-upgrade.d.ts +5 -0
  32. package/dist/{system-prompt-DYAYFW99.js → system-prompt-nik_iavo.js} +10 -10
  33. package/dist/types-Cfx_4QDK.js +39 -0
  34. package/dist/ws-upgrade-BeOQ7fXL.js +30 -0
  35. package/exports-no-dev-deps.test.ts +62 -0
  36. package/host/_mock-ws.ts +185 -0
  37. package/host/_run-code.ts +217 -0
  38. package/host/_runtime-conformance.ts +143 -0
  39. package/host/_test-utils.ts +276 -0
  40. package/host/builtin-tools.test.ts +774 -0
  41. package/host/builtin-tools.ts +255 -0
  42. package/host/cleanup.test.ts +422 -0
  43. package/host/fixture-replay.test.ts +463 -0
  44. package/host/fixtures/README.md +40 -0
  45. package/host/fixtures/greeting-session-sequence.json +40 -0
  46. package/host/fixtures/reply-audio-samples.json +42 -0
  47. package/host/fixtures/reply-lifecycle.json +21 -0
  48. package/host/fixtures/session-ready.json +48 -0
  49. package/host/fixtures/session-updated.json +45 -0
  50. package/host/fixtures/simple-question-sequence.json +73 -0
  51. package/host/fixtures/tool-call-sequence.json +114 -0
  52. package/host/fixtures/tool-calls.json +11 -0
  53. package/host/fixtures/tool-config-session-sequence.json +51 -0
  54. package/host/fixtures/user-speech-recognition.json +30 -0
  55. package/host/fixtures/web-search-sequence.json +122 -0
  56. package/host/integration.test.ts +222 -0
  57. package/host/runtime-barrel.ts +25 -0
  58. package/host/runtime-config.test.ts +71 -0
  59. package/host/runtime-config.ts +99 -0
  60. package/host/runtime.test.ts +641 -0
  61. package/host/runtime.ts +308 -0
  62. package/host/s2s-fixtures.test.ts +237 -0
  63. package/host/s2s.test.ts +562 -0
  64. package/host/s2s.ts +310 -0
  65. package/host/server-shutdown.test.ts +76 -0
  66. package/host/server.test.ts +116 -0
  67. package/host/server.ts +223 -0
  68. package/host/session-ctx.ts +107 -0
  69. package/host/session-fixture-replay.test.ts +136 -0
  70. package/host/session-prompt.test.ts +77 -0
  71. package/host/session.test.ts +590 -0
  72. package/host/session.ts +370 -0
  73. package/host/tool-executor.test.ts +124 -0
  74. package/host/tool-executor.ts +80 -0
  75. package/host/unstorage-kv.test.ts +99 -0
  76. package/host/unstorage-kv.ts +69 -0
  77. package/host/ws-handler.test.ts +739 -0
  78. package/host/ws-handler.ts +255 -0
  79. package/index.ts +16 -0
  80. package/package.json +24 -72
  81. package/sdk/_internal-types.test.ts +34 -0
  82. package/sdk/_internal-types.ts +115 -0
  83. package/sdk/compat-fixtures/README.md +26 -0
  84. package/sdk/compat-fixtures/v1.json +68 -0
  85. package/sdk/constants.ts +77 -0
  86. package/sdk/define.test.ts +57 -0
  87. package/sdk/define.ts +88 -0
  88. package/sdk/kv.ts +60 -0
  89. package/sdk/manifest-barrel.ts +12 -0
  90. package/sdk/manifest.test.ts +56 -0
  91. package/sdk/manifest.ts +89 -0
  92. package/sdk/protocol-compat.test.ts +187 -0
  93. package/sdk/protocol-snapshot.test.ts +199 -0
  94. package/sdk/protocol.test.ts +170 -0
  95. package/sdk/protocol.ts +223 -0
  96. package/sdk/schema-alignment.test.ts +191 -0
  97. package/sdk/system-prompt.test.ts +111 -0
  98. package/sdk/system-prompt.ts +74 -0
  99. package/sdk/tsconfig.json +12 -0
  100. package/sdk/types-inference.test.ts +122 -0
  101. package/sdk/types.test.ts +14 -0
  102. package/sdk/types.ts +226 -0
  103. package/sdk/utils.test.ts +52 -0
  104. package/sdk/utils.ts +20 -0
  105. package/sdk/ws-upgrade.test.ts +48 -0
  106. package/sdk/ws-upgrade.ts +13 -0
  107. package/tsconfig.build.json +14 -0
  108. package/tsconfig.json +10 -0
  109. package/tsdown.config.ts +26 -0
  110. package/vitest.config.ts +17 -0
  111. package/dist/host/_test-utils.d.ts +0 -73
  112. package/dist/host/direct-executor.d.ts +0 -130
  113. package/dist/host/index.d.ts +0 -19
  114. package/dist/host/index.js +0 -165
  115. package/dist/host/matchers.d.ts +0 -20
  116. package/dist/host/matchers.js +0 -41
  117. package/dist/host/server.js +0 -164
  118. package/dist/host/testing.d.ts +0 -294
  119. package/dist/host/testing.js +0 -2
  120. package/dist/host/vite-plugin.d.ts +0 -15
  121. package/dist/host/vite-plugin.js +0 -83
  122. package/dist/isolate/_kv-utils.d.ts +0 -10
  123. package/dist/isolate/_utils.js +0 -17
  124. package/dist/isolate/hooks.d.ts +0 -44
  125. package/dist/isolate/hooks.js +0 -58
  126. package/dist/isolate/index.d.ts +0 -18
  127. package/dist/isolate/index.js +0 -6
  128. package/dist/isolate/kv.js +0 -1
  129. package/dist/isolate/protocol.js +0 -2
  130. package/dist/isolate/types.d.ts +0 -418
  131. package/dist/isolate/types.js +0 -175
  132. package/dist/protocol-rcOrz7T3.js +0 -183
  133. package/dist/testing-BreLdpq-.js +0 -513
  134. package/dist/types.test-d.d.ts +0 -7
  135. /package/dist/{isolate/_utils.d.ts → sdk/utils.d.ts} +0 -0
@@ -0,0 +1,463 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * Fixture replay tests with a REAL Runtime.
4
+ *
5
+ * Replays recorded AssemblyAI S2S messages (from Kokoro TTS audio) through
6
+ * a real agent session — real tool execution, real Zod arg validation, real
7
+ * hook invocation.
8
+ *
9
+ * This exercises: AgentDef → toAgentConfig → tool schemas → Zod validation
10
+ * → executeToolCall → session orchestration (reply guards, tool buffering,
11
+ * turnPromise chaining, conversation history).
12
+ */
13
+
14
+ import { afterEach, describe, expect, test, vi } from "vitest";
15
+ import { z } from "zod";
16
+ import type { AgentDef } from "../sdk/types.ts";
17
+ import { createFixtureSession, flush } from "./_test-utils.ts";
18
+
19
+ // ─── Test agents with deterministic tools ────────────────────────────────────
20
+
21
+ const weatherAgent: AgentDef = {
22
+ name: "weather-agent",
23
+ systemPrompt: "You are a weather assistant.",
24
+ greeting: "Ask me about the weather!",
25
+ maxSteps: 5,
26
+ tools: {
27
+ get_weather: {
28
+ description: "Get the current weather for a city",
29
+ parameters: z.object({
30
+ city: z.string().describe("City name"),
31
+ }),
32
+ execute: ({ city }: { city: string }) => ({
33
+ city,
34
+ temperature: "72°F",
35
+ condition: "sunny",
36
+ humidity: "45%",
37
+ }),
38
+ },
39
+ },
40
+ };
41
+
42
+ const simpleAgent: AgentDef = {
43
+ name: "simple-agent",
44
+ systemPrompt: "You are a helpful assistant.",
45
+ greeting: "Hi!",
46
+ maxSteps: 5,
47
+ tools: {},
48
+ };
49
+
50
+ const statefulAgent: AgentDef<{ callCount: number }> = {
51
+ name: "stateful-agent",
52
+ systemPrompt: "You are helpful.",
53
+ greeting: "Hi!",
54
+ maxSteps: 5,
55
+ state: () => ({ callCount: 0 }),
56
+ tools: {
57
+ get_weather: {
58
+ description: "Get weather",
59
+ parameters: z.object({ city: z.string() }),
60
+ execute: ({ city }: { city: string }, ctx) => {
61
+ ctx.state.callCount++;
62
+ return { city, calls: ctx.state.callCount };
63
+ },
64
+ },
65
+ },
66
+ };
67
+
68
+ // ─── Tests ───────────────────────────────────────────────────────────────────
69
+
70
+ describe("fixture replay with real executor", () => {
71
+ let cleanup: () => void;
72
+
73
+ afterEach(() => {
74
+ cleanup?.();
75
+ });
76
+
77
+ // ── Tool call: real Zod validation + real tool execution ───────────────
78
+
79
+ test("tool call fixture: Zod validates args, real tool executes, result sent to S2S", async () => {
80
+ const ctx = createFixtureSession(weatherAgent);
81
+ cleanup = ctx.cleanup;
82
+ await ctx.session.start();
83
+
84
+ ctx.replay("tool-call-sequence.json");
85
+
86
+ // Wait for the async tool execution pipeline to complete
87
+ await vi.waitFor(() => expect(ctx.mockHandle.sendToolResult).toHaveBeenCalled());
88
+
89
+ // Verify the real tool was called and produced correct output
90
+ const [callId, resultStr] = vi.mocked(ctx.mockHandle.sendToolResult).mock.calls[0] as [
91
+ string,
92
+ string,
93
+ ];
94
+ expect(callId).toBeTruthy();
95
+ const result = JSON.parse(resultStr);
96
+ expect(result.city).toBe("San Francisco");
97
+ expect(result.temperature).toBe("72°F");
98
+ expect(result.condition).toBe("sunny");
99
+ });
100
+
101
+ test("tool call fixture: client receives tool_call with validated args", async () => {
102
+ const ctx = createFixtureSession(weatherAgent);
103
+ cleanup = ctx.cleanup;
104
+ await ctx.session.start();
105
+
106
+ ctx.replay("tool-call-sequence.json");
107
+ await vi.waitFor(() => expect(ctx.mockHandle.sendToolResult).toHaveBeenCalled());
108
+
109
+ const toolStart = ctx.client.events.find((e) => (e as { type: string }).type === "tool_call") as
110
+ | { toolName: string; args: Record<string, unknown> }
111
+ | undefined;
112
+ expect(toolStart?.toolName).toBe("get_weather");
113
+ expect(toolStart?.args).toEqual({ city: "San Francisco" });
114
+ });
115
+
116
+ test("tool call fixture: conversation history accumulates user + assistant messages", async () => {
117
+ const ctx = createFixtureSession(weatherAgent);
118
+ cleanup = ctx.cleanup;
119
+ await ctx.session.start();
120
+
121
+ ctx.replay("tool-call-sequence.json");
122
+ await vi.waitFor(() => expect(ctx.mockHandle.sendToolResult).toHaveBeenCalled());
123
+ await flush();
124
+
125
+ // Client received user transcript
126
+ const turns = ctx.client.events.filter(
127
+ (e) => (e as { type: string }).type === "user_transcript",
128
+ );
129
+ expect(turns.length).toBeGreaterThan(0);
130
+ const userText = (turns.at(-1) as { text: string }).text;
131
+ expect(userText.toLowerCase()).toContain("weather");
132
+ });
133
+
134
+ // ── Simple question: no tools, just session lifecycle ──────────────────
135
+
136
+ test("simple question fixture: greeting + agent response reach client", async () => {
137
+ const ctx = createFixtureSession(simpleAgent);
138
+ cleanup = ctx.cleanup;
139
+ await ctx.session.start();
140
+
141
+ ctx.replay("simple-question-sequence.json");
142
+ await flush();
143
+
144
+ const chats = ctx.client.events.filter(
145
+ (e) => (e as { type: string }).type === "agent_transcript",
146
+ );
147
+ expect(chats.length).toBeGreaterThanOrEqual(2); // greeting + answer
148
+ });
149
+
150
+ test("simple question fixture: user speech events forwarded to client", async () => {
151
+ const ctx = createFixtureSession(simpleAgent);
152
+ cleanup = ctx.cleanup;
153
+ await ctx.session.start();
154
+
155
+ ctx.replay("simple-question-sequence.json");
156
+ await flush();
157
+
158
+ const types = ctx.client.events.map((e) => (e as { type: string }).type);
159
+ expect(types).toContain("speech_started");
160
+ expect(types).toContain("speech_stopped");
161
+ expect(types).toContain("user_transcript");
162
+ });
163
+
164
+ // ── Stateful agent: session state persists across tool calls ───────────
165
+
166
+ test("stateful agent: tool accesses and mutates session state", async () => {
167
+ const ctx = createFixtureSession(statefulAgent);
168
+ cleanup = ctx.cleanup;
169
+ await ctx.session.start();
170
+
171
+ ctx.replay("tool-call-sequence.json");
172
+ await vi.waitFor(() => expect(ctx.mockHandle.sendToolResult).toHaveBeenCalled());
173
+
174
+ const [, resultStr] = vi.mocked(ctx.mockHandle.sendToolResult).mock.calls[0] as [
175
+ string,
176
+ string,
177
+ ];
178
+ const result = JSON.parse(resultStr);
179
+ expect(result.calls).toBe(1); // state.callCount was incremented
180
+ });
181
+
182
+ // ── Greeting only: session lifecycle without user audio ────────────────
183
+
184
+ test("greeting fixture: session setup completes with tts_done", async () => {
185
+ const ctx = createFixtureSession(simpleAgent);
186
+ cleanup = ctx.cleanup;
187
+ await ctx.session.start();
188
+
189
+ ctx.replay("greeting-session-sequence.json");
190
+ await flush();
191
+
192
+ const types = ctx.client.events.map((e) => (e as { type: string }).type);
193
+ expect(types).toContain("agent_transcript");
194
+ expect(types).toContain("reply_done");
195
+ });
196
+
197
+ // ── Tool schemas: real agent produces correct S2S tool schemas ─────────
198
+
199
+ test("real executor builds correct tool schemas from AgentDef", () => {
200
+ const ctx = createFixtureSession(weatherAgent);
201
+ cleanup = ctx.cleanup;
202
+
203
+ const schema = ctx.executor.toolSchemas.find((s) => s.name === "get_weather");
204
+ expect(schema).toBeDefined();
205
+ expect(schema?.description).toBe("Get the current weather for a city");
206
+ expect(schema?.parameters).toMatchObject({
207
+ type: "object",
208
+ properties: { city: { type: "string" } },
209
+ required: ["city"],
210
+ });
211
+ });
212
+
213
+ // ── Tool errors are surfaced as tool results ───────────────────────────
214
+
215
+ test("tool throw is surfaced as error result", async () => {
216
+ const agent: AgentDef = {
217
+ name: "error-agent",
218
+ systemPrompt: "Weather assistant.",
219
+ greeting: "Ask about weather!",
220
+ maxSteps: 5,
221
+ tools: {
222
+ get_weather: {
223
+ description: "Get weather",
224
+ parameters: z.object({ city: z.string() }),
225
+ execute: () => {
226
+ throw new Error("API key expired");
227
+ },
228
+ },
229
+ },
230
+ };
231
+
232
+ const ctx = createFixtureSession(agent);
233
+ cleanup = ctx.cleanup;
234
+ await ctx.session.start();
235
+
236
+ ctx.replay("tool-call-sequence.json");
237
+ await vi.waitFor(() => expect(ctx.mockHandle.sendToolResult).toHaveBeenCalled());
238
+
239
+ // Tool result should contain the error
240
+ const [, resultStr] = vi.mocked(ctx.mockHandle.sendToolResult).mock.calls[0] as [
241
+ string,
242
+ string,
243
+ ];
244
+ expect(resultStr).toContain("API key expired");
245
+ });
246
+
247
+ // ── Zod validation: bad args rejected ──────────────────────────────────
248
+
249
+ test("Zod validation rejects malformed tool args", async () => {
250
+ const agent: AgentDef = {
251
+ name: "strict-agent",
252
+ systemPrompt: "Weather assistant.",
253
+ greeting: "Ask about weather!",
254
+ maxSteps: 5,
255
+ tools: {
256
+ get_weather: {
257
+ description: "Get weather",
258
+ parameters: z.object({
259
+ city: z.string(),
260
+ country: z.string(), // required but not in fixture
261
+ }),
262
+ execute: () => "should not run",
263
+ },
264
+ },
265
+ };
266
+
267
+ const ctx = createFixtureSession(agent);
268
+ cleanup = ctx.cleanup;
269
+ await ctx.session.start();
270
+
271
+ ctx.replay("tool-call-sequence.json");
272
+ await vi.waitFor(() => expect(ctx.mockHandle.sendToolResult).toHaveBeenCalled());
273
+
274
+ // The result should contain a Zod validation error
275
+ const [, resultStr] = vi.mocked(ctx.mockHandle.sendToolResult).mock.calls[0] as [
276
+ string,
277
+ string,
278
+ ];
279
+ expect(resultStr).toContain("Invalid arguments");
280
+ expect(resultStr).toContain("country");
281
+ });
282
+
283
+ // ── Interrupted transcript NOT added to conversation history ────────────
284
+
285
+ test("interrupted agent transcript is not pushed to conversation history", async () => {
286
+ // Use a tool that captures messages to inspect conversation history
287
+ let capturedMessages: readonly { role: string; content: string }[] = [];
288
+ const agent: AgentDef = {
289
+ name: "interrupt-history-agent",
290
+ systemPrompt: "You are helpful.",
291
+ greeting: "Hi!",
292
+ maxSteps: 5,
293
+ tools: {
294
+ check_history: {
295
+ description: "Check history",
296
+ parameters: z.object({ q: z.string() }),
297
+ execute: (_args: unknown, ctx) => {
298
+ capturedMessages = [...ctx.messages];
299
+ return "ok";
300
+ },
301
+ },
302
+ },
303
+ };
304
+
305
+ const ctx = createFixtureSession(agent);
306
+ cleanup = ctx.cleanup;
307
+ await ctx.session.start();
308
+
309
+ const h = ctx.mockHandle;
310
+
311
+ // Fire an interrupted transcript — should NOT go into conversation history
312
+ h._fire("replyStarted", { replyId: "r1" });
313
+ h._fire("event", {
314
+ type: "agent_transcript",
315
+ text: "This was interrupted",
316
+ _interrupted: true,
317
+ });
318
+ h._fire("event", { type: "cancelled" });
319
+ await flush();
320
+
321
+ // Client sees both agent_transcript and cancelled events
322
+ const types = ctx.client.events.map((e) => (e as { type: string }).type);
323
+ expect(types).toContain("agent_transcript");
324
+ expect(types).toContain("cancelled");
325
+
326
+ // Fire a non-interrupted transcript — SHOULD go into conversation history
327
+ h._fire("replyStarted", { replyId: "r2" });
328
+ h._fire("event", {
329
+ type: "agent_transcript",
330
+ text: "This was completed",
331
+ _interrupted: false,
332
+ });
333
+ h._fire("event", { type: "reply_done" });
334
+ await flush();
335
+
336
+ // Trigger a tool call to inspect conversation history.
337
+ h._fire("event", { type: "user_transcript", text: "check" });
338
+ await flush();
339
+ h._fire("replyStarted", { replyId: "r3" });
340
+ h._fire("event", {
341
+ type: "tool_call",
342
+ toolCallId: "c1",
343
+ toolName: "check_history",
344
+ args: { q: "test" },
345
+ });
346
+ // Wait for tool to execute (captures messages)
347
+ await vi.waitFor(() => expect(capturedMessages.length).toBeGreaterThan(0));
348
+
349
+ // Conversation history should contain the completed text but NOT the interrupted text
350
+ const assistantMsgs = capturedMessages.filter((m) => m.role === "assistant");
351
+ expect(assistantMsgs.some((m) => m.content === "This was completed")).toBe(true);
352
+ expect(assistantMsgs.every((m) => m.content !== "This was interrupted")).toBe(true);
353
+ });
354
+
355
+ // ── Conversation history correctness after full tool-call flow ──────────
356
+
357
+ test("conversation history has user + assistant messages after tool-call flow", async () => {
358
+ // Use a tool that captures the messages it receives
359
+ let capturedMessages: readonly { role: string; content: string }[] = [];
360
+ const agent: AgentDef = {
361
+ name: "history-agent",
362
+ systemPrompt: "Weather assistant.",
363
+ greeting: "Ask about weather!",
364
+ maxSteps: 5,
365
+ tools: {
366
+ get_weather: {
367
+ description: "Get weather",
368
+ parameters: z.object({ city: z.string() }),
369
+ execute: ({ city }: { city: string }, ctx) => {
370
+ capturedMessages = [...ctx.messages];
371
+ return { city, temp: "72°F" };
372
+ },
373
+ },
374
+ },
375
+ };
376
+
377
+ const ctx = createFixtureSession(agent);
378
+ cleanup = ctx.cleanup;
379
+ await ctx.session.start();
380
+
381
+ ctx.replay("tool-call-sequence.json");
382
+ await vi.waitFor(() => expect(ctx.mockHandle.sendToolResult).toHaveBeenCalled());
383
+
384
+ // The tool should have seen the user's weather question in messages
385
+ const userMsgs = capturedMessages.filter((m) => m.role === "user");
386
+ expect(userMsgs.some((m) => m.content.toLowerCase().includes("weather"))).toBe(true);
387
+ });
388
+
389
+ // ── Audio chunks forwarded to client.playAudioChunk ────────────────────
390
+
391
+ test("reply.audio events forwarded to client.playAudioChunk", async () => {
392
+ const ctx = createFixtureSession(simpleAgent);
393
+ cleanup = ctx.cleanup;
394
+ await ctx.session.start();
395
+
396
+ // Manually fire audio events (replay skips them, so fire directly)
397
+ const audioBytes = new Uint8Array([10, 20, 30, 40]);
398
+ ctx.mockHandle._fire("audio", { audio: audioBytes });
399
+ ctx.mockHandle._fire("audio", { audio: new Uint8Array([50, 60]) });
400
+
401
+ expect(ctx.client.audioChunks.length).toBe(2);
402
+ expect(Array.from(ctx.client.audioChunks[0] ?? [])).toEqual([10, 20, 30, 40]);
403
+ expect(Array.from(ctx.client.audioChunks[1] ?? [])).toEqual([50, 60]);
404
+ });
405
+
406
+ // ── Multiple tool calls in one reply: results buffered and sent together ─
407
+
408
+ test("multiple tool calls in one reply: all results buffered and sent after replyDone", async () => {
409
+ const agent: AgentDef = {
410
+ name: "multi-tool-agent",
411
+ systemPrompt: "Weather assistant.",
412
+ greeting: "Hi!",
413
+ maxSteps: 5,
414
+ tools: {
415
+ get_weather: {
416
+ description: "Get weather",
417
+ parameters: z.object({ city: z.string() }),
418
+ execute: ({ city }: { city: string }) => ({ city, temp: "72°F" }),
419
+ },
420
+ },
421
+ };
422
+
423
+ const ctx = createFixtureSession(agent);
424
+ cleanup = ctx.cleanup;
425
+ await ctx.session.start();
426
+
427
+ const h = ctx.mockHandle;
428
+ h._fire("replyStarted", { replyId: "r1" });
429
+ h._fire("event", {
430
+ type: "tool_call",
431
+ toolCallId: "c1",
432
+ toolName: "get_weather",
433
+ args: { city: "NYC" },
434
+ });
435
+ h._fire("event", {
436
+ type: "tool_call",
437
+ toolCallId: "c2",
438
+ toolName: "get_weather",
439
+ args: { city: "LA" },
440
+ });
441
+
442
+ // Wait for both tool calls to execute
443
+ await vi.waitFor(() => {
444
+ const starts = ctx.client.events.filter((e) => (e as { type: string }).type === "tool_call");
445
+ expect(starts.length).toBe(2);
446
+ });
447
+
448
+ // Results NOT sent yet — reply_done hasn't fired
449
+ expect(ctx.mockHandle.sendToolResult).not.toHaveBeenCalled();
450
+
451
+ // Fire reply_done — should flush both results
452
+ h._fire("event", { type: "reply_done" });
453
+ await vi.waitFor(() => {
454
+ expect(ctx.mockHandle.sendToolResult).toHaveBeenCalledTimes(2);
455
+ });
456
+
457
+ // Verify both results are correct
458
+ const calls = vi.mocked(ctx.mockHandle.sendToolResult).mock.calls as [string, string][];
459
+ const results = calls.map(([, r]) => JSON.parse(r));
460
+ expect(results.some((r) => r.city === "NYC")).toBe(true);
461
+ expect(results.some((r) => r.city === "LA")).toBe(true);
462
+ });
463
+ });
@@ -0,0 +1,40 @@
1
+ # API Response Fixtures
2
+
3
+ Real AssemblyAI S2S WebSocket messages recorded from the live API.
4
+ User audio was generated with Kokoro TTS (24kHz resampled to 16kHz
5
+ PCM16).
6
+
7
+ ## Files
8
+
9
+ **Session lifecycle:**
10
+
11
+ - `session-ready.json` — `session.ready` messages (config echo-back)
12
+ - `session-updated.json` — `session.updated` acknowledgements
13
+
14
+ **Greeting (no user audio):**
15
+
16
+ - `greeting-session-sequence.json` — Complete greeting session
17
+ - `reply-lifecycle.json` — Reply: started, deltas, transcript, done
18
+ - `reply-audio-samples.json` — `reply.audio` chunks (base64 truncated)
19
+
20
+ **Simple question (Kokoro: "Tell me a fun fact about space"):**
21
+
22
+ - `simple-question-sequence.json` — Greeting, STT, agent response
23
+ - `user-speech-recognition.json` — Speech start/stop, transcript.user
24
+
25
+ **Tool call (Kokoro: "What is the weather in San Francisco?"):**
26
+
27
+ - `tool-call-sequence.json` — Greeting, STT, tool.call, response
28
+ - `tool-calls.json` — Just the tool.call messages with parsed args
29
+
30
+ **Builtin tool call (Kokoro: "Search for Mars rover findings"):**
31
+
32
+ - `web-search-sequence.json` — Greeting, STT, web_search, response
33
+
34
+ ## Notes
35
+
36
+ - Audio data is truncated (`_truncated`, `_originalBase64Length`).
37
+ - Real messages include extra fields (`timestamp`, `config`,
38
+ `start_ms`, `end_ms`) that the parser must tolerate.
39
+ - Session IDs and timestamps are from the recording — tests should
40
+ not depend on specific values.
@@ -0,0 +1,40 @@
1
+ [
2
+ {
3
+ "type": "session.updated",
4
+ "timestamp": 1774669383.6972675,
5
+ "config": {
6
+ "system_prompt": "You are a helpful assistant. Keep responses very short, under 10 words.",
7
+ "tools": [],
8
+ "greeting": "Hello! How can I help you today?"
9
+ }
10
+ },
11
+ {
12
+ "type": "session.ready",
13
+ "timestamp": 1774669383.7014148,
14
+ "session_id": "sess_2575293792fe47c790905010a6bb4a5e",
15
+ "config": {
16
+ "system_prompt": "You are a helpful assistant. Keep responses very short, under 10 words.",
17
+ "tools": [],
18
+ "greeting": "Hello! How can I help you today?"
19
+ }
20
+ },
21
+ {
22
+ "type": "reply.started",
23
+ "timestamp": 1774669383.702049,
24
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a"
25
+ },
26
+ {
27
+ "type": "transcript.agent",
28
+ "timestamp": 1774669386.8797214,
29
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
30
+ "item_id": "msg_5a2edc3add16401ca99c81eb8fda72e3",
31
+ "text": "Hello! How can I help you today?",
32
+ "interrupted": false
33
+ },
34
+ {
35
+ "type": "reply.done",
36
+ "timestamp": 1774669387.1929219,
37
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
38
+ "status": "completed"
39
+ }
40
+ ]
@@ -0,0 +1,42 @@
1
+ [
2
+ {
3
+ "type": "reply.audio",
4
+ "timestamp": 1774669383.7329695,
5
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
6
+ "data": "AAAAAAAA//8AAAAAAAAAAAAAAAD//wAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAP//AQD/////AAAAAAAAAAD//wAAAAAAAAAAAAAAAAAAAAAAAAAA//8AAAEAAAABAAAAAAAAAAAAAAAAAAAAAAD//wEAAAAAAAAA",
7
+ "_truncated": true,
8
+ "_originalBase64Length": 2312
9
+ },
10
+ {
11
+ "type": "reply.audio",
12
+ "timestamp": 1774669383.7631137,
13
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
14
+ "data": "AAAAAP//AAAAAAAAAAD//wAAAAAAAP////8AAP//AAAAAAAAAAAAAAAA//8AAAAAAAD//wAA//8AAP7/AAAAAAAAAAAAAAAAAQAAAP//AAABAAAAAAABAAEAAAAAAAEAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAP//AAAAAAAA//8AAAAAAQAAAAAAAAAAAAEAAQAAAAAA",
15
+ "_truncated": true,
16
+ "_originalBase64Length": 2312
17
+ },
18
+ {
19
+ "type": "reply.audio",
20
+ "timestamp": 1774669383.803568,
21
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
22
+ "data": "//8AAAAAAAAAAAAAAAAAAAAA//8AAAAAAAAAAAAAAAD//wAAAAABAAAAAAD//wAAAQAAAAAAAAABAAAAAQAAAAAAAAAAAP//AAAAAAAAAAAAAP//AAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAA//8AAAAA",
23
+ "_truncated": true,
24
+ "_originalBase64Length": 2312
25
+ },
26
+ {
27
+ "type": "reply.audio",
28
+ "timestamp": 1774669383.8428738,
29
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
30
+ "data": "//8AAAAAAAAAAAAAAAAAAP////8AAP//AAAAAP//AAAAAAAAAAAAAAAAAAAAAAAAAAABAAEAAAAAAAEAAAAAAAAAAQAAAAAAAQAAAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAAAAAAAAAAD//wAAAQAAAAEA//8AAAAAAAAAAAAA//8AAAAAAAAAAAAAAQAAAAAA",
31
+ "_truncated": true,
32
+ "_originalBase64Length": 2312
33
+ },
34
+ {
35
+ "type": "reply.audio",
36
+ "timestamp": 1774669383.8731623,
37
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
38
+ "data": "AAABAAAA//8AAAEAAAABAP//AAAAAAEAAQABAAAAAAAAAP//AAAAAAEAAAABAAAAAAAAAAAAAAD//wAAAAAAAAAAAAD//wEAAAAAAP////8AAAAA/////wAAAAAAAAEAAAAAAAEAAAAAAAAAAAABAAAA//8AAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAA",
39
+ "_truncated": true,
40
+ "_originalBase64Length": 2312
41
+ }
42
+ ]
@@ -0,0 +1,21 @@
1
+ [
2
+ {
3
+ "type": "reply.started",
4
+ "timestamp": 1774669383.702049,
5
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a"
6
+ },
7
+ {
8
+ "type": "transcript.agent",
9
+ "timestamp": 1774669386.8797214,
10
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
11
+ "item_id": "msg_5a2edc3add16401ca99c81eb8fda72e3",
12
+ "text": "Hello! How can I help you today?",
13
+ "interrupted": false
14
+ },
15
+ {
16
+ "type": "reply.done",
17
+ "timestamp": 1774669387.1929219,
18
+ "reply_id": "resp_02ca83aad786449cadc7aab6c4e2737a",
19
+ "status": "completed"
20
+ }
21
+ ]
@@ -0,0 +1,48 @@
1
+ [
2
+ {
3
+ "type": "session.ready",
4
+ "timestamp": 1774669383.7014148,
5
+ "session_id": "sess_2575293792fe47c790905010a6bb4a5e",
6
+ "config": {
7
+ "system_prompt": "You are a helpful assistant. Keep responses very short, under 10 words.",
8
+ "tools": [],
9
+ "greeting": "Hello! How can I help you today?"
10
+ }
11
+ },
12
+ {
13
+ "type": "session.ready",
14
+ "timestamp": 1774669390.5586526,
15
+ "session_id": "sess_2246ce7e2eb146e08e62c9476278fbef",
16
+ "config": {
17
+ "system_prompt": "You are a helpful assistant. Keep responses to one short sentence.",
18
+ "tools": [],
19
+ "greeting": "Hi!"
20
+ }
21
+ },
22
+ {
23
+ "type": "session.ready",
24
+ "timestamp": 1774669401.5069516,
25
+ "session_id": "sess_b01d5d5f1865418b955dc30b00b3d52b",
26
+ "config": {
27
+ "system_prompt": "You are a weather assistant. Always use the get_weather tool for weather questions. After getting the result, tell the user the weather in one sentence.",
28
+ "tools": [
29
+ {
30
+ "type": "function",
31
+ "name": "get_weather",
32
+ "description": "Get the current weather for a city",
33
+ "parameters": {
34
+ "type": "object",
35
+ "properties": {
36
+ "city": {
37
+ "type": "string",
38
+ "description": "City name"
39
+ }
40
+ },
41
+ "required": ["city"]
42
+ }
43
+ }
44
+ ],
45
+ "greeting": "Ask me about the weather!"
46
+ }
47
+ }
48
+ ]