@alexkroman1/aai 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.turbo/turbo-build.log +11 -9
  2. package/CHANGELOG.md +16 -0
  3. package/dist/{_internal-types-CrnTi9Ew.js → _internal-types-CfOAbK6V.js} +22 -35
  4. package/dist/constants-y68COEGj.js +29 -0
  5. package/dist/host/_base64.d.ts +2 -0
  6. package/dist/host/_mock-ws.d.ts +0 -61
  7. package/dist/host/_pipeline-test-fakes.d.ts +7 -4
  8. package/dist/host/_run-code.d.ts +0 -25
  9. package/dist/host/_runtime-conformance.d.ts +3 -34
  10. package/dist/host/memory-vector.d.ts +0 -11
  11. package/dist/host/providers/resolve-kv.d.ts +0 -7
  12. package/dist/host/providers/resolve-vector.d.ts +0 -8
  13. package/dist/host/providers/stt/assemblyai.d.ts +0 -14
  14. package/dist/host/providers/stt/deepgram.d.ts +2 -14
  15. package/dist/host/providers/stt/soniox.d.ts +0 -22
  16. package/dist/host/providers/tts/rime.d.ts +10 -31
  17. package/dist/host/runtime-barrel.js +628 -642
  18. package/dist/host/runtime-config.d.ts +9 -6
  19. package/dist/host/runtime.d.ts +3 -0
  20. package/dist/host/to-vercel-tools.d.ts +3 -33
  21. package/dist/host/transports/openai-realtime-transport.d.ts +43 -0
  22. package/dist/host/unstorage-kv.d.ts +0 -26
  23. package/dist/index.js +3 -3
  24. package/dist/openai-realtime-cjPAHMMx.js +10 -0
  25. package/dist/sdk/_internal-types.d.ts +6 -55
  26. package/dist/sdk/allowed-hosts.d.ts +4 -3
  27. package/dist/sdk/constants.d.ts +4 -29
  28. package/dist/sdk/define.d.ts +7 -4
  29. package/dist/sdk/kv.d.ts +13 -37
  30. package/dist/sdk/manifest-barrel.js +1 -1
  31. package/dist/sdk/manifest.d.ts +8 -2
  32. package/dist/sdk/protocol.js +1 -1
  33. package/dist/sdk/providers/s2s/openai-realtime.d.ts +17 -0
  34. package/dist/sdk/providers/s2s-barrel.d.ts +9 -0
  35. package/dist/sdk/providers/s2s-barrel.js +2 -0
  36. package/dist/sdk/providers/tts/rime.d.ts +1 -1
  37. package/dist/sdk/providers.d.ts +6 -2
  38. package/dist/sdk/types.d.ts +7 -1
  39. package/dist/{types-KUgezM6u.js → types-DOWVZhb9.js} +1 -7
  40. package/dist/{ws-upgrade-BeOQ7fXL.js → ws-upgrade-CG8-by1n.js} +2 -3
  41. package/host/_base64.ts +9 -0
  42. package/host/_mock-ws.ts +0 -65
  43. package/host/_pipeline-test-fakes.ts +19 -31
  44. package/host/_run-code.ts +10 -53
  45. package/host/_runtime-conformance.ts +3 -44
  46. package/host/_test-utils.ts +20 -42
  47. package/host/builtin-tools.test.ts +127 -222
  48. package/host/builtin-tools.ts +6 -10
  49. package/host/cleanup.test.ts +30 -73
  50. package/host/integration/pipeline-reference.integration.test.ts +12 -17
  51. package/host/integration.test.ts +0 -7
  52. package/host/memory-vector.test.ts +3 -1
  53. package/host/memory-vector.ts +16 -21
  54. package/host/pinecone-vector.test.ts +14 -17
  55. package/host/pinecone-vector.ts +10 -19
  56. package/host/providers/providers.test-d.ts +5 -3
  57. package/host/providers/resolve-kv.ts +23 -41
  58. package/host/providers/resolve-vector.ts +3 -12
  59. package/host/providers/resolve.test.ts +15 -28
  60. package/host/providers/resolve.ts +24 -24
  61. package/host/providers/stt/assemblyai.test.ts +2 -14
  62. package/host/providers/stt/assemblyai.ts +12 -35
  63. package/host/providers/stt/deepgram.test.ts +23 -83
  64. package/host/providers/stt/deepgram.ts +15 -40
  65. package/host/providers/stt/elevenlabs.test.ts +26 -38
  66. package/host/providers/stt/elevenlabs.ts +10 -9
  67. package/host/providers/stt/soniox.test.ts +35 -85
  68. package/host/providers/stt/soniox.ts +8 -53
  69. package/host/providers/tts/cartesia.test.ts +19 -58
  70. package/host/providers/tts/cartesia.ts +36 -66
  71. package/host/providers/tts/rime.test.ts +12 -38
  72. package/host/providers/tts/rime.ts +23 -86
  73. package/host/runtime-config.test.ts +9 -9
  74. package/host/runtime-config.ts +16 -22
  75. package/host/runtime.test.ts +111 -73
  76. package/host/runtime.ts +138 -86
  77. package/host/s2s.test.ts +92 -191
  78. package/host/s2s.ts +56 -53
  79. package/host/server-shutdown.test.ts +9 -30
  80. package/host/server.test.ts +2 -13
  81. package/host/server.ts +85 -100
  82. package/host/session-core.test.ts +15 -30
  83. package/host/session-core.ts +10 -13
  84. package/host/session-prompt.test.ts +1 -5
  85. package/host/to-vercel-tools.test.ts +53 -72
  86. package/host/to-vercel-tools.ts +9 -39
  87. package/host/tool-executor.test.ts +25 -51
  88. package/host/tool-executor.ts +18 -12
  89. package/host/transports/openai-realtime-transport.test.ts +371 -0
  90. package/host/transports/openai-realtime-transport.ts +319 -0
  91. package/host/transports/pipeline-transport.test.ts +125 -298
  92. package/host/transports/pipeline-transport.ts +20 -68
  93. package/host/transports/s2s-transport-fixtures.test.ts +31 -92
  94. package/host/transports/s2s-transport.test.ts +65 -134
  95. package/host/transports/s2s-transport.ts +15 -43
  96. package/host/transports/types.test.ts +4 -8
  97. package/host/unstorage-kv.test.ts +3 -2
  98. package/host/unstorage-kv.ts +5 -35
  99. package/host/ws-handler.test.ts +72 -176
  100. package/host/ws-handler.ts +6 -12
  101. package/package.json +6 -1
  102. package/sdk/__snapshots__/exports.test.ts.snap +7 -0
  103. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  104. package/sdk/_internal-types.test.ts +6 -9
  105. package/sdk/_internal-types.ts +16 -57
  106. package/sdk/_test-matchers.ts +25 -15
  107. package/sdk/allowed-hosts.test.ts +50 -114
  108. package/sdk/allowed-hosts.ts +8 -14
  109. package/sdk/constants.ts +5 -52
  110. package/sdk/define.test.ts +7 -6
  111. package/sdk/define.ts +7 -3
  112. package/sdk/exports.test.ts +6 -1
  113. package/sdk/kv.ts +13 -37
  114. package/sdk/manifest.test-d.ts +5 -0
  115. package/sdk/manifest.test.ts +61 -9
  116. package/sdk/manifest.ts +11 -11
  117. package/sdk/protocol-compat.test.ts +66 -98
  118. package/sdk/protocol-snapshot.test.ts +2 -16
  119. package/sdk/protocol.test.ts +13 -22
  120. package/sdk/providers/s2s/openai-realtime.ts +36 -0
  121. package/sdk/providers/s2s-barrel.ts +12 -0
  122. package/sdk/providers/tts/rime.ts +1 -1
  123. package/sdk/providers.ts +24 -5
  124. package/sdk/schema-alignment.test.ts +25 -73
  125. package/sdk/schema-shapes.test.ts +1 -29
  126. package/sdk/system-prompt.test.ts +0 -1
  127. package/sdk/system-prompt.ts +17 -19
  128. package/sdk/types-inference.test.ts +10 -36
  129. package/sdk/types.ts +7 -0
  130. package/sdk/ws-upgrade.test.ts +24 -23
  131. package/sdk/ws-upgrade.ts +2 -3
  132. package/tsdown.config.ts +8 -11
  133. package/dist/constants-C2nirZUI.js +0 -54
@@ -1,15 +1,11 @@
1
1
  // Copyright 2026 the AAI authors. MIT license.
2
2
  // Pipeline transport — STT → LLM → TTS orchestration behind the Transport interface.
3
-
3
+ //
4
4
  // Pipeline mode executes tools inline via streamText's `tools.execute`.
5
5
  // `callbacks.onToolCall` is observability-only; runtime.ts routes it to
6
6
  // `client.toolCall` directly (bypassing SessionCore's tool-dispatch path,
7
7
  // which is S2S-only). `sendToolResult` is a no-op because results are
8
8
  // already handled by streamText.
9
- //
10
- // `conversationMessages` below is transport-local and currently uncapped —
11
- // SessionCore's `maxHistory` does not yet feed through. Long pipeline
12
- // sessions may accumulate unbounded context; revisit if it matters.
13
9
 
14
10
  import type { LanguageModel, ModelMessage } from "ai";
15
11
  import { stepCountIs, streamText } from "ai";
@@ -97,14 +93,8 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
97
93
  });
98
94
 
99
95
  const { callbacks, sessionConfig } = opts;
100
-
101
- // Derive the system prompt — pipeline mode always uses voice=true.
102
- // In the refactored transport, we receive the final systemPrompt directly
103
- // from sessionConfig (built by the caller). We use it as-is but also keep
104
- // the hasTools logic available if the caller passes raw schemas.
105
96
  const systemPrompt = sessionConfig.systemPrompt;
106
97
 
107
- // ---- State ----------------------------------------------------------------
108
98
  const sessionAbort = new AbortController();
109
99
  let audioReady = false;
110
100
  let terminated = false;
@@ -112,16 +102,13 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
112
102
  let ttsSession: TtsSession | null = null;
113
103
  let turnController: AbortController | null = null;
114
104
  let nextReplyId = 0;
115
- // Conversation history seeded from sessionConfig.history if provided.
116
- // Pipeline transport manages its own history since SessionCore doesn't own
117
- // the conversation in pipeline mode (history is needed to build the LLM
118
- // messages array for each turn).
105
+ // Pipeline transport manages its own history; SessionCore does not own the
106
+ // conversation in pipeline mode (we need it to build LLM messages per turn).
119
107
  const conversationMessages: Message[] = sessionConfig.history ? [...sessionConfig.history] : [];
120
108
  let turnPromise: Promise<void> | null = null;
121
109
  const sttSubs: Unsubscribe[] = [];
122
110
  const ttsSubs: Unsubscribe[] = [];
123
111
 
124
- // ---- History helpers ------------------------------------------------------
125
112
  function pushMessages(...msgs: Message[]): void {
126
113
  conversationMessages.push(...msgs);
127
114
  if (conversationMessages.length > DEFAULT_MAX_HISTORY) {
@@ -133,16 +120,11 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
133
120
  turnPromise = (turnPromise ?? Promise.resolve()).then(() => p);
134
121
  }
135
122
 
136
- // ---- Error helpers --------------------------------------------------------
137
123
  function emitError(code: SessionErrorCode, message: string): void {
138
124
  callbacks.onError(code, message);
139
125
  }
140
126
 
141
- // ---- Termination ----------------------------------------------------------
142
- /**
143
- * Tear down after an unrecoverable provider error. Aborts the in-flight
144
- * turn, cancels TTS, signals providers to close. Idempotent.
145
- */
127
+ // Idempotent teardown after an unrecoverable provider error.
146
128
  function terminate(): void {
147
129
  if (terminated) return;
148
130
  terminated = true;
@@ -155,7 +137,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
155
137
  sessionAbort.abort();
156
138
  }
157
139
 
158
- // ---- STT event handlers ---------------------------------------------------
159
140
  function onSttPartial(_text: string): void {
160
141
  if (terminated) return;
161
142
  if (turnController === null) return;
@@ -170,7 +151,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
170
151
  if (terminated) return;
171
152
  const trimmed = text.trim();
172
153
  if (trimmed.length === 0) return;
173
- // Replace in-flight turn if one is running (duplicate/late STT final).
174
154
  if (turnController !== null) {
175
155
  log.info("Pipeline replacing in-flight turn", { sid: opts.sid });
176
156
  turnController.abort();
@@ -192,7 +172,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
192
172
  terminate();
193
173
  }
194
174
 
195
- // ---- TTS event handlers ---------------------------------------------------
196
175
  function onTtsError(err: TtsError): void {
197
176
  if (terminated) return;
198
177
  log.error("TTS error", { code: err.code, message: err.message, sid: opts.sid });
@@ -200,7 +179,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
200
179
  terminate();
201
180
  }
202
181
 
203
- // ---- LLM streaming --------------------------------------------------------
204
182
  async function consumeLlmStream(
205
183
  ctl: AbortController,
206
184
  messages: ModelMessage[],
@@ -274,8 +252,7 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
274
252
  pendingSeparator = true;
275
253
  return;
276
254
  case "tool-call": {
277
- // Option A: fire callbacks.onToolCall for observability only.
278
- // Actual execution happens inline via toVercelTools.
255
+ // Observability only actual execution happens inline via toVercelTools.
279
256
  const input = (part.input ?? {}) as Record<string, unknown>;
280
257
  callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
281
258
  return;
@@ -292,17 +269,11 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
292
269
  };
293
270
  }
294
271
 
295
- // ---- TTS flush ------------------------------------------------------------
296
- /**
297
- * Flush TTS and wait for drain. Resolves on:
298
- * - TTS emits `done`
299
- * - `signal` aborts (barge-in / provider error / session stop)
300
- * - PIPELINE_FLUSH_TIMEOUT_MS elapses
301
- * Resolves immediately if no TTS session.
302
- */
272
+ // Resolves on TTS `done`, signal abort, or PIPELINE_FLUSH_TIMEOUT_MS elapsed.
303
273
  function flushTtsAndWait(signal: AbortSignal): Promise<void> {
304
274
  const tts = ttsSession;
305
275
  if (!tts) return Promise.resolve();
276
+ if (signal.aborted) return Promise.resolve();
306
277
  return new Promise<void>((resolve) => {
307
278
  let off: Unsubscribe | null = null;
308
279
  let timer: ReturnType<typeof setTimeout> | null = null;
@@ -322,24 +293,16 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
322
293
  resolve();
323
294
  };
324
295
  const onAbort = () => finish();
325
- if (signal.aborted) {
326
- resolve();
327
- return;
328
- }
329
296
  signal.addEventListener("abort", onAbort, { once: true });
330
297
  off = tts.on("done", finish);
331
298
  timer = setTimeout(() => {
332
- log.warn("TTS flush timeout", {
333
- sid: opts.sid,
334
- timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS,
335
- });
299
+ log.warn("TTS flush timeout", { sid: opts.sid, timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS });
336
300
  finish();
337
301
  }, PIPELINE_FLUSH_TIMEOUT_MS);
338
302
  tts.flush();
339
303
  });
340
304
  }
341
305
 
342
- // ---- Turn orchestration ---------------------------------------------------
343
306
  async function runTurn(userText: string): Promise<void> {
344
307
  const replyId = `pipeline-${++nextReplyId}`;
345
308
  callbacks.onReplyStarted(replyId);
@@ -366,7 +329,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
366
329
  return;
367
330
  }
368
331
 
369
- // Emit the complete transcript once the LLM finishes streaming.
370
332
  if (accumulated.length > 0) {
371
333
  callbacks.onAgentTranscript(accumulated, false);
372
334
  pushMessages({ role: "assistant", content: accumulated });
@@ -404,14 +366,12 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
404
366
  return;
405
367
  }
406
368
 
407
- // Do NOT call callbacks.onAudioDone() here — session-core's flushReply
408
- // (triggered by onReplyDone) emits audioDone + replyDone together, matching
409
- // the S2S transport contract. Calling it here would double-fire audio_done.
369
+ // See runTurn: onReplyDone triggers session-core's flushReply which emits
370
+ // audioDone + replyDone together; firing onAudioDone here would double-fire.
410
371
  callbacks.onReplyDone();
411
372
  if (turnController === ctl) turnController = null;
412
373
  }
413
374
 
414
- // ---- Provider lifecycle ---------------------------------------------------
415
375
  function reportOpenRejection(which: "stt" | "tts", reason: unknown): void {
416
376
  const msg = errorMessage(reason);
417
377
  log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
@@ -443,10 +403,8 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
443
403
  callbacks.onAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
444
404
  }),
445
405
  );
446
- // Note: `done` is NOT subscribed here. flushTtsAndWait() attaches a
447
- // one-shot listener per-turn so it knows when synthesis drains. Calling
448
- // callbacks.onAudioDone() is done explicitly at the end of runTurn /
449
- // runGreeting — not via a persistent subscription — to avoid double-firing.
406
+ // `done` is intentionally NOT subscribed persistently flushTtsAndWait
407
+ // attaches a one-shot listener per-turn to avoid double-firing audio_done.
450
408
  ttsSubs.push(session.on("error", onTtsError));
451
409
  }
452
410
 
@@ -479,7 +437,6 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
479
437
  if (!aborted && (sttFailed || ttsFailed)) terminate();
480
438
  }
481
439
 
482
- // ---- Greeting on audio ready ----------------------------------------------
483
440
  function onAudioReady(): void {
484
441
  if (audioReady || terminated) return;
485
442
  audioReady = true;
@@ -492,13 +449,11 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
492
449
  chainTurn(turn);
493
450
  }
494
451
 
495
- // ---- Transport interface --------------------------------------------------
496
452
  return {
497
453
  async start(): Promise<void> {
498
454
  await openProviders();
499
- // In S2S mode, onSessionReady fires when the provider acknowledges the
500
- // session. In pipeline mode, we fire it immediately after providers open
501
- // (which is the equivalent "ready" signal), then trigger greeting.
455
+ // S2S fires onSessionReady when the provider acks; in pipeline mode the
456
+ // equivalent "ready" signal is providers having opened.
502
457
  callbacks.onSessionReady?.(opts.sid);
503
458
  onAudioReady();
504
459
  },
@@ -522,8 +477,7 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
522
477
 
523
478
  sendUserAudio(bytes: Uint8Array): void {
524
479
  if (terminated || !audioReady) return;
525
- const offset = bytes.byteOffset;
526
- const length = bytes.byteLength;
480
+ const { byteOffset: offset, byteLength: length } = bytes;
527
481
  let pcm: Int16Array;
528
482
  if (offset % 2 === 0 && length % 2 === 0) {
529
483
  pcm = new Int16Array(bytes.buffer, offset, length / 2);
@@ -535,8 +489,8 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
535
489
  sttSession?.sendAudio(pcm);
536
490
  },
537
491
 
538
- // Option A: tool execution stays inside toVercelTools/streamText.
539
- // sendToolResult is a no-op for pipeline mode.
492
+ // Tool execution stays inside toVercelTools/streamText; results aren't
493
+ // routed through the transport.
540
494
  // biome-ignore lint/suspicious/noEmptyBlockStatements: intentional no-op for pipeline mode
541
495
  sendToolResult(_callId: string, _result: string): void {},
542
496
 
@@ -545,11 +499,9 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
545
499
  turnController?.abort();
546
500
  turnController = null;
547
501
  ttsSession?.cancel();
548
- // Do NOT call callbacks.onCancelled() here. This method is invoked from
549
- // session-core.onCancel (client-initiated cancel), which calls
550
- // client.cancelled() itself firing onCancelled here would double-cancel.
551
- // Barge-in (STT partial) fires callbacks.onCancelled() directly in
552
- // onSttPartial, where the cancel originates inside the transport.
502
+ // Do NOT call callbacks.onCancelled() here session-core.onCancel
503
+ // (client-initiated) calls client.cancelled() itself. Barge-in fires
504
+ // onCancelled directly in onSttPartial where the cancel originates here.
553
505
  },
554
506
  };
555
507
  }
@@ -20,7 +20,11 @@ import { z } from "zod";
20
20
  import type { AgentDef } from "../../sdk/types.ts";
21
21
  import { createFixtureSession, flush } from "../_test-utils.ts";
22
22
 
23
- // ─── Test agents with deterministic tools ────────────────────────────────────
23
+ type FixtureSession = ReturnType<typeof createFixtureSession>;
24
+
25
+ function firstToolResult(ctx: FixtureSession): [string, string] {
26
+ return vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [string, string];
27
+ }
24
28
 
25
29
  const weatherAgent: AgentDef = {
26
30
  name: "weather-agent",
@@ -69,8 +73,6 @@ const statefulAgent: AgentDef<{ callCount: number }> = {
69
73
  },
70
74
  };
71
75
 
72
- // ─── Tests ───────────────────────────────────────────────────────────────────
73
-
74
76
  describe("fixture replay with real executor (transport layer)", () => {
75
77
  let cleanup: () => void;
76
78
 
@@ -78,23 +80,20 @@ describe("fixture replay with real executor (transport layer)", () => {
78
80
  cleanup?.();
79
81
  });
80
82
 
81
- // ── Tool call: real Zod validation + real tool execution ───────────────
83
+ function makeCtx(agent: AgentDef): FixtureSession {
84
+ const ctx = createFixtureSession(agent);
85
+ cleanup = ctx.cleanup;
86
+ return ctx;
87
+ }
82
88
 
83
89
  test("tool call fixture: Zod validates args, real tool executes, result sent to S2S", async () => {
84
- const ctx = createFixtureSession(weatherAgent);
85
- cleanup = ctx.cleanup;
90
+ const ctx = makeCtx(weatherAgent);
86
91
  await ctx.start();
87
92
 
88
93
  ctx.replay("tool-call-sequence.json");
89
-
90
- // Wait for the async tool execution pipeline to complete
91
94
  await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
92
95
 
93
- // Verify the real tool was called and produced correct output
94
- const [callId, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
95
- string,
96
- string,
97
- ];
96
+ const [callId, resultStr] = firstToolResult(ctx);
98
97
  expect(callId).toBeTruthy();
99
98
  const result = JSON.parse(resultStr);
100
99
  expect(result.city).toBe("San Francisco");
@@ -103,8 +102,7 @@ describe("fixture replay with real executor (transport layer)", () => {
103
102
  });
104
103
 
105
104
  test("tool call fixture: client receives tool_call with validated args", async () => {
106
- const ctx = createFixtureSession(weatherAgent);
107
- cleanup = ctx.cleanup;
105
+ const ctx = makeCtx(weatherAgent);
108
106
  await ctx.start();
109
107
 
110
108
  ctx.replay("tool-call-sequence.json");
@@ -117,36 +115,30 @@ describe("fixture replay with real executor (transport layer)", () => {
117
115
  });
118
116
 
119
117
  test("tool call fixture: conversation history accumulates user + assistant messages", async () => {
120
- const ctx = createFixtureSession(weatherAgent);
121
- cleanup = ctx.cleanup;
118
+ const ctx = makeCtx(weatherAgent);
122
119
  await ctx.start();
123
120
 
124
121
  ctx.replay("tool-call-sequence.json");
125
122
  await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
126
123
  await flush();
127
124
 
128
- // Client received user transcript
129
125
  expect(ctx.client.userTranscripts.length).toBeGreaterThan(0);
130
126
  const lastUserText = ctx.client.userTranscripts.at(-1) ?? "";
131
127
  expect(lastUserText.toLowerCase()).toContain("weather");
132
128
  });
133
129
 
134
- // ── Simple question: no tools, just session lifecycle ──────────────────
135
-
136
130
  test("simple question fixture: greeting + agent response reach client", async () => {
137
- const ctx = createFixtureSession(simpleAgent);
138
- cleanup = ctx.cleanup;
131
+ const ctx = makeCtx(simpleAgent);
139
132
  await ctx.start();
140
133
 
141
134
  ctx.replay("simple-question-sequence.json");
142
135
  await flush();
143
136
 
144
- expect(ctx.client.agentTranscripts.length).toBeGreaterThanOrEqual(2); // greeting + answer
137
+ expect(ctx.client.agentTranscripts.length).toBeGreaterThanOrEqual(2);
145
138
  });
146
139
 
147
140
  test("simple question fixture: user speech events forwarded to client", async () => {
148
- const ctx = createFixtureSession(simpleAgent);
149
- cleanup = ctx.cleanup;
141
+ const ctx = makeCtx(simpleAgent);
150
142
  await ctx.start();
151
143
 
152
144
  ctx.replay("simple-question-sequence.json");
@@ -157,29 +149,20 @@ describe("fixture replay with real executor (transport layer)", () => {
157
149
  expect(ctx.client.userTranscripts.length).toBeGreaterThan(0);
158
150
  });
159
151
 
160
- // ── Stateful agent: session state persists across tool calls ───────────
161
-
162
152
  test("stateful agent: tool accesses and mutates session state", async () => {
163
- const ctx = createFixtureSession(statefulAgent);
164
- cleanup = ctx.cleanup;
153
+ const ctx = makeCtx(statefulAgent);
165
154
  await ctx.start();
166
155
 
167
156
  ctx.replay("tool-call-sequence.json");
168
157
  await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
169
158
 
170
- const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
171
- string,
172
- string,
173
- ];
159
+ const [, resultStr] = firstToolResult(ctx);
174
160
  const result = JSON.parse(resultStr);
175
- expect(result.calls).toBe(1); // state.callCount was incremented
161
+ expect(result.calls).toBe(1);
176
162
  });
177
163
 
178
- // ── Greeting only: session lifecycle without user audio ────────────────
179
-
180
164
  test("greeting fixture: session setup completes with reply_done", async () => {
181
- const ctx = createFixtureSession(simpleAgent);
182
- cleanup = ctx.cleanup;
165
+ const ctx = makeCtx(simpleAgent);
183
166
  await ctx.start();
184
167
 
185
168
  ctx.replay("greeting-session-sequence.json");
@@ -189,11 +172,8 @@ describe("fixture replay with real executor (transport layer)", () => {
189
172
  expect(ctx.client.replyDoneCount).toBeGreaterThan(0);
190
173
  });
191
174
 
192
- // ── Tool schemas: real agent produces correct S2S tool schemas ─────────
193
-
194
175
  test("real executor builds correct tool schemas from AgentDef", () => {
195
- const ctx = createFixtureSession(weatherAgent);
196
- cleanup = ctx.cleanup;
176
+ const ctx = makeCtx(weatherAgent);
197
177
 
198
178
  const schema = ctx.executor.toolSchemas.find((s) => s.name === "get_weather");
199
179
  expect(schema).toBeDefined();
@@ -205,8 +185,6 @@ describe("fixture replay with real executor (transport layer)", () => {
205
185
  });
206
186
  });
207
187
 
208
- // ── Tool errors are surfaced as tool results ───────────────────────────
209
-
210
188
  test("tool throw is surfaced as error result", async () => {
211
189
  const agent: AgentDef = {
212
190
  name: "error-agent",
@@ -224,23 +202,16 @@ describe("fixture replay with real executor (transport layer)", () => {
224
202
  },
225
203
  };
226
204
 
227
- const ctx = createFixtureSession(agent);
228
- cleanup = ctx.cleanup;
205
+ const ctx = makeCtx(agent);
229
206
  await ctx.start();
230
207
 
231
208
  ctx.replay("tool-call-sequence.json");
232
209
  await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
233
210
 
234
- // Tool result should contain the error
235
- const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
236
- string,
237
- string,
238
- ];
211
+ const [, resultStr] = firstToolResult(ctx);
239
212
  expect(resultStr).toContain("API key expired");
240
213
  });
241
214
 
242
- // ── Zod validation: bad args rejected ──────────────────────────────────
243
-
244
215
  test("Zod validation rejects malformed tool args", async () => {
245
216
  const agent: AgentDef = {
246
217
  name: "strict-agent",
@@ -259,26 +230,18 @@ describe("fixture replay with real executor (transport layer)", () => {
259
230
  },
260
231
  };
261
232
 
262
- const ctx = createFixtureSession(agent);
263
- cleanup = ctx.cleanup;
233
+ const ctx = makeCtx(agent);
264
234
  await ctx.start();
265
235
 
266
236
  ctx.replay("tool-call-sequence.json");
267
237
  await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
268
238
 
269
- // The result should contain a Zod validation error
270
- const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
271
- string,
272
- string,
273
- ];
239
+ const [, resultStr] = firstToolResult(ctx);
274
240
  expect(resultStr).toContain("Invalid arguments");
275
241
  expect(resultStr).toContain("country");
276
242
  });
277
243
 
278
- // ── Interrupted transcript NOT added to conversation history ────────────
279
-
280
244
  test("interrupted agent transcript is not pushed to conversation history", async () => {
281
- // Use a tool that captures messages to inspect conversation history
282
245
  let capturedMessages: readonly { role: string; content: string }[] = [];
283
246
  const agent: AgentDef = {
284
247
  name: "interrupt-history-agent",
@@ -297,46 +260,36 @@ describe("fixture replay with real executor (transport layer)", () => {
297
260
  },
298
261
  };
299
262
 
300
- const ctx = createFixtureSession(agent);
301
- cleanup = ctx.cleanup;
263
+ const ctx = makeCtx(agent);
302
264
  await ctx.start();
303
265
 
304
266
  const cbs = ctx.mockCallbacks;
305
267
 
306
- // Fire an interrupted transcript — should NOT go into conversation history
307
268
  cbs.onReplyStarted("r1");
308
269
  cbs.onAgentTranscript("This was interrupted", true);
309
270
  cbs.onCancelled();
310
271
  await flush();
311
272
 
312
- // Client sees both agent_transcript and cancelled events
313
273
  expect(ctx.client.agentTranscripts).toContain("This was interrupted");
314
274
  expect(ctx.client.cancelledCount).toBeGreaterThan(0);
315
275
 
316
- // Fire a non-interrupted transcript — SHOULD go into conversation history
317
276
  cbs.onReplyStarted("r2");
318
277
  cbs.onAgentTranscript("This was completed", false);
319
278
  cbs.onReplyDone();
320
279
  await flush();
321
280
 
322
- // Trigger a tool call to inspect conversation history.
323
281
  cbs.onUserTranscript("check");
324
282
  await flush();
325
283
  cbs.onReplyStarted("r3");
326
284
  cbs.onToolCall("c1", "check_history", { q: "test" });
327
- // Wait for tool to execute (captures messages)
328
285
  await vi.waitFor(() => expect(capturedMessages.length).toBeGreaterThan(0));
329
286
 
330
- // Conversation history should contain the completed text but NOT the interrupted text
331
287
  const assistantMsgs = capturedMessages.filter((m) => m.role === "assistant");
332
288
  expect(assistantMsgs.some((m) => m.content === "This was completed")).toBe(true);
333
289
  expect(assistantMsgs.every((m) => m.content !== "This was interrupted")).toBe(true);
334
290
  });
335
291
 
336
- // ── Conversation history correctness after full tool-call flow ──────────
337
-
338
292
  test("conversation history has user + assistant messages after tool-call flow", async () => {
339
- // Use a tool that captures the messages it receives
340
293
  let capturedMessages: readonly { role: string; content: string }[] = [];
341
294
  const agent: AgentDef = {
342
295
  name: "history-agent",
@@ -355,28 +308,21 @@ describe("fixture replay with real executor (transport layer)", () => {
355
308
  },
356
309
  };
357
310
 
358
- const ctx = createFixtureSession(agent);
359
- cleanup = ctx.cleanup;
311
+ const ctx = makeCtx(agent);
360
312
  await ctx.start();
361
313
 
362
314
  ctx.replay("tool-call-sequence.json");
363
315
  await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
364
316
 
365
- // The tool should have seen the user's weather question in messages
366
317
  const userMsgs = capturedMessages.filter((m) => m.role === "user");
367
318
  expect(userMsgs.some((m) => m.content.toLowerCase().includes("weather"))).toBe(true);
368
319
  });
369
320
 
370
- // ── Audio chunks forwarded to client.audio ─────────────────────────────
371
-
372
321
  test("reply.audio events forwarded to client.audio", async () => {
373
- const ctx = createFixtureSession(simpleAgent);
374
- cleanup = ctx.cleanup;
322
+ const ctx = makeCtx(simpleAgent);
375
323
  await ctx.start();
376
324
 
377
- // Fire audio events directly via callbacks (replay skips reply.audio)
378
- const audioBytes = new Uint8Array([10, 20, 30, 40]);
379
- ctx.mockCallbacks.onAudio(audioBytes);
325
+ ctx.mockCallbacks.onAudio(new Uint8Array([10, 20, 30, 40]));
380
326
  ctx.mockCallbacks.onAudio(new Uint8Array([50, 60]));
381
327
 
382
328
  expect(ctx.client.audioChunks.length).toBe(2);
@@ -384,8 +330,6 @@ describe("fixture replay with real executor (transport layer)", () => {
384
330
  expect(Array.from(ctx.client.audioChunks[1] ?? [])).toEqual([50, 60]);
385
331
  });
386
332
 
387
- // ── Multiple tool calls in one reply: results buffered and sent together ─
388
-
389
333
  test("multiple tool calls in one reply: all results buffered and sent after replyDone", async () => {
390
334
  const agent: AgentDef = {
391
335
  name: "multi-tool-agent",
@@ -401,8 +345,7 @@ describe("fixture replay with real executor (transport layer)", () => {
401
345
  },
402
346
  };
403
347
 
404
- const ctx = createFixtureSession(agent);
405
- cleanup = ctx.cleanup;
348
+ const ctx = makeCtx(agent);
406
349
  await ctx.start();
407
350
 
408
351
  const cbs = ctx.mockCallbacks;
@@ -410,21 +353,17 @@ describe("fixture replay with real executor (transport layer)", () => {
410
353
  cbs.onToolCall("c1", "get_weather", { city: "NYC" });
411
354
  cbs.onToolCall("c2", "get_weather", { city: "LA" });
412
355
 
413
- // Wait for both tool calls to be dispatched to the client
414
356
  await vi.waitFor(() => {
415
357
  expect(ctx.client.toolCallEvents.length).toBe(2);
416
358
  });
417
359
 
418
- // Results NOT sent yet — reply.done hasn't fired
419
360
  expect(ctx.fakeHandle.sendToolResult).not.toHaveBeenCalled();
420
361
 
421
- // Fire reply.done — should flush both results
422
362
  cbs.onReplyDone();
423
363
  await vi.waitFor(() => {
424
364
  expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalledTimes(2);
425
365
  });
426
366
 
427
- // Verify both results are correct
428
367
  const calls = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls as [string, string][];
429
368
  const results = calls.map(([, r]) => JSON.parse(r));
430
369
  expect(results.some((r) => r.city === "NYC")).toBe(true);