@alexkroman1/aai 1.7.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.turbo/turbo-build.log +11 -9
  2. package/CHANGELOG.md +10 -0
  3. package/dist/{_internal-types-CrnTi9Ew.js → _internal-types-CfOAbK6V.js} +22 -35
  4. package/dist/constants-y68COEGj.js +29 -0
  5. package/dist/host/_base64.d.ts +2 -0
  6. package/dist/host/_mock-ws.d.ts +0 -61
  7. package/dist/host/_pipeline-test-fakes.d.ts +7 -4
  8. package/dist/host/_run-code.d.ts +0 -25
  9. package/dist/host/_runtime-conformance.d.ts +3 -34
  10. package/dist/host/memory-vector.d.ts +0 -11
  11. package/dist/host/providers/resolve-kv.d.ts +0 -7
  12. package/dist/host/providers/resolve-vector.d.ts +0 -8
  13. package/dist/host/providers/stt/assemblyai.d.ts +0 -14
  14. package/dist/host/providers/stt/deepgram.d.ts +2 -14
  15. package/dist/host/providers/stt/soniox.d.ts +0 -22
  16. package/dist/host/providers/tts/rime.d.ts +10 -31
  17. package/dist/host/runtime-barrel.js +619 -630
  18. package/dist/host/runtime-config.d.ts +9 -6
  19. package/dist/host/runtime.d.ts +3 -0
  20. package/dist/host/to-vercel-tools.d.ts +3 -33
  21. package/dist/host/transports/openai-realtime-transport.d.ts +43 -0
  22. package/dist/host/unstorage-kv.d.ts +0 -26
  23. package/dist/index.js +3 -3
  24. package/dist/openai-realtime-cjPAHMMx.js +10 -0
  25. package/dist/sdk/_internal-types.d.ts +6 -55
  26. package/dist/sdk/allowed-hosts.d.ts +4 -3
  27. package/dist/sdk/constants.d.ts +4 -29
  28. package/dist/sdk/define.d.ts +7 -4
  29. package/dist/sdk/kv.d.ts +13 -37
  30. package/dist/sdk/manifest-barrel.js +1 -1
  31. package/dist/sdk/manifest.d.ts +8 -2
  32. package/dist/sdk/protocol.js +1 -1
  33. package/dist/sdk/providers/s2s/openai-realtime.d.ts +17 -0
  34. package/dist/sdk/providers/s2s-barrel.d.ts +9 -0
  35. package/dist/sdk/providers/s2s-barrel.js +2 -0
  36. package/dist/sdk/providers/tts/rime.d.ts +1 -1
  37. package/dist/sdk/providers.d.ts +6 -2
  38. package/dist/sdk/types.d.ts +7 -1
  39. package/dist/{types-KUgezM6u.js → types-DOWVZhb9.js} +1 -7
  40. package/dist/{ws-upgrade-BeOQ7fXL.js → ws-upgrade-CG8-by1n.js} +2 -3
  41. package/host/_base64.ts +9 -0
  42. package/host/_mock-ws.ts +0 -65
  43. package/host/_pipeline-test-fakes.ts +19 -31
  44. package/host/_run-code.ts +10 -53
  45. package/host/_runtime-conformance.ts +3 -44
  46. package/host/_test-utils.ts +20 -42
  47. package/host/builtin-tools.test.ts +127 -222
  48. package/host/builtin-tools.ts +6 -10
  49. package/host/cleanup.test.ts +30 -73
  50. package/host/integration/pipeline-reference.integration.test.ts +12 -17
  51. package/host/integration.test.ts +0 -7
  52. package/host/memory-vector.test.ts +3 -1
  53. package/host/memory-vector.ts +16 -21
  54. package/host/pinecone-vector.test.ts +14 -17
  55. package/host/pinecone-vector.ts +10 -19
  56. package/host/providers/providers.test-d.ts +5 -3
  57. package/host/providers/resolve-kv.ts +23 -41
  58. package/host/providers/resolve-vector.ts +3 -12
  59. package/host/providers/resolve.test.ts +15 -28
  60. package/host/providers/resolve.ts +24 -24
  61. package/host/providers/stt/assemblyai.test.ts +2 -14
  62. package/host/providers/stt/assemblyai.ts +12 -35
  63. package/host/providers/stt/deepgram.test.ts +23 -83
  64. package/host/providers/stt/deepgram.ts +15 -40
  65. package/host/providers/stt/elevenlabs.test.ts +26 -38
  66. package/host/providers/stt/elevenlabs.ts +10 -9
  67. package/host/providers/stt/soniox.test.ts +35 -85
  68. package/host/providers/stt/soniox.ts +8 -53
  69. package/host/providers/tts/cartesia.test.ts +19 -58
  70. package/host/providers/tts/cartesia.ts +36 -66
  71. package/host/providers/tts/rime.test.ts +12 -38
  72. package/host/providers/tts/rime.ts +23 -86
  73. package/host/runtime-config.test.ts +9 -9
  74. package/host/runtime-config.ts +16 -22
  75. package/host/runtime.test.ts +111 -73
  76. package/host/runtime.ts +138 -86
  77. package/host/s2s.test.ts +92 -191
  78. package/host/s2s.ts +55 -49
  79. package/host/server-shutdown.test.ts +9 -30
  80. package/host/server.test.ts +2 -13
  81. package/host/server.ts +85 -100
  82. package/host/session-core.test.ts +15 -30
  83. package/host/session-core.ts +10 -13
  84. package/host/session-prompt.test.ts +1 -5
  85. package/host/to-vercel-tools.test.ts +53 -72
  86. package/host/to-vercel-tools.ts +9 -39
  87. package/host/tool-executor.test.ts +25 -51
  88. package/host/tool-executor.ts +18 -12
  89. package/host/transports/openai-realtime-transport.test.ts +371 -0
  90. package/host/transports/openai-realtime-transport.ts +319 -0
  91. package/host/transports/pipeline-transport.test.ts +125 -298
  92. package/host/transports/pipeline-transport.ts +20 -68
  93. package/host/transports/s2s-transport-fixtures.test.ts +31 -92
  94. package/host/transports/s2s-transport.test.ts +65 -134
  95. package/host/transports/s2s-transport.ts +15 -43
  96. package/host/transports/types.test.ts +4 -8
  97. package/host/unstorage-kv.test.ts +3 -2
  98. package/host/unstorage-kv.ts +5 -35
  99. package/host/ws-handler.test.ts +72 -176
  100. package/host/ws-handler.ts +6 -12
  101. package/package.json +6 -1
  102. package/sdk/__snapshots__/exports.test.ts.snap +7 -0
  103. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  104. package/sdk/_internal-types.test.ts +6 -9
  105. package/sdk/_internal-types.ts +16 -57
  106. package/sdk/_test-matchers.ts +25 -15
  107. package/sdk/allowed-hosts.test.ts +50 -114
  108. package/sdk/allowed-hosts.ts +8 -14
  109. package/sdk/constants.ts +5 -52
  110. package/sdk/define.test.ts +7 -6
  111. package/sdk/define.ts +7 -3
  112. package/sdk/exports.test.ts +6 -1
  113. package/sdk/kv.ts +13 -37
  114. package/sdk/manifest.test-d.ts +5 -0
  115. package/sdk/manifest.test.ts +61 -9
  116. package/sdk/manifest.ts +11 -11
  117. package/sdk/protocol-compat.test.ts +66 -98
  118. package/sdk/protocol-snapshot.test.ts +2 -16
  119. package/sdk/protocol.test.ts +13 -22
  120. package/sdk/providers/s2s/openai-realtime.ts +36 -0
  121. package/sdk/providers/s2s-barrel.ts +12 -0
  122. package/sdk/providers/tts/rime.ts +1 -1
  123. package/sdk/providers.ts +24 -5
  124. package/sdk/schema-alignment.test.ts +25 -73
  125. package/sdk/schema-shapes.test.ts +1 -29
  126. package/sdk/system-prompt.test.ts +0 -1
  127. package/sdk/system-prompt.ts +17 -19
  128. package/sdk/types-inference.test.ts +10 -36
  129. package/sdk/types.ts +7 -0
  130. package/sdk/ws-upgrade.test.ts +24 -23
  131. package/sdk/ws-upgrade.ts +2 -3
  132. package/tsdown.config.ts +8 -11
  133. package/dist/constants-C2nirZUI.js +0 -54
@@ -1,13 +1,14 @@
1
- import { r as DEFAULT_SYSTEM_PROMPT } from "../types-KUgezM6u.js";
2
- import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-C2nirZUI.js";
3
- import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-BeOQ7fXL.js";
1
+ import { r as DEFAULT_SYSTEM_PROMPT } from "../types-DOWVZhb9.js";
2
+ import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-y68COEGj.js";
3
+ import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-CG8-by1n.js";
4
4
  import { ClientMessageSchema, VectorRequestSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
5
- import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-CrnTi9Ew.js";
5
+ import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-CfOAbK6V.js";
6
6
  import { a as MISTRAL_KIND, d as ANTHROPIC_KIND, l as GOOGLE_KIND, r as OPENAI_KIND, s as GROQ_KIND } from "../xai-BDI61Y2M.js";
7
7
  import { a as DEEPGRAM_KIND, r as ELEVENLABS_KIND, s as ASSEMBLYAI_KIND, t as SONIOX_KIND } from "../soniox-BQdL0mB5.js";
8
8
  import { a as CARTESIA_KIND, n as RIME_KIND } from "../rime-58p9mDR8.js";
9
9
  import { a as MEMORY_KV_KIND, r as REDIS_KV_KIND } from "../s3-BtCMvCod.js";
10
10
  import { r as IN_MEMORY_VECTOR_KIND, t as PINECONE_VECTOR_KIND } from "../pinecone-CeJ69aRs.js";
11
+ import "../openai-realtime-cjPAHMMx.js";
11
12
  import { createRequire } from "node:module";
12
13
  import { z } from "zod";
13
14
  import { convert } from "html-to-text";
@@ -35,20 +36,12 @@ import path from "node:path";
35
36
  import escapeHtml from "escape-html";
36
37
  import { lookup } from "mime-types";
37
38
  //#region host/_run-code.ts
38
- /**
39
- * run_code built-in tool — executes user JavaScript in a fresh `node:vm`
40
- * context with no network, filesystem, or process access.
41
- */
42
39
  const SKIPPED_CLASS_KEYS = new Set([
43
40
  "constructor",
44
41
  "prototype",
45
42
  "length",
46
43
  "name"
47
44
  ]);
48
- /**
49
- * Copy static members from a class constructor to a wrapper function,
50
- * skipping built-in keys that must not be forwarded.
51
- */
52
45
  function copyStaticMembers(src, dst) {
53
46
  for (const key of Object.getOwnPropertyNames(src)) {
54
47
  if (SKIPPED_CLASS_KEYS.has(key)) continue;
@@ -59,16 +52,10 @@ function copyStaticMembers(src, dst) {
59
52
  }
60
53
  }
61
54
  /**
62
- * Neuter the `.constructor` chain on a host function or class constructor.
63
- *
64
- * For plain functions: wraps the function so calling `.constructor` or
65
- * `.constructor.constructor` no longer exposes the host `Function`.
66
- *
67
- * For class constructors: additionally copies static methods and neutralizes
68
- * `prototype.constructor` so instances created via `new` also cannot escape.
69
- *
70
- * This prevents sandbox code from reaching the host `Function` constructor
71
- * via patterns like `fn.constructor.constructor('return process')()`.
55
+ * Prevents sandbox code from reaching the host `Function` constructor via
56
+ * `fn.constructor.constructor('return process')()`. For class constructors
57
+ * we also copy static members and neuter `prototype.constructor` so
58
+ * instances created via `new` cannot escape either.
72
59
  */
73
60
  function neutralizeConstructor(fn) {
74
61
  const hasPrototype = typeof fn.prototype === "object" && fn.prototype !== null;
@@ -92,19 +79,6 @@ function neutralizeConstructor(fn) {
92
79
  return Wrapper;
93
80
  }
94
81
  const runCodeParams = z.object({ code: z.string().describe("JavaScript code to execute. Use console.log() for output.") });
95
- /**
96
- * Execute JavaScript code inside a fresh `node:vm` context.
97
- *
98
- * Each invocation creates a disposable VM context with:
99
- * - No filesystem access (`node:fs` and other built-ins unavailable)
100
- * - No network access (`fetch`, `http` unavailable)
101
- * - No child process spawning
102
- * - No environment variable access (`process` unavailable)
103
- * - Execution timeout (default 5 s)
104
- *
105
- * The context is discarded after execution, so no state leaks between
106
- * invocations or across sessions.
107
- */
108
82
  function createRunCode() {
109
83
  return {
110
84
  guidance: "You MUST use the run_code tool for ANY question involving math, counting, calculations, data processing, or code. NEVER do mental math or recite code verbally. run_code executes JavaScript (not Python). Always write JavaScript.",
@@ -115,14 +89,6 @@ function createRunCode() {
115
89
  }
116
90
  };
117
91
  }
118
- /**
119
- * Execute user code in a fresh `node:vm` context.
120
- *
121
- * @remarks
122
- * The VM context only exposes standard ECMAScript globals and a console
123
- * object that captures output. Node.js APIs (`process`, `require`,
124
- * `import()`) are not available inside the sandbox.
125
- */
126
92
  async function executeInIsolate(code) {
127
93
  const output = [];
128
94
  const capture = (...args) => output.push(args.map(String).join(" "));
@@ -258,12 +224,11 @@ function createVisitWebpage(fetchFn = globalThis.fetch) {
258
224
  error: `Failed to fetch: ${resp.status} ${resp.statusText}`,
259
225
  url
260
226
  };
261
- const htmlContent = await resp.text();
262
- const text = htmlToText(htmlContent.length > 2e5 ? htmlContent.slice(0, MAX_HTML_BYTES) : htmlContent);
227
+ const text = htmlToText((await resp.text()).slice(0, MAX_HTML_BYTES));
263
228
  const truncated = text.length > MAX_PAGE_CHARS;
264
229
  return {
265
230
  url,
266
- content: truncated ? text.slice(0, MAX_PAGE_CHARS) : text,
231
+ content: text.slice(0, MAX_PAGE_CHARS),
267
232
  ...truncated ? {
268
233
  truncated: true,
269
234
  totalChars: text.length
@@ -323,7 +288,6 @@ function createFetchJson(fetchFn = globalThis.fetch) {
323
288
  }
324
289
  };
325
290
  }
326
- /** Resolve a builtin name to an array of [toolName, ToolDef] pairs. */
327
291
  function resolveBuiltin(name, opts) {
328
292
  switch (name) {
329
293
  case "web_search": return [["web_search", createWebSearch(opts?.fetch)]];
@@ -349,8 +313,7 @@ function resolveAllBuiltins(names, opts) {
349
313
  description: def.description,
350
314
  parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
351
315
  });
352
- const g = def.guidance;
353
- if (g) guidance.push(g);
316
+ if (def.guidance) guidance.push(def.guidance);
354
317
  }
355
318
  return {
356
319
  defs,
@@ -360,16 +323,7 @@ function resolveAllBuiltins(names, opts) {
360
323
  }
361
324
  //#endregion
362
325
  //#region host/memory-vector.ts
363
- /**
364
- * In-memory Vector implementation.
365
- *
366
- * INTENTIONALLY BAD QUALITY. Pseudo-embedding hashes the text into a
367
- * 64-dim Float32Array of values in [-1, ~0.99], then L2-normalizes
368
- * the result. Because both stored and probe vectors are unit-length,
369
- * cosine similarity reduces to a plain dot product — that's what
370
- * `cosine()` computes. Used only for `aai dev` and tests — the goal
371
- * is proving tool wiring, not retrieval ranking.
372
- */
326
+ const DIM = 64;
373
327
  const stores = /* @__PURE__ */ new Map();
374
328
  function getStore(ns) {
375
329
  let store = stores.get(ns);
@@ -379,13 +333,14 @@ function getStore(ns) {
379
333
  }
380
334
  return store;
381
335
  }
382
- const DIM = 64;
383
336
  function pseudoEmbed(text) {
384
337
  const out = new Float32Array(DIM);
385
338
  const h1 = createHash("sha256").update(text).digest();
386
339
  const h2 = createHash("sha256").update(h1).digest();
387
- for (let i = 0; i < 32; i++) out[i] = (h1[i] - 128) / 128;
388
- for (let i = 0; i < 32; i++) out[i + 32] = (h2[i] - 128) / 128;
340
+ for (let i = 0; i < 32; i++) {
341
+ out[i] = (h1[i] - 128) / 128;
342
+ out[i + 32] = (h2[i] - 128) / 128;
343
+ }
389
344
  let norm = 0;
390
345
  for (let i = 0; i < DIM; i++) norm += out[i] * out[i];
391
346
  norm = Math.sqrt(norm) || 1;
@@ -421,12 +376,13 @@ function createMemoryVector(opts) {
421
376
  const scored = [];
422
377
  for (const [id, rec] of getStore(ns)) {
423
378
  if (filter && !matches(rec.metadata, filter)) continue;
424
- scored.push({
379
+ const match = {
425
380
  id,
426
381
  score: cosine(probe, rec.vec),
427
- text: rec.text,
428
- ...rec.metadata !== void 0 ? { metadata: rec.metadata } : {}
429
- });
382
+ text: rec.text
383
+ };
384
+ if (rec.metadata !== void 0) match.metadata = rec.metadata;
385
+ scored.push(match);
430
386
  }
431
387
  scored.sort((a, b) => b.score - a.score);
432
388
  return scored.slice(0, topK);
@@ -440,24 +396,9 @@ function createMemoryVector(opts) {
440
396
  }
441
397
  //#endregion
442
398
  //#region host/providers/stt/assemblyai.ts
443
- /**
444
- * AssemblyAI Universal-Streaming STT opener (host-only).
445
- *
446
- * The user-facing descriptor factory (`assemblyAI(...)`) lives in
447
- * `sdk/providers/stt/assemblyai.ts`. This module is the host-side
448
- * counterpart: it takes the descriptor options + an API key and
449
- * returns an {@link SttOpener} that the pipeline session drives.
450
- *
451
- * Default model: `"u3pro-rt"` (Universal-3 Pro Real-Time). The adapter
452
- * maps that to the SDK's `"u3-rt-pro"` `speechModel` value; any other
453
- * string is forwarded verbatim.
454
- */
455
- /** Translate the descriptor's model alias to the SDK's `speechModel` value. */
456
399
  function resolveSpeechModel(model) {
457
- if (model === "u3pro-rt") return "u3-rt-pro";
458
- return model;
400
+ return model === "u3pro-rt" ? "u3-rt-pro" : model;
459
401
  }
460
- /** Build an {@link SttOpener} from resolved AssemblyAI descriptor options. */
461
402
  function openAssemblyAI(opts = {}) {
462
403
  return {
463
404
  name: "assemblyai",
@@ -476,17 +417,16 @@ function openAssemblyAI(opts = {}) {
476
417
  transcriber.on("turn", (event) => {
477
418
  if (closed) return;
478
419
  const text = event.transcript ?? "";
479
- if (event.end_of_turn) {
480
- if (text.length > 0) emitter.emit("final", text);
481
- } else if (text.length > 0) emitter.emit("partial", text);
420
+ if (text.length === 0) return;
421
+ emitter.emit(event.end_of_turn ? "final" : "partial", text);
482
422
  });
483
423
  transcriber.on("error", (err) => {
484
424
  if (closed) return;
485
425
  emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
486
426
  });
487
427
  transcriber.on("close", (code) => {
488
- if (closed) return;
489
- if (code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
428
+ if (closed || code === 1e3) return;
429
+ emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
490
430
  });
491
431
  try {
492
432
  await transcriber.connect();
@@ -505,8 +445,7 @@ function openAssemblyAI(opts = {}) {
505
445
  return {
506
446
  sendAudio(pcm) {
507
447
  if (closed) return;
508
- const copy = new Uint8Array(pcm.byteLength);
509
- copy.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
448
+ const copy = new Uint8Array(pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength));
510
449
  transcriber.sendAudio(copy.buffer);
511
450
  },
512
451
  on(event, fn) {
@@ -523,33 +462,18 @@ function openAssemblyAI(opts = {}) {
523
462
  /**
524
463
  * Deepgram Nova streaming STT opener (host-only).
525
464
  *
526
- * The user-facing descriptor factory (`deepgram(...)`) lives in
527
- * `sdk/providers/stt/deepgram.ts`. This module is the host-side
528
- * counterpart: it takes the descriptor options + an API key and
529
- * returns an {@link SttOpener} that the pipeline session drives.
530
- *
531
- * Default model: `"nova-3"`. Any string is forwarded verbatim to the SDK.
532
- *
533
- * This adapter targets the Deepgram SDK v5 (`@deepgram/sdk@^5`). The v5
534
- * streaming API is:
535
- * `client.listen.v1.connect(args)` → `Promise<V1Socket>`
536
- * followed by:
537
- * `socket.connect()` + `socket.waitForOpen()` to establish the connection.
538
- */
539
- /**
540
- * Handle an incoming Deepgram transcript message, emitting `partial` or
541
- * `final` events on the emitter. Empty transcripts are silently dropped.
465
+ * Targets Deepgram SDK v5: `client.listen.v1.connect(args)` returns a
466
+ * socket; `socket.connect()` + `socket.waitForOpen()` establish it.
542
467
  */
468
+ function errMsg(cause) {
469
+ return cause instanceof Error ? cause.message : String(cause);
470
+ }
543
471
  function handleMessage(data, closed, emitter) {
544
- if (closed) return;
545
- if (data.type !== "Results") return;
546
- const result = data;
547
- const text = result.channel?.alternatives?.[0]?.transcript ?? "";
548
- if (result.is_final) {
549
- if (text.length > 0) emitter.emit("final", text);
550
- } else if (text.length > 0) emitter.emit("partial", text);
551
- }
552
- /** Wire Deepgram socket events onto the nanoevents emitter. */
472
+ if (closed || data.type !== "Results") return;
473
+ const text = data.channel?.alternatives?.[0]?.transcript ?? "";
474
+ if (text.length === 0) return;
475
+ emitter.emit(data.is_final ? "final" : "partial", text);
476
+ }
553
477
  function wireSocketEvents(connection, emitter, getIsClosed) {
554
478
  connection.on("message", (data) => handleMessage(data, getIsClosed(), emitter));
555
479
  connection.on("error", (err) => {
@@ -562,12 +486,13 @@ function wireSocketEvents(connection, emitter, getIsClosed) {
562
486
  if (code !== void 0 && code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
563
487
  });
564
488
  }
565
- /** Wire the AbortSignal to the close function. */
566
489
  function wireAbortSignal(signal, close) {
567
- if (signal.aborted) close();
568
- else signal.addEventListener("abort", () => void close(), { once: true });
490
+ if (signal.aborted) {
491
+ close();
492
+ return;
493
+ }
494
+ signal.addEventListener("abort", () => void close(), { once: true });
569
495
  }
570
- /** Build an {@link SttOpener} from resolved Deepgram descriptor options. */
571
496
  function openDeepgram(opts = {}) {
572
497
  return {
573
498
  name: "deepgram",
@@ -590,7 +515,7 @@ function openDeepgram(opts = {}) {
590
515
  Authorization: apiKey
591
516
  });
592
517
  } catch (cause) {
593
- throw makeSttError("stt_connect_failed", `Deepgram STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
518
+ throw makeSttError("stt_connect_failed", `Deepgram STT: connect failed: ${errMsg(cause)}`);
594
519
  }
595
520
  const emitter = createNanoEvents();
596
521
  let closed = false;
@@ -599,7 +524,7 @@ function openDeepgram(opts = {}) {
599
524
  try {
600
525
  await connection.waitForOpen();
601
526
  } catch (cause) {
602
- throw makeSttError("stt_connect_failed", `Deepgram STT: WebSocket open failed: ${cause instanceof Error ? cause.message : String(cause)}`);
527
+ throw makeSttError("stt_connect_failed", `Deepgram STT: WebSocket open failed: ${errMsg(cause)}`);
603
528
  }
604
529
  const close = async () => {
605
530
  if (closed) return;
@@ -671,15 +596,15 @@ function openElevenLabs(opts = {}) {
671
596
  }
672
597
  const emitter = createNanoEvents();
673
598
  let closed = false;
674
- connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (msg) => {
599
+ function emitTranscript(event, text) {
675
600
  if (closed) return;
676
- const text = msg.text ?? "";
677
- if (text.length > 0) emitter.emit("partial", text);
601
+ if (text && text.length > 0) emitter.emit(event, text);
602
+ }
603
+ connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (msg) => {
604
+ emitTranscript("partial", msg.text);
678
605
  });
679
606
  connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (msg) => {
680
- if (closed) return;
681
- const text = msg.text ?? "";
682
- if (text.length > 0) emitter.emit("final", text);
607
+ emitTranscript("final", msg.text);
683
608
  });
684
609
  connection.on(RealtimeEvents.ERROR, (payload) => {
685
610
  if (closed) return;
@@ -690,13 +615,13 @@ function openElevenLabs(opts = {}) {
690
615
  if (closed) return;
691
616
  emitter.emit("error", makeSttError("stt_auth_failed", msg.error));
692
617
  });
693
- const close = async () => {
618
+ async function close() {
694
619
  if (closed) return;
695
620
  closed = true;
696
621
  try {
697
622
  connection.close();
698
623
  } catch {}
699
- };
624
+ }
700
625
  if (openOpts.signal.aborted) close();
701
626
  else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
702
627
  return {
@@ -715,32 +640,7 @@ function openElevenLabs(opts = {}) {
715
640
  }
716
641
  //#endregion
717
642
  //#region host/providers/stt/soniox.ts
718
- /**
719
- * Soniox real-time STT opener (host-only).
720
- *
721
- * The user-facing descriptor factory (`soniox(...)`) lives in
722
- * `sdk/providers/stt/soniox.ts`. This module is the host-side
723
- * counterpart: it takes the descriptor options + an API key and
724
- * returns an {@link SttOpener} that the pipeline session drives.
725
- *
726
- * Soniox's published JS client (`@soniox/speech-to-text-web`) is
727
- * browser-only — it depends on `MediaRecorder` and `getUserMedia`. For
728
- * server-side use we talk to the WebSocket directly:
729
- * `wss://stt-rt.soniox.com/transcribe-websocket`
730
- *
731
- * Wire format:
732
- * - First text frame: JSON config with api_key, model, audio_format,
733
- * sample_rate, num_channels (and optional language hints).
734
- * - Subsequent binary frames: 16-bit signed little-endian PCM audio.
735
- * - Server replies: JSON `{ tokens: [{ text, is_final }] }` messages.
736
- * Final tokens accumulate; non-final tokens are a rolling preview.
737
- * - On error: `{ error_code, error_message }`.
738
- */
739
643
  const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
740
- /**
741
- * Walk a batch of Soniox tokens, sending finals into `appendFinal` and
742
- * returning the concatenated non-finals as a rolling preview string.
743
- */
744
644
  function consumeTokens(tokens, appendFinal) {
745
645
  let nonFinal = "";
746
646
  for (const tok of tokens) {
@@ -751,7 +651,6 @@ function consumeTokens(tokens, appendFinal) {
751
651
  }
752
652
  return nonFinal;
753
653
  }
754
- /** Resolve once the WebSocket opens; reject on the first error. */
755
654
  function waitForOpen$1(ws) {
756
655
  return new Promise((resolve, reject) => {
757
656
  const onOpen = () => {
@@ -766,7 +665,6 @@ function waitForOpen$1(ws) {
766
665
  ws.once("error", onErr);
767
666
  });
768
667
  }
769
- /** Build the initial JSON config frame for a Soniox session. */
770
668
  function buildConfigFrame(apiKey, opts, sampleRate) {
771
669
  const config = {
772
670
  api_key: apiKey,
@@ -778,7 +676,6 @@ function buildConfigFrame(apiKey, opts, sampleRate) {
778
676
  if (opts.languageHints && opts.languageHints.length > 0) config.language_hints = [...opts.languageHints];
779
677
  return config;
780
678
  }
781
- /** Parse a Soniox text frame into a {@link SonioxResponse}; returns null on garbage. */
782
679
  function parseFrame(raw) {
783
680
  try {
784
681
  return JSON.parse(raw.toString());
@@ -786,12 +683,6 @@ function parseFrame(raw) {
786
683
  return null;
787
684
  }
788
685
  }
789
- /**
790
- * Handle one server response. Emits `error`, `final`, and `partial` events
791
- * onto `emitter` based on the token batch and the running `finalBuf`. The
792
- * caller owns `finalBuf` so it survives across messages and can be flushed
793
- * on close.
794
- */
795
686
  function handleResponse(res, emitter, finalBuf) {
796
687
  if (res.error_code !== void 0) {
797
688
  emitter.emit("error", makeSttError("stt_stream_error", `Soniox error ${res.error_code}: ${res.error_message ?? "unknown"}`));
@@ -807,7 +698,6 @@ function handleResponse(res, emitter, finalBuf) {
807
698
  }
808
699
  if (nonFinal.length > 0) emitter.emit("partial", nonFinal);
809
700
  }
810
- /** Build an {@link SttOpener} from resolved Soniox descriptor options. */
811
701
  function openSoniox(opts = {}) {
812
702
  return {
813
703
  name: "soniox",
@@ -918,8 +808,7 @@ function openCartesia(opts) {
918
808
  }
919
809
  const emitter = createNanoEvents();
920
810
  let closed = false;
921
- /** Mint a fresh context bound to the shared TTSWS connection. */
922
- const mintContext = () => ws.context({
811
+ const audioConfig = {
923
812
  model_id: model,
924
813
  voice: {
925
814
  mode: "id",
@@ -929,39 +818,32 @@ function openCartesia(opts) {
929
818
  container: "raw",
930
819
  encoding: "pcm_s16le",
931
820
  sample_rate: sampleRate
932
- },
821
+ }
822
+ };
823
+ const baseRequest = {
824
+ ...audioConfig,
825
+ language
826
+ };
827
+ const mintContext = () => ws.context({
828
+ ...audioConfig,
933
829
  contextId: randomUUID()
934
830
  });
935
831
  let context = mintContext();
936
- /**
937
- * `doneEmitted` guards against emitting `done` more than once per turn.
938
- * Reset whenever a fresh context is minted (i.e. at turn boundaries).
939
- */
940
832
  let doneEmitted = false;
941
- /**
942
- * After `flush()` or `cancel()`, the current context is done accepting
943
- * input. We defer minting a fresh one until the next `sendText()` so
944
- * that late audio chunks + Cartesia's real `done` event (both tagged
945
- * with the flushed context's id) still pass the filter below. Rotating
946
- * eagerly would silently drop all audio still in flight.
947
- */
948
833
  let rotatePending = false;
949
- const rotateContext = () => {
834
+ const rotateIfPending = () => {
835
+ if (!rotatePending) return;
950
836
  context = mintContext();
951
837
  doneEmitted = false;
952
838
  rotatePending = false;
953
839
  };
954
- const rotateIfPending = () => {
955
- if (rotatePending) rotateContext();
956
- };
957
840
  const emitDoneOnce = () => {
958
841
  if (doneEmitted || closed) return;
959
842
  doneEmitted = true;
960
843
  emitter.emit("done");
961
844
  };
962
845
  ws.on("chunk", (event) => {
963
- if (closed) return;
964
- if (event.context_id !== context.contextId) return;
846
+ if (closed || event.context_id !== context.contextId) return;
965
847
  const buf = event.audio;
966
848
  if (!buf || buf.byteLength === 0) return;
967
849
  const evenBytes = buf.byteLength - buf.byteLength % 2;
@@ -970,8 +852,7 @@ function openCartesia(opts) {
970
852
  emitter.emit("audio", pcm);
971
853
  });
972
854
  ws.on("done", (event) => {
973
- if (closed) return;
974
- if (event.context_id !== context.contextId) return;
855
+ if (closed || event.context_id !== context.contextId) return;
975
856
  emitDoneOnce();
976
857
  });
977
858
  ws.on("error", (err) => {
@@ -990,19 +871,6 @@ function openCartesia(opts) {
990
871
  };
991
872
  if (openOpts.signal.aborted) close();
992
873
  else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
993
- const baseRequest = {
994
- model_id: model,
995
- voice: {
996
- mode: "id",
997
- id: voice
998
- },
999
- output_format: {
1000
- container: "raw",
1001
- encoding: "pcm_s16le",
1002
- sample_rate: sampleRate
1003
- },
1004
- language
1005
- };
1006
874
  const ignoreRejection = (_err) => {};
1007
875
  return {
1008
876
  sendText(text) {
@@ -1044,38 +912,18 @@ function openCartesia(opts) {
1044
912
  /**
1045
913
  * Rime TTS opener (host-only).
1046
914
  *
1047
- * The user-facing descriptor factory (`rime(...)`) lives in
1048
- * `sdk/providers/tts/rime.ts`. This module is the host-side
1049
- * counterpart: it takes the descriptor options + an API key and
1050
- * returns a {@link TtsOpener} that the pipeline session drives.
1051
- *
1052
- * **Protocol.** Connects to Rime's `ws2` JSON WebSocket endpoint
1053
- * (`wss://users-ws.rime.ai/ws2`). Client-to-server messages are JSON:
1054
- * - `{ "text": "..." }` — append text to the synthesis buffer
1055
- * - `{ "operation": "clear" }` — drop buffered text (barge-in)
1056
- * - `{ "operation": "eos" }` — drain buffer, close connection (NOT used
1057
- * during a session: it would tear down the WS, forcing reconnect per
1058
- * turn). We force end-of-turn synthesis with a trailing `"."` instead.
1059
- * The server responds with JSON frames:
1060
- * - `{ type: "chunk", data: <base64 PCM16 LE>, contextId: string | null }`
1061
- * - `{ type: "timestamps", ... }` (ignored)
1062
- * - `{ type: "error", message: string }` (surfaced as `tts_stream_error`)
1063
- *
1064
- * **Single long-lived connection per session.** Rime buffers text until it
1065
- * sees terminal punctuation (`.`, `?`, `!`), so we use one WebSocket per
1066
- * `open()` call and reuse it across turns. `clear` resets the buffer
1067
- * between cancellations.
915
+ * Connects to Rime's `ws2` JSON WebSocket endpoint with one long-lived
916
+ * connection per session. Client server: `{ text }` appends to the
917
+ * synthesis buffer, `{ operation: "clear" }` drops it (barge-in). We never
918
+ * send `eos` since it tears down the WS `flush()` instead sends a
919
+ * trailing `"."` to force synthesis of any text buffered behind missing
920
+ * terminal punctuation while keeping the connection reusable.
1068
921
  *
1069
- * **Done detection.** After `flush()` sends a trailing `"."` to force the
1070
- * server to synthesize any half-buffered text, we arm a quiescence timer
1071
- * that fires 500 ms after the last received audio chunk. When it fires,
1072
- * `done` is emitted.
1073
- *
1074
- * **Audio format.** The URL requests `audioFormat=pcm` at the negotiated
1075
- * `sampleRate`, which returns raw PCM16 little-endian. We decode the base64
1076
- * payload and construct a zero-copy `Int16Array` view over the decoded bytes.
922
+ * Server client: `{ type: "chunk", data: <base64 PCM16 LE> }` carries
923
+ * audio; `timestamps` is ignored; `error` surfaces as `tts_stream_error`.
924
+ * The `audioFormat=pcm` query param at the negotiated `sampleRate` returns
925
+ * raw PCM16 LE that we view as a zero-copy `Int16Array`.
1077
926
  */
1078
- /** PCM16 sample rates accepted by the Rime `ws2` endpoint. */
1079
927
  const RIME_PCM16_RATES = [
1080
928
  8e3,
1081
929
  16e3,
@@ -1088,31 +936,14 @@ function assertSupportedSampleRate(rate) {
1088
936
  if (RIME_PCM16_RATES.includes(rate)) return rate;
1089
937
  throw makeTtsError("tts_connect_failed", `Rime TTS: unsupported sample rate ${rate}. Supported: ${RIME_PCM16_RATES.join(", ")}.`);
1090
938
  }
1091
- /**
1092
- * Decode a base64 string from Rime into a zero-copy `Int16Array`.
1093
- *
1094
- * Rime's `ws2` endpoint returns base64-encoded PCM16 LE in each chunk.
1095
- * `Buffer.from(base64, "base64")` gives us a Node.js Buffer (which is a
1096
- * Uint8Array subclass) with `byteOffset === 0`. PCM16 bytes always come in
1097
- * pairs so the length is guaranteed to be even.
1098
- */
1099
939
  function base64ToPcm(data) {
1100
940
  const bytes = Buffer.from(data, "base64");
1101
941
  const evenLen = bytes.byteLength - bytes.byteLength % 2;
1102
942
  if (evenLen === 0) return new Int16Array(0);
1103
943
  return new Int16Array(bytes.buffer, bytes.byteOffset, evenLen / 2);
1104
944
  }
1105
- /** Quiescence timeout in ms — how long to wait after the last audio chunk before emitting `done`. */
1106
945
  const QUIESCENCE_MS = 500;
1107
- /**
1108
- * After `flush()`, how long to wait for the FIRST audio chunk before
1109
- * giving up and emitting `done`. Greeting and short replies hit this
1110
- * path: `flush()` runs immediately after `sendText()`, so audio TTFB
1111
- * exceeds the 500 ms quiescence window. Once the first chunk arrives,
1112
- * we transition to the shorter quiescence timeout.
1113
- */
1114
946
  const FIRST_AUDIO_TIMEOUT_MS = 5e3;
1115
- /** Wait for the WebSocket `open` event; reject on first `error`. */
1116
947
  function waitForOpen(ws) {
1117
948
  return new Promise((resolve, reject) => {
1118
949
  const onOpen = () => {
@@ -1127,12 +958,6 @@ function waitForOpen(ws) {
1127
958
  ws.once("error", onError);
1128
959
  });
1129
960
  }
1130
- /**
1131
- * Handle one incoming WebSocket message frame.
1132
- *
1133
- * Extracted into a top-level function to keep `open()` under the cognitive
1134
- * complexity limit while retaining full access to the session state via refs.
1135
- */
1136
961
  function handleRimeMessage(raw, emitter, armQuiescence, isActiveTimer) {
1137
962
  let msg;
1138
963
  try {
@@ -1150,7 +975,6 @@ function handleRimeMessage(raw, emitter, armQuiescence, isActiveTimer) {
1150
975
  }
1151
976
  if (msg.type === "error") emitter.emit("error", makeTtsError("tts_stream_error", `Rime TTS: ${msg.message ?? "unknown error"}`));
1152
977
  }
1153
- /** Build a {@link TtsOpener} from resolved Rime descriptor options. */
1154
978
  function openRime(opts) {
1155
979
  return {
1156
980
  name: "rime",
@@ -1172,12 +996,6 @@ function openRime(opts) {
1172
996
  const emitter = createNanoEvents();
1173
997
  let closed = false;
1174
998
  let doneEmitted = false;
1175
- /**
1176
- * After `flush()`, we arm a timer that fires `done`. Initial timeout is
1177
- * `FIRST_AUDIO_TIMEOUT_MS` to give Rime headroom on TTFB; the first
1178
- * chunk swaps it for a shorter `QUIESCENCE_MS` window that resets on
1179
- * each subsequent chunk. `cancel()` emits `done` synchronously.
1180
- */
1181
999
  let quiescenceTimer = null;
1182
1000
  const clearQuiescence = () => {
1183
1001
  if (quiescenceTimer !== null) {
@@ -1271,21 +1089,24 @@ function openRime(opts) {
1271
1089
  function resolveApiKey(envVar, env) {
1272
1090
  return env[envVar] ?? process.env[envVar] ?? "";
1273
1091
  }
1092
+ function options(descriptor) {
1093
+ return descriptor.options;
1094
+ }
1274
1095
  /** Resolve an {@link SttProvider} descriptor into a host-side opener. */
1275
1096
  function resolveStt(descriptor) {
1276
1097
  switch (descriptor.kind) {
1277
- case ASSEMBLYAI_KIND: return openAssemblyAI(descriptor.options);
1278
- case DEEPGRAM_KIND: return openDeepgram(descriptor.options);
1279
- case ELEVENLABS_KIND: return openElevenLabs(descriptor.options);
1280
- case SONIOX_KIND: return openSoniox(descriptor.options);
1098
+ case ASSEMBLYAI_KIND: return openAssemblyAI(options(descriptor));
1099
+ case DEEPGRAM_KIND: return openDeepgram(options(descriptor));
1100
+ case ELEVENLABS_KIND: return openElevenLabs(options(descriptor));
1101
+ case SONIOX_KIND: return openSoniox(options(descriptor));
1281
1102
  default: throw new Error(`Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}, ${DEEPGRAM_KIND}, ${ELEVENLABS_KIND}, ${SONIOX_KIND}.`);
1282
1103
  }
1283
1104
  }
1284
1105
  /** Resolve a {@link TtsProvider} descriptor into a host-side opener. */
1285
1106
  function resolveTts(descriptor) {
1286
1107
  switch (descriptor.kind) {
1287
- case CARTESIA_KIND: return openCartesia(descriptor.options);
1288
- case RIME_KIND: return openRime(descriptor.options);
1108
+ case CARTESIA_KIND: return openCartesia(options(descriptor));
1109
+ case RIME_KIND: return openRime(options(descriptor));
1289
1110
  default: throw new Error(`Unknown TTS provider kind: "${descriptor.kind}". Supported: ${CARTESIA_KIND}, ${RIME_KIND}.`);
1290
1111
  }
1291
1112
  }
@@ -1302,12 +1123,12 @@ function resolveLlm(descriptor, env) {
1302
1123
  case ANTHROPIC_KIND: return createAnthropic({
1303
1124
  apiKey: requireKey(env, "ANTHROPIC_API_KEY", "Anthropic"),
1304
1125
  baseURL: "https://api.anthropic.com/v1"
1305
- })(descriptor.options.model);
1306
- case OPENAI_KIND: return createOpenAI({ apiKey: requireKey(env, "OPENAI_API_KEY", "OpenAI") })(descriptor.options.model);
1307
- case GOOGLE_KIND: return createGoogleGenerativeAI({ apiKey: requireKey(env, "GOOGLE_GENERATIVE_AI_API_KEY", "Google") })(descriptor.options.model);
1308
- case MISTRAL_KIND: return createMistral({ apiKey: requireKey(env, "MISTRAL_API_KEY", "Mistral") })(descriptor.options.model);
1309
- case "xai": return createXai({ apiKey: requireKey(env, "XAI_API_KEY", "xAI") })(descriptor.options.model);
1310
- case GROQ_KIND: return createGroq({ apiKey: requireKey(env, "GROQ_API_KEY", "Groq") })(descriptor.options.model);
1126
+ })(options(descriptor).model);
1127
+ case OPENAI_KIND: return createOpenAI({ apiKey: requireKey(env, "OPENAI_API_KEY", "OpenAI") })(options(descriptor).model);
1128
+ case GOOGLE_KIND: return createGoogleGenerativeAI({ apiKey: requireKey(env, "GOOGLE_GENERATIVE_AI_API_KEY", "Google") })(options(descriptor).model);
1129
+ case MISTRAL_KIND: return createMistral({ apiKey: requireKey(env, "MISTRAL_API_KEY", "Mistral") })(options(descriptor).model);
1130
+ case "xai": return createXai({ apiKey: requireKey(env, "XAI_API_KEY", "xAI") })(options(descriptor).model);
1131
+ case GROQ_KIND: return createGroq({ apiKey: requireKey(env, "GROQ_API_KEY", "Groq") })(options(descriptor).model);
1311
1132
  default: throw new Error(`Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}, ${OPENAI_KIND}, ${GOOGLE_KIND}, ${MISTRAL_KIND}, xai, ${GROQ_KIND}.`);
1312
1133
  }
1313
1134
  }
@@ -1321,8 +1142,9 @@ function loadProviderPackage(name, label) {
1321
1142
  try {
1322
1143
  return requireFromHere(name);
1323
1144
  } catch (err) {
1324
- if (err instanceof Error && (err.code === "MODULE_NOT_FOUND" || err.code === "ERR_MODULE_NOT_FOUND") && err.message.includes(name)) throw new Error(`${label}: package \`${name}\` is not installed. Run \`pnpm add ${name}\`.`, { cause: err });
1325
- throw err;
1145
+ const code = err?.code;
1146
+ if (!(err instanceof Error && (code === "MODULE_NOT_FOUND" || code === "ERR_MODULE_NOT_FOUND") && err.message.includes(name))) throw err;
1147
+ throw new Error(`${label}: package \`${name}\` is not installed. Run \`pnpm add ${name}\`.`, { cause: err });
1326
1148
  }
1327
1149
  }
1328
1150
  function requireKey(env, name, label) {
@@ -1334,67 +1156,42 @@ function requireKey(env, name, label) {
1334
1156
  //#region host/pinecone-vector.ts
1335
1157
  function createPineconeVector(opts) {
1336
1158
  const { Pinecone } = loadProviderPackage("@pinecone-database/pinecone", "Pinecone Vector");
1337
- const client = new Pinecone({ apiKey: opts.apiKey });
1338
- const ns = () => client.index(opts.index).namespace(opts.namespace);
1159
+ const ns = new Pinecone({ apiKey: opts.apiKey }).index(opts.index).namespace(opts.namespace);
1339
1160
  return {
1340
1161
  async upsert(id, text, metadata) {
1341
- const record = {
1162
+ await ns.upsertRecords([{
1342
1163
  _id: id,
1343
1164
  text,
1344
1165
  ...metadata ?? {}
1345
- };
1346
- await ns().upsertRecords([record]);
1166
+ }]);
1347
1167
  },
1348
1168
  async query(text, queryOpts) {
1349
- const topK = queryOpts?.topK ?? 5;
1350
- const req = {
1169
+ const { topK = 5, filter } = queryOpts ?? {};
1170
+ return (await ns.searchRecords({
1351
1171
  query: {
1352
1172
  inputs: { text },
1353
1173
  topK,
1354
- ...queryOpts?.filter !== void 0 ? { filter: queryOpts.filter } : {}
1174
+ ...filter !== void 0 ? { filter } : {}
1355
1175
  },
1356
1176
  fields: ["*"]
1357
- };
1358
- return (await ns().searchRecords(req)).result.hits.map((hit) => {
1177
+ })).result.hits.map((hit) => {
1359
1178
  const { text: hitText, ...rest } = hit.fields;
1360
- const metadata = Object.keys(rest).length > 0 ? rest : void 0;
1361
- return {
1179
+ const match = {
1362
1180
  id: hit._id,
1363
1181
  score: hit._score,
1364
- text: typeof hitText === "string" ? hitText : "",
1365
- ...metadata !== void 0 ? { metadata } : {}
1182
+ text: typeof hitText === "string" ? hitText : ""
1366
1183
  };
1184
+ if (Object.keys(rest).length > 0) match.metadata = rest;
1185
+ return match;
1367
1186
  });
1368
1187
  },
1369
1188
  async delete(ids) {
1370
- const list = Array.isArray(ids) ? ids : [ids];
1371
- await ns().deleteMany(list);
1189
+ await ns.deleteMany(Array.isArray(ids) ? ids : [ids]);
1372
1190
  }
1373
1191
  };
1374
1192
  }
1375
1193
  //#endregion
1376
1194
  //#region host/unstorage-kv.ts
1377
- /**
1378
- * Key-value store backed by unstorage.
1379
- *
1380
- * Works with any unstorage driver (memory, fs, S3/R2, etc.).
1381
- */
1382
- /**
1383
- * Create a KV store backed by any unstorage driver.
1384
- *
1385
- * @param options - See {@link UnstorageKvOptions}.
1386
- * @returns A {@link Kv} instance.
1387
- *
1388
- * @example
1389
- * ```ts
1390
- * import { createStorage } from "unstorage";
1391
- * import { createUnstorageKv } from "@alexkroman1/aai/unstorage-kv";
1392
- *
1393
- * const kv = createUnstorageKv({ storage: createStorage() });
1394
- * await kv.set("greeting", "hello");
1395
- * const value = await kv.get<string>("greeting"); // "hello"
1396
- * ```
1397
- */
1398
1195
  function createUnstorageKv(options) {
1399
1196
  const store = options.prefix ? prefixStorage(options.storage, options.prefix) : options.storage;
1400
1197
  return {
@@ -1403,9 +1200,9 @@ function createUnstorageKv(options) {
1403
1200
  },
1404
1201
  async set(key, value, setOptions) {
1405
1202
  if (JSON.stringify(value).length > 65536) throw new Error(`Value exceeds max size of ${MAX_VALUE_SIZE} bytes`);
1406
- const storable = value;
1407
- if (setOptions?.expireIn && setOptions.expireIn > 0) await store.setItem(key, storable, { ttl: Math.ceil(setOptions.expireIn / 1e3) });
1408
- else await store.setItem(key, storable);
1203
+ const expireIn = setOptions?.expireIn;
1204
+ const ttlOption = expireIn && expireIn > 0 ? { ttl: Math.ceil(expireIn / 1e3) } : void 0;
1205
+ await store.setItem(key, value, ttlOption);
1409
1206
  },
1410
1207
  async delete(keys) {
1411
1208
  const keyArray = Array.isArray(keys) ? keys : [keys];
@@ -1418,36 +1215,15 @@ function createUnstorageKv(options) {
1418
1215
  }
1419
1216
  //#endregion
1420
1217
  //#region host/providers/resolve-kv.ts
1421
- /**
1422
- * Descriptor → concrete `Kv` resolver. Mirror of `resolveLlm` /
1423
- * `resolveVector`. Always wraps the produced unstorage Storage in
1424
- * `createUnstorageKv` with the provided per-tenant prefix so namespace
1425
- * isolation is enforced regardless of backend choice.
1426
- */
1427
- /**
1428
- * Load a CJS unstorage driver factory. The CJS variants use
1429
- * `module.exports = defineDriver(...)` so the require result is the
1430
- * factory itself (not an object with `.default`).
1431
- *
1432
- * Delegates to loadProviderPackage (lazy-load via createRequire so the
1433
- * driver is a true optional peer dep).
1434
- */
1435
1218
  function loadDriver(modulePath, label) {
1436
1219
  return loadProviderPackage(modulePath, `${label} KV: driver`);
1437
1220
  }
1438
- /**
1439
- * Build a lazy unstorage Driver that defers loading the real driver
1440
- * factory until the first I/O operation. This is necessary for drivers
1441
- * whose peer dependencies (e.g. `ioredis`) may not be installed on the
1442
- * host at startup — the missing package will only surface when the agent
1443
- * actually performs KV operations, not at session creation time.
1444
- */
1445
1221
  function makeLazyDriver(modulePath, label, opts) {
1446
1222
  let resolved = null;
1447
- const get = () => {
1223
+ function get() {
1448
1224
  if (!resolved) resolved = loadDriver(modulePath, label)(opts);
1449
1225
  return resolved;
1450
- };
1226
+ }
1451
1227
  return {
1452
1228
  name: label.toLowerCase(),
1453
1229
  hasItem: (key, txOpts) => get().hasItem(key, txOpts),
@@ -1458,10 +1234,9 @@ function makeLazyDriver(modulePath, label, opts) {
1458
1234
  removeItem: (key, txOpts) => get().removeItem?.(key, txOpts),
1459
1235
  getKeys: (base, txOpts) => get().getKeys(base, txOpts),
1460
1236
  clear: (base, txOpts) => get().clear?.(base, txOpts),
1461
- dispose: () => resolved ? resolved.dispose?.() : void 0
1237
+ dispose: () => resolved?.dispose?.()
1462
1238
  };
1463
1239
  }
1464
- /** Resolve a {@link KvProvider} descriptor into a {@link Kv}. */
1465
1240
  function resolveKv(descriptor, env, prefix) {
1466
1241
  switch (descriptor.kind) {
1467
1242
  case MEMORY_KV_KIND: return createUnstorageKv({
@@ -1508,24 +1283,16 @@ function resolveKv(descriptor, env, prefix) {
1508
1283
  }
1509
1284
  //#endregion
1510
1285
  //#region host/providers/resolve-vector.ts
1511
- /**
1512
- * Descriptor → concrete `Vector` resolver. Mirror of `resolveLlm`.
1513
- *
1514
- * Pulls API keys from the agent env so descriptors stay
1515
- * secret-free. Lazy-loads provider SDKs via `createRequire` so
1516
- * unused providers never enter the bundle.
1517
- */
1518
- /** Resolve a {@link VectorProvider} descriptor into a {@link Vector}. */
1519
1286
  function resolveVector(descriptor, env, namespace) {
1520
1287
  switch (descriptor.kind) {
1521
1288
  case IN_MEMORY_VECTOR_KIND: return createMemoryVector({ namespace });
1522
1289
  case PINECONE_VECTOR_KIND: {
1523
1290
  const apiKey = resolveApiKey("PINECONE_API_KEY", env);
1524
1291
  if (!apiKey) throw new Error("Pinecone Vector: missing API key. Set PINECONE_API_KEY in the agent env.");
1525
- const opts = descriptor.options;
1292
+ const { index } = descriptor.options;
1526
1293
  return createPineconeVector({
1527
1294
  apiKey,
1528
- index: opts.index,
1295
+ index,
1529
1296
  namespace
1530
1297
  });
1531
1298
  }
@@ -1534,14 +1301,13 @@ function resolveVector(descriptor, env, namespace) {
1534
1301
  }
1535
1302
  //#endregion
1536
1303
  //#region sdk/system-prompt.ts
1537
- function getFormattedDate() {
1538
- return (/* @__PURE__ */ new Date()).toLocaleDateString("en-US", {
1539
- weekday: "long",
1540
- year: "numeric",
1541
- month: "long",
1542
- day: "numeric"
1543
- });
1544
- }
1304
+ const DATE_FORMAT_OPTIONS = {
1305
+ weekday: "long",
1306
+ year: "numeric",
1307
+ month: "long",
1308
+ day: "numeric"
1309
+ };
1310
+ const TOOL_PREAMBLE = "\n\nWhen you decide to use a tool, ALWAYS say a brief natural phrase BEFORE the tool call (e.g. \"Let me look that up\" or \"One moment while I check\"). This fills silence while the tool executes. Keep preambles to one short sentence.";
1545
1311
  const VOICE_RULES = "\n\nCRITICAL OUTPUT RULES — you MUST follow these for EVERY response:\nYour response will be spoken aloud by a TTS system and displayed as plain text.\n- NEVER use markdown: no **, no *, no _, no #, no `, no [](), no ---\n- NEVER use bullet points (-, *, •) or numbered lists (1., 2.)\n- NEVER use code blocks or inline code\n- NEVER mention tools, search, APIs, or technical failures to the user. If a tool returns no results, just answer naturally without explaining why.\n- Write exactly as you would say it out loud to a friend\n- Use short conversational sentences. To list things, say \"First,\" \"Next,\" \"Finally,\"\n- Keep responses concise — 1 to 3 sentences max";
1546
1312
  /**
1547
1313
  * Build the system prompt sent to the LLM from the agent configuration.
@@ -1557,11 +1323,10 @@ const VOICE_RULES = "\n\nCRITICAL OUTPUT RULES — you MUST follow these for EVE
1557
1323
  * @returns The assembled system prompt string.
1558
1324
  */
1559
1325
  function buildSystemPrompt(config, opts) {
1560
- const { hasTools } = opts;
1561
1326
  const agentInstructions = config.systemPrompt && config.systemPrompt !== DEFAULT_SYSTEM_PROMPT ? `\n\nAgent-Specific Instructions:\n${config.systemPrompt}` : "";
1562
- const toolPreamble = hasTools ? "\n\nWhen you decide to use a tool, ALWAYS say a brief natural phrase BEFORE the tool call (e.g. \"Let me look that up\" or \"One moment while I check\"). This fills silence while the tool executes. Keep preambles to one short sentence." : "";
1327
+ const toolPreamble = opts.hasTools ? TOOL_PREAMBLE : "";
1563
1328
  const guidance = opts.toolGuidance && opts.toolGuidance.length > 0 ? `\n\nBuilt-in Tool Usage:\n${opts.toolGuidance.join("\n")}` : "";
1564
- return DEFAULT_SYSTEM_PROMPT + `\n\nToday's date is ${getFormattedDate()}.` + agentInstructions + toolPreamble + guidance + (opts.voice ? VOICE_RULES : "");
1329
+ return DEFAULT_SYSTEM_PROMPT + `\n\nToday's date is ${(/* @__PURE__ */ new Date()).toLocaleDateString("en-US", DATE_FORMAT_OPTIONS)}.` + agentInstructions + toolPreamble + guidance + (opts.voice ? VOICE_RULES : "");
1565
1330
  }
1566
1331
  //#endregion
1567
1332
  //#region host/runtime-config.ts
@@ -1581,22 +1346,23 @@ const consoleLogger = {
1581
1346
  error: consoleLog(console.error),
1582
1347
  debug: consoleLog(console.debug)
1583
1348
  };
1584
- /**
1585
- * Structured JSON logger for production diagnostics. Each log entry is a
1586
- * single-line JSON object with `timestamp`, `level`, `msg`, and any
1587
- * caller-provided context fields.
1588
- */
1589
1349
  function jsonLog(level) {
1350
+ const out = level === "error" || level === "warn" ? process.stderr : process.stdout;
1590
1351
  return (msg, ctx) => {
1591
1352
  const entry = {
1592
1353
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1593
1354
  level,
1594
- msg
1355
+ msg,
1356
+ ...ctx
1595
1357
  };
1596
- if (ctx) Object.assign(entry, ctx);
1597
- (level === "error" || level === "warn" ? process.stderr : process.stdout).write(`${JSON.stringify(entry)}\n`);
1358
+ out.write(`${JSON.stringify(entry)}\n`);
1598
1359
  };
1599
1360
  }
1361
+ /**
1362
+ * Structured JSON logger for production diagnostics. Each log entry is a
1363
+ * single-line JSON object with `timestamp`, `level`, `msg`, and any
1364
+ * caller-provided context fields.
1365
+ */
1600
1366
  const jsonLogger = {
1601
1367
  info: jsonLog("info"),
1602
1368
  warn: jsonLog("warn"),
@@ -1615,15 +1381,16 @@ const REPLY_DONE_SLOW_THRESHOLD_MS = 50;
1615
1381
  function createSessionCore(opts) {
1616
1382
  const log = opts.logger ?? consoleLogger;
1617
1383
  const maxHistory = opts.maxHistory ?? 200;
1618
- const idleMs = (() => {
1619
- const raw = opts.agentConfig.idleTimeoutMs ?? 3e5;
1620
- return raw === 0 || !Number.isFinite(raw) ? 0 : raw;
1621
- })();
1622
- let reply = {
1623
- currentReplyId: null,
1624
- pendingTools: [],
1625
- toolCallCount: 0
1626
- };
1384
+ const rawIdleMs = opts.agentConfig.idleTimeoutMs ?? 3e5;
1385
+ const idleMs = rawIdleMs === 0 || !Number.isFinite(rawIdleMs) ? 0 : rawIdleMs;
1386
+ function emptyReply() {
1387
+ return {
1388
+ currentReplyId: null,
1389
+ pendingTools: [],
1390
+ toolCallCount: 0
1391
+ };
1392
+ }
1393
+ let reply = emptyReply();
1627
1394
  let history = [];
1628
1395
  let turnPromise = null;
1629
1396
  let idleTimer = null;
@@ -1645,18 +1412,13 @@ function createSessionCore(opts) {
1645
1412
  }
1646
1413
  function beginReply(replyId) {
1647
1414
  reply = {
1648
- currentReplyId: replyId,
1649
- pendingTools: [],
1650
- toolCallCount: 0
1415
+ ...emptyReply(),
1416
+ currentReplyId: replyId
1651
1417
  };
1652
1418
  turnPromise = null;
1653
1419
  }
1654
1420
  function cancelReply() {
1655
- reply = {
1656
- currentReplyId: null,
1657
- pendingTools: [],
1658
- toolCallCount: 0
1659
- };
1421
+ reply = emptyReply();
1660
1422
  }
1661
1423
  function flushReply(startMs, hadTurnPromise) {
1662
1424
  const stepsUsed = reply.toolCallCount;
@@ -1846,7 +1608,7 @@ function createSessionCore(opts) {
1846
1608
  */
1847
1609
  const yieldTick = () => new Promise((r) => setTimeout(r, 0));
1848
1610
  function buildToolContext(opts) {
1849
- const { env, state, kv, vector, messages, sessionId } = opts;
1611
+ const { env, state, kv, vector, messages, sessionId, send } = opts;
1850
1612
  return {
1851
1613
  env,
1852
1614
  state: state ?? {},
@@ -1861,14 +1623,21 @@ function buildToolContext(opts) {
1861
1623
  messages: messages ?? [],
1862
1624
  sessionId: sessionId ?? "",
1863
1625
  send(event, data) {
1864
- opts.send?.(event, data);
1626
+ send?.(event, data);
1865
1627
  }
1866
1628
  };
1867
1629
  }
1630
+ function formatZodIssues(error) {
1631
+ return (error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ");
1632
+ }
1633
+ function stringifyResult(result) {
1634
+ if (result == null) return "null";
1635
+ return typeof result === "string" ? result : JSON.stringify(result);
1636
+ }
1868
1637
  async function executeToolCall(name, args, options) {
1869
- const { tool } = options;
1638
+ const { tool, logger } = options;
1870
1639
  const parsed = (tool.parameters ?? EMPTY_PARAMS).safeParse(args);
1871
- if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${(parsed.error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ")}`);
1640
+ if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${formatZodIssues(parsed.error)}`);
1872
1641
  try {
1873
1642
  const ctx = buildToolContext(options);
1874
1643
  await yieldTick();
@@ -1877,11 +1646,9 @@ async function executeToolCall(name, args, options) {
1877
1646
  message: `Tool "${name}" timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
1878
1647
  });
1879
1648
  await yieldTick();
1880
- if (result == null) return "null";
1881
- return typeof result === "string" ? result : JSON.stringify(result);
1649
+ return stringifyResult(result);
1882
1650
  } catch (err) {
1883
- const log = options.logger;
1884
- if (log) log.warn("Tool execution failed", {
1651
+ if (logger) logger.warn("Tool execution failed", {
1885
1652
  tool: name,
1886
1653
  error: errorDetail(err)
1887
1654
  });
@@ -1890,29 +1657,267 @@ async function executeToolCall(name, args, options) {
1890
1657
  }
1891
1658
  }
1892
1659
  //#endregion
1660
+ //#region host/_base64.ts
1661
+ function uint8ToBase64(bytes) {
1662
+ return Buffer.from(bytes).toString("base64");
1663
+ }
1664
+ function base64ToUint8(base64) {
1665
+ return new Uint8Array(Buffer.from(base64, "base64"));
1666
+ }
1667
+ //#endregion
1668
+ //#region host/transports/openai-realtime-transport.ts
1669
+ const DEFAULT_MODEL = "gpt-realtime-2";
1670
+ const DEFAULT_VOICE = "alloy";
1671
+ const DEFAULT_URL = "wss://api.openai.com/v1/realtime";
1672
+ const defaultCreateOpenaiRealtimeWebSocket = (url, opts) => new WsWebSocket(url, { headers: opts.headers });
1673
+ function createOpenaiRealtimeTransport(opts) {
1674
+ const log = opts.logger ?? consoleLogger;
1675
+ const createWs = opts.createWebSocket ?? defaultCreateOpenaiRealtimeWebSocket;
1676
+ const model = opts.options.model ?? DEFAULT_MODEL;
1677
+ const voice = opts.options.voice ?? DEFAULT_VOICE;
1678
+ const baseUrl = opts.options.url ?? DEFAULT_URL;
1679
+ let ws = null;
1680
+ let closing = false;
1681
+ const agentTranscriptBuffers = /* @__PURE__ */ new Map();
1682
+ const toolBuffers = /* @__PURE__ */ new Map();
1683
+ let currentResponseId = null;
1684
+ function send(payload) {
1685
+ if (!ws || ws.readyState !== 1) {
1686
+ log.debug("OpenAI Realtime send dropped: socket not open", { type: payload.type });
1687
+ return;
1688
+ }
1689
+ ws.send(JSON.stringify(payload));
1690
+ }
1691
+ function sendSessionUpdate() {
1692
+ send({
1693
+ type: "session.update",
1694
+ session: {
1695
+ modalities: ["audio", "text"],
1696
+ voice,
1697
+ instructions: opts.sessionConfig.systemPrompt,
1698
+ input_audio_format: "pcm16",
1699
+ output_audio_format: "pcm16",
1700
+ input_audio_transcription: { model: "whisper-1" },
1701
+ turn_detection: { type: "server_vad" },
1702
+ tools: opts.toolSchemas,
1703
+ tool_choice: opts.toolChoice
1704
+ }
1705
+ });
1706
+ }
1707
+ async function start() {
1708
+ const url = `${baseUrl}?model=${encodeURIComponent(model)}`;
1709
+ log.info("OpenAI Realtime connecting", { url });
1710
+ return new Promise((resolve, reject) => {
1711
+ const sock = createWs(url, { headers: {
1712
+ Authorization: `Bearer ${opts.apiKey}`,
1713
+ "OpenAI-Beta": "realtime=v1"
1714
+ } });
1715
+ ws = sock;
1716
+ let opened = false;
1717
+ sock.addEventListener("open", () => {
1718
+ opened = true;
1719
+ sendSessionUpdate();
1720
+ resolve();
1721
+ });
1722
+ sock.addEventListener("message", (ev) => handleMessage(ev.data));
1723
+ sock.addEventListener("close", (ev) => handleClose(ev.code ?? 0, ev.reason ?? ""));
1724
+ sock.addEventListener("error", (ev) => {
1725
+ const msg = typeof ev.message === "string" ? ev.message : "WebSocket error";
1726
+ if (!opened) {
1727
+ reject(new Error(msg));
1728
+ return;
1729
+ }
1730
+ if (closing) {
1731
+ log.info("OpenAI Realtime error during close", { error: msg });
1732
+ return;
1733
+ }
1734
+ opts.callbacks.onError("internal", msg);
1735
+ });
1736
+ });
1737
+ }
1738
+ function asString(v) {
1739
+ return typeof v === "string" ? v : "";
1740
+ }
1741
+ function handleAudioDelta(obj) {
1742
+ if (typeof obj.delta === "string") opts.callbacks.onAudioChunk(base64ToUint8(obj.delta));
1743
+ }
1744
+ function handleUserTranscript(obj) {
1745
+ if (typeof obj.transcript === "string") opts.callbacks.onUserTranscript(obj.transcript);
1746
+ }
1747
+ function handleResponseCreated(obj) {
1748
+ const resp = obj.response;
1749
+ const id = asString(resp?.id);
1750
+ currentResponseId = id;
1751
+ opts.callbacks.onReplyStarted(id);
1752
+ }
1753
+ function handleAgentTranscriptDelta(obj) {
1754
+ const id = asString(obj.item_id);
1755
+ const delta = asString(obj.delta);
1756
+ agentTranscriptBuffers.set(id, (agentTranscriptBuffers.get(id) ?? "") + delta);
1757
+ }
1758
+ function handleAgentTranscriptDone(obj) {
1759
+ const id = asString(obj.item_id);
1760
+ const text = agentTranscriptBuffers.get(id) ?? "";
1761
+ agentTranscriptBuffers.delete(id);
1762
+ if (text) opts.callbacks.onAgentTranscript(text, false);
1763
+ }
1764
+ function clearTurnBuffers() {
1765
+ agentTranscriptBuffers.clear();
1766
+ toolBuffers.clear();
1767
+ }
1768
+ function handleResponseDone() {
1769
+ currentResponseId = null;
1770
+ clearTurnBuffers();
1771
+ opts.callbacks.onReplyDone();
1772
+ }
1773
+ function handleErrorEvent(obj) {
1774
+ const err = obj.error;
1775
+ const message = typeof err?.message === "string" ? err.message : "OpenAI Realtime error";
1776
+ clearTurnBuffers();
1777
+ opts.callbacks.onError("internal", message);
1778
+ }
1779
+ function handleOutputItemAdded(obj) {
1780
+ const item = obj.item;
1781
+ if (item?.type !== "function_call" || !item.id) return;
1782
+ toolBuffers.set(item.id, {
1783
+ callId: item.call_id ?? "",
1784
+ name: item.name ?? "",
1785
+ argsBuffer: ""
1786
+ });
1787
+ }
1788
+ function handleFunctionCallArgsDelta(obj) {
1789
+ const id = asString(obj.item_id);
1790
+ const delta = asString(obj.delta);
1791
+ const buf = toolBuffers.get(id);
1792
+ if (buf) buf.argsBuffer += delta;
1793
+ }
1794
+ function parseToolArgs(argsStr, name, callId) {
1795
+ if (!argsStr) return {};
1796
+ try {
1797
+ const parsed = JSON.parse(argsStr);
1798
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) return parsed;
1799
+ } catch {
1800
+ log.warn("OpenAI Realtime: invalid tool args JSON", {
1801
+ name,
1802
+ callId
1803
+ });
1804
+ }
1805
+ return {};
1806
+ }
1807
+ function handleFunctionCallArgsDone(obj) {
1808
+ const id = asString(obj.item_id);
1809
+ const buf = toolBuffers.get(id);
1810
+ toolBuffers.delete(id);
1811
+ const callId = asString(obj.call_id) || (buf?.callId ?? "");
1812
+ const name = asString(obj.name) || (buf?.name ?? "");
1813
+ const args = parseToolArgs(asString(obj.arguments) || (buf?.argsBuffer ?? ""), name, callId);
1814
+ opts.callbacks.onToolCall(callId, name, args);
1815
+ }
1816
+ function handleMessage(data) {
1817
+ let raw;
1818
+ try {
1819
+ raw = JSON.parse(String(data));
1820
+ } catch {
1821
+ log.warn("OpenAI Realtime: invalid JSON");
1822
+ return;
1823
+ }
1824
+ if (typeof raw !== "object" || raw === null) return;
1825
+ const obj = raw;
1826
+ switch (obj.type) {
1827
+ case "response.audio.delta":
1828
+ handleAudioDelta(obj);
1829
+ return;
1830
+ case "response.audio.done":
1831
+ opts.callbacks.onAudioDone();
1832
+ return;
1833
+ case "input_audio_buffer.speech_started":
1834
+ opts.callbacks.onSpeechStarted();
1835
+ return;
1836
+ case "input_audio_buffer.speech_stopped":
1837
+ opts.callbacks.onSpeechStopped();
1838
+ return;
1839
+ case "conversation.item.input_audio_transcription.completed":
1840
+ handleUserTranscript(obj);
1841
+ return;
1842
+ case "response.created":
1843
+ handleResponseCreated(obj);
1844
+ return;
1845
+ case "response.audio_transcript.delta":
1846
+ handleAgentTranscriptDelta(obj);
1847
+ return;
1848
+ case "response.audio_transcript.done":
1849
+ handleAgentTranscriptDone(obj);
1850
+ return;
1851
+ case "response.done":
1852
+ handleResponseDone();
1853
+ return;
1854
+ case "response.output_item.added":
1855
+ handleOutputItemAdded(obj);
1856
+ return;
1857
+ case "response.function_call_arguments.delta":
1858
+ handleFunctionCallArgsDelta(obj);
1859
+ return;
1860
+ case "response.function_call_arguments.done":
1861
+ handleFunctionCallArgsDone(obj);
1862
+ return;
1863
+ case "error":
1864
+ handleErrorEvent(obj);
1865
+ return;
1866
+ default: return;
1867
+ }
1868
+ }
1869
+ function handleClose(code, reason) {
1870
+ if (closing) {
1871
+ log.info("OpenAI Realtime closed", {
1872
+ code,
1873
+ reason
1874
+ });
1875
+ return;
1876
+ }
1877
+ log.warn("OpenAI Realtime closed unexpectedly", {
1878
+ code,
1879
+ reason
1880
+ });
1881
+ opts.callbacks.onError("connection", `OpenAI Realtime closed (code=${code})`);
1882
+ }
1883
+ async function stop() {
1884
+ closing = true;
1885
+ ws?.close();
1886
+ ws = null;
1887
+ }
1888
+ return {
1889
+ start,
1890
+ stop,
1891
+ sendUserAudio(bytes) {
1892
+ if (!ws || ws.readyState !== 1) return;
1893
+ ws.send(`{"type":"input_audio_buffer.append","audio":"${uint8ToBase64(bytes)}"}`);
1894
+ },
1895
+ sendToolResult(callId, result) {
1896
+ send({
1897
+ type: "conversation.item.create",
1898
+ item: {
1899
+ type: "function_call_output",
1900
+ call_id: callId,
1901
+ output: result
1902
+ }
1903
+ });
1904
+ send({ type: "response.create" });
1905
+ },
1906
+ cancelReply() {
1907
+ if (currentResponseId === null) return;
1908
+ send({ type: "response.cancel" });
1909
+ currentResponseId = null;
1910
+ clearTurnBuffers();
1911
+ opts.callbacks.onCancelled();
1912
+ }
1913
+ };
1914
+ }
1915
+ //#endregion
1893
1916
  //#region host/to-vercel-tools.ts
1894
1917
  /**
1895
- * Converts agent {@link ToolSchema}[] to Vercel AI SDK tools with `execute`
1896
- * delegation to the agent's {@link ExecuteTool} function.
1897
- *
1898
- * The pipeline orchestrator passes the output to `streamText({ tools })`.
1899
- * Each produced tool's `execute` closure calls
1900
- * `ctx.executeTool(name, args, sessionId, messages(), { signal, toolCallId })`,
1901
- * so the existing agent tool infrastructure (argument validation, KV, hooks,
1902
- * timeout) remains the single source of truth for tool behavior.
1903
- *
1904
- * Per-call `options.abortSignal` (forwarded by `streamText` when the
1905
- * outer turn is aborted, e.g. barge-in) takes precedence over the
1906
- * bag-level `ctx.signal` so individual invocations respect streamText
1907
- * aborts.
1908
- */
1909
- /**
1910
- * Convert an array of {@link ToolSchema} to a Vercel AI SDK `ToolSet`
1911
- * (record keyed by tool name).
1912
- *
1913
- * Uses the v6 `tool()` helper with `inputSchema: jsonSchema(...)` wrapping
1914
- * the agent's JSON Schema `parameters`. Execution is delegated to
1915
- * `ctx.executeTool` so validation, KV, timeouts, and hooks keep working.
1918
+ * Converts agent {@link ToolSchema}[] to Vercel AI SDK tools, delegating
1919
+ * `execute` to the agent's {@link ExecuteTool} so validation, KV, hooks,
1920
+ * and timeouts remain the single source of truth for tool behavior.
1916
1921
  */
1917
1922
  function toVercelTools(schemas, ctx) {
1918
1923
  const out = {};
@@ -1925,7 +1930,8 @@ function toVercelTools(schemas, ctx) {
1925
1930
  const opts = {};
1926
1931
  if (signal !== void 0) opts.signal = signal;
1927
1932
  if (options.toolCallId !== void 0) opts.toolCallId = options.toolCallId;
1928
- return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages().slice(), opts);
1933
+ const history = ctx.messages().slice();
1934
+ return ctx.executeTool(schema.name, input, ctx.sessionId, history, opts);
1929
1935
  }
1930
1936
  });
1931
1937
  return out;
@@ -1976,10 +1982,6 @@ function createPipelineTransport(opts) {
1976
1982
  function emitError(code, message) {
1977
1983
  callbacks.onError(code, message);
1978
1984
  }
1979
- /**
1980
- * Tear down after an unrecoverable provider error. Aborts the in-flight
1981
- * turn, cancels TTS, signals providers to close. Idempotent.
1982
- */
1983
1985
  function terminate() {
1984
1986
  if (terminated) return;
1985
1987
  terminated = true;
@@ -2115,16 +2117,10 @@ function createPipelineTransport(opts) {
2115
2117
  }
2116
2118
  };
2117
2119
  }
2118
- /**
2119
- * Flush TTS and wait for drain. Resolves on:
2120
- * - TTS emits `done`
2121
- * - `signal` aborts (barge-in / provider error / session stop)
2122
- * - PIPELINE_FLUSH_TIMEOUT_MS elapses
2123
- * Resolves immediately if no TTS session.
2124
- */
2125
2120
  function flushTtsAndWait(signal) {
2126
2121
  const tts = ttsSession;
2127
2122
  if (!tts) return Promise.resolve();
2123
+ if (signal.aborted) return Promise.resolve();
2128
2124
  return new Promise((resolve) => {
2129
2125
  let off = null;
2130
2126
  let timer = null;
@@ -2144,10 +2140,6 @@ function createPipelineTransport(opts) {
2144
2140
  resolve();
2145
2141
  };
2146
2142
  const onAbort = () => finish();
2147
- if (signal.aborted) {
2148
- resolve();
2149
- return;
2150
- }
2151
2143
  signal.addEventListener("abort", onAbort, { once: true });
2152
2144
  off = tts.on("done", finish);
2153
2145
  timer = setTimeout(() => {
@@ -2301,8 +2293,7 @@ function createPipelineTransport(opts) {
2301
2293
  },
2302
2294
  sendUserAudio(bytes) {
2303
2295
  if (terminated || !audioReady) return;
2304
- const offset = bytes.byteOffset;
2305
- const length = bytes.byteLength;
2296
+ const { byteOffset: offset, byteLength: length } = bytes;
2306
2297
  let pcm;
2307
2298
  if (offset % 2 === 0 && length % 2 === 0) pcm = new Int16Array(bytes.buffer, offset, length / 2);
2308
2299
  else {
@@ -2323,15 +2314,16 @@ function createPipelineTransport(opts) {
2323
2314
  }
2324
2315
  //#endregion
2325
2316
  //#region host/s2s.ts
2326
- const uint8ToBase64 = (bytes) => Buffer.from(bytes).toString("base64");
2327
- const base64ToUint8 = (base64) => new Uint8Array(Buffer.from(base64, "base64"));
2328
2317
  const defaultCreateS2sWebSocket = (url, opts) => new WsWebSocket(url, { headers: opts.headers });
2329
2318
  const S2sMessageSchema = z.discriminatedUnion("type", [
2330
2319
  z.object({
2331
2320
  type: z.literal("session.ready"),
2332
2321
  session_id: z.string()
2333
2322
  }).passthrough(),
2334
- z.object({ type: z.literal("session.updated") }).passthrough(),
2323
+ z.object({
2324
+ type: z.literal("session.updated"),
2325
+ config: z.object({ id: z.string().optional() }).passthrough().optional()
2326
+ }).passthrough(),
2335
2327
  z.object({ type: z.literal("input.speech.started") }),
2336
2328
  z.object({ type: z.literal("input.speech.stopped") }),
2337
2329
  z.object({
@@ -2374,12 +2366,17 @@ function parseS2sMessage(obj) {
2374
2366
  const result = S2sMessageSchema.safeParse(obj);
2375
2367
  return result.success ? result.data : void 0;
2376
2368
  }
2369
+ function sidFields(ctx) {
2370
+ return ctx.sid !== void 0 ? { sid: ctx.sid } : {};
2371
+ }
2377
2372
  function dispatchS2sMessage(callbacks, msg, state, ctx) {
2378
2373
  switch (msg.type) {
2379
2374
  case "session.ready":
2380
2375
  callbacks.onSessionReady(msg.session_id);
2381
2376
  break;
2382
- case "session.updated": break;
2377
+ case "session.updated":
2378
+ if (msg.config?.id !== void 0) callbacks.onSessionReady(msg.config.id);
2379
+ break;
2383
2380
  case "input.speech.started":
2384
2381
  if (!state.speechActive) {
2385
2382
  state.speechActive = true;
@@ -2406,13 +2403,18 @@ function dispatchS2sMessage(callbacks, msg, state, ctx) {
2406
2403
  break;
2407
2404
  case "reply.done":
2408
2405
  ctx.log.info("S2S << reply.done", {
2409
- ...ctx.sid !== void 0 ? { sid: ctx.sid } : {},
2406
+ ...sidFields(ctx),
2410
2407
  status: msg.status ?? "completed"
2411
2408
  });
2412
2409
  if (msg.status === "interrupted") callbacks.onCancelled();
2413
2410
  else callbacks.onReplyDone();
2414
2411
  break;
2415
2412
  case "session.error":
2413
+ ctx.log.warn("S2S << session.error", {
2414
+ ...sidFields(ctx),
2415
+ code: msg.code,
2416
+ message: msg.message
2417
+ });
2416
2418
  if (msg.code === "session_not_found" || msg.code === "session_forbidden") callbacks.onSessionExpired();
2417
2419
  else callbacks.onError(new Error(msg.message));
2418
2420
  break;
@@ -2439,8 +2441,8 @@ function connectS2s(opts) {
2439
2441
  return;
2440
2442
  }
2441
2443
  const json = JSON.stringify(msg);
2442
- if (msg.type !== "input.audio") if (msg.type === "session.update") log.info(`S2S >> ${msg.type}`, { payload: json });
2443
- else log.info(`S2S >> ${msg.type}`);
2444
+ if (msg.type === "session.update") log.info(`S2S >> ${msg.type}`, { payload: json });
2445
+ else if (msg.type !== "input.audio") log.info(`S2S >> ${msg.type}`);
2444
2446
  ws.send(json);
2445
2447
  }
2446
2448
  const handle = {
@@ -2489,35 +2491,28 @@ function connectS2s(opts) {
2489
2491
  log.info("S2S WebSocket open");
2490
2492
  resolve(handle);
2491
2493
  });
2492
- function tryParseJson(data) {
2494
+ function logIncoming(type) {
2495
+ if (type === "reply.audio" || type === "input.audio" || type === "reply.done" || type === "session.error") return;
2496
+ log.info(`S2S << ${type}`);
2497
+ }
2498
+ ws.addEventListener("message", (ev) => {
2499
+ let raw;
2493
2500
  try {
2494
- return JSON.parse(String(data));
2501
+ raw = JSON.parse(String(ev.data));
2495
2502
  } catch {
2496
- log.warn("S2S << invalid JSON", { data: String(data).slice(0, 200) });
2497
- }
2498
- }
2499
- function handleAudioFastPath(obj) {
2500
- if (obj.type === "reply.audio" && typeof obj.data === "string") {
2501
- callbacks.onAudio(base64ToUint8(obj.data));
2502
- return true;
2503
+ log.warn("S2S << invalid JSON", { data: String(ev.data).slice(0, 200) });
2504
+ return;
2503
2505
  }
2504
- return false;
2505
- }
2506
- function logIncoming(obj) {
2507
- if (obj.type === "reply.audio" || obj.type === "input.audio") return;
2508
- if (obj.type === "reply.done") return;
2509
- log.info(`S2S << ${obj.type}`);
2510
- }
2511
- ws.addEventListener("message", (ev) => {
2512
- const raw = tryParseJson(ev.data);
2513
- if (raw === void 0) return;
2514
2506
  if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
2515
2507
  log.warn("S2S << non-object JSON message", { type: typeof raw });
2516
2508
  return;
2517
2509
  }
2518
2510
  const obj = raw;
2519
- logIncoming(obj);
2520
- if (handleAudioFastPath(obj)) return;
2511
+ logIncoming(obj.type);
2512
+ if (obj.type === "reply.audio" && typeof obj.data === "string") {
2513
+ callbacks.onAudio(base64ToUint8(obj.data));
2514
+ return;
2515
+ }
2521
2516
  const parsed = parseS2sMessage(obj);
2522
2517
  if (!parsed) {
2523
2518
  log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
@@ -2550,9 +2545,9 @@ function connectS2s(opts) {
2550
2545
  const _internals = { connectS2s };
2551
2546
  /**
2552
2547
  * Close codes worth attempting `session.resume` on. These are network/server
2553
- * blips, not protocol or auth violations. Per AssemblyAI's docs, sessions are
2554
- * preserved for 30 s after disconnect, so resume is bounded by the window in
2555
- * `RESUME_WINDOW_MS` below.
2548
+ * blips, not protocol or auth violations. AssemblyAI keeps the session
2549
+ * available for 30 s after disconnect; reconnect runs immediately on close,
2550
+ * so the resume request reliably lands inside that window.
2556
2551
  */
2557
2552
  const TRANSIENT_CLOSE_CODES = new Set([
2558
2553
  1005,
@@ -2560,48 +2555,29 @@ const TRANSIENT_CLOSE_CODES = new Set([
2560
2555
  1011,
2561
2556
  3005
2562
2557
  ]);
2563
- /**
2564
- * AssemblyAI keeps the session alive for 30 s after disconnect; we leave a
2565
- * little headroom so the resume request still fits inside that window after
2566
- * the new WebSocket finishes opening.
2567
- */
2568
- const RESUME_WINDOW_MS = 25e3;
2569
2558
  function createS2sTransport(opts) {
2570
2559
  const log = opts.logger ?? consoleLogger;
2571
2560
  const createWs = opts.createWebSocket ?? defaultCreateS2sWebSocket;
2572
2561
  let handle = null;
2573
2562
  let currentReplyId = null;
2574
- /** Most recent `session.ready` ID — present once the upstream session is established. */
2575
2563
  let providerSessionId = null;
2576
- /** When the current session became ready; bounds the resume window. */
2577
- let sessionReadyAt = 0;
2578
- /** Set by `stop()` so a deliberate close doesn't trigger a reconnect. */
2579
2564
  let closing = false;
2580
- /**
2581
- * True while a `session.resume` round-trip is in flight (between sending
2582
- * resume and the next `session.ready`). Used to distinguish a resume failure
2583
- * (close before ready) from a normal close.
2584
- */
2585
2565
  let reconnecting = false;
2586
- /**
2587
- * Set when a reconnect attempt is kicked off, cleared once the resumed
2588
- * session's `session.ready` arrives. Prevents back-to-back reconnect loops
2589
- * when the freshly-resumed socket also drops before fully recovering.
2590
- */
2591
- let reconnectInFlight = false;
2592
2566
  function buildCallbacks() {
2593
2567
  return {
2594
2568
  onSessionReady: (id) => {
2569
+ const isFirstReady = providerSessionId === null;
2595
2570
  providerSessionId = id;
2596
- sessionReadyAt = Date.now();
2597
2571
  if (reconnecting) {
2598
2572
  reconnecting = false;
2599
- reconnectInFlight = false;
2600
2573
  log.info("S2S resumed", {
2601
2574
  sid: opts.sid,
2602
2575
  sessionId: id
2603
2576
  });
2604
- }
2577
+ } else if (isFirstReady) log.info("S2S session ready", {
2578
+ sid: opts.sid,
2579
+ sessionId: id
2580
+ });
2605
2581
  opts.callbacks.onSessionReady?.(id);
2606
2582
  },
2607
2583
  onReplyStarted: (replyId) => {
@@ -2625,7 +2601,6 @@ function createS2sTransport(opts) {
2625
2601
  onSessionExpired: () => {
2626
2602
  if (reconnecting) {
2627
2603
  reconnecting = false;
2628
- reconnectInFlight = false;
2629
2604
  log.warn("S2S resume rejected: session expired", { sid: opts.sid });
2630
2605
  opts.callbacks.onError("connection", "S2S resume failed: session expired");
2631
2606
  return;
@@ -2638,15 +2613,11 @@ function createS2sTransport(opts) {
2638
2613
  };
2639
2614
  }
2640
2615
  function canResumeAfter(code) {
2641
- if (!TRANSIENT_CLOSE_CODES.has(code)) return false;
2642
- if (providerSessionId === null) return false;
2643
- if (reconnectInFlight) return false;
2644
- return sessionReadyAt > 0 && Date.now() - sessionReadyAt < RESUME_WINDOW_MS;
2616
+ return TRANSIENT_CLOSE_CODES.has(code) && providerSessionId !== null && !reconnecting;
2645
2617
  }
2646
2618
  function emitFatalClose(code, reason, wasReconnecting) {
2647
2619
  if (wasReconnecting) {
2648
2620
  reconnecting = false;
2649
- reconnectInFlight = false;
2650
2621
  opts.callbacks.onError("connection", `S2S resume failed (code=${code})`);
2651
2622
  return;
2652
2623
  }
@@ -2667,7 +2638,6 @@ function createS2sTransport(opts) {
2667
2638
  });
2668
2639
  }
2669
2640
  function startResume(prevId, code, reason) {
2670
- reconnectInFlight = true;
2671
2641
  reconnecting = true;
2672
2642
  log.warn("S2S unexpected close — attempting resume", {
2673
2643
  sid: opts.sid,
@@ -2682,7 +2652,6 @@ function createS2sTransport(opts) {
2682
2652
  }
2683
2653
  resume(prevId).catch((err) => {
2684
2654
  reconnecting = false;
2685
- reconnectInFlight = false;
2686
2655
  const msg = err instanceof Error ? err.message : String(err);
2687
2656
  log.warn("S2S resume failed", {
2688
2657
  sid: opts.sid,
@@ -2700,12 +2669,11 @@ function createS2sTransport(opts) {
2700
2669
  return;
2701
2670
  }
2702
2671
  const wasReconnecting = reconnecting;
2703
- if (!canResumeAfter(code)) {
2672
+ const prevId = providerSessionId;
2673
+ if (!canResumeAfter(code) || prevId === null) {
2704
2674
  emitFatalClose(code, reason, wasReconnecting);
2705
2675
  return;
2706
2676
  }
2707
- const prevId = providerSessionId;
2708
- if (prevId === null) return;
2709
2677
  startResume(prevId, code, reason);
2710
2678
  }
2711
2679
  async function resume(prevSessionId) {
@@ -2714,7 +2682,7 @@ function createS2sTransport(opts) {
2714
2682
  config: opts.s2sConfig,
2715
2683
  createWebSocket: createWs,
2716
2684
  logger: log,
2717
- ...opts.sid !== void 0 ? { sid: opts.sid } : {},
2685
+ sid: opts.sid,
2718
2686
  callbacks: buildCallbacks()
2719
2687
  });
2720
2688
  if (closing) {
@@ -2799,14 +2767,11 @@ function createClientSink(ws, log) {
2799
2767
  }
2800
2768
  };
2801
2769
  }
2802
- function handleBinaryAudio(data, session) {
2770
+ function dispatchMessage(data, session, log, sid) {
2803
2771
  if (data instanceof Uint8Array) {
2804
2772
  session.onAudio(data);
2805
- return true;
2773
+ return;
2806
2774
  }
2807
- return false;
2808
- }
2809
- function handleTextMessage(data, session, log, sid) {
2810
2775
  if (typeof data !== "string") {
2811
2776
  log.warn("ws: non-string, non-binary frame received; dropping", { sid });
2812
2777
  return;
@@ -2869,10 +2834,7 @@ function wireSessionSocket(ws, opts) {
2869
2834
  if (!(session && messageBuffer)) return;
2870
2835
  const buf = messageBuffer;
2871
2836
  messageBuffer = null;
2872
- for (const event of buf) {
2873
- if (handleBinaryAudio(event.data, session)) continue;
2874
- handleTextMessage(event.data, session, log, sid);
2875
- }
2837
+ for (const event of buf) dispatchMessage(event.data, session, log, sid);
2876
2838
  }
2877
2839
  function onOpen() {
2878
2840
  opts.onOpen?.();
@@ -2921,8 +2883,7 @@ function wireSessionSocket(ws, opts) {
2921
2883
  if (messageBuffer && messageBuffer.length < 100) messageBuffer.push(event);
2922
2884
  return;
2923
2885
  }
2924
- if (handleBinaryAudio(event.data, session)) return;
2925
- handleTextMessage(event.data, session, log, sid);
2886
+ dispatchMessage(event.data, session, log, sid);
2926
2887
  });
2927
2888
  ws.addEventListener("close", () => {
2928
2889
  log.info("Session disconnected", {
@@ -2953,27 +2914,19 @@ function wireSessionSocket(ws, opts) {
2953
2914
  //#endregion
2954
2915
  //#region host/runtime.ts
2955
2916
  /**
2956
- * Resolve the API key env-var for the configured STT provider.
2957
- *
2958
- * Each STT provider uses its own env var (e.g. `ASSEMBLYAI_API_KEY`,
2959
- * `DEEPGRAM_API_KEY`). We read the kind from the descriptor if it is one;
2960
- * pre-resolved openers have no kind field so we fall back to AssemblyAI for
2961
- * backward compatibility (openers supply their own key at open-time anyway).
2917
+ * Read the descriptor `kind` if present. Pre-resolved openers (test escape
2918
+ * hatch) have no `kind` field, so callers fall back to a default env var.
2962
2919
  */
2920
+ function descriptorKind(value) {
2921
+ const kind = value?.kind;
2922
+ return typeof kind === "string" ? kind : void 0;
2923
+ }
2963
2924
  function resolveSttApiKey(stt, env) {
2964
- if ((stt != null && "kind" in stt && typeof stt.kind === "string" ? stt.kind : void 0) === "deepgram") return resolveApiKey("DEEPGRAM_API_KEY", env);
2925
+ if (descriptorKind(stt) === "deepgram") return resolveApiKey("DEEPGRAM_API_KEY", env);
2965
2926
  return resolveApiKey("ASSEMBLYAI_API_KEY", env);
2966
2927
  }
2967
- /**
2968
- * Resolve the API key env-var for the configured TTS provider.
2969
- *
2970
- * Each TTS provider uses its own env var (e.g. `CARTESIA_API_KEY`,
2971
- * `RIME_API_KEY`). We read the kind from the descriptor if it is one;
2972
- * pre-resolved openers have no kind field so we fall back to Cartesia for
2973
- * backward compatibility (openers supply their own key at open-time anyway).
2974
- */
2975
2928
  function resolveTtsApiKey(tts, env) {
2976
- if ((tts != null && "kind" in tts && typeof tts.kind === "string" ? tts.kind : void 0) === "rime") return resolveApiKey("RIME_API_KEY", env);
2929
+ if (descriptorKind(tts) === "rime") return resolveApiKey("RIME_API_KEY", env);
2977
2930
  return resolveApiKey("CARTESIA_API_KEY", env);
2978
2931
  }
2979
2932
  /**
@@ -3014,7 +2967,7 @@ function createLocalVector(slug) {
3014
2967
  * @public
3015
2968
  */
3016
2969
  function createRuntime(opts) {
3017
- const { agent, env, kv = createLocalKv(), vector, createWebSocket, logger = consoleLogger, s2sConfig = DEFAULT_S2S_CONFIG, sessionStartTimeoutMs, shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS } = opts;
2970
+ const { agent, env, kv = createLocalKv(), vector, createWebSocket, createOpenaiRealtimeWebSocket, logger = consoleLogger, s2sConfig = DEFAULT_S2S_CONFIG, sessionStartTimeoutMs, shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS } = opts;
3018
2971
  const mode = assertProviderTriple(opts.stt, opts.llm, opts.tts);
3019
2972
  const slug = agent.name ?? "local";
3020
2973
  const resolvedKv = agent.kv ? resolveKv(agent.kv, env, "") : kv;
@@ -3083,49 +3036,20 @@ function createRuntime(opts) {
3083
3036
  });
3084
3037
  };
3085
3038
  }
3086
- const pipelineProviders = mode === "pipeline" ? {
3039
+ let pipelineProviders = null;
3040
+ if (mode === "pipeline" && opts.stt && opts.llm && opts.tts) pipelineProviders = {
3087
3041
  stt: resolveSttIfDescriptor(opts.stt),
3088
3042
  llm: resolveLlmIfDescriptor(opts.llm, env),
3089
3043
  tts: resolveTtsIfDescriptor(opts.tts)
3090
- } : null;
3091
- function createSession(sessionOpts) {
3092
- sinkMap.set(sessionOpts.id, sessionOpts.client);
3093
- const isPipeline = Boolean(pipelineProviders);
3094
- const systemPrompt = buildSystemPrompt(agentConfig, {
3095
- hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
3096
- voice: true,
3097
- toolGuidance
3098
- });
3099
- let core = null;
3100
- function bindCore() {
3101
- if (!core) throw new Error("SessionCore not yet created");
3102
- return core;
3103
- }
3104
- const callbacks = {
3105
- onReplyStarted: (replyId) => bindCore().onReplyStarted(replyId),
3106
- onReplyDone: () => bindCore().onReplyDone(),
3107
- onCancelled: () => bindCore().onCancelled(),
3108
- onAudioChunk: (bytes) => bindCore().onAudioChunk(bytes),
3109
- onAudioDone: () => bindCore().onAudioDone(),
3110
- onUserTranscript: (text) => bindCore().onUserTranscript(text),
3111
- onAgentTranscript: (text, interrupted) => bindCore().onAgentTranscript(text, interrupted),
3112
- onToolCall: isPipeline ? (id, name, args) => sessionOpts.client.event({
3113
- type: "tool_call",
3114
- toolCallId: id,
3115
- toolName: name,
3116
- args
3117
- }) : (id, name, args) => bindCore().onToolCall(id, name, args),
3118
- onError: (code, message) => bindCore().onError(code, message),
3119
- onSpeechStarted: () => bindCore().onSpeechStarted(),
3120
- onSpeechStopped: () => bindCore().onSpeechStopped()
3121
- };
3122
- let transport;
3123
- if (pipelineProviders) transport = createPipelineTransport({
3044
+ };
3045
+ function buildPipelineTransport(args) {
3046
+ const { sessionOpts, systemPrompt, callbacks, providers } = args;
3047
+ return createPipelineTransport({
3124
3048
  sid: sessionOpts.id,
3125
3049
  agent: sessionOpts.agent,
3126
- stt: pipelineProviders.stt,
3127
- llm: pipelineProviders.llm,
3128
- tts: pipelineProviders.tts,
3050
+ stt: providers.stt,
3051
+ llm: providers.llm,
3052
+ tts: providers.tts,
3129
3053
  callbacks,
3130
3054
  sessionConfig: {
3131
3055
  systemPrompt,
@@ -3145,7 +3069,29 @@ function createRuntime(opts) {
3145
3069
  skipGreeting: sessionOpts.skipGreeting ?? false,
3146
3070
  logger
3147
3071
  });
3148
- else transport = createS2sTransport({
3072
+ }
3073
+ function buildOpenaiRealtimeTransport(args) {
3074
+ const { sessionOpts, systemPrompt, callbacks } = args;
3075
+ return createOpenaiRealtimeTransport({
3076
+ apiKey: resolveApiKey("OPENAI_API_KEY", env),
3077
+ options: agent.s2s?.options ?? {},
3078
+ sessionConfig: {
3079
+ systemPrompt,
3080
+ ...agentConfig.greeting !== void 0 ? { greeting: agentConfig.greeting } : {},
3081
+ tools: toolSchemas
3082
+ },
3083
+ toolSchemas,
3084
+ toolChoice: agentConfig.toolChoice ?? "auto",
3085
+ callbacks,
3086
+ sid: sessionOpts.id,
3087
+ agent: sessionOpts.agent,
3088
+ ...createOpenaiRealtimeWebSocket ? { createWebSocket: createOpenaiRealtimeWebSocket } : {},
3089
+ logger
3090
+ });
3091
+ }
3092
+ function buildAssemblyS2sTransport(args) {
3093
+ const { sessionOpts, systemPrompt, callbacks } = args;
3094
+ return createS2sTransport({
3149
3095
  apiKey: env.ASSEMBLYAI_API_KEY ?? "",
3150
3096
  s2sConfig,
3151
3097
  sessionConfig: {
@@ -3160,6 +3106,54 @@ function createRuntime(opts) {
3160
3106
  ...createWebSocket ? { createWebSocket } : {},
3161
3107
  logger
3162
3108
  });
3109
+ }
3110
+ function buildTransport(args) {
3111
+ if (pipelineProviders) return buildPipelineTransport({
3112
+ ...args,
3113
+ providers: pipelineProviders
3114
+ });
3115
+ if (agent.s2s !== void 0) {
3116
+ const kind = descriptorKind(agent.s2s);
3117
+ if (kind === "openai-realtime") return buildOpenaiRealtimeTransport(args);
3118
+ throw new Error(`Unknown s2s provider kind: ${kind ?? "<missing>"}`);
3119
+ }
3120
+ return buildAssemblyS2sTransport(args);
3121
+ }
3122
+ function createSession(sessionOpts) {
3123
+ sinkMap.set(sessionOpts.id, sessionOpts.client);
3124
+ const isPipeline = Boolean(pipelineProviders);
3125
+ const systemPrompt = buildSystemPrompt(agentConfig, {
3126
+ hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
3127
+ voice: true,
3128
+ toolGuidance
3129
+ });
3130
+ let core = null;
3131
+ function bindCore() {
3132
+ if (!core) throw new Error("SessionCore not yet created");
3133
+ return core;
3134
+ }
3135
+ const transport = buildTransport({
3136
+ sessionOpts,
3137
+ systemPrompt,
3138
+ callbacks: {
3139
+ onReplyStarted: (replyId) => bindCore().onReplyStarted(replyId),
3140
+ onReplyDone: () => bindCore().onReplyDone(),
3141
+ onCancelled: () => bindCore().onCancelled(),
3142
+ onAudioChunk: (bytes) => bindCore().onAudioChunk(bytes),
3143
+ onAudioDone: () => bindCore().onAudioDone(),
3144
+ onUserTranscript: (text) => bindCore().onUserTranscript(text),
3145
+ onAgentTranscript: (text, interrupted) => bindCore().onAgentTranscript(text, interrupted),
3146
+ onToolCall: isPipeline ? (id, name, args) => sessionOpts.client.event({
3147
+ type: "tool_call",
3148
+ toolCallId: id,
3149
+ toolName: name,
3150
+ args
3151
+ }) : (id, name, args) => bindCore().onToolCall(id, name, args),
3152
+ onError: (code, message) => bindCore().onError(code, message),
3153
+ onSpeechStarted: () => bindCore().onSpeechStarted(),
3154
+ onSpeechStopped: () => bindCore().onSpeechStopped()
3155
+ }
3156
+ });
3163
3157
  core = createSessionCore({
3164
3158
  id: sessionOpts.id,
3165
3159
  agent: sessionOpts.agent,
@@ -3228,6 +3222,11 @@ function createRuntime(opts) {
3228
3222
  * **Internal module** — used by `aai-cli` dev server. Not a public API.
3229
3223
  * Import via `aai/host`.
3230
3224
  */
3225
+ const JSON_HEADERS = { "Content-Type": "application/json" };
3226
+ function sendJson(res, status, body) {
3227
+ res.writeHead(status, JSON_HEADERS);
3228
+ res.end(JSON.stringify(body));
3229
+ }
3231
3230
  async function serveStatic(dir, req, res) {
3232
3231
  const url = req.url?.split("?")[0] ?? "/";
3233
3232
  const filePath = path.join(dir, url === "/" ? "index.html" : url);
@@ -3247,66 +3246,59 @@ async function serveStatic(dir, req, res) {
3247
3246
  return false;
3248
3247
  }
3249
3248
  }
3250
- function handleVectorPost(vector, req, res) {
3249
+ async function readBody(req) {
3251
3250
  let body = "";
3252
- req.on("data", (chunk) => {
3253
- body += chunk;
3254
- });
3255
- req.on("end", async () => {
3256
- try {
3257
- const json = JSON.parse(body);
3258
- const parsed = VectorRequestSchema.safeParse(json);
3259
- if (!parsed.success) {
3260
- res.statusCode = 400;
3261
- res.end(JSON.stringify({ error: parsed.error.message }));
3262
- return;
3263
- }
3264
- const op = parsed.data;
3265
- let result;
3266
- switch (op.op) {
3267
- case "upsert":
3268
- await vector.upsert(op.id, op.text, op.metadata);
3269
- result = "OK";
3270
- break;
3271
- case "query":
3272
- result = await vector.query(op.text, {
3273
- ...op.topK !== void 0 ? { topK: op.topK } : {},
3274
- ...op.filter !== void 0 ? { filter: op.filter } : {}
3275
- });
3276
- break;
3277
- case "delete":
3278
- await vector.delete(op.ids);
3279
- result = "OK";
3280
- break;
3281
- default: break;
3282
- }
3283
- res.statusCode = 200;
3284
- res.end(JSON.stringify({ result }));
3285
- } catch (err) {
3286
- res.statusCode = 500;
3287
- res.end(JSON.stringify({ error: err instanceof Error ? err.message : String(err) }));
3251
+ for await (const chunk of req) body += chunk;
3252
+ return body;
3253
+ }
3254
+ async function handleVectorPost(vector, req, res) {
3255
+ try {
3256
+ const parsed = VectorRequestSchema.safeParse(JSON.parse(await readBody(req)));
3257
+ if (!parsed.success) {
3258
+ sendJson(res, 400, { error: parsed.error.message });
3259
+ return;
3288
3260
  }
3289
- });
3261
+ const op = parsed.data;
3262
+ let result;
3263
+ switch (op.op) {
3264
+ case "upsert":
3265
+ await vector.upsert(op.id, op.text, op.metadata);
3266
+ result = "OK";
3267
+ break;
3268
+ case "query":
3269
+ result = await vector.query(op.text, {
3270
+ ...op.topK !== void 0 ? { topK: op.topK } : {},
3271
+ ...op.filter !== void 0 ? { filter: op.filter } : {}
3272
+ });
3273
+ break;
3274
+ case "delete":
3275
+ await vector.delete(op.ids);
3276
+ result = "OK";
3277
+ break;
3278
+ default: return op;
3279
+ }
3280
+ sendJson(res, 200, { result });
3281
+ } catch (err) {
3282
+ sendJson(res, 500, { error: err instanceof Error ? err.message : String(err) });
3283
+ }
3290
3284
  }
3291
- function handleKvGet(kv, req, res) {
3285
+ async function handleKvGet(kv, req, res) {
3292
3286
  const key = new URL(req.url ?? "/", "http://localhost").searchParams.get("key");
3293
3287
  if (!key) {
3294
- res.writeHead(400, { "Content-Type": "application/json" });
3295
- res.end(JSON.stringify({ error: "Missing key query parameter" }));
3288
+ sendJson(res, 400, { error: "Missing key query parameter" });
3296
3289
  return;
3297
3290
  }
3298
- kv.get(key).then((value) => {
3291
+ try {
3292
+ const value = await kv.get(key);
3299
3293
  if (value === null) {
3300
- res.writeHead(404, { "Content-Type": "application/json" });
3294
+ res.writeHead(404, JSON_HEADERS);
3301
3295
  res.end("null");
3302
- } else {
3303
- res.writeHead(200, { "Content-Type": "application/json" });
3304
- res.end(JSON.stringify(value));
3296
+ return;
3305
3297
  }
3306
- }).catch(() => {
3307
- res.writeHead(500, { "Content-Type": "application/json" });
3308
- res.end(JSON.stringify({ error: "KV error" }));
3309
- });
3298
+ sendJson(res, 200, value);
3299
+ } catch {
3300
+ sendJson(res, 500, { error: "KV error" });
3301
+ }
3310
3302
  }
3311
3303
  /**
3312
3304
  * Create an HTTP + WebSocket server for an agent.
@@ -3317,8 +3309,17 @@ function createServer(options) {
3317
3309
  const { runtime, clientHtml, clientDir, logger = consoleLogger, kv, vector } = options;
3318
3310
  const name = options.name ?? "agent";
3319
3311
  if (clientHtml && clientDir) throw new Error("clientHtml and clientDir are mutually exclusive");
3320
- const escapedName = escapeHtml(name);
3321
- const defaultHtml = clientHtml ?? `<!DOCTYPE html><html><body><h1>${escapedName}</h1><p>Agent server running.</p></body></html>`;
3312
+ const defaultHtml = clientHtml ?? `<!DOCTYPE html><html><body><h1>${escapeHtml(name)}</h1><p>Agent server running.</p></body></html>`;
3313
+ async function handleRequest(req, res, url, method) {
3314
+ if (clientDir && await serveStatic(clientDir, req, res)) return;
3315
+ if (method === "GET" && url === "/") {
3316
+ res.writeHead(200, { "Content-Type": "text/html" });
3317
+ res.end(defaultHtml);
3318
+ return;
3319
+ }
3320
+ logger.error(`${method} ${url} 404`);
3321
+ sendJson(res, 404, { error: "Not found" });
3322
+ }
3322
3323
  const httpServer = http.createServer((req, res) => {
3323
3324
  const url = req.url?.split("?")[0] ?? "/";
3324
3325
  const method = req.method ?? "GET";
@@ -3326,11 +3327,10 @@ function createServer(options) {
3326
3327
  res.setHeader("X-Content-Type-Options", "nosniff");
3327
3328
  res.setHeader("X-Frame-Options", "SAMEORIGIN");
3328
3329
  if (method === "GET" && url === "/health") {
3329
- res.writeHead(200, { "Content-Type": "application/json" });
3330
- res.end(JSON.stringify({
3330
+ sendJson(res, 200, {
3331
3331
  status: "ok",
3332
3332
  name
3333
- }));
3333
+ });
3334
3334
  return;
3335
3335
  }
3336
3336
  if (kv && method === "GET" && url === "/kv") {
@@ -3343,17 +3343,6 @@ function createServer(options) {
3343
3343
  }
3344
3344
  handleRequest(req, res, url, method);
3345
3345
  });
3346
- async function handleRequest(req, res, url, method) {
3347
- if (clientDir && await serveStatic(clientDir, req, res)) return;
3348
- if (method === "GET" && url === "/") {
3349
- res.writeHead(200, { "Content-Type": "text/html" });
3350
- res.end(defaultHtml);
3351
- return;
3352
- }
3353
- logger.error(`${method} ${url} 404`);
3354
- res.writeHead(404, { "Content-Type": "application/json" });
3355
- res.end(JSON.stringify({ error: "Not found" }));
3356
- }
3357
3346
  const wss = new WebSocketServer({
3358
3347
  noServer: true,
3359
3348
  maxPayload: MAX_WS_PAYLOAD_BYTES