@alexkroman1/aai 0.10.3 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/_internal-types.d.ts +8 -1
  2. package/dist/_run-code.d.ts +16 -12
  3. package/dist/_runtime-conformance.d.ts +55 -0
  4. package/dist/_test-utils.d.ts +73 -0
  5. package/dist/_utils.d.ts +0 -19
  6. package/dist/_utils.js +28 -2
  7. package/dist/builtin-tools.d.ts +1 -5
  8. package/dist/constants-CwotjpJR.js +45 -0
  9. package/dist/constants.d.ts +42 -0
  10. package/dist/direct-executor-DAGCZOAN.js +1530 -0
  11. package/dist/direct-executor.d.ts +90 -31
  12. package/dist/hooks.d.ts +44 -0
  13. package/dist/hooks.js +58 -0
  14. package/dist/index.d.ts +1 -2
  15. package/dist/index.js +2 -2
  16. package/dist/internal.d.ts +19 -0
  17. package/dist/internal.js +164 -0
  18. package/dist/kv.d.ts +1 -1
  19. package/dist/kv.js +32 -1
  20. package/dist/matchers.js +1 -1
  21. package/dist/protocol.d.ts +3 -29
  22. package/dist/protocol.js +140 -2
  23. package/dist/server.d.ts +27 -40
  24. package/dist/server.js +117 -145
  25. package/dist/session.d.ts +65 -44
  26. package/dist/{testing-BbitshLb.js → testing-Dmx-dudh.js} +39 -43
  27. package/dist/testing.d.ts +9 -14
  28. package/dist/testing.js +2 -2
  29. package/dist/types.d.ts +24 -226
  30. package/dist/types.js +176 -2
  31. package/dist/types.test-d.d.ts +7 -0
  32. package/dist/vite-plugin.d.ts +15 -0
  33. package/dist/vite-plugin.js +82 -0
  34. package/dist/ws-handler.d.ts +1 -2
  35. package/package.json +34 -95
  36. package/dist/_embeddings.d.ts +0 -31
  37. package/dist/_internal-types-IfPcaJd5.js +0 -61
  38. package/dist/_internal-types.js +0 -2
  39. package/dist/_session-ctx.d.ts +0 -73
  40. package/dist/_session-otel.d.ts +0 -43
  41. package/dist/_session-persist.d.ts +0 -30
  42. package/dist/_ssrf-DCp_27V4.js +0 -123
  43. package/dist/_ssrf.d.ts +0 -30
  44. package/dist/_ssrf.js +0 -2
  45. package/dist/_utils-DgzpOMSV.js +0 -61
  46. package/dist/direct-executor-B-5mq3cu.js +0 -570
  47. package/dist/kv-iXtikQmR.js +0 -32
  48. package/dist/middleware-core-BwyBIPed.js +0 -107
  49. package/dist/middleware-core.d.ts +0 -47
  50. package/dist/middleware-core.js +0 -2
  51. package/dist/middleware.d.ts +0 -37
  52. package/dist/protocol-B-H2Q4ox.js +0 -162
  53. package/dist/runtime-CxcwaK68.js +0 -58
  54. package/dist/runtime.js +0 -2
  55. package/dist/s2s-M7JqtgFw.js +0 -272
  56. package/dist/s2s.js +0 -2
  57. package/dist/session-BYlwcrya.js +0 -683
  58. package/dist/session.js +0 -2
  59. package/dist/telemetry-CJlaDFNc.js +0 -95
  60. package/dist/telemetry.d.ts +0 -49
  61. package/dist/telemetry.js +0 -2
  62. package/dist/types-D8ZBxTL_.js +0 -192
  63. package/dist/unstorage-kv-CDgP-frt.js +0 -64
  64. package/dist/unstorage-kv.js +0 -2
  65. package/dist/unstorage-vector-Cj5llNhg.js +0 -172
  66. package/dist/unstorage-vector.d.ts +0 -47
  67. package/dist/unstorage-vector.js +0 -2
  68. package/dist/vector.d.ts +0 -86
  69. package/dist/vector.js +0 -49
  70. package/dist/worker-entry-2jaiqIj0.js +0 -70
  71. package/dist/worker-entry.d.ts +0 -47
  72. package/dist/worker-entry.js +0 -2
  73. package/dist/ws-handler-C0Q6eSay.js +0 -207
  74. package/dist/ws-handler.js +0 -2
@@ -0,0 +1,1530 @@
1
+ import { BuiltinToolSchema, DEFAULT_INSTRUCTIONS, ToolChoiceSchema, defineTool } from "./types.js";
2
+ import { a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_HTML_BYTES, f as MAX_PAGE_CHARS, g as TOOL_EXECUTION_TIMEOUT_MS, h as RUN_CODE_TIMEOUT_MS, l as HOOK_TIMEOUT_MS, m as MAX_VALUE_SIZE, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_TOOL_RESULT_CHARS, s as DEFAULT_TTS_SAMPLE_RATE } from "./constants-CwotjpJR.js";
3
+ import { errorDetail, errorMessage, toolError } from "./_utils.js";
4
+ import { callResolveTurnConfig, createAgentHooks } from "./hooks.js";
5
+ import { ClientMessageSchema, buildReadyConfig } from "./protocol.js";
6
+ import { matchGlob, sortAndPaginate } from "./kv.js";
7
+ import { z } from "zod";
8
+ import WsWebSocket from "ws";
9
+ import pTimeout from "p-timeout";
10
+ import { createStorage, prefixStorage } from "unstorage";
11
+ import vm from "node:vm";
12
+ import { createNanoEvents } from "nanoevents";
13
+ //#region runtime.ts
14
+ /**
15
+ * Runtime dependencies injected into the session pipeline.
16
+ *
17
+ * Defines the {@link Logger} interface, a default {@link consoleLogger},
18
+ * and the {@link S2SConfig} for Speech-to-Speech endpoint configuration.
19
+ */
20
+ /** Default console-backed logger. */
21
+ const consoleLogger = {
22
+ info: (msg, ctx) => ctx ? console.log(msg, ctx) : console.log(msg),
23
+ warn: (msg, ctx) => ctx ? console.warn(msg, ctx) : console.warn(msg),
24
+ error: (msg, ctx) => ctx ? console.error(msg, ctx) : console.error(msg),
25
+ debug: (msg, ctx) => ctx ? console.debug(msg, ctx) : console.debug(msg)
26
+ };
27
+ /**
28
+ * Structured JSON logger for production diagnostics. Each log entry is a
29
+ * single-line JSON object with `timestamp`, `level`, `msg`, and any
30
+ * caller-provided context fields.
31
+ */
32
+ function jsonLog(level) {
33
+ return (msg, ctx) => {
34
+ const entry = {
35
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
36
+ level,
37
+ msg
38
+ };
39
+ if (ctx) Object.assign(entry, ctx);
40
+ (level === "error" || level === "warn" ? process.stderr : process.stdout).write(`${JSON.stringify(entry)}\n`);
41
+ };
42
+ }
43
+ const jsonLogger = {
44
+ info: jsonLog("info"),
45
+ warn: jsonLog("warn"),
46
+ error: jsonLog("error"),
47
+ debug: jsonLog("debug")
48
+ };
49
+ /** Default S2S endpoint configuration. */
50
+ const DEFAULT_S2S_CONFIG = {
51
+ wssUrl: "wss://speech-to-speech.us.assemblyai.com/v1/realtime",
52
+ inputSampleRate: DEFAULT_STT_SAMPLE_RATE,
53
+ outputSampleRate: DEFAULT_TTS_SAMPLE_RATE
54
+ };
55
+ //#endregion
56
+ //#region _internal-types.ts
57
+ /**
58
+ * Zod schema for serializable agent configuration sent over the wire.
59
+ *
60
+ * This is the JSON-safe subset of the agent definition that can be
61
+ * transmitted between the worker and the host process via structured clone.
62
+ */
63
+ const AgentConfigSchema = z.object({
64
+ name: z.string().min(1),
65
+ instructions: z.string(),
66
+ greeting: z.string(),
67
+ sttPrompt: z.string().optional(),
68
+ maxSteps: z.number().int().positive().optional(),
69
+ toolChoice: ToolChoiceSchema.optional(),
70
+ builtinTools: z.array(BuiltinToolSchema).readonly().optional(),
71
+ idleTimeoutMs: z.number().nonnegative().optional()
72
+ });
73
+ /** Extract the serializable {@link AgentConfig} subset from a source object. */
74
+ function toAgentConfig(src) {
75
+ const config = {
76
+ name: src.name,
77
+ instructions: src.instructions,
78
+ greeting: src.greeting
79
+ };
80
+ if (src.sttPrompt !== void 0) config.sttPrompt = src.sttPrompt;
81
+ if (typeof src.maxSteps !== "function" && src.maxSteps !== void 0) config.maxSteps = src.maxSteps;
82
+ if (src.toolChoice !== void 0) config.toolChoice = src.toolChoice;
83
+ if (src.builtinTools) config.builtinTools = [...src.builtinTools];
84
+ if (src.idleTimeoutMs !== void 0) config.idleTimeoutMs = src.idleTimeoutMs;
85
+ return config;
86
+ }
87
+ /**
88
+ * Zod schema for serialized tool definitions sent over the wire.
89
+ *
90
+ * `parameters` must be a valid JSON Schema object (with `type`, `properties`,
91
+ * etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
92
+ */
93
+ const ToolSchemaSchema = z.object({
94
+ name: z.string().min(1),
95
+ description: z.string().min(1),
96
+ parameters: z.record(z.string(), z.unknown())
97
+ });
98
+ /** Empty Zod object schema used as default when tools have no parameters. */
99
+ const EMPTY_PARAMS = z.object({});
100
+ /**
101
+ * Convert agent tool definitions to JSON Schema format for wire transport.
102
+ *
103
+ * Transforms the Zod-based `parameters` of each tool into a plain JSON Schema
104
+ * object suitable for structured clone / JSON serialization.
105
+ */
106
+ function agentToolsToSchemas(tools) {
107
+ return Object.entries(tools).map(([name, def]) => ({
108
+ name,
109
+ description: def.description,
110
+ parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
111
+ }));
112
+ }
113
+ //#endregion
114
+ //#region _run-code.ts
115
+ /**
116
+ * run_code built-in tool — executes user JavaScript in a fresh `node:vm`
117
+ * context with no network, filesystem, or process access.
118
+ */
119
+ const runCodeParams = z.object({ code: z.string().describe("JavaScript code to execute. Use console.log() for output.") });
120
+ /**
121
+ * Execute JavaScript code inside a fresh `node:vm` context.
122
+ *
123
+ * Each invocation creates a disposable VM context with:
124
+ * - No filesystem access (`node:fs` and other built-ins unavailable)
125
+ * - No network access (`fetch`, `http` unavailable)
126
+ * - No child process spawning
127
+ * - No environment variable access (`process` unavailable)
128
+ * - Execution timeout (default 5 s)
129
+ *
130
+ * The context is discarded after execution, so no state leaks between
131
+ * invocations or across sessions.
132
+ */
133
+ function createRunCode() {
134
+ return {
135
+ description: "Execute JavaScript code in a sandbox and return the output. Use this for calculations, data transformations, string manipulation, or any task that benefits from running code. Output is captured from console.log(). No network or filesystem access.",
136
+ parameters: runCodeParams,
137
+ async execute(args) {
138
+ return executeInIsolate(args.code);
139
+ }
140
+ };
141
+ }
142
+ /**
143
+ * Execute user code in a fresh `node:vm` context.
144
+ *
145
+ * @remarks
146
+ * The VM context only exposes standard ECMAScript globals and a console
147
+ * object that captures output. Node.js APIs (`process`, `require`,
148
+ * `import()`) are not available inside the sandbox.
149
+ */
150
+ async function executeInIsolate(code) {
151
+ const output = [];
152
+ const capture = (...args) => output.push(args.map(String).join(" "));
153
+ const context = vm.createContext({
154
+ console: {
155
+ log: capture,
156
+ info: capture,
157
+ warn: capture,
158
+ error: capture,
159
+ debug: capture
160
+ },
161
+ setTimeout,
162
+ clearTimeout,
163
+ setInterval,
164
+ clearInterval,
165
+ URL,
166
+ URLSearchParams,
167
+ TextEncoder,
168
+ TextDecoder,
169
+ atob,
170
+ btoa,
171
+ structuredClone,
172
+ queueMicrotask
173
+ });
174
+ try {
175
+ const wrapped = `(async () => {\n${code}\n})()`;
176
+ const promise = new vm.Script(wrapped, { filename: "run_code.js" }).runInContext(context, { timeout: RUN_CODE_TIMEOUT_MS });
177
+ await Promise.race([promise, new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("Code execution timed out")), RUN_CODE_TIMEOUT_MS))]);
178
+ return output.join("\n").trim() || "Code ran successfully (no output)";
179
+ } catch (err) {
180
+ return { error: errorMessage(err) };
181
+ }
182
+ }
183
+ //#endregion
184
+ //#region memory-tools.ts
185
+ /**
186
+ * KV-backed memory tools for agent persistent state.
187
+ */
188
+ /**
189
+ * Returns a standard set of KV-backed memory tools: `save_memory`,
190
+ * `recall_memory`, `list_memories`, and `forget_memory`.
191
+ *
192
+ * Spread the result into your agent's `tools` record.
193
+ *
194
+ * @example
195
+ * ```ts
196
+ * import { defineAgent, memoryTools } from "aai";
197
+ *
198
+ * export default defineAgent({
199
+ * name: "My Agent",
200
+ * tools: { ...memoryTools() },
201
+ * });
202
+ * ```
203
+ *
204
+ * @returns A record with four tool definitions: `save_memory`, `recall_memory`,
205
+ * `list_memories`, and `forget_memory`.
206
+ * @public
207
+ */
208
+ function memoryTools() {
209
+ return {
210
+ save_memory: defineTool({
211
+ description: "Save a piece of information to persistent memory. Use a descriptive key like 'user:name' or 'project:status'.",
212
+ parameters: z.object({
213
+ key: z.string().describe("A descriptive key for this memory (e.g. 'user:name', 'preference:color')"),
214
+ value: z.string().describe("The information to remember")
215
+ }),
216
+ execute: async ({ key, value }, ctx) => {
217
+ await ctx.kv.set(key, value);
218
+ return { saved: key };
219
+ }
220
+ }),
221
+ recall_memory: defineTool({
222
+ description: "Retrieve a previously saved memory by its key.",
223
+ parameters: z.object({ key: z.string().describe("The key to look up") }),
224
+ execute: async ({ key }, ctx) => {
225
+ const value = await ctx.kv.get(key);
226
+ if (value === null) return {
227
+ found: false,
228
+ key
229
+ };
230
+ return {
231
+ found: true,
232
+ key,
233
+ value
234
+ };
235
+ }
236
+ }),
237
+ list_memories: defineTool({
238
+ description: "List all saved memory keys, optionally filtered by a prefix (e.g. 'user:').",
239
+ parameters: z.object({ prefix: z.string().describe("Prefix to filter keys (e.g. 'user:'). Use empty string for all.").optional() }),
240
+ execute: async ({ prefix }, ctx) => {
241
+ const entries = await ctx.kv.list(prefix ?? "");
242
+ return {
243
+ count: entries.length,
244
+ keys: entries.map((e) => e.key)
245
+ };
246
+ }
247
+ }),
248
+ forget_memory: defineTool({
249
+ description: "Delete a previously saved memory by its key.",
250
+ parameters: z.object({ key: z.string().describe("The key to delete") }),
251
+ execute: async ({ key }, ctx) => {
252
+ await ctx.kv.delete(key);
253
+ return { deleted: key };
254
+ }
255
+ })
256
+ };
257
+ }
258
+ //#endregion
259
+ //#region builtin-tools.ts
260
+ /**
261
+ * Built-in tool definitions for the AAI agent SDK.
262
+ *
263
+ * In self-hosted mode, these run in-process alongside custom tools.
264
+ * In platform mode, they run on the host process outside the sandbox.
265
+ * Network requests go through the host's fetch proxy (with SSRF protection).
266
+ */
267
+ const fetchSignal = () => AbortSignal.timeout(FETCH_TIMEOUT_MS);
268
+ /** Strip HTML tags and decode common entities. */
269
+ function htmlToText(html) {
270
+ return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, "\"").replace(/&#39;/g, "'").replace(/&nbsp;/g, " ").replace(/\s{2,}/g, " ").trim();
271
+ }
272
+ const webSearchParams = z.object({
273
+ query: z.string().describe("The search query"),
274
+ max_results: z.number().describe("Maximum number of results to return (default 5)").optional()
275
+ });
276
+ const BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search";
277
+ const BraveSearchResponseSchema = z.object({ web: z.object({ results: z.array(z.object({
278
+ title: z.string(),
279
+ url: z.string(),
280
+ description: z.string()
281
+ })) }).optional() });
282
+ function createWebSearch(fetchFn = globalThis.fetch) {
283
+ return {
284
+ description: "Search the web for current information, facts, news, or answers to questions. Returns a list of results with title, URL, and description. Use this when the user asks about something you don't know, need up-to-date information, or want to verify facts.",
285
+ parameters: webSearchParams,
286
+ async execute(args, ctx) {
287
+ const { query, max_results: maxResults = 5 } = args;
288
+ const apiKey = ctx.env.BRAVE_API_KEY ?? "";
289
+ if (!apiKey) return { error: "BRAVE_API_KEY is not set — web search unavailable" };
290
+ const resp = await fetchFn(`${BRAVE_SEARCH_URL}?${new URLSearchParams({
291
+ q: query,
292
+ count: String(maxResults),
293
+ text_decorations: "false"
294
+ })}`, {
295
+ headers: { "X-Subscription-Token": apiKey },
296
+ signal: fetchSignal()
297
+ });
298
+ if (!resp.ok) return { error: `Search request failed: ${resp.status} ${resp.statusText}` };
299
+ const raw = await resp.json();
300
+ const data = BraveSearchResponseSchema.safeParse(raw);
301
+ if (!data.success) return { error: "Unexpected search response format" };
302
+ return (data.data.web?.results ?? []).slice(0, maxResults).map((r) => ({
303
+ title: r.title,
304
+ url: r.url,
305
+ description: r.description
306
+ }));
307
+ }
308
+ };
309
+ }
310
+ const visitWebpageParams = z.object({ url: z.string().describe("The full URL to fetch (e.g., 'https://example.com/page')") });
311
+ function createVisitWebpage(fetchFn = globalThis.fetch) {
312
+ return {
313
+ description: "Fetch a webpage and return its content as clean text. Use this to read the full content of a URL found via web_search, or any link the user shares. Good for reading articles, documentation, blog posts, or product pages.",
314
+ parameters: visitWebpageParams,
315
+ async execute(args, _ctx) {
316
+ const { url } = args;
317
+ const resp = await fetchFn(url, {
318
+ headers: {
319
+ "User-Agent": "Mozilla/5.0 (compatible; VoiceAgent/1.0; +https://github.com/AssemblyAI/aai)",
320
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
321
+ },
322
+ signal: fetchSignal()
323
+ });
324
+ if (!resp.ok) return {
325
+ error: `Failed to fetch: ${resp.status} ${resp.statusText}`,
326
+ url
327
+ };
328
+ const htmlContent = await resp.text();
329
+ const text = htmlToText(htmlContent.length > 2e5 ? htmlContent.slice(0, MAX_HTML_BYTES) : htmlContent);
330
+ const truncated = text.length > MAX_PAGE_CHARS;
331
+ return {
332
+ url,
333
+ content: truncated ? text.slice(0, MAX_PAGE_CHARS) : text,
334
+ ...truncated ? {
335
+ truncated: true,
336
+ totalChars: text.length
337
+ } : {}
338
+ };
339
+ }
340
+ };
341
+ }
342
+ const fetchJsonParams = z.object({
343
+ url: z.string().describe("The URL to fetch JSON from"),
344
+ headers: z.record(z.string(), z.string()).describe("Optional HTTP headers to include in the request (only safe headers like Accept, Content-Type are allowed)").optional()
345
+ });
346
+ /** Headers the LLM must never control — could exfiltrate credentials or manipulate routing. */
347
+ const BLOCKED_FETCH_HEADERS = new Set([
348
+ "authorization",
349
+ "cookie",
350
+ "set-cookie",
351
+ "host",
352
+ "proxy-authorization",
353
+ "x-forwarded-for",
354
+ "x-forwarded-host",
355
+ "x-forwarded-proto",
356
+ "x-real-ip",
357
+ "cf-connecting-ip",
358
+ "fly-client-ip"
359
+ ]);
360
+ function sanitizeHeaders(raw) {
361
+ if (!raw) return;
362
+ const safe = {};
363
+ for (const [key, value] of Object.entries(raw)) if (!BLOCKED_FETCH_HEADERS.has(key.toLowerCase())) safe[key] = value;
364
+ return Object.keys(safe).length > 0 ? safe : void 0;
365
+ }
366
+ function createFetchJson(fetchFn = globalThis.fetch) {
367
+ return {
368
+ description: "Call a REST API endpoint via HTTP GET and return the JSON response. Use this to fetch structured data from APIs — for example, weather data, stock prices, exchange rates, or any public JSON API. Supports custom headers for authenticated APIs.",
369
+ parameters: fetchJsonParams,
370
+ async execute(args, _ctx) {
371
+ const { url, headers } = args;
372
+ const safeHeaders = sanitizeHeaders(headers);
373
+ const resp = await fetchFn(url, {
374
+ ...safeHeaders && { headers: safeHeaders },
375
+ signal: fetchSignal()
376
+ });
377
+ if (!resp.ok) return {
378
+ error: `HTTP ${resp.status} ${resp.statusText}`,
379
+ url
380
+ };
381
+ try {
382
+ return await resp.json();
383
+ } catch {
384
+ return {
385
+ error: "Response was not valid JSON",
386
+ url
387
+ };
388
+ }
389
+ }
390
+ };
391
+ }
392
+ /** Resolve a builtin name to an array of [toolName, ToolDef] pairs. */
393
+ function resolveBuiltin(name, opts) {
394
+ switch (name) {
395
+ case "web_search": return [["web_search", createWebSearch(opts?.fetch)]];
396
+ case "visit_webpage": return [["visit_webpage", createVisitWebpage(opts?.fetch)]];
397
+ case "fetch_json": return [["fetch_json", createFetchJson(opts?.fetch)]];
398
+ case "run_code": return [["run_code", createRunCode()]];
399
+ case "memory": return Object.entries(memoryTools());
400
+ default: return [];
401
+ }
402
+ }
403
+ /**
404
+ * Create built-in tool definitions for the given tool names.
405
+ * For runtime use.
406
+ */
407
+ function getBuiltinToolDefs(names, opts) {
408
+ const defs = {};
409
+ for (const name of names) for (const [k, v] of resolveBuiltin(name, opts)) defs[k] = v;
410
+ return defs;
411
+ }
412
+ /** Returns JSON tool schemas for the specified builtin tools. */
413
+ function getBuiltinToolSchemas(names) {
414
+ return names.flatMap((name) => resolveBuiltin(name).map(([toolName, def]) => ({
415
+ name: toolName,
416
+ description: def.description,
417
+ parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
418
+ })));
419
+ }
420
+ //#endregion
421
+ //#region s2s.ts
422
+ const uint8ToBase64 = (bytes) => Buffer.from(bytes).toString("base64");
423
+ const base64ToUint8 = (base64) => new Uint8Array(Buffer.from(base64, "base64"));
424
+ const WS_OPEN = 1;
425
+ const defaultCreateS2sWebSocket = (url, opts) => new WsWebSocket(url, { headers: opts.headers });
426
+ function hasStringFields(obj, ...keys) {
427
+ for (const k of keys) if (typeof obj[k] !== "string") return false;
428
+ return true;
429
+ }
430
+ function parseAgentTranscript(obj) {
431
+ if (typeof obj.text !== "string") return;
432
+ return {
433
+ type: "transcript.agent",
434
+ text: obj.text,
435
+ reply_id: typeof obj.reply_id === "string" ? obj.reply_id : "",
436
+ item_id: typeof obj.item_id === "string" ? obj.item_id : "",
437
+ interrupted: obj.interrupted === true
438
+ };
439
+ }
440
+ function parseToolCall(obj) {
441
+ if (typeof obj.call_id !== "string" || typeof obj.name !== "string") return;
442
+ const args = obj.args != null && typeof obj.args === "object" && !Array.isArray(obj.args) ? obj.args : {};
443
+ return {
444
+ type: "tool.call",
445
+ call_id: obj.call_id,
446
+ name: obj.name,
447
+ args
448
+ };
449
+ }
450
+ function passthrough(obj) {
451
+ return obj;
452
+ }
453
+ function requireFields(...keys) {
454
+ return (obj) => hasStringFields(obj, ...keys) ? obj : void 0;
455
+ }
456
+ const MESSAGE_VALIDATORS = new Map([
457
+ ["session.ready", requireFields("session_id")],
458
+ ["session.updated", passthrough],
459
+ ["input.speech.started", passthrough],
460
+ ["input.speech.stopped", passthrough],
461
+ ["reply.content_part.started", passthrough],
462
+ ["reply.content_part.done", passthrough],
463
+ ["transcript.user.delta", requireFields("text")],
464
+ ["transcript.user", requireFields("item_id", "text")],
465
+ ["reply.started", requireFields("reply_id")],
466
+ ["transcript.agent.delta", requireFields("delta")],
467
+ ["transcript.agent", parseAgentTranscript],
468
+ ["tool.call", parseToolCall],
469
+ ["reply.done", (obj) => ({
470
+ type: "reply.done",
471
+ ...typeof obj.status === "string" ? { status: obj.status } : {}
472
+ })],
473
+ ["session.error", requireFields("code", "message")],
474
+ ["error", requireFields("message")]
475
+ ]);
476
+ function parseS2sMessage(obj) {
477
+ const type = obj.type;
478
+ if (typeof type !== "string") return;
479
+ return MESSAGE_VALIDATORS.get(type)?.(obj);
480
+ }
481
+ function dispatchS2sMessage(emitter, msg) {
482
+ switch (msg.type) {
483
+ case "session.ready":
484
+ emitter.emit("ready", { sessionId: msg.session_id });
485
+ break;
486
+ case "session.updated":
487
+ emitter.emit("sessionUpdated", msg);
488
+ break;
489
+ case "input.speech.started":
490
+ emitter.emit("speechStarted");
491
+ break;
492
+ case "input.speech.stopped":
493
+ emitter.emit("speechStopped");
494
+ break;
495
+ case "transcript.user.delta":
496
+ emitter.emit("userTranscriptDelta", { text: msg.text });
497
+ break;
498
+ case "transcript.user":
499
+ emitter.emit("userTranscript", {
500
+ itemId: msg.item_id,
501
+ text: msg.text
502
+ });
503
+ break;
504
+ case "reply.started":
505
+ emitter.emit("replyStarted", { replyId: msg.reply_id });
506
+ break;
507
+ case "transcript.agent.delta":
508
+ emitter.emit("agentTranscriptDelta", { text: msg.delta });
509
+ break;
510
+ case "transcript.agent":
511
+ emitter.emit("agentTranscript", {
512
+ text: msg.text,
513
+ replyId: msg.reply_id,
514
+ itemId: msg.item_id,
515
+ interrupted: msg.interrupted
516
+ });
517
+ break;
518
+ case "tool.call":
519
+ emitter.emit("toolCall", {
520
+ callId: msg.call_id,
521
+ name: msg.name,
522
+ args: msg.args
523
+ });
524
+ break;
525
+ case "reply.done":
526
+ emitter.emit("replyDone", msg.status ? { status: msg.status } : {});
527
+ break;
528
+ case "session.error":
529
+ if (msg.code === "session_not_found" || msg.code === "session_forbidden") emitter.emit("sessionExpired", {
530
+ code: msg.code,
531
+ message: msg.message
532
+ });
533
+ else emitter.emit("error", {
534
+ code: msg.code,
535
+ message: msg.message
536
+ });
537
+ break;
538
+ case "error":
539
+ emitter.emit("error", {
540
+ code: "connection",
541
+ message: msg.message
542
+ });
543
+ break;
544
+ case "reply.content_part.started":
545
+ case "reply.content_part.done": break;
546
+ default: break;
547
+ }
548
+ }
549
+ function connectS2s(opts) {
550
+ const { apiKey, config, createWebSocket, logger: log = consoleLogger } = opts;
551
+ return new Promise((resolve, reject) => {
552
+ log.info("S2S connecting", { url: config.wssUrl });
553
+ const ws = createWebSocket(config.wssUrl, { headers: { Authorization: `Bearer ${apiKey}` } });
554
+ const emitter = createNanoEvents();
555
+ let opened = false;
556
+ function send(msg) {
557
+ if (ws.readyState !== WS_OPEN) {
558
+ log.debug("S2S send dropped: socket not open", { type: msg.type });
559
+ return;
560
+ }
561
+ const json = JSON.stringify(msg);
562
+ if (msg.type !== "input.audio") log.info(`S2S >> ${msg.type}`, msg.type === "session.update" ? { payload: json } : void 0);
563
+ ws.send(json);
564
+ }
565
+ const handle = {
566
+ on: emitter.on.bind(emitter),
567
+ sendAudio(audio) {
568
+ if (ws.readyState !== WS_OPEN) {
569
+ log.debug("S2S sendAudio dropped: socket not open");
570
+ return;
571
+ }
572
+ ws.send(`{"type":"input.audio","audio":"${uint8ToBase64(audio)}"}`);
573
+ },
574
+ sendToolResult(callId, result) {
575
+ const msg = {
576
+ type: "tool.result",
577
+ call_id: callId,
578
+ result
579
+ };
580
+ log.info("S2S >> tool.result", {
581
+ call_id: callId,
582
+ resultLength: result.length
583
+ });
584
+ send(msg);
585
+ },
586
+ updateSession(sessionConfig) {
587
+ const { systemPrompt, ...rest } = sessionConfig;
588
+ send({
589
+ type: "session.update",
590
+ session: {
591
+ system_prompt: systemPrompt,
592
+ ...rest
593
+ }
594
+ });
595
+ },
596
+ resumeSession(sessionId) {
597
+ send({
598
+ type: "session.resume",
599
+ session_id: sessionId
600
+ });
601
+ },
602
+ close() {
603
+ log.info("S2S closing");
604
+ ws.close();
605
+ }
606
+ };
607
+ ws.addEventListener("open", () => {
608
+ opened = true;
609
+ log.info("S2S WebSocket open");
610
+ resolve(handle);
611
+ });
612
+ function tryParseJson(data) {
613
+ try {
614
+ return JSON.parse(String(data));
615
+ } catch {
616
+ log.warn("S2S << invalid JSON", { data: String(data).slice(0, 200) });
617
+ }
618
+ }
619
+ function handleAudioFastPath(obj) {
620
+ if (obj.type === "reply.audio" && typeof obj.data === "string") {
621
+ const audioBytes = base64ToUint8(obj.data);
622
+ emitter.emit("audio", { audio: audioBytes });
623
+ return true;
624
+ }
625
+ return false;
626
+ }
627
+ function logIncoming(obj) {
628
+ if (obj.type === "reply.audio" || obj.type === "input.audio") return;
629
+ log.info(`S2S << ${obj.type}`, obj.type === "transcript.agent.delta" ? { delta: obj.delta } : void 0);
630
+ }
631
+ function handleS2sMessage(ev) {
632
+ const raw = tryParseJson(ev.data);
633
+ if (raw === void 0) return;
634
+ if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
635
+ log.warn("S2S << non-object JSON message", { type: typeof raw });
636
+ return;
637
+ }
638
+ const obj = raw;
639
+ logIncoming(obj);
640
+ if (handleAudioFastPath(obj)) return;
641
+ const parsed = parseS2sMessage(obj);
642
+ if (!parsed) {
643
+ log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
644
+ return;
645
+ }
646
+ dispatchS2sMessage(emitter, parsed);
647
+ }
648
+ ws.addEventListener("message", handleS2sMessage);
649
+ ws.addEventListener("close", (ev) => {
650
+ log.info("S2S WebSocket closed", {
651
+ code: ev.code ?? 0,
652
+ reason: ev.reason ?? ""
653
+ });
654
+ if (!opened) reject(/* @__PURE__ */ new Error(`WebSocket closed before open (code: ${ev.code ?? 0})`));
655
+ emitter.emit("close");
656
+ });
657
+ ws.addEventListener("error", (ev) => {
658
+ const message = typeof ev.message === "string" ? ev.message : "WebSocket error";
659
+ const errObj = new Error(message);
660
+ log.error("S2S WebSocket error", { error: errObj.message });
661
+ if (!opened) reject(errObj);
662
+ else emitter.emit("error", {
663
+ code: "ws_error",
664
+ message: errObj.message
665
+ });
666
+ });
667
+ });
668
+ }
669
+ //#endregion
670
+ //#region system-prompt.ts
671
+ function getFormattedDate() {
672
+ return (/* @__PURE__ */ new Date()).toLocaleDateString("en-US", {
673
+ weekday: "long",
674
+ year: "numeric",
675
+ month: "long",
676
+ day: "numeric"
677
+ });
678
+ }
679
+ const VOICE_RULES = "\n\nCRITICAL OUTPUT RULES — you MUST follow these for EVERY response:\nYour response will be spoken aloud by a TTS system and displayed as plain text.\n- NEVER use markdown: no **, no *, no _, no #, no `, no [](), no ---\n- NEVER use bullet points (-, *, •) or numbered lists (1., 2.)\n- NEVER use code blocks or inline code\n- NEVER mention tools, search, APIs, or technical failures to the user. If a tool returns no results, just answer naturally without explaining why.\n- Write exactly as you would say it out loud to a friend\n- Use short conversational sentences. To list things, say \"First,\" \"Next,\" \"Finally,\"\n- Keep responses concise — 1 to 3 sentences max";
680
+ /**
681
+ * Build the system prompt sent to the LLM from the agent configuration.
682
+ *
683
+ * Assembles the default instructions, today's date, agent-specific instructions,
684
+ * and optional sections for tool usage preamble and voice output rules.
685
+ *
686
+ * @param config - The serializable agent configuration (name, instructions, etc.).
687
+ * @param opts.hasTools - When `true`, appends a preamble instructing the LLM to
688
+ * speak a brief phrase before each tool call to fill silence.
689
+ * @param opts.voice - When `true`, appends strict voice-specific output rules
690
+ * (no markdown, no bullet points, conversational tone, concise responses).
691
+ * @returns The assembled system prompt string.
692
+ */
693
+ function buildSystemPrompt(config, opts) {
694
+ const { hasTools } = opts;
695
+ const agentInstructions = config.instructions && config.instructions !== DEFAULT_INSTRUCTIONS ? `\n\nAgent-Specific Instructions:\n${config.instructions}` : "";
696
+ const toolPreamble = hasTools ? "\n\nWhen you decide to use a tool, ALWAYS say a brief natural phrase BEFORE the tool call (e.g. \"Let me look that up\" or \"One moment while I check\"). This fills silence while the tool executes. Keep preambles to one short sentence." : "";
697
+ return DEFAULT_INSTRUCTIONS + `\n\nToday's date is ${getFormattedDate()}.` + agentInstructions + toolPreamble + (opts.voice ? VOICE_RULES : "");
698
+ }
699
+ //#endregion
700
+ //#region session.ts
701
+ function buildCtx(opts) {
702
+ const { id, agentConfig, hooks, log } = opts;
703
+ const maxHistory = opts.maxHistory ?? 200;
704
+ /** Track in-flight hook promises so they can be awaited during shutdown. */
705
+ const pendingHooks = /* @__PURE__ */ new Set();
706
+ const ctx = {
707
+ ...opts,
708
+ s2s: null,
709
+ reply: {
710
+ pendingTools: [],
711
+ toolCallCount: 0,
712
+ currentReplyId: null
713
+ },
714
+ turnPromise: null,
715
+ conversationMessages: [],
716
+ maxHistory,
717
+ resolveTurnConfig() {
718
+ return callResolveTurnConfig(hooks, id, HOOK_TIMEOUT_MS);
719
+ },
720
+ consumeToolCallStep(turnConfig, _name, replyId) {
721
+ if (replyId === null || replyId !== ctx.reply.currentReplyId) return toolError("Reply was interrupted. Discarding stale tool call.");
722
+ const maxSteps = turnConfig?.maxSteps ?? agentConfig.maxSteps;
723
+ ctx.reply.toolCallCount++;
724
+ if (maxSteps !== void 0 && ctx.reply.toolCallCount > maxSteps) {
725
+ log.info("maxSteps exceeded, refusing tool call", {
726
+ toolCallCount: ctx.reply.toolCallCount,
727
+ maxSteps
728
+ });
729
+ return toolError("Maximum tool steps reached. Please respond to the user now.");
730
+ }
731
+ return null;
732
+ },
733
+ fireHook(name, ...args) {
734
+ if (!hooks) return;
735
+ const notifyOnError = (err) => {
736
+ log.warn(`${name} hook failed`, { err: errorMessage(err) });
737
+ if (name !== "error") {
738
+ const ep = hooks.callHook("error", id, { message: errorMessage(err) });
739
+ if (ep && typeof ep.catch === "function") ep.catch((e) => {
740
+ log.warn("error hook failed", { err: errorMessage(e) });
741
+ });
742
+ }
743
+ };
744
+ try {
745
+ const result = hooks.callHook(name, ...args);
746
+ if (result == null) return;
747
+ const p = result.catch(notifyOnError).finally(() => pendingHooks.delete(p));
748
+ pendingHooks.add(p);
749
+ } catch (err) {
750
+ notifyOnError(err);
751
+ }
752
+ },
753
+ async drainHooks() {
754
+ if (pendingHooks.size > 0) await Promise.all([...pendingHooks]);
755
+ },
756
+ pushMessages(...msgs) {
757
+ ctx.conversationMessages.push(...msgs);
758
+ if (maxHistory > 0 && ctx.conversationMessages.length > maxHistory) ctx.conversationMessages = ctx.conversationMessages.slice(-maxHistory);
759
+ },
760
+ beginReply(replyId) {
761
+ ctx.reply = {
762
+ pendingTools: [],
763
+ toolCallCount: 0,
764
+ currentReplyId: replyId
765
+ };
766
+ ctx.turnPromise = null;
767
+ },
768
+ cancelReply() {
769
+ ctx.reply = {
770
+ pendingTools: [],
771
+ toolCallCount: 0,
772
+ currentReplyId: null
773
+ };
774
+ },
775
+ chainTurn(p) {
776
+ ctx.turnPromise = (ctx.turnPromise ?? Promise.resolve()).then(() => p);
777
+ }
778
+ };
779
+ return ctx;
780
+ }
781
+ /** @internal Not part of the public API. Exposed for testing only. */
782
+ const _internals = { connectS2s };
783
+ function createIdleTimer(opts) {
784
+ if (opts.timeoutMs <= 0) return {
785
+ reset() {},
786
+ clear() {}
787
+ };
788
+ let timer = null;
789
+ return {
790
+ reset() {
791
+ if (timer !== null) clearTimeout(timer);
792
+ timer = setTimeout(() => {
793
+ opts.log.info("S2S idle timeout", {
794
+ timeoutMs: opts.timeoutMs,
795
+ agent: opts.agent
796
+ });
797
+ opts.client.event({ type: "idle_timeout" });
798
+ opts.ctx.s2s?.close();
799
+ }, opts.timeoutMs);
800
+ },
801
+ clear() {
802
+ if (timer !== null) {
803
+ clearTimeout(timer);
804
+ timer = null;
805
+ }
806
+ }
807
+ };
808
+ }
809
+ /**
810
+ * Complete a tool call by truncating the result, emitting a `tool_call_done` event,
811
+ * and accumulating the result in `ctx.reply.pendingTools` — but only if the reply that
812
+ * initiated this call is still active.
813
+ */
814
+ function finishToolCall(ctx, callId, result, replyId) {
815
+ const truncatedResult = result.length > 4e3 ? result.slice(0, MAX_TOOL_RESULT_CHARS) : result;
816
+ ctx.client.event({
817
+ type: "tool_call_done",
818
+ toolCallId: callId,
819
+ result: truncatedResult
820
+ });
821
+ if (replyId !== null && replyId === ctx.reply.currentReplyId) {
822
+ ctx.reply.pendingTools.push({
823
+ callId,
824
+ result
825
+ });
826
+ if (ctx.maxHistory > 0 && ctx.reply.pendingTools.length > ctx.maxHistory) ctx.reply.pendingTools.shift();
827
+ }
828
+ }
829
+ async function handleToolCall(ctx, detail) {
830
+ const { callId, name, args: parsedArgs } = detail;
831
+ const replyId = ctx.reply.currentReplyId;
832
+ ctx.client.event({
833
+ type: "tool_call_start",
834
+ toolCallId: callId,
835
+ toolName: name,
836
+ args: parsedArgs
837
+ });
838
+ let turnConfig;
839
+ try {
840
+ turnConfig = await ctx.resolveTurnConfig();
841
+ } catch (err) {
842
+ const msg = `resolveTurnConfig hook error: ${errorMessage(err)}`;
843
+ ctx.log.error(msg);
844
+ finishToolCall(ctx, callId, toolError(msg), replyId);
845
+ return;
846
+ }
847
+ const refused = ctx.consumeToolCallStep(turnConfig, name, replyId);
848
+ if (refused !== null) {
849
+ finishToolCall(ctx, callId, refused, replyId);
850
+ return;
851
+ }
852
+ ctx.log.info("S2S tool call", {
853
+ tool: name,
854
+ callId,
855
+ args: parsedArgs,
856
+ agent: ctx.agent
857
+ });
858
+ let result;
859
+ try {
860
+ result = await ctx.executeTool(name, parsedArgs, ctx.id, ctx.conversationMessages);
861
+ } catch (err) {
862
+ const msg = errorMessage(err);
863
+ ctx.log.error("Tool execution failed", {
864
+ tool: name,
865
+ error: errorDetail(err)
866
+ });
867
+ result = toolError(msg);
868
+ }
869
+ ctx.log.info("S2S tool result", {
870
+ tool: name,
871
+ callId,
872
+ resultLength: result.length
873
+ });
874
+ finishToolCall(ctx, callId, result, replyId);
875
+ }
876
+ function handleUserTranscript(ctx, text) {
877
+ ctx.log.info("S2S user transcript", { text });
878
+ ctx.client.event({
879
+ type: "transcript",
880
+ text,
881
+ isFinal: true
882
+ });
883
+ ctx.client.event({
884
+ type: "turn",
885
+ text
886
+ });
887
+ ctx.pushMessages({
888
+ role: "user",
889
+ content: text
890
+ });
891
+ ctx.fireHook("turn", ctx.id, text, HOOK_TIMEOUT_MS);
892
+ }
893
+ function handleAgentTranscript(ctx, text, interrupted) {
894
+ ctx.client.event({
895
+ type: "chat",
896
+ text
897
+ });
898
+ if (!interrupted) ctx.pushMessages({
899
+ role: "assistant",
900
+ content: text
901
+ });
902
+ }
903
+ function handleReplyDone(ctx, status) {
904
+ if (status === "interrupted") {
905
+ ctx.log.info("S2S reply interrupted (barge-in)");
906
+ ctx.cancelReply();
907
+ ctx.client.event({ type: "cancelled" });
908
+ return;
909
+ }
910
+ const doneReplyId = ctx.reply.currentReplyId;
911
+ const sendPending = () => {
912
+ if (ctx.reply.currentReplyId !== doneReplyId) {
913
+ ctx.reply.pendingTools = [];
914
+ return;
915
+ }
916
+ if (ctx.reply.pendingTools.length > 0) {
917
+ for (const tool of ctx.reply.pendingTools) ctx.s2s?.sendToolResult(tool.callId, tool.result);
918
+ ctx.reply.pendingTools = [];
919
+ } else {
920
+ const stepsUsed = ctx.reply.toolCallCount;
921
+ if (stepsUsed > 0) ctx.log.info("Turn complete", {
922
+ steps: stepsUsed,
923
+ agent: ctx.agent
924
+ });
925
+ ctx.client.playAudioDone();
926
+ ctx.client.event({ type: "tts_done" });
927
+ }
928
+ };
929
+ if (ctx.turnPromise !== null) ctx.turnPromise.then(sendPending);
930
+ else sendPending();
931
+ }
932
+ function setupListeners(ctx, handle) {
933
+ handle.on("ready", ({ sessionId }) => ctx.log.info("S2S session ready", { sessionId }));
934
+ handle.on("sessionExpired", () => {
935
+ ctx.log.info("S2S session expired");
936
+ handle.close();
937
+ });
938
+ handle.on("speechStarted", () => ctx.client.event({ type: "speech_started" }));
939
+ handle.on("speechStopped", () => ctx.client.event({ type: "speech_stopped" }));
940
+ handle.on("userTranscriptDelta", ({ text }) => ctx.client.event({
941
+ type: "transcript",
942
+ text,
943
+ isFinal: false
944
+ }));
945
+ handle.on("userTranscript", ({ text }) => handleUserTranscript(ctx, text));
946
+ handle.on("replyStarted", ({ replyId }) => {
947
+ ctx.beginReply(replyId);
948
+ });
949
+ handle.on("audio", ({ audio }) => ctx.client.playAudioChunk(audio));
950
+ handle.on("agentTranscriptDelta", ({ text }) => ctx.client.event({
951
+ type: "chat_delta",
952
+ text
953
+ }));
954
+ handle.on("agentTranscript", ({ text, interrupted }) => handleAgentTranscript(ctx, text, interrupted));
955
+ handle.on("toolCall", (detail) => {
956
+ const p = handleToolCall(ctx, detail).catch((err) => {
957
+ ctx.log.error("Tool call handler failed", { err: errorMessage(err) });
958
+ });
959
+ ctx.chainTurn(p);
960
+ });
961
+ handle.on("replyDone", ({ status }) => handleReplyDone(ctx, status));
962
+ handle.on("error", ({ code, message }) => {
963
+ ctx.log.error("S2S error", {
964
+ code,
965
+ message
966
+ });
967
+ ctx.client.event({
968
+ type: "error",
969
+ code: "internal",
970
+ message
971
+ });
972
+ handle.close();
973
+ });
974
+ handle.on("close", () => {
975
+ ctx.log.info("S2S closed");
976
+ ctx.s2s = null;
977
+ ctx.cancelReply();
978
+ });
979
+ }
980
+ function createS2sSession(opts) {
981
+ const { id, agent, client, toolSchemas, apiKey, s2sConfig, executeTool, createWebSocket = defaultCreateS2sWebSocket, hooks, logger: log = consoleLogger } = opts;
982
+ const agentConfig = opts.skipGreeting ? {
983
+ ...opts.agentConfig,
984
+ greeting: ""
985
+ } : opts.agentConfig;
986
+ const systemPrompt = buildSystemPrompt(agentConfig, {
987
+ hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
988
+ voice: true
989
+ });
990
+ const s2sTools = toolSchemas.map((ts) => ({
991
+ type: "function",
992
+ name: ts.name,
993
+ description: ts.description,
994
+ parameters: ts.parameters
995
+ }));
996
+ const sessionAbort = new AbortController();
997
+ const ctx = buildCtx({
998
+ id,
999
+ agent,
1000
+ client,
1001
+ agentConfig,
1002
+ executeTool,
1003
+ hooks,
1004
+ log,
1005
+ maxHistory: opts.maxHistory
1006
+ });
1007
+ const rawTimeout = agentConfig.idleTimeoutMs ?? 3e5;
1008
+ const idle = createIdleTimer({
1009
+ timeoutMs: rawTimeout === 0 || !Number.isFinite(rawTimeout) ? 0 : rawTimeout,
1010
+ agent,
1011
+ log,
1012
+ client,
1013
+ ctx
1014
+ });
1015
+ let connectGeneration = 0;
1016
+ const sessionUpdatePayload = {
1017
+ systemPrompt,
1018
+ tools: s2sTools,
1019
+ ...agentConfig.greeting ? { greeting: agentConfig.greeting } : {}
1020
+ };
1021
+ async function connectAndSetup() {
1022
+ const generation = ++connectGeneration;
1023
+ try {
1024
+ const handle = await _internals.connectS2s({
1025
+ apiKey,
1026
+ config: s2sConfig,
1027
+ createWebSocket,
1028
+ logger: log
1029
+ });
1030
+ if (sessionAbort.signal.aborted || generation !== connectGeneration) {
1031
+ handle.close();
1032
+ return;
1033
+ }
1034
+ setupListeners(ctx, handle);
1035
+ handle.updateSession(sessionUpdatePayload);
1036
+ ctx.s2s = handle;
1037
+ idle.reset();
1038
+ } catch (err) {
1039
+ const msg = errorMessage(err);
1040
+ log.error("S2S connect failed", { error: errorDetail(err) });
1041
+ client.event({
1042
+ type: "error",
1043
+ code: "internal",
1044
+ message: msg
1045
+ });
1046
+ }
1047
+ }
1048
+ return {
1049
+ async start() {
1050
+ ctx.fireHook("connect", id, HOOK_TIMEOUT_MS);
1051
+ await connectAndSetup();
1052
+ },
1053
+ async stop() {
1054
+ if (sessionAbort.signal.aborted) return;
1055
+ sessionAbort.abort();
1056
+ idle.clear();
1057
+ if (ctx.turnPromise !== null) await ctx.turnPromise;
1058
+ await ctx.drainHooks();
1059
+ ctx.s2s?.close();
1060
+ ctx.fireHook("disconnect", id, HOOK_TIMEOUT_MS);
1061
+ await ctx.drainHooks();
1062
+ },
1063
+ onAudio(data) {
1064
+ idle.reset();
1065
+ ctx.s2s?.sendAudio(data);
1066
+ },
1067
+ onAudioReady() {},
1068
+ onCancel() {
1069
+ client.event({ type: "cancelled" });
1070
+ },
1071
+ onReset() {
1072
+ ctx.cancelReply();
1073
+ ctx.conversationMessages = [];
1074
+ ctx.reply.toolCallCount = 0;
1075
+ ctx.turnPromise = null;
1076
+ idle.clear();
1077
+ ctx.s2s?.close();
1078
+ client.event({ type: "reset" });
1079
+ connectAndSetup().catch((err) => log.error("S2S reset reconnect failed", { error: errorMessage(err) }));
1080
+ },
1081
+ onHistory(incoming) {
1082
+ ctx.pushMessages(...incoming.map((m) => ({
1083
+ role: m.role,
1084
+ content: m.content
1085
+ })));
1086
+ },
1087
+ waitForTurn() {
1088
+ return ctx.turnPromise ?? Promise.resolve();
1089
+ }
1090
+ };
1091
+ }
1092
+ //#endregion
1093
+ //#region unstorage-kv.ts
1094
+ /**
1095
+ * Key-value store backed by unstorage.
1096
+ *
1097
+ * Works with any unstorage driver (memory, fs, S3/R2, etc.).
1098
+ */
1099
+ /**
1100
+ * Create a KV store backed by any unstorage driver.
1101
+ *
1102
+ * @param options - See {@link UnstorageKvOptions}.
1103
+ * @returns A {@link Kv} instance.
1104
+ *
1105
+ * @example
1106
+ * ```ts
1107
+ * import { createStorage } from "unstorage";
1108
+ * import { createUnstorageKv } from "@alexkroman1/aai/unstorage-kv";
1109
+ *
1110
+ * const kv = createUnstorageKv({ storage: createStorage() });
1111
+ * await kv.set("greeting", "hello");
1112
+ * const value = await kv.get<string>("greeting"); // "hello"
1113
+ * ```
1114
+ */
1115
+ function createUnstorageKv(options) {
1116
+ const store = options.prefix ? prefixStorage(options.storage, options.prefix) : options.storage;
1117
+ return {
1118
+ async get(key) {
1119
+ return await store.getItem(key) ?? null;
1120
+ },
1121
+ async set(key, value, setOptions) {
1122
+ if (JSON.stringify(value).length > 65536) throw new Error(`Value exceeds max size of ${MAX_VALUE_SIZE} bytes`);
1123
+ const storable = value;
1124
+ if (setOptions?.expireIn && setOptions.expireIn > 0) await store.setItem(key, storable, { ttl: Math.ceil(setOptions.expireIn / 1e3) });
1125
+ else await store.setItem(key, storable);
1126
+ },
1127
+ async delete(keys) {
1128
+ const keyArray = Array.isArray(keys) ? keys : [keys];
1129
+ await Promise.all(keyArray.map((k) => store.removeItem(k)));
1130
+ },
1131
+ async list(listPrefix, listOptions) {
1132
+ const allKeys = await store.getKeys(listPrefix);
1133
+ const entries = [];
1134
+ for (const key of allKeys) {
1135
+ const value = await store.getItem(key);
1136
+ if (value != null) entries.push({
1137
+ key,
1138
+ value
1139
+ });
1140
+ }
1141
+ return sortAndPaginate(entries, listOptions);
1142
+ },
1143
+ async keys(pattern) {
1144
+ const allKeys = await store.getKeys();
1145
+ if (!pattern) return allKeys.sort((a, b) => a.localeCompare(b));
1146
+ return allKeys.filter((key) => matchGlob(key, pattern)).sort((a, b) => a.localeCompare(b));
1147
+ },
1148
+ close() {
1149
+ store.dispose();
1150
+ }
1151
+ };
1152
+ }
1153
+ //#endregion
1154
+ //#region ws-handler.ts
1155
+ /**
1156
+ * WebSocket session lifecycle handler.
1157
+ *
1158
+ * Audio validation is handled at the host transport layer (see server.ts).
1159
+ */
1160
+ /**
1161
+ * Creates a {@link ClientSink} backed by a plain WebSocket.
1162
+ *
1163
+ * Text events are sent as JSON text frames; audio chunks are sent as
1164
+ * binary frames (zero-copy).
1165
+ */
1166
+ function createClientSink(ws, log) {
1167
+ /** Send data over ws, silently dropping if the socket is not open. */
1168
+ function safeSend(data) {
1169
+ try {
1170
+ if (ws.readyState !== 1) return;
1171
+ ws.send(data);
1172
+ } catch (err) {
1173
+ log.debug?.("safeSend: socket closed between readyState check and send", { error: errorMessage(err) });
1174
+ }
1175
+ }
1176
+ return {
1177
+ get open() {
1178
+ return ws.readyState === 1;
1179
+ },
1180
+ event(e) {
1181
+ safeSend(JSON.stringify(e));
1182
+ },
1183
+ playAudioChunk(chunk) {
1184
+ safeSend(chunk);
1185
+ },
1186
+ playAudioDone() {
1187
+ safeSend(JSON.stringify({ type: "audio_done" }));
1188
+ }
1189
+ };
1190
+ }
1191
+ function handleBinaryAudio(data, session) {
1192
+ if (data instanceof Uint8Array) {
1193
+ session.onAudio(data);
1194
+ return true;
1195
+ }
1196
+ if (data instanceof ArrayBuffer) {
1197
+ session.onAudio(new Uint8Array(data));
1198
+ return true;
1199
+ }
1200
+ return false;
1201
+ }
1202
+ function handleTextMessage(data, session, log, ctx, sid) {
1203
+ if (typeof data !== "string") return;
1204
+ let json;
1205
+ try {
1206
+ json = JSON.parse(data);
1207
+ } catch {
1208
+ log.warn("Invalid JSON from client", {
1209
+ ...ctx,
1210
+ sid
1211
+ });
1212
+ return;
1213
+ }
1214
+ const parsed = ClientMessageSchema.safeParse(json);
1215
+ if (!parsed.success) {
1216
+ log.warn("Invalid client message", {
1217
+ ...ctx,
1218
+ sid,
1219
+ error: parsed.error.message
1220
+ });
1221
+ return;
1222
+ }
1223
+ const msg = parsed.data;
1224
+ switch (msg.type) {
1225
+ case "audio_ready":
1226
+ session.onAudioReady();
1227
+ break;
1228
+ case "cancel":
1229
+ session.onCancel();
1230
+ break;
1231
+ case "reset":
1232
+ session.onReset();
1233
+ break;
1234
+ case "history":
1235
+ session.onHistory(msg.messages);
1236
+ break;
1237
+ default: break;
1238
+ }
1239
+ }
1240
+ /**
1241
+ * Attaches session lifecycle handlers to a native WebSocket using
1242
+ * plain JSON text frames and binary audio frames.
1243
+ *
1244
+ * Connection flow:
1245
+ * 1. WebSocket opens → server sends `{ type: "config", ...ReadyConfig }`
1246
+ * 2. Client sets up audio → sends `{ type: "audio_ready" }`
1247
+ * 3. If reconnecting → client sends `{ type: "history", messages: [...] }`
1248
+ */
1249
+ function wireSessionSocket(ws, opts) {
1250
+ const { sessions, logger: log = consoleLogger } = opts;
1251
+ const sessionId = opts.resumeFrom ?? crypto.randomUUID();
1252
+ const sid = sessionId.slice(0, 8);
1253
+ const ctx = opts.logContext ?? {};
1254
+ let session = null;
1255
+ /** Set to true once session.start() resolves. Messages arriving before
1256
+ * this flag is set are buffered and replayed once the session is ready,
1257
+ * preventing audio/text from being dispatched to a half-initialized session. */
1258
+ let sessionReady = false;
1259
+ let messageBuffer = [];
1260
+ function drainBuffer() {
1261
+ if (!(session && messageBuffer)) return;
1262
+ const buf = messageBuffer;
1263
+ messageBuffer = null;
1264
+ for (const event of buf) {
1265
+ const { data } = event;
1266
+ if (handleBinaryAudio(data, session)) continue;
1267
+ handleTextMessage(data, session, log, ctx, sid);
1268
+ }
1269
+ }
1270
+ function onOpen() {
1271
+ opts.onOpen?.();
1272
+ log.info("Session connected", {
1273
+ ...ctx,
1274
+ sid
1275
+ });
1276
+ const client = createClientSink(ws, log);
1277
+ session = opts.createSession(sessionId, client);
1278
+ sessions.set(sessionId, session);
1279
+ ws.send(JSON.stringify({
1280
+ type: "config",
1281
+ ...opts.readyConfig,
1282
+ sessionId
1283
+ }));
1284
+ const timeoutMs = opts.sessionStartTimeoutMs ?? 1e4;
1285
+ pTimeout(session.start(), {
1286
+ milliseconds: timeoutMs,
1287
+ message: `session.start() timed out after ${timeoutMs}ms`
1288
+ }).then(() => {
1289
+ log.info("Session ready", {
1290
+ ...ctx,
1291
+ sid
1292
+ });
1293
+ sessionReady = true;
1294
+ drainBuffer();
1295
+ }).catch((err) => {
1296
+ log.error("Session start failed", {
1297
+ ...ctx,
1298
+ sid,
1299
+ error: errorDetail(err)
1300
+ });
1301
+ sessions.delete(sessionId);
1302
+ session = null;
1303
+ messageBuffer = null;
1304
+ });
1305
+ }
1306
+ if (ws.readyState === 1) onOpen();
1307
+ else ws.addEventListener("open", onOpen);
1308
+ ws.addEventListener("message", (event) => {
1309
+ if (!session) return;
1310
+ if (!sessionReady) {
1311
+ messageBuffer?.push(event);
1312
+ return;
1313
+ }
1314
+ const { data } = event;
1315
+ if (handleBinaryAudio(data, session)) return;
1316
+ handleTextMessage(data, session, log, ctx, sid);
1317
+ });
1318
+ ws.addEventListener("close", () => {
1319
+ log.info("Session disconnected", {
1320
+ ...ctx,
1321
+ sid
1322
+ });
1323
+ if (session) session.stop().catch((err) => {
1324
+ log.error("Session stop failed", {
1325
+ ...ctx,
1326
+ sid,
1327
+ error: errorDetail(err)
1328
+ });
1329
+ }).finally(() => {
1330
+ sessions.delete(sessionId);
1331
+ });
1332
+ opts.onClose?.();
1333
+ });
1334
+ ws.addEventListener("error", (ev) => {
1335
+ const msg = typeof ev.message === "string" ? ev.message : "WebSocket error";
1336
+ log.error("WebSocket error", {
1337
+ ...ctx,
1338
+ sid,
1339
+ error: msg
1340
+ });
1341
+ });
1342
+ }
1343
+ //#endregion
1344
+ //#region direct-executor.ts
1345
+ /**
1346
+ * Agent runtime — the execution engine for voice agents.
1347
+ *
1348
+ * {@link createRuntime} builds the single execution engine used by both
1349
+ * self-hosted servers and the platform sandbox. It wires up tool execution,
1350
+ * lifecycle hooks, and session management.
1351
+ */
1352
+ const yieldTick = () => new Promise((r) => setTimeout(r, 0));
1353
+ function buildToolContext(opts) {
1354
+ const { env, state, kv, messages, fetch: fetchFn, sessionId } = opts;
1355
+ return {
1356
+ env: { ...env },
1357
+ state: state ?? {},
1358
+ get kv() {
1359
+ if (!kv) throw new Error("KV not available");
1360
+ return kv;
1361
+ },
1362
+ messages: messages ?? [],
1363
+ fetch: fetchFn ?? globalThis.fetch,
1364
+ sessionId: sessionId ?? ""
1365
+ };
1366
+ }
1367
+ async function executeToolCall(name, args, options) {
1368
+ const { tool } = options;
1369
+ const parsed = (tool.parameters ?? EMPTY_PARAMS).safeParse(args);
1370
+ if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${(parsed.error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ")}`);
1371
+ try {
1372
+ const ctx = buildToolContext(options);
1373
+ await yieldTick();
1374
+ const result = await pTimeout(Promise.resolve(tool.execute(parsed.data, ctx)), {
1375
+ milliseconds: TOOL_EXECUTION_TIMEOUT_MS,
1376
+ message: `Tool "${name}" timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
1377
+ });
1378
+ await yieldTick();
1379
+ if (result == null) return "null";
1380
+ return typeof result === "string" ? result : JSON.stringify(result);
1381
+ } catch (err) {
1382
+ const log = options.logger;
1383
+ if (log) log.warn("Tool execution failed", {
1384
+ tool: name,
1385
+ error: errorDetail(err)
1386
+ });
1387
+ else console.warn(`[tool-executor] Tool execution failed: ${name}`, err);
1388
+ return toolError(errorMessage(err));
1389
+ }
1390
+ }
1391
+ /** Create an in-memory KV store (default for self-hosted). */
1392
+ function createLocalKv() {
1393
+ return createUnstorageKv({ storage: createStorage() });
1394
+ }
1395
+ /**
1396
+ * Create an agent runtime — the execution engine for a voice agent.
1397
+ *
1398
+ * Merges built-in and custom tool definitions, builds tool schemas for the
1399
+ * S2S API, and wires up lifecycle hooks.
1400
+ *
1401
+ * @param opts - Runtime configuration. See {@link RuntimeOptions}.
1402
+ * @returns A {@link Runtime} with tool execution, hook invocation,
1403
+ * schemas, and session management.
1404
+ *
1405
+ * @public
1406
+ */
1407
+ function createRuntime(opts) {
1408
+ const { agent, env, kv = createLocalKv(), createWebSocket, logger = consoleLogger, s2sConfig = DEFAULT_S2S_CONFIG, sessionStartTimeoutMs, shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS } = opts;
1409
+ const agentConfig = toAgentConfig(agent);
1410
+ const sessions = /* @__PURE__ */ new Map();
1411
+ const readyConfig = buildReadyConfig(s2sConfig);
1412
+ let executeTool;
1413
+ let hooks;
1414
+ let toolSchemas;
1415
+ if (opts.executeTool && opts.hooks && opts.toolSchemas) {
1416
+ executeTool = opts.executeTool;
1417
+ hooks = opts.hooks;
1418
+ toolSchemas = opts.toolSchemas;
1419
+ } else {
1420
+ const allTools = {
1421
+ ...getBuiltinToolDefs(agent.builtinTools ?? []),
1422
+ ...agent.tools
1423
+ };
1424
+ const customSchemas = agentToolsToSchemas(agent.tools ?? {});
1425
+ const builtinSchemas = getBuiltinToolSchemas(agent.builtinTools ?? []);
1426
+ toolSchemas = [...customSchemas, ...builtinSchemas];
1427
+ const stateMap = /* @__PURE__ */ new Map();
1428
+ const getState = (sid) => {
1429
+ if (!stateMap.has(sid) && agent.state) stateMap.set(sid, agent.state());
1430
+ return stateMap.get(sid) ?? {};
1431
+ };
1432
+ const frozenEnv = Object.freeze({ ...env });
1433
+ function makeHookContext(sessionId) {
1434
+ return {
1435
+ env: frozenEnv,
1436
+ state: getState(sessionId),
1437
+ sessionId,
1438
+ get kv() {
1439
+ return kv;
1440
+ },
1441
+ fetch: globalThis.fetch
1442
+ };
1443
+ }
1444
+ executeTool = async (name, args, sessionId, messages) => {
1445
+ const tool = allTools[name];
1446
+ if (!tool) return toolError(`Unknown tool: ${name}`);
1447
+ return executeToolCall(name, args, {
1448
+ tool,
1449
+ env: frozenEnv,
1450
+ state: getState(sessionId ?? ""),
1451
+ sessionId: sessionId ?? "",
1452
+ kv,
1453
+ messages,
1454
+ logger,
1455
+ fetch: globalThis.fetch
1456
+ });
1457
+ };
1458
+ hooks = createAgentHooks({
1459
+ agent,
1460
+ makeCtx: makeHookContext
1461
+ });
1462
+ hooks.hook("disconnect", async (sessionId) => {
1463
+ stateMap.delete(sessionId);
1464
+ });
1465
+ }
1466
+ function createSession(sessionOpts) {
1467
+ const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
1468
+ return createS2sSession({
1469
+ id: sessionOpts.id,
1470
+ agent: sessionOpts.agent,
1471
+ client: sessionOpts.client,
1472
+ agentConfig,
1473
+ toolSchemas,
1474
+ apiKey,
1475
+ s2sConfig,
1476
+ executeTool,
1477
+ ...createWebSocket ? { createWebSocket } : {},
1478
+ hooks,
1479
+ skipGreeting: sessionOpts.skipGreeting ?? false,
1480
+ logger,
1481
+ ...sessionOpts.resumeFrom ? { resumeFrom: sessionOpts.resumeFrom } : {}
1482
+ });
1483
+ }
1484
+ function startSession(ws, startOpts) {
1485
+ const resumeFrom = startOpts?.resumeFrom;
1486
+ wireSessionSocket(ws, {
1487
+ sessions,
1488
+ createSession: (sid, client) => createSession({
1489
+ id: sid,
1490
+ agent: agent.name,
1491
+ client,
1492
+ skipGreeting: startOpts?.skipGreeting ?? false,
1493
+ ...resumeFrom ? { resumeFrom } : {}
1494
+ }),
1495
+ readyConfig,
1496
+ logger,
1497
+ ...startOpts?.logContext ? { logContext: startOpts.logContext } : {},
1498
+ ...startOpts?.onOpen ? { onOpen: startOpts.onOpen } : {},
1499
+ ...startOpts?.onClose ? { onClose: startOpts.onClose } : {},
1500
+ ...sessionStartTimeoutMs !== void 0 ? { sessionStartTimeoutMs } : {},
1501
+ ...resumeFrom ? { resumeFrom } : {}
1502
+ });
1503
+ }
1504
+ async function shutdown() {
1505
+ if (sessions.size === 0) return;
1506
+ let timer;
1507
+ const timeout = new Promise((resolve) => {
1508
+ timer = setTimeout(resolve, shutdownTimeoutMs, "timeout");
1509
+ });
1510
+ const graceful = Promise.allSettled([...sessions.values()].map((s) => s.stop())).then((results) => {
1511
+ for (const r of results) if (r.status === "rejected") logger.warn(`Session stop failed during shutdown: ${r.reason}`);
1512
+ return "done";
1513
+ });
1514
+ const outcome = await Promise.race([graceful, timeout]);
1515
+ if (timer) clearTimeout(timer);
1516
+ if (outcome === "timeout") logger.warn(`Shutdown timeout (${shutdownTimeoutMs}ms) exceeded — force-closing ${sessions.size} remaining session(s)`);
1517
+ sessions.clear();
1518
+ }
1519
+ return {
1520
+ executeTool,
1521
+ hooks,
1522
+ toolSchemas,
1523
+ createSession,
1524
+ startSession,
1525
+ shutdown,
1526
+ readyConfig
1527
+ };
1528
+ }
1529
+ //#endregion
1530
+ export { consoleLogger as _, _internals as a, buildSystemPrompt as c, AgentConfigSchema as d, EMPTY_PARAMS as f, DEFAULT_S2S_CONFIG as g, toAgentConfig as h, createUnstorageKv as i, connectS2s as l, agentToolsToSchemas as m, executeToolCall as n, buildCtx as o, ToolSchemaSchema as p, wireSessionSocket as r, createS2sSession as s, createRuntime as t, defaultCreateS2sWebSocket as u, jsonLogger as v };