@alexkroman1/aai 0.10.2 → 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/_internal-types.d.ts +8 -1
  2. package/dist/_runtime-conformance.d.ts +64 -0
  3. package/dist/_test-utils.d.ts +70 -0
  4. package/dist/_utils.d.ts +1 -8
  5. package/dist/_utils.js +1 -13
  6. package/dist/builtin-tools.d.ts +1 -5
  7. package/dist/constants-BbAOvKl_.js +47 -0
  8. package/dist/constants.d.ts +44 -0
  9. package/dist/direct-executor-BfHrDdPL.js +1589 -0
  10. package/dist/direct-executor.d.ts +90 -31
  11. package/dist/hooks.d.ts +44 -0
  12. package/dist/hooks.js +58 -0
  13. package/dist/index.d.ts +1 -2
  14. package/dist/index.js +2 -2
  15. package/dist/internal.d.ts +19 -0
  16. package/dist/internal.js +209 -0
  17. package/dist/kv.d.ts +1 -1
  18. package/dist/kv.js +5 -4
  19. package/dist/matchers.js +1 -1
  20. package/dist/protocol.d.ts +3 -29
  21. package/dist/protocol.js +2 -24
  22. package/dist/server.d.ts +25 -38
  23. package/dist/server.js +114 -138
  24. package/dist/session.d.ts +65 -44
  25. package/dist/{testing-MRl3SXsI.js → testing-BonJtfHJ.js} +26 -46
  26. package/dist/testing.d.ts +9 -14
  27. package/dist/testing.js +2 -2
  28. package/dist/types.d.ts +24 -226
  29. package/dist/types.js +6 -22
  30. package/dist/types.test-d.d.ts +7 -0
  31. package/dist/unstorage-kv.d.ts +33 -0
  32. package/dist/vite-plugin.d.ts +15 -0
  33. package/dist/vite-plugin.js +82 -0
  34. package/dist/ws-handler.d.ts +1 -2
  35. package/package.json +29 -84
  36. package/dist/_internal-types.js +0 -61
  37. package/dist/_session-ctx.d.ts +0 -73
  38. package/dist/_session-otel.d.ts +0 -43
  39. package/dist/_session-persist.d.ts +0 -30
  40. package/dist/_ssrf.d.ts +0 -30
  41. package/dist/_ssrf.js +0 -123
  42. package/dist/direct-executor-Ca0wt5H0.js +0 -572
  43. package/dist/middleware-core.d.ts +0 -47
  44. package/dist/middleware-core.js +0 -107
  45. package/dist/middleware.d.ts +0 -37
  46. package/dist/runtime.js +0 -53
  47. package/dist/s2s.js +0 -272
  48. package/dist/session-BkN9u0ni.js +0 -683
  49. package/dist/session.js +0 -2
  50. package/dist/sqlite-kv.d.ts +0 -34
  51. package/dist/sqlite-kv.js +0 -133
  52. package/dist/sqlite-vector.d.ts +0 -58
  53. package/dist/sqlite-vector.js +0 -149
  54. package/dist/telemetry.d.ts +0 -49
  55. package/dist/telemetry.js +0 -95
  56. package/dist/vector.d.ts +0 -85
  57. package/dist/vector.js +0 -49
  58. package/dist/worker-entry.d.ts +0 -47
  59. package/dist/worker-entry.js +0 -70
  60. package/dist/ws-handler.js +0 -207
@@ -0,0 +1,1589 @@
1
+ import { BuiltinToolSchema, DEFAULT_INSTRUCTIONS, ToolChoiceSchema, defineTool } from "./types.js";
2
+ import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_HTML_BYTES, f as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, l as HOOK_TIMEOUT_MS, m as MAX_VALUE_SIZE, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_TOOL_RESULT_CHARS, s as DEFAULT_TTS_SAMPLE_RATE } from "./constants-BbAOvKl_.js";
3
+ import { errorDetail, errorMessage, isReadOnlyFsOp, toolError } from "./_utils.js";
4
+ import { callResolveTurnConfig, createAgentHooks } from "./hooks.js";
5
+ import { ClientMessageSchema, buildReadyConfig } from "./protocol.js";
6
+ import { matchGlob, sortAndPaginate } from "./kv.js";
7
+ import { z } from "zod";
8
+ import WsWebSocket from "ws";
9
+ import pTimeout from "p-timeout";
10
+ import { createStorage, prefixStorage } from "unstorage";
11
+ import { createNanoEvents } from "nanoevents";
12
+ //#region runtime.ts
13
+ /**
14
+ * Runtime dependencies injected into the session pipeline.
15
+ *
16
+ * Defines the {@link Logger} interface, a default {@link consoleLogger},
17
+ * and the {@link S2SConfig} for Speech-to-Speech endpoint configuration.
18
+ */
19
+ /** Default console-backed logger. */
20
+ const consoleLogger = {
21
+ info: (msg, ctx) => ctx ? console.log(msg, ctx) : console.log(msg),
22
+ warn: (msg, ctx) => ctx ? console.warn(msg, ctx) : console.warn(msg),
23
+ error: (msg, ctx) => ctx ? console.error(msg, ctx) : console.error(msg),
24
+ debug: (msg, ctx) => ctx ? console.debug(msg, ctx) : console.debug(msg)
25
+ };
26
+ /**
27
+ * Structured JSON logger for production diagnostics. Each log entry is a
28
+ * single-line JSON object with `timestamp`, `level`, `msg`, and any
29
+ * caller-provided context fields.
30
+ */
31
+ function jsonLog(level) {
32
+ return (msg, ctx) => {
33
+ const entry = {
34
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
35
+ level,
36
+ msg
37
+ };
38
+ if (ctx) Object.assign(entry, ctx);
39
+ (level === "error" || level === "warn" ? process.stderr : process.stdout).write(`${JSON.stringify(entry)}\n`);
40
+ };
41
+ }
42
+ const jsonLogger = {
43
+ info: jsonLog("info"),
44
+ warn: jsonLog("warn"),
45
+ error: jsonLog("error"),
46
+ debug: jsonLog("debug")
47
+ };
48
+ /** Default S2S endpoint configuration. */
49
+ const DEFAULT_S2S_CONFIG = {
50
+ wssUrl: "wss://speech-to-speech.us.assemblyai.com/v1/realtime",
51
+ inputSampleRate: DEFAULT_STT_SAMPLE_RATE,
52
+ outputSampleRate: DEFAULT_TTS_SAMPLE_RATE
53
+ };
54
+ //#endregion
55
+ //#region _internal-types.ts
56
+ /**
57
+ * Zod schema for serializable agent configuration sent over the wire.
58
+ *
59
+ * This is the JSON-safe subset of the agent definition that can be
60
+ * transmitted between the worker and the host process via structured clone.
61
+ */
62
+ const AgentConfigSchema = z.object({
63
+ name: z.string().min(1),
64
+ instructions: z.string(),
65
+ greeting: z.string(),
66
+ sttPrompt: z.string().optional(),
67
+ maxSteps: z.number().int().positive().optional(),
68
+ toolChoice: ToolChoiceSchema.optional(),
69
+ builtinTools: z.array(BuiltinToolSchema).readonly().optional(),
70
+ idleTimeoutMs: z.number().nonnegative().optional()
71
+ });
72
+ /** Extract the serializable {@link AgentConfig} subset from a source object. */
73
+ function toAgentConfig(src) {
74
+ const config = {
75
+ name: src.name,
76
+ instructions: src.instructions,
77
+ greeting: src.greeting
78
+ };
79
+ if (src.sttPrompt !== void 0) config.sttPrompt = src.sttPrompt;
80
+ if (typeof src.maxSteps !== "function" && src.maxSteps !== void 0) config.maxSteps = src.maxSteps;
81
+ if (src.toolChoice !== void 0) config.toolChoice = src.toolChoice;
82
+ if (src.builtinTools) config.builtinTools = [...src.builtinTools];
83
+ if (src.idleTimeoutMs !== void 0) config.idleTimeoutMs = src.idleTimeoutMs;
84
+ return config;
85
+ }
86
+ /**
87
+ * Zod schema for serialized tool definitions sent over the wire.
88
+ *
89
+ * `parameters` must be a valid JSON Schema object (with `type`, `properties`,
90
+ * etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
91
+ */
92
+ const ToolSchemaSchema = z.object({
93
+ name: z.string().min(1),
94
+ description: z.string().min(1),
95
+ parameters: z.record(z.string(), z.unknown())
96
+ });
97
+ /** Empty Zod object schema used as default when tools have no parameters. */
98
+ const EMPTY_PARAMS = z.object({});
99
+ /**
100
+ * Convert agent tool definitions to JSON Schema format for wire transport.
101
+ *
102
+ * Transforms the Zod-based `parameters` of each tool into a plain JSON Schema
103
+ * object suitable for structured clone / JSON serialization.
104
+ */
105
+ function agentToolsToSchemas(tools) {
106
+ return Object.entries(tools).map(([name, def]) => ({
107
+ name,
108
+ description: def.description,
109
+ parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
110
+ }));
111
+ }
112
+ //#endregion
113
+ //#region _run-code.ts
114
+ /**
115
+ * run_code built-in tool — executes user JavaScript in a fresh secure-exec
116
+ * V8 isolate with no network, filesystem writes, or env access.
117
+ */
118
+ const runCodeParams = z.object({ code: z.string().describe("JavaScript code to execute. Use console.log() for output.") });
119
+ /**
120
+ * Execute JavaScript code inside a fresh secure-exec V8 isolate.
121
+ *
122
+ * Each invocation spins up a disposable isolate with:
123
+ * - No filesystem writes
124
+ * - No network access
125
+ * - No child process spawning
126
+ * - No environment variable access
127
+ * - 32 MB memory limit
128
+ * - 5 second execution timeout
129
+ *
130
+ * The isolate is disposed immediately after execution, so no state
131
+ * leaks between invocations or across sessions.
132
+ */
133
+ function createRunCode() {
134
+ return {
135
+ description: "Execute JavaScript code in a secure sandbox and return the output. Use this for calculations, data transformations, string manipulation, or any task that benefits from running code. Output is captured from console.log(). No network or filesystem access.",
136
+ parameters: runCodeParams,
137
+ async execute(args) {
138
+ return executeInIsolate(args.code);
139
+ }
140
+ };
141
+ }
142
+ /** Lazily import secure-exec to avoid top-level side effects. */
143
+ let _secureExecPromise;
144
+ function getSecureExec() {
145
+ _secureExecPromise ??= import("secure-exec");
146
+ return _secureExecPromise;
147
+ }
148
+ const RUN_CODE_HARNESS = `
149
+ import { readFileSync } from "node:fs";
150
+
151
+ const __output = [];
152
+ const __capture = (...args) => __output.push(args.map(String).join(" "));
153
+ const __console = {
154
+ log: __capture, info: __capture, warn: __capture,
155
+ error: __capture, debug: __capture,
156
+ };
157
+ try {
158
+ const __userCode = readFileSync("/app/user-code.js", "utf8");
159
+ const __AsyncFn = Object.getPrototypeOf(async function(){}).constructor;
160
+ const __fn = new __AsyncFn("console", __userCode);
161
+ await __fn(__console);
162
+ const result = __output.join("\\n").trim();
163
+ process.stdout.write(JSON.stringify({ ok: true, result: result || "Code ran successfully (no output)" }));
164
+ } catch (err) {
165
+ process.stdout.write(JSON.stringify({ ok: false, error: String(err?.message ?? err) }));
166
+ }
167
+ `;
168
+ const IsolateOutputSchema = z.object({
169
+ ok: z.boolean(),
170
+ result: z.string().optional(),
171
+ error: z.string().optional()
172
+ });
173
+ /** Parse stdout from the run_code harness into a result or error. */
174
+ function parseIsolateOutput(stdout, stderr) {
175
+ if (!stdout) {
176
+ if (stderr) return { error: stderr.trim() };
177
+ return { error: "Code execution timed out" };
178
+ }
179
+ try {
180
+ const parsed = IsolateOutputSchema.parse(JSON.parse(stdout));
181
+ if (parsed.ok) return parsed.result ?? "Code ran successfully (no output)";
182
+ return { error: parsed.error ?? "Unknown error" };
183
+ } catch {
184
+ return stdout.trim() || "Code ran successfully (no output)";
185
+ }
186
+ }
187
+ /**
188
+ * Exported for testing — execute user code in a fresh secure-exec V8 isolate.
189
+ */
190
+ async function executeInIsolate(code) {
191
+ const { createInMemoryFileSystem, createNodeDriver, createNodeRuntimeDriverFactory, NodeRuntime } = await getSecureExec();
192
+ const fs = createInMemoryFileSystem();
193
+ await fs.writeFile("/app/harness.js", RUN_CODE_HARNESS);
194
+ await fs.writeFile("/app/user-code.js", code);
195
+ const stdoutChunks = [];
196
+ const stderrChunks = [];
197
+ let resolveOutput = null;
198
+ const outputReady = new Promise((r) => {
199
+ resolveOutput = r;
200
+ });
201
+ const runtime = new NodeRuntime({
202
+ systemDriver: createNodeDriver({
203
+ filesystem: fs,
204
+ permissions: {
205
+ fs: (req) => isReadOnlyFsOp(req.op) ? { allow: true } : {
206
+ allow: false,
207
+ reason: "Filesystem is read-only"
208
+ },
209
+ network: () => ({
210
+ allow: false,
211
+ reason: "Network access is disabled in run_code"
212
+ }),
213
+ childProcess: () => ({
214
+ allow: false,
215
+ reason: "Subprocess spawning is disabled"
216
+ }),
217
+ env: () => ({
218
+ allow: false,
219
+ reason: "Env access is disabled in run_code"
220
+ })
221
+ }
222
+ }),
223
+ runtimeDriverFactory: createNodeRuntimeDriverFactory(),
224
+ memoryLimit: 32,
225
+ onStdio(event) {
226
+ if (event.channel === "stdout") stdoutChunks.push(event.message);
227
+ if (event.channel === "stderr") stderrChunks.push(event.message);
228
+ resolveOutput?.();
229
+ }
230
+ });
231
+ const execPromise = runtime.exec("import \"/app/harness.js\";", { cwd: "/app" });
232
+ try {
233
+ await Promise.race([outputReady, new Promise((r) => setTimeout(r, RUN_CODE_TIMEOUT_MS))]);
234
+ await Promise.race([execPromise.catch(() => {}), new Promise((r) => setTimeout(r, 200))]);
235
+ return parseIsolateOutput(stdoutChunks.join(""), stderrChunks.join(""));
236
+ } catch (err) {
237
+ return { error: errorMessage(err) };
238
+ } finally {
239
+ runtime.dispose();
240
+ }
241
+ }
242
+ //#endregion
243
+ //#region memory-tools.ts
244
+ /**
245
+ * KV-backed memory tools for agent persistent state.
246
+ */
247
+ /**
248
+ * Returns a standard set of KV-backed memory tools: `save_memory`,
249
+ * `recall_memory`, `list_memories`, and `forget_memory`.
250
+ *
251
+ * Spread the result into your agent's `tools` record.
252
+ *
253
+ * @example
254
+ * ```ts
255
+ * import { defineAgent, memoryTools } from "aai";
256
+ *
257
+ * export default defineAgent({
258
+ * name: "My Agent",
259
+ * tools: { ...memoryTools() },
260
+ * });
261
+ * ```
262
+ *
263
+ * @returns A record with four tool definitions: `save_memory`, `recall_memory`,
264
+ * `list_memories`, and `forget_memory`.
265
+ * @public
266
+ */
267
+ function memoryTools() {
268
+ return {
269
+ save_memory: defineTool({
270
+ description: "Save a piece of information to persistent memory. Use a descriptive key like 'user:name' or 'project:status'.",
271
+ parameters: z.object({
272
+ key: z.string().describe("A descriptive key for this memory (e.g. 'user:name', 'preference:color')"),
273
+ value: z.string().describe("The information to remember")
274
+ }),
275
+ execute: async ({ key, value }, ctx) => {
276
+ await ctx.kv.set(key, value);
277
+ return { saved: key };
278
+ }
279
+ }),
280
+ recall_memory: defineTool({
281
+ description: "Retrieve a previously saved memory by its key.",
282
+ parameters: z.object({ key: z.string().describe("The key to look up") }),
283
+ execute: async ({ key }, ctx) => {
284
+ const value = await ctx.kv.get(key);
285
+ if (value === null) return {
286
+ found: false,
287
+ key
288
+ };
289
+ return {
290
+ found: true,
291
+ key,
292
+ value
293
+ };
294
+ }
295
+ }),
296
+ list_memories: defineTool({
297
+ description: "List all saved memory keys, optionally filtered by a prefix (e.g. 'user:').",
298
+ parameters: z.object({ prefix: z.string().describe("Prefix to filter keys (e.g. 'user:'). Use empty string for all.").optional() }),
299
+ execute: async ({ prefix }, ctx) => {
300
+ const entries = await ctx.kv.list(prefix ?? "");
301
+ return {
302
+ count: entries.length,
303
+ keys: entries.map((e) => e.key)
304
+ };
305
+ }
306
+ }),
307
+ forget_memory: defineTool({
308
+ description: "Delete a previously saved memory by its key.",
309
+ parameters: z.object({ key: z.string().describe("The key to delete") }),
310
+ execute: async ({ key }, ctx) => {
311
+ await ctx.kv.delete(key);
312
+ return { deleted: key };
313
+ }
314
+ })
315
+ };
316
+ }
317
+ //#endregion
318
+ //#region builtin-tools.ts
319
+ /**
320
+ * Built-in tool definitions for the AAI agent SDK.
321
+ *
322
+ * In self-hosted mode, these run in-process alongside custom tools.
323
+ * In platform mode, they run on the host process outside the sandbox.
324
+ * Network requests go through the host's fetch proxy (with SSRF protection).
325
+ */
326
+ const fetchSignal = () => AbortSignal.timeout(FETCH_TIMEOUT_MS);
327
+ /** Strip HTML tags and decode common entities. */
328
+ function htmlToText(html) {
329
+ return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, "\"").replace(/&#39;/g, "'").replace(/&nbsp;/g, " ").replace(/\s{2,}/g, " ").trim();
330
+ }
331
+ const webSearchParams = z.object({
332
+ query: z.string().describe("The search query"),
333
+ max_results: z.number().describe("Maximum number of results to return (default 5)").optional()
334
+ });
335
+ const BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search";
336
+ const BraveSearchResponseSchema = z.object({ web: z.object({ results: z.array(z.object({
337
+ title: z.string(),
338
+ url: z.string(),
339
+ description: z.string()
340
+ })) }).optional() });
341
+ function createWebSearch(fetchFn = globalThis.fetch) {
342
+ return {
343
+ description: "Search the web for current information, facts, news, or answers to questions. Returns a list of results with title, URL, and description. Use this when the user asks about something you don't know, need up-to-date information, or want to verify facts.",
344
+ parameters: webSearchParams,
345
+ async execute(args, ctx) {
346
+ const { query, max_results: maxResults = 5 } = args;
347
+ const apiKey = ctx.env.BRAVE_API_KEY ?? "";
348
+ if (!apiKey) return { error: "BRAVE_API_KEY is not set — web search unavailable" };
349
+ const resp = await fetchFn(`${BRAVE_SEARCH_URL}?${new URLSearchParams({
350
+ q: query,
351
+ count: String(maxResults),
352
+ text_decorations: "false"
353
+ })}`, {
354
+ headers: { "X-Subscription-Token": apiKey },
355
+ signal: fetchSignal()
356
+ });
357
+ if (!resp.ok) return { error: `Search request failed: ${resp.status} ${resp.statusText}` };
358
+ const raw = await resp.json();
359
+ const data = BraveSearchResponseSchema.safeParse(raw);
360
+ if (!data.success) return { error: "Unexpected search response format" };
361
+ return (data.data.web?.results ?? []).slice(0, maxResults).map((r) => ({
362
+ title: r.title,
363
+ url: r.url,
364
+ description: r.description
365
+ }));
366
+ }
367
+ };
368
+ }
369
+ const visitWebpageParams = z.object({ url: z.string().describe("The full URL to fetch (e.g., 'https://example.com/page')") });
370
+ function createVisitWebpage(fetchFn = globalThis.fetch) {
371
+ return {
372
+ description: "Fetch a webpage and return its content as clean text. Use this to read the full content of a URL found via web_search, or any link the user shares. Good for reading articles, documentation, blog posts, or product pages.",
373
+ parameters: visitWebpageParams,
374
+ async execute(args, _ctx) {
375
+ const { url } = args;
376
+ const resp = await fetchFn(url, {
377
+ headers: {
378
+ "User-Agent": "Mozilla/5.0 (compatible; VoiceAgent/1.0; +https://github.com/AssemblyAI/aai)",
379
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
380
+ },
381
+ signal: fetchSignal()
382
+ });
383
+ if (!resp.ok) return {
384
+ error: `Failed to fetch: ${resp.status} ${resp.statusText}`,
385
+ url
386
+ };
387
+ const htmlContent = await resp.text();
388
+ const text = htmlToText(htmlContent.length > 2e5 ? htmlContent.slice(0, MAX_HTML_BYTES) : htmlContent);
389
+ const truncated = text.length > MAX_PAGE_CHARS;
390
+ return {
391
+ url,
392
+ content: truncated ? text.slice(0, MAX_PAGE_CHARS) : text,
393
+ ...truncated ? {
394
+ truncated: true,
395
+ totalChars: text.length
396
+ } : {}
397
+ };
398
+ }
399
+ };
400
+ }
401
+ const fetchJsonParams = z.object({
402
+ url: z.string().describe("The URL to fetch JSON from"),
403
+ headers: z.record(z.string(), z.string()).describe("Optional HTTP headers to include in the request (only safe headers like Accept, Content-Type are allowed)").optional()
404
+ });
405
+ /** Headers the LLM must never control — could exfiltrate credentials or manipulate routing. */
406
+ const BLOCKED_FETCH_HEADERS = new Set([
407
+ "authorization",
408
+ "cookie",
409
+ "set-cookie",
410
+ "host",
411
+ "proxy-authorization",
412
+ "x-forwarded-for",
413
+ "x-forwarded-host",
414
+ "x-forwarded-proto",
415
+ "x-real-ip",
416
+ "cf-connecting-ip",
417
+ "fly-client-ip"
418
+ ]);
419
+ function sanitizeHeaders(raw) {
420
+ if (!raw) return;
421
+ const safe = {};
422
+ for (const [key, value] of Object.entries(raw)) if (!BLOCKED_FETCH_HEADERS.has(key.toLowerCase())) safe[key] = value;
423
+ return Object.keys(safe).length > 0 ? safe : void 0;
424
+ }
425
+ function createFetchJson(fetchFn = globalThis.fetch) {
426
+ return {
427
+ description: "Call a REST API endpoint via HTTP GET and return the JSON response. Use this to fetch structured data from APIs — for example, weather data, stock prices, exchange rates, or any public JSON API. Supports custom headers for authenticated APIs.",
428
+ parameters: fetchJsonParams,
429
+ async execute(args, _ctx) {
430
+ const { url, headers } = args;
431
+ const safeHeaders = sanitizeHeaders(headers);
432
+ const resp = await fetchFn(url, {
433
+ ...safeHeaders && { headers: safeHeaders },
434
+ signal: fetchSignal()
435
+ });
436
+ if (!resp.ok) return {
437
+ error: `HTTP ${resp.status} ${resp.statusText}`,
438
+ url
439
+ };
440
+ try {
441
+ return await resp.json();
442
+ } catch {
443
+ return {
444
+ error: "Response was not valid JSON",
445
+ url
446
+ };
447
+ }
448
+ }
449
+ };
450
+ }
451
+ /** Resolve a builtin name to an array of [toolName, ToolDef] pairs. */
452
+ function resolveBuiltin(name, opts) {
453
+ switch (name) {
454
+ case "web_search": return [["web_search", createWebSearch(opts?.fetch)]];
455
+ case "visit_webpage": return [["visit_webpage", createVisitWebpage(opts?.fetch)]];
456
+ case "fetch_json": return [["fetch_json", createFetchJson(opts?.fetch)]];
457
+ case "run_code": return [["run_code", createRunCode()]];
458
+ case "memory": return Object.entries(memoryTools());
459
+ default: return [];
460
+ }
461
+ }
462
+ /**
463
+ * Create built-in tool definitions for the given tool names.
464
+ * For runtime use.
465
+ */
466
+ function getBuiltinToolDefs(names, opts) {
467
+ const defs = {};
468
+ for (const name of names) for (const [k, v] of resolveBuiltin(name, opts)) defs[k] = v;
469
+ return defs;
470
+ }
471
+ /** Returns JSON tool schemas for the specified builtin tools. */
472
+ function getBuiltinToolSchemas(names) {
473
+ return names.flatMap((name) => resolveBuiltin(name).map(([toolName, def]) => ({
474
+ name: toolName,
475
+ description: def.description,
476
+ parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
477
+ })));
478
+ }
479
+ //#endregion
480
+ //#region s2s.ts
481
+ const uint8ToBase64 = (bytes) => Buffer.from(bytes).toString("base64");
482
+ const base64ToUint8 = (base64) => new Uint8Array(Buffer.from(base64, "base64"));
483
+ const WS_OPEN = 1;
484
+ const defaultCreateS2sWebSocket = (url, opts) => new WsWebSocket(url, { headers: opts.headers });
485
+ function hasStringFields(obj, ...keys) {
486
+ for (const k of keys) if (typeof obj[k] !== "string") return false;
487
+ return true;
488
+ }
489
+ function parseAgentTranscript(obj) {
490
+ if (typeof obj.text !== "string") return;
491
+ return {
492
+ type: "transcript.agent",
493
+ text: obj.text,
494
+ reply_id: typeof obj.reply_id === "string" ? obj.reply_id : "",
495
+ item_id: typeof obj.item_id === "string" ? obj.item_id : "",
496
+ interrupted: obj.interrupted === true
497
+ };
498
+ }
499
+ function parseToolCall(obj) {
500
+ if (typeof obj.call_id !== "string" || typeof obj.name !== "string") return;
501
+ const args = obj.args != null && typeof obj.args === "object" && !Array.isArray(obj.args) ? obj.args : {};
502
+ return {
503
+ type: "tool.call",
504
+ call_id: obj.call_id,
505
+ name: obj.name,
506
+ args
507
+ };
508
+ }
509
+ function passthrough(obj) {
510
+ return obj;
511
+ }
512
+ function requireFields(...keys) {
513
+ return (obj) => hasStringFields(obj, ...keys) ? obj : void 0;
514
+ }
515
+ const MESSAGE_VALIDATORS = new Map([
516
+ ["session.ready", requireFields("session_id")],
517
+ ["session.updated", passthrough],
518
+ ["input.speech.started", passthrough],
519
+ ["input.speech.stopped", passthrough],
520
+ ["reply.content_part.started", passthrough],
521
+ ["reply.content_part.done", passthrough],
522
+ ["transcript.user.delta", requireFields("text")],
523
+ ["transcript.user", requireFields("item_id", "text")],
524
+ ["reply.started", requireFields("reply_id")],
525
+ ["transcript.agent.delta", requireFields("delta")],
526
+ ["transcript.agent", parseAgentTranscript],
527
+ ["tool.call", parseToolCall],
528
+ ["reply.done", (obj) => ({
529
+ type: "reply.done",
530
+ ...typeof obj.status === "string" ? { status: obj.status } : {}
531
+ })],
532
+ ["session.error", requireFields("code", "message")],
533
+ ["error", requireFields("message")]
534
+ ]);
535
+ function parseS2sMessage(obj) {
536
+ const type = obj.type;
537
+ if (typeof type !== "string") return;
538
+ return MESSAGE_VALIDATORS.get(type)?.(obj);
539
+ }
540
+ function dispatchS2sMessage(emitter, msg) {
541
+ switch (msg.type) {
542
+ case "session.ready":
543
+ emitter.emit("ready", { sessionId: msg.session_id });
544
+ break;
545
+ case "session.updated":
546
+ emitter.emit("sessionUpdated", msg);
547
+ break;
548
+ case "input.speech.started":
549
+ emitter.emit("speechStarted");
550
+ break;
551
+ case "input.speech.stopped":
552
+ emitter.emit("speechStopped");
553
+ break;
554
+ case "transcript.user.delta":
555
+ emitter.emit("userTranscriptDelta", { text: msg.text });
556
+ break;
557
+ case "transcript.user":
558
+ emitter.emit("userTranscript", {
559
+ itemId: msg.item_id,
560
+ text: msg.text
561
+ });
562
+ break;
563
+ case "reply.started":
564
+ emitter.emit("replyStarted", { replyId: msg.reply_id });
565
+ break;
566
+ case "transcript.agent.delta":
567
+ emitter.emit("agentTranscriptDelta", { text: msg.delta });
568
+ break;
569
+ case "transcript.agent":
570
+ emitter.emit("agentTranscript", {
571
+ text: msg.text,
572
+ replyId: msg.reply_id,
573
+ itemId: msg.item_id,
574
+ interrupted: msg.interrupted
575
+ });
576
+ break;
577
+ case "tool.call":
578
+ emitter.emit("toolCall", {
579
+ callId: msg.call_id,
580
+ name: msg.name,
581
+ args: msg.args
582
+ });
583
+ break;
584
+ case "reply.done":
585
+ emitter.emit("replyDone", msg.status ? { status: msg.status } : {});
586
+ break;
587
+ case "session.error":
588
+ if (msg.code === "session_not_found" || msg.code === "session_forbidden") emitter.emit("sessionExpired", {
589
+ code: msg.code,
590
+ message: msg.message
591
+ });
592
+ else emitter.emit("error", {
593
+ code: msg.code,
594
+ message: msg.message
595
+ });
596
+ break;
597
+ case "error":
598
+ emitter.emit("error", {
599
+ code: "connection",
600
+ message: msg.message
601
+ });
602
+ break;
603
+ case "reply.content_part.started":
604
+ case "reply.content_part.done": break;
605
+ default: break;
606
+ }
607
+ }
608
+ function connectS2s(opts) {
609
+ const { apiKey, config, createWebSocket, logger: log = consoleLogger } = opts;
610
+ return new Promise((resolve, reject) => {
611
+ log.info("S2S connecting", { url: config.wssUrl });
612
+ const ws = createWebSocket(config.wssUrl, { headers: { Authorization: `Bearer ${apiKey}` } });
613
+ const emitter = createNanoEvents();
614
+ let opened = false;
615
+ function send(msg) {
616
+ if (ws.readyState !== WS_OPEN) {
617
+ log.debug("S2S send dropped: socket not open", { type: msg.type });
618
+ return;
619
+ }
620
+ const json = JSON.stringify(msg);
621
+ if (msg.type !== "input.audio") log.info(`S2S >> ${msg.type}`, msg.type === "session.update" ? { payload: json } : void 0);
622
+ ws.send(json);
623
+ }
624
+ const handle = {
625
+ on: emitter.on.bind(emitter),
626
+ sendAudio(audio) {
627
+ if (ws.readyState !== WS_OPEN) {
628
+ log.debug("S2S sendAudio dropped: socket not open");
629
+ return;
630
+ }
631
+ ws.send(`{"type":"input.audio","audio":"${uint8ToBase64(audio)}"}`);
632
+ },
633
+ sendToolResult(callId, result) {
634
+ const msg = {
635
+ type: "tool.result",
636
+ call_id: callId,
637
+ result
638
+ };
639
+ log.info("S2S >> tool.result", {
640
+ call_id: callId,
641
+ resultLength: result.length
642
+ });
643
+ send(msg);
644
+ },
645
+ updateSession(sessionConfig) {
646
+ const { systemPrompt, ...rest } = sessionConfig;
647
+ send({
648
+ type: "session.update",
649
+ session: {
650
+ system_prompt: systemPrompt,
651
+ ...rest
652
+ }
653
+ });
654
+ },
655
+ resumeSession(sessionId) {
656
+ send({
657
+ type: "session.resume",
658
+ session_id: sessionId
659
+ });
660
+ },
661
+ close() {
662
+ log.info("S2S closing");
663
+ ws.close();
664
+ }
665
+ };
666
+ ws.addEventListener("open", () => {
667
+ opened = true;
668
+ log.info("S2S WebSocket open");
669
+ resolve(handle);
670
+ });
671
+ function tryParseJson(data) {
672
+ try {
673
+ return JSON.parse(String(data));
674
+ } catch {
675
+ log.warn("S2S << invalid JSON", { data: String(data).slice(0, 200) });
676
+ }
677
+ }
678
+ function handleAudioFastPath(obj) {
679
+ if (obj.type === "reply.audio" && typeof obj.data === "string") {
680
+ const audioBytes = base64ToUint8(obj.data);
681
+ emitter.emit("audio", { audio: audioBytes });
682
+ return true;
683
+ }
684
+ return false;
685
+ }
686
+ function logIncoming(obj) {
687
+ if (obj.type === "reply.audio" || obj.type === "input.audio") return;
688
+ log.info(`S2S << ${obj.type}`, obj.type === "transcript.agent.delta" ? { delta: obj.delta } : void 0);
689
+ }
690
+ function handleS2sMessage(ev) {
691
+ const raw = tryParseJson(ev.data);
692
+ if (raw === void 0) return;
693
+ if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
694
+ log.warn("S2S << non-object JSON message", { type: typeof raw });
695
+ return;
696
+ }
697
+ const obj = raw;
698
+ logIncoming(obj);
699
+ if (handleAudioFastPath(obj)) return;
700
+ const parsed = parseS2sMessage(obj);
701
+ if (!parsed) {
702
+ log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
703
+ return;
704
+ }
705
+ dispatchS2sMessage(emitter, parsed);
706
+ }
707
+ ws.addEventListener("message", handleS2sMessage);
708
+ ws.addEventListener("close", (ev) => {
709
+ log.info("S2S WebSocket closed", {
710
+ code: ev.code ?? 0,
711
+ reason: ev.reason ?? ""
712
+ });
713
+ if (!opened) reject(/* @__PURE__ */ new Error(`WebSocket closed before open (code: ${ev.code ?? 0})`));
714
+ emitter.emit("close");
715
+ });
716
+ ws.addEventListener("error", (ev) => {
717
+ const message = typeof ev.message === "string" ? ev.message : "WebSocket error";
718
+ const errObj = new Error(message);
719
+ log.error("S2S WebSocket error", { error: errObj.message });
720
+ if (!opened) reject(errObj);
721
+ else emitter.emit("error", {
722
+ code: "ws_error",
723
+ message: errObj.message
724
+ });
725
+ });
726
+ });
727
+ }
728
+ //#endregion
729
+ //#region system-prompt.ts
730
+ function getFormattedDate() {
731
+ return (/* @__PURE__ */ new Date()).toLocaleDateString("en-US", {
732
+ weekday: "long",
733
+ year: "numeric",
734
+ month: "long",
735
+ day: "numeric"
736
+ });
737
+ }
738
+ const VOICE_RULES = "\n\nCRITICAL OUTPUT RULES — you MUST follow these for EVERY response:\nYour response will be spoken aloud by a TTS system and displayed as plain text.\n- NEVER use markdown: no **, no *, no _, no #, no `, no [](), no ---\n- NEVER use bullet points (-, *, •) or numbered lists (1., 2.)\n- NEVER use code blocks or inline code\n- NEVER mention tools, search, APIs, or technical failures to the user. If a tool returns no results, just answer naturally without explaining why.\n- Write exactly as you would say it out loud to a friend\n- Use short conversational sentences. To list things, say \"First,\" \"Next,\" \"Finally,\"\n- Keep responses concise — 1 to 3 sentences max";
739
+ /**
740
+ * Build the system prompt sent to the LLM from the agent configuration.
741
+ *
742
+ * Assembles the default instructions, today's date, agent-specific instructions,
743
+ * and optional sections for tool usage preamble and voice output rules.
744
+ *
745
+ * @param config - The serializable agent configuration (name, instructions, etc.).
746
+ * @param opts.hasTools - When `true`, appends a preamble instructing the LLM to
747
+ * speak a brief phrase before each tool call to fill silence.
748
+ * @param opts.voice - When `true`, appends strict voice-specific output rules
749
+ * (no markdown, no bullet points, conversational tone, concise responses).
750
+ * @returns The assembled system prompt string.
751
+ */
752
+ function buildSystemPrompt(config, opts) {
753
+ const { hasTools } = opts;
754
+ const agentInstructions = config.instructions && config.instructions !== DEFAULT_INSTRUCTIONS ? `\n\nAgent-Specific Instructions:\n${config.instructions}` : "";
755
+ const toolPreamble = hasTools ? "\n\nWhen you decide to use a tool, ALWAYS say a brief natural phrase BEFORE the tool call (e.g. \"Let me look that up\" or \"One moment while I check\"). This fills silence while the tool executes. Keep preambles to one short sentence." : "";
756
+ return DEFAULT_INSTRUCTIONS + `\n\nToday's date is ${getFormattedDate()}.` + agentInstructions + toolPreamble + (opts.voice ? VOICE_RULES : "");
757
+ }
758
+ //#endregion
759
+ //#region session.ts
760
+ function buildCtx(opts) {
761
+ const { id, agentConfig, hooks, log } = opts;
762
+ const maxHistory = opts.maxHistory ?? 200;
763
+ /** Track in-flight hook promises so they can be awaited during shutdown. */
764
+ const pendingHooks = /* @__PURE__ */ new Set();
765
+ const ctx = {
766
+ ...opts,
767
+ s2s: null,
768
+ reply: {
769
+ pendingTools: [],
770
+ toolCallCount: 0,
771
+ currentReplyId: null
772
+ },
773
+ turnPromise: null,
774
+ conversationMessages: [],
775
+ maxHistory,
776
+ resolveTurnConfig() {
777
+ return callResolveTurnConfig(hooks, id, HOOK_TIMEOUT_MS);
778
+ },
779
+ consumeToolCallStep(turnConfig, _name, replyId) {
780
+ if (replyId === null || replyId !== ctx.reply.currentReplyId) return toolError("Reply was interrupted. Discarding stale tool call.");
781
+ const maxSteps = turnConfig?.maxSteps ?? agentConfig.maxSteps;
782
+ ctx.reply.toolCallCount++;
783
+ if (maxSteps !== void 0 && ctx.reply.toolCallCount > maxSteps) {
784
+ log.info("maxSteps exceeded, refusing tool call", {
785
+ toolCallCount: ctx.reply.toolCallCount,
786
+ maxSteps
787
+ });
788
+ return toolError("Maximum tool steps reached. Please respond to the user now.");
789
+ }
790
+ return null;
791
+ },
792
+ fireHook(name, ...args) {
793
+ if (!hooks) return;
794
+ const notifyOnError = (err) => {
795
+ log.warn(`${name} hook failed`, { err: errorMessage(err) });
796
+ if (name !== "error") {
797
+ const ep = hooks.callHook("error", id, { message: errorMessage(err) });
798
+ if (ep && typeof ep.catch === "function") ep.catch((e) => {
799
+ log.warn("error hook failed", { err: errorMessage(e) });
800
+ });
801
+ }
802
+ };
803
+ try {
804
+ const result = hooks.callHook(name, ...args);
805
+ if (result == null) return;
806
+ const p = result.catch(notifyOnError).finally(() => pendingHooks.delete(p));
807
+ pendingHooks.add(p);
808
+ } catch (err) {
809
+ notifyOnError(err);
810
+ }
811
+ },
812
+ async drainHooks() {
813
+ if (pendingHooks.size > 0) await Promise.all([...pendingHooks]);
814
+ },
815
+ pushMessages(...msgs) {
816
+ ctx.conversationMessages.push(...msgs);
817
+ if (maxHistory > 0 && ctx.conversationMessages.length > maxHistory) ctx.conversationMessages = ctx.conversationMessages.slice(-maxHistory);
818
+ },
819
+ beginReply(replyId) {
820
+ ctx.reply = {
821
+ pendingTools: [],
822
+ toolCallCount: 0,
823
+ currentReplyId: replyId
824
+ };
825
+ ctx.turnPromise = null;
826
+ },
827
+ cancelReply() {
828
+ ctx.reply = {
829
+ pendingTools: [],
830
+ toolCallCount: 0,
831
+ currentReplyId: null
832
+ };
833
+ },
834
+ chainTurn(p) {
835
+ ctx.turnPromise = (ctx.turnPromise ?? Promise.resolve()).then(() => p);
836
+ }
837
+ };
838
+ return ctx;
839
+ }
840
+ /** @internal Not part of the public API. Exposed for testing only. */
841
+ const _internals = { connectS2s };
842
+ function createIdleTimer(opts) {
843
+ if (opts.timeoutMs <= 0) return {
844
+ reset() {},
845
+ clear() {}
846
+ };
847
+ let timer = null;
848
+ return {
849
+ reset() {
850
+ if (timer !== null) clearTimeout(timer);
851
+ timer = setTimeout(() => {
852
+ opts.log.info("S2S idle timeout", {
853
+ timeoutMs: opts.timeoutMs,
854
+ agent: opts.agent
855
+ });
856
+ opts.client.event({ type: "idle_timeout" });
857
+ opts.ctx.s2s?.close();
858
+ }, opts.timeoutMs);
859
+ },
860
+ clear() {
861
+ if (timer !== null) {
862
+ clearTimeout(timer);
863
+ timer = null;
864
+ }
865
+ }
866
+ };
867
+ }
868
+ /**
869
+ * Complete a tool call by truncating the result, emitting a `tool_call_done` event,
870
+ * and accumulating the result in `ctx.reply.pendingTools` — but only if the reply that
871
+ * initiated this call is still active.
872
+ */
873
+ function finishToolCall(ctx, callId, result, replyId) {
874
+ const truncatedResult = result.length > 4e3 ? result.slice(0, MAX_TOOL_RESULT_CHARS) : result;
875
+ ctx.client.event({
876
+ type: "tool_call_done",
877
+ toolCallId: callId,
878
+ result: truncatedResult
879
+ });
880
+ if (replyId !== null && replyId === ctx.reply.currentReplyId) {
881
+ ctx.reply.pendingTools.push({
882
+ callId,
883
+ result
884
+ });
885
+ if (ctx.maxHistory > 0 && ctx.reply.pendingTools.length > ctx.maxHistory) ctx.reply.pendingTools.shift();
886
+ }
887
+ }
888
+ async function handleToolCall(ctx, detail) {
889
+ const { callId, name, args: parsedArgs } = detail;
890
+ const replyId = ctx.reply.currentReplyId;
891
+ ctx.client.event({
892
+ type: "tool_call_start",
893
+ toolCallId: callId,
894
+ toolName: name,
895
+ args: parsedArgs
896
+ });
897
+ let turnConfig;
898
+ try {
899
+ turnConfig = await ctx.resolveTurnConfig();
900
+ } catch (err) {
901
+ const msg = `resolveTurnConfig hook error: ${errorMessage(err)}`;
902
+ ctx.log.error(msg);
903
+ finishToolCall(ctx, callId, toolError(msg), replyId);
904
+ return;
905
+ }
906
+ const refused = ctx.consumeToolCallStep(turnConfig, name, replyId);
907
+ if (refused !== null) {
908
+ finishToolCall(ctx, callId, refused, replyId);
909
+ return;
910
+ }
911
+ ctx.log.info("S2S tool call", {
912
+ tool: name,
913
+ callId,
914
+ args: parsedArgs,
915
+ agent: ctx.agent
916
+ });
917
+ let result;
918
+ try {
919
+ result = await ctx.executeTool(name, parsedArgs, ctx.id, ctx.conversationMessages);
920
+ } catch (err) {
921
+ const msg = errorMessage(err);
922
+ ctx.log.error("Tool execution failed", {
923
+ tool: name,
924
+ error: errorDetail(err)
925
+ });
926
+ result = toolError(msg);
927
+ }
928
+ ctx.log.info("S2S tool result", {
929
+ tool: name,
930
+ callId,
931
+ resultLength: result.length
932
+ });
933
+ finishToolCall(ctx, callId, result, replyId);
934
+ }
935
+ function handleUserTranscript(ctx, text) {
936
+ ctx.log.info("S2S user transcript", { text });
937
+ ctx.client.event({
938
+ type: "transcript",
939
+ text,
940
+ isFinal: true
941
+ });
942
+ ctx.client.event({
943
+ type: "turn",
944
+ text
945
+ });
946
+ ctx.pushMessages({
947
+ role: "user",
948
+ content: text
949
+ });
950
+ ctx.fireHook("turn", ctx.id, text, HOOK_TIMEOUT_MS);
951
+ }
952
+ function handleAgentTranscript(ctx, text, interrupted) {
953
+ ctx.client.event({
954
+ type: "chat",
955
+ text
956
+ });
957
+ if (!interrupted) ctx.pushMessages({
958
+ role: "assistant",
959
+ content: text
960
+ });
961
+ }
962
+ function handleReplyDone(ctx, status) {
963
+ if (status === "interrupted") {
964
+ ctx.log.info("S2S reply interrupted (barge-in)");
965
+ ctx.cancelReply();
966
+ ctx.client.event({ type: "cancelled" });
967
+ return;
968
+ }
969
+ const doneReplyId = ctx.reply.currentReplyId;
970
+ const sendPending = () => {
971
+ if (ctx.reply.currentReplyId !== doneReplyId) {
972
+ ctx.reply.pendingTools = [];
973
+ return;
974
+ }
975
+ if (ctx.reply.pendingTools.length > 0) {
976
+ for (const tool of ctx.reply.pendingTools) ctx.s2s?.sendToolResult(tool.callId, tool.result);
977
+ ctx.reply.pendingTools = [];
978
+ } else {
979
+ const stepsUsed = ctx.reply.toolCallCount;
980
+ if (stepsUsed > 0) ctx.log.info("Turn complete", {
981
+ steps: stepsUsed,
982
+ agent: ctx.agent
983
+ });
984
+ ctx.client.playAudioDone();
985
+ ctx.client.event({ type: "tts_done" });
986
+ }
987
+ };
988
+ if (ctx.turnPromise !== null) ctx.turnPromise.then(sendPending);
989
+ else sendPending();
990
+ }
991
+ function setupListeners(ctx, handle) {
992
+ handle.on("ready", ({ sessionId }) => ctx.log.info("S2S session ready", { sessionId }));
993
+ handle.on("sessionExpired", () => {
994
+ ctx.log.info("S2S session expired");
995
+ handle.close();
996
+ });
997
+ handle.on("speechStarted", () => ctx.client.event({ type: "speech_started" }));
998
+ handle.on("speechStopped", () => ctx.client.event({ type: "speech_stopped" }));
999
+ handle.on("userTranscriptDelta", ({ text }) => ctx.client.event({
1000
+ type: "transcript",
1001
+ text,
1002
+ isFinal: false
1003
+ }));
1004
+ handle.on("userTranscript", ({ text }) => handleUserTranscript(ctx, text));
1005
+ handle.on("replyStarted", ({ replyId }) => {
1006
+ ctx.beginReply(replyId);
1007
+ });
1008
+ handle.on("audio", ({ audio }) => ctx.client.playAudioChunk(audio));
1009
+ handle.on("agentTranscriptDelta", ({ text }) => ctx.client.event({
1010
+ type: "chat_delta",
1011
+ text
1012
+ }));
1013
+ handle.on("agentTranscript", ({ text, interrupted }) => handleAgentTranscript(ctx, text, interrupted));
1014
+ handle.on("toolCall", (detail) => {
1015
+ const p = handleToolCall(ctx, detail).catch((err) => {
1016
+ ctx.log.error("Tool call handler failed", { err: errorMessage(err) });
1017
+ });
1018
+ ctx.chainTurn(p);
1019
+ });
1020
+ handle.on("replyDone", ({ status }) => handleReplyDone(ctx, status));
1021
+ handle.on("error", ({ code, message }) => {
1022
+ ctx.log.error("S2S error", {
1023
+ code,
1024
+ message
1025
+ });
1026
+ ctx.client.event({
1027
+ type: "error",
1028
+ code: "internal",
1029
+ message
1030
+ });
1031
+ handle.close();
1032
+ });
1033
+ handle.on("close", () => {
1034
+ ctx.log.info("S2S closed");
1035
+ ctx.s2s = null;
1036
+ ctx.cancelReply();
1037
+ });
1038
+ }
1039
+ function createS2sSession(opts) {
1040
+ const { id, agent, client, toolSchemas, apiKey, s2sConfig, executeTool, createWebSocket = defaultCreateS2sWebSocket, hooks, logger: log = consoleLogger } = opts;
1041
+ const agentConfig = opts.skipGreeting ? {
1042
+ ...opts.agentConfig,
1043
+ greeting: ""
1044
+ } : opts.agentConfig;
1045
+ const systemPrompt = buildSystemPrompt(agentConfig, {
1046
+ hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
1047
+ voice: true
1048
+ });
1049
+ const s2sTools = toolSchemas.map((ts) => ({
1050
+ type: "function",
1051
+ name: ts.name,
1052
+ description: ts.description,
1053
+ parameters: ts.parameters
1054
+ }));
1055
+ const sessionAbort = new AbortController();
1056
+ const ctx = buildCtx({
1057
+ id,
1058
+ agent,
1059
+ client,
1060
+ agentConfig,
1061
+ executeTool,
1062
+ hooks,
1063
+ log,
1064
+ maxHistory: opts.maxHistory
1065
+ });
1066
+ const rawTimeout = agentConfig.idleTimeoutMs ?? 3e5;
1067
+ const idle = createIdleTimer({
1068
+ timeoutMs: rawTimeout === 0 || !Number.isFinite(rawTimeout) ? 0 : rawTimeout,
1069
+ agent,
1070
+ log,
1071
+ client,
1072
+ ctx
1073
+ });
1074
+ let connectGeneration = 0;
1075
+ const sessionUpdatePayload = {
1076
+ systemPrompt,
1077
+ tools: s2sTools,
1078
+ ...agentConfig.greeting ? { greeting: agentConfig.greeting } : {}
1079
+ };
1080
+ async function connectAndSetup() {
1081
+ const generation = ++connectGeneration;
1082
+ try {
1083
+ const handle = await _internals.connectS2s({
1084
+ apiKey,
1085
+ config: s2sConfig,
1086
+ createWebSocket,
1087
+ logger: log
1088
+ });
1089
+ if (sessionAbort.signal.aborted || generation !== connectGeneration) {
1090
+ handle.close();
1091
+ return;
1092
+ }
1093
+ setupListeners(ctx, handle);
1094
+ handle.updateSession(sessionUpdatePayload);
1095
+ ctx.s2s = handle;
1096
+ idle.reset();
1097
+ } catch (err) {
1098
+ const msg = errorMessage(err);
1099
+ log.error("S2S connect failed", { error: errorDetail(err) });
1100
+ client.event({
1101
+ type: "error",
1102
+ code: "internal",
1103
+ message: msg
1104
+ });
1105
+ }
1106
+ }
1107
+ return {
1108
+ async start() {
1109
+ ctx.fireHook("connect", id, HOOK_TIMEOUT_MS);
1110
+ await connectAndSetup();
1111
+ },
1112
+ async stop() {
1113
+ if (sessionAbort.signal.aborted) return;
1114
+ sessionAbort.abort();
1115
+ idle.clear();
1116
+ if (ctx.turnPromise !== null) await ctx.turnPromise;
1117
+ await ctx.drainHooks();
1118
+ ctx.s2s?.close();
1119
+ ctx.fireHook("disconnect", id, HOOK_TIMEOUT_MS);
1120
+ await ctx.drainHooks();
1121
+ },
1122
+ onAudio(data) {
1123
+ idle.reset();
1124
+ ctx.s2s?.sendAudio(data);
1125
+ },
1126
+ onAudioReady() {},
1127
+ onCancel() {
1128
+ client.event({ type: "cancelled" });
1129
+ },
1130
+ onReset() {
1131
+ ctx.cancelReply();
1132
+ ctx.conversationMessages = [];
1133
+ ctx.reply.toolCallCount = 0;
1134
+ ctx.turnPromise = null;
1135
+ idle.clear();
1136
+ ctx.s2s?.close();
1137
+ client.event({ type: "reset" });
1138
+ connectAndSetup().catch((err) => log.error("S2S reset reconnect failed", { error: errorMessage(err) }));
1139
+ },
1140
+ onHistory(incoming) {
1141
+ ctx.pushMessages(...incoming.map((m) => ({
1142
+ role: m.role,
1143
+ content: m.content
1144
+ })));
1145
+ },
1146
+ waitForTurn() {
1147
+ return ctx.turnPromise ?? Promise.resolve();
1148
+ }
1149
+ };
1150
+ }
1151
+ //#endregion
1152
+ //#region unstorage-kv.ts
1153
+ /**
1154
+ * Key-value store backed by unstorage.
1155
+ *
1156
+ * Works with any unstorage driver (memory, fs, S3/R2, etc.).
1157
+ */
1158
+ /**
1159
+ * Create a KV store backed by any unstorage driver.
1160
+ *
1161
+ * @param options - See {@link UnstorageKvOptions}.
1162
+ * @returns A {@link Kv} instance.
1163
+ *
1164
+ * @example
1165
+ * ```ts
1166
+ * import { createStorage } from "unstorage";
1167
+ * import { createUnstorageKv } from "@alexkroman1/aai/unstorage-kv";
1168
+ *
1169
+ * const kv = createUnstorageKv({ storage: createStorage() });
1170
+ * await kv.set("greeting", "hello");
1171
+ * const value = await kv.get<string>("greeting"); // "hello"
1172
+ * ```
1173
+ */
1174
+ function createUnstorageKv(options) {
1175
+ const store = options.prefix ? prefixStorage(options.storage, options.prefix) : options.storage;
1176
+ return {
1177
+ async get(key) {
1178
+ return await store.getItem(key) ?? null;
1179
+ },
1180
+ async set(key, value, setOptions) {
1181
+ if (JSON.stringify(value).length > 65536) throw new Error(`Value exceeds max size of ${MAX_VALUE_SIZE} bytes`);
1182
+ const storable = value;
1183
+ if (setOptions?.expireIn && setOptions.expireIn > 0) await store.setItem(key, storable, { ttl: Math.ceil(setOptions.expireIn / 1e3) });
1184
+ else await store.setItem(key, storable);
1185
+ },
1186
+ async delete(keys) {
1187
+ const keyArray = Array.isArray(keys) ? keys : [keys];
1188
+ await Promise.all(keyArray.map((k) => store.removeItem(k)));
1189
+ },
1190
+ async list(listPrefix, listOptions) {
1191
+ const allKeys = await store.getKeys(listPrefix);
1192
+ const entries = [];
1193
+ for (const key of allKeys) {
1194
+ const value = await store.getItem(key);
1195
+ if (value != null) entries.push({
1196
+ key,
1197
+ value
1198
+ });
1199
+ }
1200
+ return sortAndPaginate(entries, listOptions);
1201
+ },
1202
+ async keys(pattern) {
1203
+ const allKeys = await store.getKeys();
1204
+ if (!pattern) return allKeys.sort((a, b) => a.localeCompare(b));
1205
+ return allKeys.filter((key) => matchGlob(key, pattern)).sort((a, b) => a.localeCompare(b));
1206
+ },
1207
+ close() {
1208
+ store.dispose();
1209
+ }
1210
+ };
1211
+ }
1212
+ //#endregion
1213
+ //#region ws-handler.ts
1214
+ /**
1215
+ * WebSocket session lifecycle handler.
1216
+ *
1217
+ * Audio validation is handled at the host transport layer (see server.ts).
1218
+ */
1219
+ /**
1220
+ * Creates a {@link ClientSink} backed by a plain WebSocket.
1221
+ *
1222
+ * Text events are sent as JSON text frames; audio chunks are sent as
1223
+ * binary frames (zero-copy).
1224
+ */
1225
+ function createClientSink(ws, log) {
1226
+ /** Send data over ws, silently dropping if the socket is not open. */
1227
+ function safeSend(data) {
1228
+ try {
1229
+ if (ws.readyState !== 1) return;
1230
+ ws.send(data);
1231
+ } catch (err) {
1232
+ log.debug?.("safeSend: socket closed between readyState check and send", { error: errorMessage(err) });
1233
+ }
1234
+ }
1235
+ return {
1236
+ get open() {
1237
+ return ws.readyState === 1;
1238
+ },
1239
+ event(e) {
1240
+ safeSend(JSON.stringify(e));
1241
+ },
1242
+ playAudioChunk(chunk) {
1243
+ safeSend(chunk);
1244
+ },
1245
+ playAudioDone() {
1246
+ safeSend(JSON.stringify({ type: "audio_done" }));
1247
+ }
1248
+ };
1249
+ }
1250
+ function handleBinaryAudio(data, session) {
1251
+ if (data instanceof Uint8Array) {
1252
+ session.onAudio(data);
1253
+ return true;
1254
+ }
1255
+ if (data instanceof ArrayBuffer) {
1256
+ session.onAudio(new Uint8Array(data));
1257
+ return true;
1258
+ }
1259
+ return false;
1260
+ }
1261
+ function handleTextMessage(data, session, log, ctx, sid) {
1262
+ if (typeof data !== "string") return;
1263
+ let json;
1264
+ try {
1265
+ json = JSON.parse(data);
1266
+ } catch {
1267
+ log.warn("Invalid JSON from client", {
1268
+ ...ctx,
1269
+ sid
1270
+ });
1271
+ return;
1272
+ }
1273
+ const parsed = ClientMessageSchema.safeParse(json);
1274
+ if (!parsed.success) {
1275
+ log.warn("Invalid client message", {
1276
+ ...ctx,
1277
+ sid,
1278
+ error: parsed.error.message
1279
+ });
1280
+ return;
1281
+ }
1282
+ const msg = parsed.data;
1283
+ switch (msg.type) {
1284
+ case "audio_ready":
1285
+ session.onAudioReady();
1286
+ break;
1287
+ case "cancel":
1288
+ session.onCancel();
1289
+ break;
1290
+ case "reset":
1291
+ session.onReset();
1292
+ break;
1293
+ case "history":
1294
+ session.onHistory(msg.messages);
1295
+ break;
1296
+ default: break;
1297
+ }
1298
+ }
1299
+ /**
1300
+ * Attaches session lifecycle handlers to a native WebSocket using
1301
+ * plain JSON text frames and binary audio frames.
1302
+ *
1303
+ * Connection flow:
1304
+ * 1. WebSocket opens → server sends `{ type: "config", ...ReadyConfig }`
1305
+ * 2. Client sets up audio → sends `{ type: "audio_ready" }`
1306
+ * 3. If reconnecting → client sends `{ type: "history", messages: [...] }`
1307
+ */
1308
+ function wireSessionSocket(ws, opts) {
1309
+ const { sessions, logger: log = consoleLogger } = opts;
1310
+ const sessionId = opts.resumeFrom ?? crypto.randomUUID();
1311
+ const sid = sessionId.slice(0, 8);
1312
+ const ctx = opts.logContext ?? {};
1313
+ let session = null;
1314
+ /** Set to true once session.start() resolves. Messages arriving before
1315
+ * this flag is set are buffered and replayed once the session is ready,
1316
+ * preventing audio/text from being dispatched to a half-initialized session. */
1317
+ let sessionReady = false;
1318
+ let messageBuffer = [];
1319
+ function drainBuffer() {
1320
+ if (!(session && messageBuffer)) return;
1321
+ const buf = messageBuffer;
1322
+ messageBuffer = null;
1323
+ for (const event of buf) {
1324
+ const { data } = event;
1325
+ if (handleBinaryAudio(data, session)) continue;
1326
+ handleTextMessage(data, session, log, ctx, sid);
1327
+ }
1328
+ }
1329
+ function onOpen() {
1330
+ opts.onOpen?.();
1331
+ log.info("Session connected", {
1332
+ ...ctx,
1333
+ sid
1334
+ });
1335
+ const client = createClientSink(ws, log);
1336
+ session = opts.createSession(sessionId, client);
1337
+ sessions.set(sessionId, session);
1338
+ ws.send(JSON.stringify({
1339
+ type: "config",
1340
+ ...opts.readyConfig,
1341
+ sessionId
1342
+ }));
1343
+ const timeoutMs = opts.sessionStartTimeoutMs ?? 1e4;
1344
+ pTimeout(session.start(), {
1345
+ milliseconds: timeoutMs,
1346
+ message: `session.start() timed out after ${timeoutMs}ms`
1347
+ }).then(() => {
1348
+ log.info("Session ready", {
1349
+ ...ctx,
1350
+ sid
1351
+ });
1352
+ sessionReady = true;
1353
+ drainBuffer();
1354
+ }).catch((err) => {
1355
+ log.error("Session start failed", {
1356
+ ...ctx,
1357
+ sid,
1358
+ error: errorDetail(err)
1359
+ });
1360
+ sessions.delete(sessionId);
1361
+ session = null;
1362
+ messageBuffer = null;
1363
+ });
1364
+ }
1365
+ if (ws.readyState === 1) onOpen();
1366
+ else ws.addEventListener("open", onOpen);
1367
+ ws.addEventListener("message", (event) => {
1368
+ if (!session) return;
1369
+ if (!sessionReady) {
1370
+ messageBuffer?.push(event);
1371
+ return;
1372
+ }
1373
+ const { data } = event;
1374
+ if (handleBinaryAudio(data, session)) return;
1375
+ handleTextMessage(data, session, log, ctx, sid);
1376
+ });
1377
+ ws.addEventListener("close", () => {
1378
+ log.info("Session disconnected", {
1379
+ ...ctx,
1380
+ sid
1381
+ });
1382
+ if (session) session.stop().catch((err) => {
1383
+ log.error("Session stop failed", {
1384
+ ...ctx,
1385
+ sid,
1386
+ error: errorDetail(err)
1387
+ });
1388
+ }).finally(() => {
1389
+ sessions.delete(sessionId);
1390
+ });
1391
+ opts.onClose?.();
1392
+ });
1393
+ ws.addEventListener("error", (ev) => {
1394
+ const msg = typeof ev.message === "string" ? ev.message : "WebSocket error";
1395
+ log.error("WebSocket error", {
1396
+ ...ctx,
1397
+ sid,
1398
+ error: msg
1399
+ });
1400
+ });
1401
+ }
1402
+ //#endregion
1403
+ //#region direct-executor.ts
1404
+ /**
1405
+ * Agent runtime — the execution engine for voice agents.
1406
+ *
1407
+ * {@link createRuntime} builds the single execution engine used by both
1408
+ * self-hosted servers and the platform sandbox. It wires up tool execution,
1409
+ * lifecycle hooks, and session management.
1410
+ */
1411
+ const yieldTick = () => new Promise((r) => setTimeout(r, 0));
1412
+ function buildToolContext(opts) {
1413
+ const { env, state, kv, messages, fetch: fetchFn, sessionId } = opts;
1414
+ return {
1415
+ env: { ...env },
1416
+ state: state ?? {},
1417
+ get kv() {
1418
+ if (!kv) throw new Error("KV not available");
1419
+ return kv;
1420
+ },
1421
+ messages: messages ?? [],
1422
+ fetch: fetchFn ?? globalThis.fetch,
1423
+ sessionId: sessionId ?? ""
1424
+ };
1425
+ }
1426
+ async function executeToolCall(name, args, options) {
1427
+ const { tool } = options;
1428
+ const parsed = (tool.parameters ?? EMPTY_PARAMS).safeParse(args);
1429
+ if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${(parsed.error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ")}`);
1430
+ try {
1431
+ const ctx = buildToolContext(options);
1432
+ await yieldTick();
1433
+ const result = await pTimeout(Promise.resolve(tool.execute(parsed.data, ctx)), {
1434
+ milliseconds: TOOL_EXECUTION_TIMEOUT_MS,
1435
+ message: `Tool "${name}" timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
1436
+ });
1437
+ await yieldTick();
1438
+ if (result == null) return "null";
1439
+ return typeof result === "string" ? result : JSON.stringify(result);
1440
+ } catch (err) {
1441
+ const log = options.logger;
1442
+ if (log) log.warn("Tool execution failed", {
1443
+ tool: name,
1444
+ error: errorDetail(err)
1445
+ });
1446
+ else console.warn(`[tool-executor] Tool execution failed: ${name}`, err);
1447
+ return toolError(errorMessage(err));
1448
+ }
1449
+ }
1450
+ /** Create an in-memory KV store (default for self-hosted). */
1451
+ function createLocalKv() {
1452
+ return createUnstorageKv({ storage: createStorage() });
1453
+ }
1454
+ /**
1455
+ * Create an agent runtime — the execution engine for a voice agent.
1456
+ *
1457
+ * Merges built-in and custom tool definitions, builds tool schemas for the
1458
+ * S2S API, and wires up lifecycle hooks.
1459
+ *
1460
+ * @param opts - Runtime configuration. See {@link RuntimeOptions}.
1461
+ * @returns A {@link Runtime} with tool execution, hook invocation,
1462
+ * schemas, and session management.
1463
+ *
1464
+ * @public
1465
+ */
1466
+ function createRuntime(opts) {
1467
+ const { agent, env, kv = createLocalKv(), createWebSocket, logger = consoleLogger, s2sConfig = DEFAULT_S2S_CONFIG, sessionStartTimeoutMs, shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS } = opts;
1468
+ const agentConfig = toAgentConfig(agent);
1469
+ const sessions = /* @__PURE__ */ new Map();
1470
+ const readyConfig = buildReadyConfig(s2sConfig);
1471
+ let executeTool;
1472
+ let hooks;
1473
+ let toolSchemas;
1474
+ if (opts.executeTool && opts.hooks && opts.toolSchemas) {
1475
+ executeTool = opts.executeTool;
1476
+ hooks = opts.hooks;
1477
+ toolSchemas = opts.toolSchemas;
1478
+ } else {
1479
+ const allTools = {
1480
+ ...getBuiltinToolDefs(agent.builtinTools ?? []),
1481
+ ...agent.tools
1482
+ };
1483
+ const customSchemas = agentToolsToSchemas(agent.tools ?? {});
1484
+ const builtinSchemas = getBuiltinToolSchemas(agent.builtinTools ?? []);
1485
+ toolSchemas = [...customSchemas, ...builtinSchemas];
1486
+ const stateMap = /* @__PURE__ */ new Map();
1487
+ const getState = (sid) => {
1488
+ if (!stateMap.has(sid) && agent.state) stateMap.set(sid, agent.state());
1489
+ return stateMap.get(sid) ?? {};
1490
+ };
1491
+ const frozenEnv = Object.freeze({ ...env });
1492
+ function makeHookContext(sessionId) {
1493
+ return {
1494
+ env: frozenEnv,
1495
+ state: getState(sessionId),
1496
+ sessionId,
1497
+ get kv() {
1498
+ return kv;
1499
+ },
1500
+ fetch: globalThis.fetch
1501
+ };
1502
+ }
1503
+ executeTool = async (name, args, sessionId, messages) => {
1504
+ const tool = allTools[name];
1505
+ if (!tool) return toolError(`Unknown tool: ${name}`);
1506
+ return executeToolCall(name, args, {
1507
+ tool,
1508
+ env: frozenEnv,
1509
+ state: getState(sessionId ?? ""),
1510
+ sessionId: sessionId ?? "",
1511
+ kv,
1512
+ messages,
1513
+ logger,
1514
+ fetch: globalThis.fetch
1515
+ });
1516
+ };
1517
+ hooks = createAgentHooks({
1518
+ agent,
1519
+ makeCtx: makeHookContext
1520
+ });
1521
+ hooks.hook("disconnect", async (sessionId) => {
1522
+ stateMap.delete(sessionId);
1523
+ });
1524
+ }
1525
+ function createSession(sessionOpts) {
1526
+ const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
1527
+ return createS2sSession({
1528
+ id: sessionOpts.id,
1529
+ agent: sessionOpts.agent,
1530
+ client: sessionOpts.client,
1531
+ agentConfig,
1532
+ toolSchemas,
1533
+ apiKey,
1534
+ s2sConfig,
1535
+ executeTool,
1536
+ ...createWebSocket ? { createWebSocket } : {},
1537
+ hooks,
1538
+ skipGreeting: sessionOpts.skipGreeting ?? false,
1539
+ logger,
1540
+ ...sessionOpts.resumeFrom ? { resumeFrom: sessionOpts.resumeFrom } : {}
1541
+ });
1542
+ }
1543
+ function startSession(ws, startOpts) {
1544
+ const resumeFrom = startOpts?.resumeFrom;
1545
+ wireSessionSocket(ws, {
1546
+ sessions,
1547
+ createSession: (sid, client) => createSession({
1548
+ id: sid,
1549
+ agent: agent.name,
1550
+ client,
1551
+ skipGreeting: startOpts?.skipGreeting ?? false,
1552
+ ...resumeFrom ? { resumeFrom } : {}
1553
+ }),
1554
+ readyConfig,
1555
+ logger,
1556
+ ...startOpts?.logContext ? { logContext: startOpts.logContext } : {},
1557
+ ...startOpts?.onOpen ? { onOpen: startOpts.onOpen } : {},
1558
+ ...startOpts?.onClose ? { onClose: startOpts.onClose } : {},
1559
+ ...sessionStartTimeoutMs !== void 0 ? { sessionStartTimeoutMs } : {},
1560
+ ...resumeFrom ? { resumeFrom } : {}
1561
+ });
1562
+ }
1563
+ async function shutdown() {
1564
+ if (sessions.size === 0) return;
1565
+ let timer;
1566
+ const timeout = new Promise((resolve) => {
1567
+ timer = setTimeout(resolve, shutdownTimeoutMs, "timeout");
1568
+ });
1569
+ const graceful = Promise.allSettled([...sessions.values()].map((s) => s.stop())).then((results) => {
1570
+ for (const r of results) if (r.status === "rejected") logger.warn(`Session stop failed during shutdown: ${r.reason}`);
1571
+ return "done";
1572
+ });
1573
+ const outcome = await Promise.race([graceful, timeout]);
1574
+ if (timer) clearTimeout(timer);
1575
+ if (outcome === "timeout") logger.warn(`Shutdown timeout (${shutdownTimeoutMs}ms) exceeded — force-closing ${sessions.size} remaining session(s)`);
1576
+ sessions.clear();
1577
+ }
1578
+ return {
1579
+ executeTool,
1580
+ hooks,
1581
+ toolSchemas,
1582
+ createSession,
1583
+ startSession,
1584
+ shutdown,
1585
+ readyConfig
1586
+ };
1587
+ }
1588
+ //#endregion
1589
+ export { consoleLogger as _, _internals as a, buildSystemPrompt as c, AgentConfigSchema as d, EMPTY_PARAMS as f, DEFAULT_S2S_CONFIG as g, toAgentConfig as h, createUnstorageKv as i, connectS2s as l, agentToolsToSchemas as m, executeToolCall as n, buildCtx as o, ToolSchemaSchema as p, wireSessionSocket as r, createS2sSession as s, createRuntime as t, defaultCreateS2sWebSocket as u, jsonLogger as v };