@alexkroman1/aai 0.10.3 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_internal-types.d.ts +8 -1
- package/dist/_run-code.d.ts +16 -12
- package/dist/_runtime-conformance.d.ts +55 -0
- package/dist/_test-utils.d.ts +73 -0
- package/dist/_utils.d.ts +0 -19
- package/dist/_utils.js +28 -2
- package/dist/builtin-tools.d.ts +1 -5
- package/dist/constants-CwotjpJR.js +45 -0
- package/dist/constants.d.ts +42 -0
- package/dist/direct-executor-DAGCZOAN.js +1530 -0
- package/dist/direct-executor.d.ts +90 -31
- package/dist/hooks.d.ts +44 -0
- package/dist/hooks.js +58 -0
- package/dist/index.d.ts +1 -2
- package/dist/index.js +2 -2
- package/dist/internal.d.ts +19 -0
- package/dist/internal.js +164 -0
- package/dist/kv.d.ts +1 -1
- package/dist/kv.js +32 -1
- package/dist/matchers.js +1 -1
- package/dist/protocol.d.ts +3 -29
- package/dist/protocol.js +140 -2
- package/dist/server.d.ts +27 -40
- package/dist/server.js +117 -145
- package/dist/session.d.ts +65 -44
- package/dist/{testing-BbitshLb.js → testing-Dmx-dudh.js} +39 -43
- package/dist/testing.d.ts +9 -14
- package/dist/testing.js +2 -2
- package/dist/types.d.ts +24 -226
- package/dist/types.js +176 -2
- package/dist/types.test-d.d.ts +7 -0
- package/dist/vite-plugin.d.ts +15 -0
- package/dist/vite-plugin.js +82 -0
- package/dist/ws-handler.d.ts +1 -2
- package/package.json +34 -95
- package/dist/_embeddings.d.ts +0 -31
- package/dist/_internal-types-IfPcaJd5.js +0 -61
- package/dist/_internal-types.js +0 -2
- package/dist/_session-ctx.d.ts +0 -73
- package/dist/_session-otel.d.ts +0 -43
- package/dist/_session-persist.d.ts +0 -30
- package/dist/_ssrf-DCp_27V4.js +0 -123
- package/dist/_ssrf.d.ts +0 -30
- package/dist/_ssrf.js +0 -2
- package/dist/_utils-DgzpOMSV.js +0 -61
- package/dist/direct-executor-B-5mq3cu.js +0 -570
- package/dist/kv-iXtikQmR.js +0 -32
- package/dist/middleware-core-BwyBIPed.js +0 -107
- package/dist/middleware-core.d.ts +0 -47
- package/dist/middleware-core.js +0 -2
- package/dist/middleware.d.ts +0 -37
- package/dist/protocol-B-H2Q4ox.js +0 -162
- package/dist/runtime-CxcwaK68.js +0 -58
- package/dist/runtime.js +0 -2
- package/dist/s2s-M7JqtgFw.js +0 -272
- package/dist/s2s.js +0 -2
- package/dist/session-BYlwcrya.js +0 -683
- package/dist/session.js +0 -2
- package/dist/telemetry-CJlaDFNc.js +0 -95
- package/dist/telemetry.d.ts +0 -49
- package/dist/telemetry.js +0 -2
- package/dist/types-D8ZBxTL_.js +0 -192
- package/dist/unstorage-kv-CDgP-frt.js +0 -64
- package/dist/unstorage-kv.js +0 -2
- package/dist/unstorage-vector-Cj5llNhg.js +0 -172
- package/dist/unstorage-vector.d.ts +0 -47
- package/dist/unstorage-vector.js +0 -2
- package/dist/vector.d.ts +0 -86
- package/dist/vector.js +0 -49
- package/dist/worker-entry-2jaiqIj0.js +0 -70
- package/dist/worker-entry.d.ts +0 -47
- package/dist/worker-entry.js +0 -2
- package/dist/ws-handler-C0Q6eSay.js +0 -207
- package/dist/ws-handler.js +0 -2
|
@@ -0,0 +1,1530 @@
|
|
|
1
|
+
import { BuiltinToolSchema, DEFAULT_INSTRUCTIONS, ToolChoiceSchema, defineTool } from "./types.js";
|
|
2
|
+
import { a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_HTML_BYTES, f as MAX_PAGE_CHARS, g as TOOL_EXECUTION_TIMEOUT_MS, h as RUN_CODE_TIMEOUT_MS, l as HOOK_TIMEOUT_MS, m as MAX_VALUE_SIZE, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_TOOL_RESULT_CHARS, s as DEFAULT_TTS_SAMPLE_RATE } from "./constants-CwotjpJR.js";
|
|
3
|
+
import { errorDetail, errorMessage, toolError } from "./_utils.js";
|
|
4
|
+
import { callResolveTurnConfig, createAgentHooks } from "./hooks.js";
|
|
5
|
+
import { ClientMessageSchema, buildReadyConfig } from "./protocol.js";
|
|
6
|
+
import { matchGlob, sortAndPaginate } from "./kv.js";
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import WsWebSocket from "ws";
|
|
9
|
+
import pTimeout from "p-timeout";
|
|
10
|
+
import { createStorage, prefixStorage } from "unstorage";
|
|
11
|
+
import vm from "node:vm";
|
|
12
|
+
import { createNanoEvents } from "nanoevents";
|
|
13
|
+
//#region runtime.ts
|
|
14
|
+
/**
|
|
15
|
+
* Runtime dependencies injected into the session pipeline.
|
|
16
|
+
*
|
|
17
|
+
* Defines the {@link Logger} interface, a default {@link consoleLogger},
|
|
18
|
+
* and the {@link S2SConfig} for Speech-to-Speech endpoint configuration.
|
|
19
|
+
*/
|
|
20
|
+
/** Default console-backed logger. */
|
|
21
|
+
const consoleLogger = {
|
|
22
|
+
info: (msg, ctx) => ctx ? console.log(msg, ctx) : console.log(msg),
|
|
23
|
+
warn: (msg, ctx) => ctx ? console.warn(msg, ctx) : console.warn(msg),
|
|
24
|
+
error: (msg, ctx) => ctx ? console.error(msg, ctx) : console.error(msg),
|
|
25
|
+
debug: (msg, ctx) => ctx ? console.debug(msg, ctx) : console.debug(msg)
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Structured JSON logger for production diagnostics. Each log entry is a
|
|
29
|
+
* single-line JSON object with `timestamp`, `level`, `msg`, and any
|
|
30
|
+
* caller-provided context fields.
|
|
31
|
+
*/
|
|
32
|
+
function jsonLog(level) {
|
|
33
|
+
return (msg, ctx) => {
|
|
34
|
+
const entry = {
|
|
35
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
36
|
+
level,
|
|
37
|
+
msg
|
|
38
|
+
};
|
|
39
|
+
if (ctx) Object.assign(entry, ctx);
|
|
40
|
+
(level === "error" || level === "warn" ? process.stderr : process.stdout).write(`${JSON.stringify(entry)}\n`);
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
const jsonLogger = {
|
|
44
|
+
info: jsonLog("info"),
|
|
45
|
+
warn: jsonLog("warn"),
|
|
46
|
+
error: jsonLog("error"),
|
|
47
|
+
debug: jsonLog("debug")
|
|
48
|
+
};
|
|
49
|
+
/** Default S2S endpoint configuration. */
|
|
50
|
+
const DEFAULT_S2S_CONFIG = {
|
|
51
|
+
wssUrl: "wss://speech-to-speech.us.assemblyai.com/v1/realtime",
|
|
52
|
+
inputSampleRate: DEFAULT_STT_SAMPLE_RATE,
|
|
53
|
+
outputSampleRate: DEFAULT_TTS_SAMPLE_RATE
|
|
54
|
+
};
|
|
55
|
+
//#endregion
|
|
56
|
+
//#region _internal-types.ts
|
|
57
|
+
/**
|
|
58
|
+
* Zod schema for serializable agent configuration sent over the wire.
|
|
59
|
+
*
|
|
60
|
+
* This is the JSON-safe subset of the agent definition that can be
|
|
61
|
+
* transmitted between the worker and the host process via structured clone.
|
|
62
|
+
*/
|
|
63
|
+
const AgentConfigSchema = z.object({
|
|
64
|
+
name: z.string().min(1),
|
|
65
|
+
instructions: z.string(),
|
|
66
|
+
greeting: z.string(),
|
|
67
|
+
sttPrompt: z.string().optional(),
|
|
68
|
+
maxSteps: z.number().int().positive().optional(),
|
|
69
|
+
toolChoice: ToolChoiceSchema.optional(),
|
|
70
|
+
builtinTools: z.array(BuiltinToolSchema).readonly().optional(),
|
|
71
|
+
idleTimeoutMs: z.number().nonnegative().optional()
|
|
72
|
+
});
|
|
73
|
+
/** Extract the serializable {@link AgentConfig} subset from a source object. */
|
|
74
|
+
function toAgentConfig(src) {
|
|
75
|
+
const config = {
|
|
76
|
+
name: src.name,
|
|
77
|
+
instructions: src.instructions,
|
|
78
|
+
greeting: src.greeting
|
|
79
|
+
};
|
|
80
|
+
if (src.sttPrompt !== void 0) config.sttPrompt = src.sttPrompt;
|
|
81
|
+
if (typeof src.maxSteps !== "function" && src.maxSteps !== void 0) config.maxSteps = src.maxSteps;
|
|
82
|
+
if (src.toolChoice !== void 0) config.toolChoice = src.toolChoice;
|
|
83
|
+
if (src.builtinTools) config.builtinTools = [...src.builtinTools];
|
|
84
|
+
if (src.idleTimeoutMs !== void 0) config.idleTimeoutMs = src.idleTimeoutMs;
|
|
85
|
+
return config;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Zod schema for serialized tool definitions sent over the wire.
|
|
89
|
+
*
|
|
90
|
+
* `parameters` must be a valid JSON Schema object (with `type`, `properties`,
|
|
91
|
+
* etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
|
|
92
|
+
*/
|
|
93
|
+
const ToolSchemaSchema = z.object({
|
|
94
|
+
name: z.string().min(1),
|
|
95
|
+
description: z.string().min(1),
|
|
96
|
+
parameters: z.record(z.string(), z.unknown())
|
|
97
|
+
});
|
|
98
|
+
/** Empty Zod object schema used as default when tools have no parameters. */
|
|
99
|
+
const EMPTY_PARAMS = z.object({});
|
|
100
|
+
/**
|
|
101
|
+
* Convert agent tool definitions to JSON Schema format for wire transport.
|
|
102
|
+
*
|
|
103
|
+
* Transforms the Zod-based `parameters` of each tool into a plain JSON Schema
|
|
104
|
+
* object suitable for structured clone / JSON serialization.
|
|
105
|
+
*/
|
|
106
|
+
function agentToolsToSchemas(tools) {
|
|
107
|
+
return Object.entries(tools).map(([name, def]) => ({
|
|
108
|
+
name,
|
|
109
|
+
description: def.description,
|
|
110
|
+
parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
|
|
111
|
+
}));
|
|
112
|
+
}
|
|
113
|
+
//#endregion
|
|
114
|
+
//#region _run-code.ts
|
|
115
|
+
/**
|
|
116
|
+
* run_code built-in tool — executes user JavaScript in a fresh `node:vm`
|
|
117
|
+
* context with no network, filesystem, or process access.
|
|
118
|
+
*/
|
|
119
|
+
const runCodeParams = z.object({ code: z.string().describe("JavaScript code to execute. Use console.log() for output.") });
|
|
120
|
+
/**
|
|
121
|
+
* Execute JavaScript code inside a fresh `node:vm` context.
|
|
122
|
+
*
|
|
123
|
+
* Each invocation creates a disposable VM context with:
|
|
124
|
+
* - No filesystem access (`node:fs` and other built-ins unavailable)
|
|
125
|
+
* - No network access (`fetch`, `http` unavailable)
|
|
126
|
+
* - No child process spawning
|
|
127
|
+
* - No environment variable access (`process` unavailable)
|
|
128
|
+
* - Execution timeout (default 5 s)
|
|
129
|
+
*
|
|
130
|
+
* The context is discarded after execution, so no state leaks between
|
|
131
|
+
* invocations or across sessions.
|
|
132
|
+
*/
|
|
133
|
+
function createRunCode() {
|
|
134
|
+
return {
|
|
135
|
+
description: "Execute JavaScript code in a sandbox and return the output. Use this for calculations, data transformations, string manipulation, or any task that benefits from running code. Output is captured from console.log(). No network or filesystem access.",
|
|
136
|
+
parameters: runCodeParams,
|
|
137
|
+
async execute(args) {
|
|
138
|
+
return executeInIsolate(args.code);
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Execute user code in a fresh `node:vm` context.
|
|
144
|
+
*
|
|
145
|
+
* @remarks
|
|
146
|
+
* The VM context only exposes standard ECMAScript globals and a console
|
|
147
|
+
* object that captures output. Node.js APIs (`process`, `require`,
|
|
148
|
+
* `import()`) are not available inside the sandbox.
|
|
149
|
+
*/
|
|
150
|
+
async function executeInIsolate(code) {
|
|
151
|
+
const output = [];
|
|
152
|
+
const capture = (...args) => output.push(args.map(String).join(" "));
|
|
153
|
+
const context = vm.createContext({
|
|
154
|
+
console: {
|
|
155
|
+
log: capture,
|
|
156
|
+
info: capture,
|
|
157
|
+
warn: capture,
|
|
158
|
+
error: capture,
|
|
159
|
+
debug: capture
|
|
160
|
+
},
|
|
161
|
+
setTimeout,
|
|
162
|
+
clearTimeout,
|
|
163
|
+
setInterval,
|
|
164
|
+
clearInterval,
|
|
165
|
+
URL,
|
|
166
|
+
URLSearchParams,
|
|
167
|
+
TextEncoder,
|
|
168
|
+
TextDecoder,
|
|
169
|
+
atob,
|
|
170
|
+
btoa,
|
|
171
|
+
structuredClone,
|
|
172
|
+
queueMicrotask
|
|
173
|
+
});
|
|
174
|
+
try {
|
|
175
|
+
const wrapped = `(async () => {\n${code}\n})()`;
|
|
176
|
+
const promise = new vm.Script(wrapped, { filename: "run_code.js" }).runInContext(context, { timeout: RUN_CODE_TIMEOUT_MS });
|
|
177
|
+
await Promise.race([promise, new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("Code execution timed out")), RUN_CODE_TIMEOUT_MS))]);
|
|
178
|
+
return output.join("\n").trim() || "Code ran successfully (no output)";
|
|
179
|
+
} catch (err) {
|
|
180
|
+
return { error: errorMessage(err) };
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
//#endregion
|
|
184
|
+
//#region memory-tools.ts
|
|
185
|
+
/**
|
|
186
|
+
* KV-backed memory tools for agent persistent state.
|
|
187
|
+
*/
|
|
188
|
+
/**
|
|
189
|
+
* Returns a standard set of KV-backed memory tools: `save_memory`,
|
|
190
|
+
* `recall_memory`, `list_memories`, and `forget_memory`.
|
|
191
|
+
*
|
|
192
|
+
* Spread the result into your agent's `tools` record.
|
|
193
|
+
*
|
|
194
|
+
* @example
|
|
195
|
+
* ```ts
|
|
196
|
+
* import { defineAgent, memoryTools } from "aai";
|
|
197
|
+
*
|
|
198
|
+
* export default defineAgent({
|
|
199
|
+
* name: "My Agent",
|
|
200
|
+
* tools: { ...memoryTools() },
|
|
201
|
+
* });
|
|
202
|
+
* ```
|
|
203
|
+
*
|
|
204
|
+
* @returns A record with four tool definitions: `save_memory`, `recall_memory`,
|
|
205
|
+
* `list_memories`, and `forget_memory`.
|
|
206
|
+
* @public
|
|
207
|
+
*/
|
|
208
|
+
function memoryTools() {
|
|
209
|
+
return {
|
|
210
|
+
save_memory: defineTool({
|
|
211
|
+
description: "Save a piece of information to persistent memory. Use a descriptive key like 'user:name' or 'project:status'.",
|
|
212
|
+
parameters: z.object({
|
|
213
|
+
key: z.string().describe("A descriptive key for this memory (e.g. 'user:name', 'preference:color')"),
|
|
214
|
+
value: z.string().describe("The information to remember")
|
|
215
|
+
}),
|
|
216
|
+
execute: async ({ key, value }, ctx) => {
|
|
217
|
+
await ctx.kv.set(key, value);
|
|
218
|
+
return { saved: key };
|
|
219
|
+
}
|
|
220
|
+
}),
|
|
221
|
+
recall_memory: defineTool({
|
|
222
|
+
description: "Retrieve a previously saved memory by its key.",
|
|
223
|
+
parameters: z.object({ key: z.string().describe("The key to look up") }),
|
|
224
|
+
execute: async ({ key }, ctx) => {
|
|
225
|
+
const value = await ctx.kv.get(key);
|
|
226
|
+
if (value === null) return {
|
|
227
|
+
found: false,
|
|
228
|
+
key
|
|
229
|
+
};
|
|
230
|
+
return {
|
|
231
|
+
found: true,
|
|
232
|
+
key,
|
|
233
|
+
value
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
}),
|
|
237
|
+
list_memories: defineTool({
|
|
238
|
+
description: "List all saved memory keys, optionally filtered by a prefix (e.g. 'user:').",
|
|
239
|
+
parameters: z.object({ prefix: z.string().describe("Prefix to filter keys (e.g. 'user:'). Use empty string for all.").optional() }),
|
|
240
|
+
execute: async ({ prefix }, ctx) => {
|
|
241
|
+
const entries = await ctx.kv.list(prefix ?? "");
|
|
242
|
+
return {
|
|
243
|
+
count: entries.length,
|
|
244
|
+
keys: entries.map((e) => e.key)
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
}),
|
|
248
|
+
forget_memory: defineTool({
|
|
249
|
+
description: "Delete a previously saved memory by its key.",
|
|
250
|
+
parameters: z.object({ key: z.string().describe("The key to delete") }),
|
|
251
|
+
execute: async ({ key }, ctx) => {
|
|
252
|
+
await ctx.kv.delete(key);
|
|
253
|
+
return { deleted: key };
|
|
254
|
+
}
|
|
255
|
+
})
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
//#endregion
|
|
259
|
+
//#region builtin-tools.ts
|
|
260
|
+
/**
|
|
261
|
+
* Built-in tool definitions for the AAI agent SDK.
|
|
262
|
+
*
|
|
263
|
+
* In self-hosted mode, these run in-process alongside custom tools.
|
|
264
|
+
* In platform mode, they run on the host process outside the sandbox.
|
|
265
|
+
* Network requests go through the host's fetch proxy (with SSRF protection).
|
|
266
|
+
*/
|
|
267
|
+
const fetchSignal = () => AbortSignal.timeout(FETCH_TIMEOUT_MS);
|
|
268
|
+
/** Strip HTML tags and decode common entities. */
|
|
269
|
+
function htmlToText(html) {
|
|
270
|
+
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'").replace(/ /g, " ").replace(/\s{2,}/g, " ").trim();
|
|
271
|
+
}
|
|
272
|
+
const webSearchParams = z.object({
|
|
273
|
+
query: z.string().describe("The search query"),
|
|
274
|
+
max_results: z.number().describe("Maximum number of results to return (default 5)").optional()
|
|
275
|
+
});
|
|
276
|
+
const BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search";
|
|
277
|
+
const BraveSearchResponseSchema = z.object({ web: z.object({ results: z.array(z.object({
|
|
278
|
+
title: z.string(),
|
|
279
|
+
url: z.string(),
|
|
280
|
+
description: z.string()
|
|
281
|
+
})) }).optional() });
|
|
282
|
+
function createWebSearch(fetchFn = globalThis.fetch) {
|
|
283
|
+
return {
|
|
284
|
+
description: "Search the web for current information, facts, news, or answers to questions. Returns a list of results with title, URL, and description. Use this when the user asks about something you don't know, need up-to-date information, or want to verify facts.",
|
|
285
|
+
parameters: webSearchParams,
|
|
286
|
+
async execute(args, ctx) {
|
|
287
|
+
const { query, max_results: maxResults = 5 } = args;
|
|
288
|
+
const apiKey = ctx.env.BRAVE_API_KEY ?? "";
|
|
289
|
+
if (!apiKey) return { error: "BRAVE_API_KEY is not set — web search unavailable" };
|
|
290
|
+
const resp = await fetchFn(`${BRAVE_SEARCH_URL}?${new URLSearchParams({
|
|
291
|
+
q: query,
|
|
292
|
+
count: String(maxResults),
|
|
293
|
+
text_decorations: "false"
|
|
294
|
+
})}`, {
|
|
295
|
+
headers: { "X-Subscription-Token": apiKey },
|
|
296
|
+
signal: fetchSignal()
|
|
297
|
+
});
|
|
298
|
+
if (!resp.ok) return { error: `Search request failed: ${resp.status} ${resp.statusText}` };
|
|
299
|
+
const raw = await resp.json();
|
|
300
|
+
const data = BraveSearchResponseSchema.safeParse(raw);
|
|
301
|
+
if (!data.success) return { error: "Unexpected search response format" };
|
|
302
|
+
return (data.data.web?.results ?? []).slice(0, maxResults).map((r) => ({
|
|
303
|
+
title: r.title,
|
|
304
|
+
url: r.url,
|
|
305
|
+
description: r.description
|
|
306
|
+
}));
|
|
307
|
+
}
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
const visitWebpageParams = z.object({ url: z.string().describe("The full URL to fetch (e.g., 'https://example.com/page')") });
|
|
311
|
+
function createVisitWebpage(fetchFn = globalThis.fetch) {
|
|
312
|
+
return {
|
|
313
|
+
description: "Fetch a webpage and return its content as clean text. Use this to read the full content of a URL found via web_search, or any link the user shares. Good for reading articles, documentation, blog posts, or product pages.",
|
|
314
|
+
parameters: visitWebpageParams,
|
|
315
|
+
async execute(args, _ctx) {
|
|
316
|
+
const { url } = args;
|
|
317
|
+
const resp = await fetchFn(url, {
|
|
318
|
+
headers: {
|
|
319
|
+
"User-Agent": "Mozilla/5.0 (compatible; VoiceAgent/1.0; +https://github.com/AssemblyAI/aai)",
|
|
320
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
|
321
|
+
},
|
|
322
|
+
signal: fetchSignal()
|
|
323
|
+
});
|
|
324
|
+
if (!resp.ok) return {
|
|
325
|
+
error: `Failed to fetch: ${resp.status} ${resp.statusText}`,
|
|
326
|
+
url
|
|
327
|
+
};
|
|
328
|
+
const htmlContent = await resp.text();
|
|
329
|
+
const text = htmlToText(htmlContent.length > 2e5 ? htmlContent.slice(0, MAX_HTML_BYTES) : htmlContent);
|
|
330
|
+
const truncated = text.length > MAX_PAGE_CHARS;
|
|
331
|
+
return {
|
|
332
|
+
url,
|
|
333
|
+
content: truncated ? text.slice(0, MAX_PAGE_CHARS) : text,
|
|
334
|
+
...truncated ? {
|
|
335
|
+
truncated: true,
|
|
336
|
+
totalChars: text.length
|
|
337
|
+
} : {}
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
const fetchJsonParams = z.object({
|
|
343
|
+
url: z.string().describe("The URL to fetch JSON from"),
|
|
344
|
+
headers: z.record(z.string(), z.string()).describe("Optional HTTP headers to include in the request (only safe headers like Accept, Content-Type are allowed)").optional()
|
|
345
|
+
});
|
|
346
|
+
/** Headers the LLM must never control — could exfiltrate credentials or manipulate routing. */
|
|
347
|
+
const BLOCKED_FETCH_HEADERS = new Set([
|
|
348
|
+
"authorization",
|
|
349
|
+
"cookie",
|
|
350
|
+
"set-cookie",
|
|
351
|
+
"host",
|
|
352
|
+
"proxy-authorization",
|
|
353
|
+
"x-forwarded-for",
|
|
354
|
+
"x-forwarded-host",
|
|
355
|
+
"x-forwarded-proto",
|
|
356
|
+
"x-real-ip",
|
|
357
|
+
"cf-connecting-ip",
|
|
358
|
+
"fly-client-ip"
|
|
359
|
+
]);
|
|
360
|
+
function sanitizeHeaders(raw) {
|
|
361
|
+
if (!raw) return;
|
|
362
|
+
const safe = {};
|
|
363
|
+
for (const [key, value] of Object.entries(raw)) if (!BLOCKED_FETCH_HEADERS.has(key.toLowerCase())) safe[key] = value;
|
|
364
|
+
return Object.keys(safe).length > 0 ? safe : void 0;
|
|
365
|
+
}
|
|
366
|
+
function createFetchJson(fetchFn = globalThis.fetch) {
|
|
367
|
+
return {
|
|
368
|
+
description: "Call a REST API endpoint via HTTP GET and return the JSON response. Use this to fetch structured data from APIs — for example, weather data, stock prices, exchange rates, or any public JSON API. Supports custom headers for authenticated APIs.",
|
|
369
|
+
parameters: fetchJsonParams,
|
|
370
|
+
async execute(args, _ctx) {
|
|
371
|
+
const { url, headers } = args;
|
|
372
|
+
const safeHeaders = sanitizeHeaders(headers);
|
|
373
|
+
const resp = await fetchFn(url, {
|
|
374
|
+
...safeHeaders && { headers: safeHeaders },
|
|
375
|
+
signal: fetchSignal()
|
|
376
|
+
});
|
|
377
|
+
if (!resp.ok) return {
|
|
378
|
+
error: `HTTP ${resp.status} ${resp.statusText}`,
|
|
379
|
+
url
|
|
380
|
+
};
|
|
381
|
+
try {
|
|
382
|
+
return await resp.json();
|
|
383
|
+
} catch {
|
|
384
|
+
return {
|
|
385
|
+
error: "Response was not valid JSON",
|
|
386
|
+
url
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
/** Resolve a builtin name to an array of [toolName, ToolDef] pairs. */
|
|
393
|
+
function resolveBuiltin(name, opts) {
|
|
394
|
+
switch (name) {
|
|
395
|
+
case "web_search": return [["web_search", createWebSearch(opts?.fetch)]];
|
|
396
|
+
case "visit_webpage": return [["visit_webpage", createVisitWebpage(opts?.fetch)]];
|
|
397
|
+
case "fetch_json": return [["fetch_json", createFetchJson(opts?.fetch)]];
|
|
398
|
+
case "run_code": return [["run_code", createRunCode()]];
|
|
399
|
+
case "memory": return Object.entries(memoryTools());
|
|
400
|
+
default: return [];
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
/**
|
|
404
|
+
* Create built-in tool definitions for the given tool names.
|
|
405
|
+
* For runtime use.
|
|
406
|
+
*/
|
|
407
|
+
function getBuiltinToolDefs(names, opts) {
|
|
408
|
+
const defs = {};
|
|
409
|
+
for (const name of names) for (const [k, v] of resolveBuiltin(name, opts)) defs[k] = v;
|
|
410
|
+
return defs;
|
|
411
|
+
}
|
|
412
|
+
/** Returns JSON tool schemas for the specified builtin tools. */
|
|
413
|
+
function getBuiltinToolSchemas(names) {
|
|
414
|
+
return names.flatMap((name) => resolveBuiltin(name).map(([toolName, def]) => ({
|
|
415
|
+
name: toolName,
|
|
416
|
+
description: def.description,
|
|
417
|
+
parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
|
|
418
|
+
})));
|
|
419
|
+
}
|
|
420
|
+
//#endregion
|
|
421
|
+
//#region s2s.ts
|
|
422
|
+
const uint8ToBase64 = (bytes) => Buffer.from(bytes).toString("base64");
|
|
423
|
+
const base64ToUint8 = (base64) => new Uint8Array(Buffer.from(base64, "base64"));
|
|
424
|
+
const WS_OPEN = 1;
|
|
425
|
+
const defaultCreateS2sWebSocket = (url, opts) => new WsWebSocket(url, { headers: opts.headers });
|
|
426
|
+
function hasStringFields(obj, ...keys) {
|
|
427
|
+
for (const k of keys) if (typeof obj[k] !== "string") return false;
|
|
428
|
+
return true;
|
|
429
|
+
}
|
|
430
|
+
function parseAgentTranscript(obj) {
|
|
431
|
+
if (typeof obj.text !== "string") return;
|
|
432
|
+
return {
|
|
433
|
+
type: "transcript.agent",
|
|
434
|
+
text: obj.text,
|
|
435
|
+
reply_id: typeof obj.reply_id === "string" ? obj.reply_id : "",
|
|
436
|
+
item_id: typeof obj.item_id === "string" ? obj.item_id : "",
|
|
437
|
+
interrupted: obj.interrupted === true
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
function parseToolCall(obj) {
|
|
441
|
+
if (typeof obj.call_id !== "string" || typeof obj.name !== "string") return;
|
|
442
|
+
const args = obj.args != null && typeof obj.args === "object" && !Array.isArray(obj.args) ? obj.args : {};
|
|
443
|
+
return {
|
|
444
|
+
type: "tool.call",
|
|
445
|
+
call_id: obj.call_id,
|
|
446
|
+
name: obj.name,
|
|
447
|
+
args
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
function passthrough(obj) {
|
|
451
|
+
return obj;
|
|
452
|
+
}
|
|
453
|
+
function requireFields(...keys) {
|
|
454
|
+
return (obj) => hasStringFields(obj, ...keys) ? obj : void 0;
|
|
455
|
+
}
|
|
456
|
+
const MESSAGE_VALIDATORS = new Map([
|
|
457
|
+
["session.ready", requireFields("session_id")],
|
|
458
|
+
["session.updated", passthrough],
|
|
459
|
+
["input.speech.started", passthrough],
|
|
460
|
+
["input.speech.stopped", passthrough],
|
|
461
|
+
["reply.content_part.started", passthrough],
|
|
462
|
+
["reply.content_part.done", passthrough],
|
|
463
|
+
["transcript.user.delta", requireFields("text")],
|
|
464
|
+
["transcript.user", requireFields("item_id", "text")],
|
|
465
|
+
["reply.started", requireFields("reply_id")],
|
|
466
|
+
["transcript.agent.delta", requireFields("delta")],
|
|
467
|
+
["transcript.agent", parseAgentTranscript],
|
|
468
|
+
["tool.call", parseToolCall],
|
|
469
|
+
["reply.done", (obj) => ({
|
|
470
|
+
type: "reply.done",
|
|
471
|
+
...typeof obj.status === "string" ? { status: obj.status } : {}
|
|
472
|
+
})],
|
|
473
|
+
["session.error", requireFields("code", "message")],
|
|
474
|
+
["error", requireFields("message")]
|
|
475
|
+
]);
|
|
476
|
+
function parseS2sMessage(obj) {
|
|
477
|
+
const type = obj.type;
|
|
478
|
+
if (typeof type !== "string") return;
|
|
479
|
+
return MESSAGE_VALIDATORS.get(type)?.(obj);
|
|
480
|
+
}
|
|
481
|
+
function dispatchS2sMessage(emitter, msg) {
|
|
482
|
+
switch (msg.type) {
|
|
483
|
+
case "session.ready":
|
|
484
|
+
emitter.emit("ready", { sessionId: msg.session_id });
|
|
485
|
+
break;
|
|
486
|
+
case "session.updated":
|
|
487
|
+
emitter.emit("sessionUpdated", msg);
|
|
488
|
+
break;
|
|
489
|
+
case "input.speech.started":
|
|
490
|
+
emitter.emit("speechStarted");
|
|
491
|
+
break;
|
|
492
|
+
case "input.speech.stopped":
|
|
493
|
+
emitter.emit("speechStopped");
|
|
494
|
+
break;
|
|
495
|
+
case "transcript.user.delta":
|
|
496
|
+
emitter.emit("userTranscriptDelta", { text: msg.text });
|
|
497
|
+
break;
|
|
498
|
+
case "transcript.user":
|
|
499
|
+
emitter.emit("userTranscript", {
|
|
500
|
+
itemId: msg.item_id,
|
|
501
|
+
text: msg.text
|
|
502
|
+
});
|
|
503
|
+
break;
|
|
504
|
+
case "reply.started":
|
|
505
|
+
emitter.emit("replyStarted", { replyId: msg.reply_id });
|
|
506
|
+
break;
|
|
507
|
+
case "transcript.agent.delta":
|
|
508
|
+
emitter.emit("agentTranscriptDelta", { text: msg.delta });
|
|
509
|
+
break;
|
|
510
|
+
case "transcript.agent":
|
|
511
|
+
emitter.emit("agentTranscript", {
|
|
512
|
+
text: msg.text,
|
|
513
|
+
replyId: msg.reply_id,
|
|
514
|
+
itemId: msg.item_id,
|
|
515
|
+
interrupted: msg.interrupted
|
|
516
|
+
});
|
|
517
|
+
break;
|
|
518
|
+
case "tool.call":
|
|
519
|
+
emitter.emit("toolCall", {
|
|
520
|
+
callId: msg.call_id,
|
|
521
|
+
name: msg.name,
|
|
522
|
+
args: msg.args
|
|
523
|
+
});
|
|
524
|
+
break;
|
|
525
|
+
case "reply.done":
|
|
526
|
+
emitter.emit("replyDone", msg.status ? { status: msg.status } : {});
|
|
527
|
+
break;
|
|
528
|
+
case "session.error":
|
|
529
|
+
if (msg.code === "session_not_found" || msg.code === "session_forbidden") emitter.emit("sessionExpired", {
|
|
530
|
+
code: msg.code,
|
|
531
|
+
message: msg.message
|
|
532
|
+
});
|
|
533
|
+
else emitter.emit("error", {
|
|
534
|
+
code: msg.code,
|
|
535
|
+
message: msg.message
|
|
536
|
+
});
|
|
537
|
+
break;
|
|
538
|
+
case "error":
|
|
539
|
+
emitter.emit("error", {
|
|
540
|
+
code: "connection",
|
|
541
|
+
message: msg.message
|
|
542
|
+
});
|
|
543
|
+
break;
|
|
544
|
+
case "reply.content_part.started":
|
|
545
|
+
case "reply.content_part.done": break;
|
|
546
|
+
default: break;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
function connectS2s(opts) {
|
|
550
|
+
const { apiKey, config, createWebSocket, logger: log = consoleLogger } = opts;
|
|
551
|
+
return new Promise((resolve, reject) => {
|
|
552
|
+
log.info("S2S connecting", { url: config.wssUrl });
|
|
553
|
+
const ws = createWebSocket(config.wssUrl, { headers: { Authorization: `Bearer ${apiKey}` } });
|
|
554
|
+
const emitter = createNanoEvents();
|
|
555
|
+
let opened = false;
|
|
556
|
+
function send(msg) {
|
|
557
|
+
if (ws.readyState !== WS_OPEN) {
|
|
558
|
+
log.debug("S2S send dropped: socket not open", { type: msg.type });
|
|
559
|
+
return;
|
|
560
|
+
}
|
|
561
|
+
const json = JSON.stringify(msg);
|
|
562
|
+
if (msg.type !== "input.audio") log.info(`S2S >> ${msg.type}`, msg.type === "session.update" ? { payload: json } : void 0);
|
|
563
|
+
ws.send(json);
|
|
564
|
+
}
|
|
565
|
+
const handle = {
|
|
566
|
+
on: emitter.on.bind(emitter),
|
|
567
|
+
sendAudio(audio) {
|
|
568
|
+
if (ws.readyState !== WS_OPEN) {
|
|
569
|
+
log.debug("S2S sendAudio dropped: socket not open");
|
|
570
|
+
return;
|
|
571
|
+
}
|
|
572
|
+
ws.send(`{"type":"input.audio","audio":"${uint8ToBase64(audio)}"}`);
|
|
573
|
+
},
|
|
574
|
+
sendToolResult(callId, result) {
|
|
575
|
+
const msg = {
|
|
576
|
+
type: "tool.result",
|
|
577
|
+
call_id: callId,
|
|
578
|
+
result
|
|
579
|
+
};
|
|
580
|
+
log.info("S2S >> tool.result", {
|
|
581
|
+
call_id: callId,
|
|
582
|
+
resultLength: result.length
|
|
583
|
+
});
|
|
584
|
+
send(msg);
|
|
585
|
+
},
|
|
586
|
+
updateSession(sessionConfig) {
|
|
587
|
+
const { systemPrompt, ...rest } = sessionConfig;
|
|
588
|
+
send({
|
|
589
|
+
type: "session.update",
|
|
590
|
+
session: {
|
|
591
|
+
system_prompt: systemPrompt,
|
|
592
|
+
...rest
|
|
593
|
+
}
|
|
594
|
+
});
|
|
595
|
+
},
|
|
596
|
+
resumeSession(sessionId) {
|
|
597
|
+
send({
|
|
598
|
+
type: "session.resume",
|
|
599
|
+
session_id: sessionId
|
|
600
|
+
});
|
|
601
|
+
},
|
|
602
|
+
close() {
|
|
603
|
+
log.info("S2S closing");
|
|
604
|
+
ws.close();
|
|
605
|
+
}
|
|
606
|
+
};
|
|
607
|
+
ws.addEventListener("open", () => {
|
|
608
|
+
opened = true;
|
|
609
|
+
log.info("S2S WebSocket open");
|
|
610
|
+
resolve(handle);
|
|
611
|
+
});
|
|
612
|
+
function tryParseJson(data) {
|
|
613
|
+
try {
|
|
614
|
+
return JSON.parse(String(data));
|
|
615
|
+
} catch {
|
|
616
|
+
log.warn("S2S << invalid JSON", { data: String(data).slice(0, 200) });
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
function handleAudioFastPath(obj) {
|
|
620
|
+
if (obj.type === "reply.audio" && typeof obj.data === "string") {
|
|
621
|
+
const audioBytes = base64ToUint8(obj.data);
|
|
622
|
+
emitter.emit("audio", { audio: audioBytes });
|
|
623
|
+
return true;
|
|
624
|
+
}
|
|
625
|
+
return false;
|
|
626
|
+
}
|
|
627
|
+
function logIncoming(obj) {
|
|
628
|
+
if (obj.type === "reply.audio" || obj.type === "input.audio") return;
|
|
629
|
+
log.info(`S2S << ${obj.type}`, obj.type === "transcript.agent.delta" ? { delta: obj.delta } : void 0);
|
|
630
|
+
}
|
|
631
|
+
function handleS2sMessage(ev) {
|
|
632
|
+
const raw = tryParseJson(ev.data);
|
|
633
|
+
if (raw === void 0) return;
|
|
634
|
+
if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
|
|
635
|
+
log.warn("S2S << non-object JSON message", { type: typeof raw });
|
|
636
|
+
return;
|
|
637
|
+
}
|
|
638
|
+
const obj = raw;
|
|
639
|
+
logIncoming(obj);
|
|
640
|
+
if (handleAudioFastPath(obj)) return;
|
|
641
|
+
const parsed = parseS2sMessage(obj);
|
|
642
|
+
if (!parsed) {
|
|
643
|
+
log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
|
|
644
|
+
return;
|
|
645
|
+
}
|
|
646
|
+
dispatchS2sMessage(emitter, parsed);
|
|
647
|
+
}
|
|
648
|
+
ws.addEventListener("message", handleS2sMessage);
|
|
649
|
+
ws.addEventListener("close", (ev) => {
|
|
650
|
+
log.info("S2S WebSocket closed", {
|
|
651
|
+
code: ev.code ?? 0,
|
|
652
|
+
reason: ev.reason ?? ""
|
|
653
|
+
});
|
|
654
|
+
if (!opened) reject(/* @__PURE__ */ new Error(`WebSocket closed before open (code: ${ev.code ?? 0})`));
|
|
655
|
+
emitter.emit("close");
|
|
656
|
+
});
|
|
657
|
+
ws.addEventListener("error", (ev) => {
|
|
658
|
+
const message = typeof ev.message === "string" ? ev.message : "WebSocket error";
|
|
659
|
+
const errObj = new Error(message);
|
|
660
|
+
log.error("S2S WebSocket error", { error: errObj.message });
|
|
661
|
+
if (!opened) reject(errObj);
|
|
662
|
+
else emitter.emit("error", {
|
|
663
|
+
code: "ws_error",
|
|
664
|
+
message: errObj.message
|
|
665
|
+
});
|
|
666
|
+
});
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
//#endregion
|
|
670
|
+
//#region system-prompt.ts
|
|
671
|
+
function getFormattedDate() {
|
|
672
|
+
return (/* @__PURE__ */ new Date()).toLocaleDateString("en-US", {
|
|
673
|
+
weekday: "long",
|
|
674
|
+
year: "numeric",
|
|
675
|
+
month: "long",
|
|
676
|
+
day: "numeric"
|
|
677
|
+
});
|
|
678
|
+
}
|
|
679
|
+
const VOICE_RULES = "\n\nCRITICAL OUTPUT RULES — you MUST follow these for EVERY response:\nYour response will be spoken aloud by a TTS system and displayed as plain text.\n- NEVER use markdown: no **, no *, no _, no #, no `, no [](), no ---\n- NEVER use bullet points (-, *, •) or numbered lists (1., 2.)\n- NEVER use code blocks or inline code\n- NEVER mention tools, search, APIs, or technical failures to the user. If a tool returns no results, just answer naturally without explaining why.\n- Write exactly as you would say it out loud to a friend\n- Use short conversational sentences. To list things, say \"First,\" \"Next,\" \"Finally,\"\n- Keep responses concise — 1 to 3 sentences max";
|
|
680
|
+
/**
|
|
681
|
+
* Build the system prompt sent to the LLM from the agent configuration.
|
|
682
|
+
*
|
|
683
|
+
* Assembles the default instructions, today's date, agent-specific instructions,
|
|
684
|
+
* and optional sections for tool usage preamble and voice output rules.
|
|
685
|
+
*
|
|
686
|
+
* @param config - The serializable agent configuration (name, instructions, etc.).
|
|
687
|
+
* @param opts.hasTools - When `true`, appends a preamble instructing the LLM to
|
|
688
|
+
* speak a brief phrase before each tool call to fill silence.
|
|
689
|
+
* @param opts.voice - When `true`, appends strict voice-specific output rules
|
|
690
|
+
* (no markdown, no bullet points, conversational tone, concise responses).
|
|
691
|
+
* @returns The assembled system prompt string.
|
|
692
|
+
*/
|
|
693
|
+
function buildSystemPrompt(config, opts) {
|
|
694
|
+
const { hasTools } = opts;
|
|
695
|
+
const agentInstructions = config.instructions && config.instructions !== DEFAULT_INSTRUCTIONS ? `\n\nAgent-Specific Instructions:\n${config.instructions}` : "";
|
|
696
|
+
const toolPreamble = hasTools ? "\n\nWhen you decide to use a tool, ALWAYS say a brief natural phrase BEFORE the tool call (e.g. \"Let me look that up\" or \"One moment while I check\"). This fills silence while the tool executes. Keep preambles to one short sentence." : "";
|
|
697
|
+
return DEFAULT_INSTRUCTIONS + `\n\nToday's date is ${getFormattedDate()}.` + agentInstructions + toolPreamble + (opts.voice ? VOICE_RULES : "");
|
|
698
|
+
}
|
|
699
|
+
//#endregion
|
|
700
|
+
//#region session.ts
|
|
701
|
+
function buildCtx(opts) {
|
|
702
|
+
const { id, agentConfig, hooks, log } = opts;
|
|
703
|
+
const maxHistory = opts.maxHistory ?? 200;
|
|
704
|
+
/** Track in-flight hook promises so they can be awaited during shutdown. */
|
|
705
|
+
const pendingHooks = /* @__PURE__ */ new Set();
|
|
706
|
+
const ctx = {
|
|
707
|
+
...opts,
|
|
708
|
+
s2s: null,
|
|
709
|
+
reply: {
|
|
710
|
+
pendingTools: [],
|
|
711
|
+
toolCallCount: 0,
|
|
712
|
+
currentReplyId: null
|
|
713
|
+
},
|
|
714
|
+
turnPromise: null,
|
|
715
|
+
conversationMessages: [],
|
|
716
|
+
maxHistory,
|
|
717
|
+
resolveTurnConfig() {
|
|
718
|
+
return callResolveTurnConfig(hooks, id, HOOK_TIMEOUT_MS);
|
|
719
|
+
},
|
|
720
|
+
consumeToolCallStep(turnConfig, _name, replyId) {
|
|
721
|
+
if (replyId === null || replyId !== ctx.reply.currentReplyId) return toolError("Reply was interrupted. Discarding stale tool call.");
|
|
722
|
+
const maxSteps = turnConfig?.maxSteps ?? agentConfig.maxSteps;
|
|
723
|
+
ctx.reply.toolCallCount++;
|
|
724
|
+
if (maxSteps !== void 0 && ctx.reply.toolCallCount > maxSteps) {
|
|
725
|
+
log.info("maxSteps exceeded, refusing tool call", {
|
|
726
|
+
toolCallCount: ctx.reply.toolCallCount,
|
|
727
|
+
maxSteps
|
|
728
|
+
});
|
|
729
|
+
return toolError("Maximum tool steps reached. Please respond to the user now.");
|
|
730
|
+
}
|
|
731
|
+
return null;
|
|
732
|
+
},
|
|
733
|
+
fireHook(name, ...args) {
|
|
734
|
+
if (!hooks) return;
|
|
735
|
+
const notifyOnError = (err) => {
|
|
736
|
+
log.warn(`${name} hook failed`, { err: errorMessage(err) });
|
|
737
|
+
if (name !== "error") {
|
|
738
|
+
const ep = hooks.callHook("error", id, { message: errorMessage(err) });
|
|
739
|
+
if (ep && typeof ep.catch === "function") ep.catch((e) => {
|
|
740
|
+
log.warn("error hook failed", { err: errorMessage(e) });
|
|
741
|
+
});
|
|
742
|
+
}
|
|
743
|
+
};
|
|
744
|
+
try {
|
|
745
|
+
const result = hooks.callHook(name, ...args);
|
|
746
|
+
if (result == null) return;
|
|
747
|
+
const p = result.catch(notifyOnError).finally(() => pendingHooks.delete(p));
|
|
748
|
+
pendingHooks.add(p);
|
|
749
|
+
} catch (err) {
|
|
750
|
+
notifyOnError(err);
|
|
751
|
+
}
|
|
752
|
+
},
|
|
753
|
+
async drainHooks() {
|
|
754
|
+
if (pendingHooks.size > 0) await Promise.all([...pendingHooks]);
|
|
755
|
+
},
|
|
756
|
+
pushMessages(...msgs) {
|
|
757
|
+
ctx.conversationMessages.push(...msgs);
|
|
758
|
+
if (maxHistory > 0 && ctx.conversationMessages.length > maxHistory) ctx.conversationMessages = ctx.conversationMessages.slice(-maxHistory);
|
|
759
|
+
},
|
|
760
|
+
beginReply(replyId) {
|
|
761
|
+
ctx.reply = {
|
|
762
|
+
pendingTools: [],
|
|
763
|
+
toolCallCount: 0,
|
|
764
|
+
currentReplyId: replyId
|
|
765
|
+
};
|
|
766
|
+
ctx.turnPromise = null;
|
|
767
|
+
},
|
|
768
|
+
cancelReply() {
|
|
769
|
+
ctx.reply = {
|
|
770
|
+
pendingTools: [],
|
|
771
|
+
toolCallCount: 0,
|
|
772
|
+
currentReplyId: null
|
|
773
|
+
};
|
|
774
|
+
},
|
|
775
|
+
chainTurn(p) {
|
|
776
|
+
ctx.turnPromise = (ctx.turnPromise ?? Promise.resolve()).then(() => p);
|
|
777
|
+
}
|
|
778
|
+
};
|
|
779
|
+
return ctx;
|
|
780
|
+
}
|
|
781
|
+
/** @internal Not part of the public API. Exposed for testing only. */
|
|
782
|
+
const _internals = { connectS2s };
|
|
783
|
+
function createIdleTimer(opts) {
|
|
784
|
+
if (opts.timeoutMs <= 0) return {
|
|
785
|
+
reset() {},
|
|
786
|
+
clear() {}
|
|
787
|
+
};
|
|
788
|
+
let timer = null;
|
|
789
|
+
return {
|
|
790
|
+
reset() {
|
|
791
|
+
if (timer !== null) clearTimeout(timer);
|
|
792
|
+
timer = setTimeout(() => {
|
|
793
|
+
opts.log.info("S2S idle timeout", {
|
|
794
|
+
timeoutMs: opts.timeoutMs,
|
|
795
|
+
agent: opts.agent
|
|
796
|
+
});
|
|
797
|
+
opts.client.event({ type: "idle_timeout" });
|
|
798
|
+
opts.ctx.s2s?.close();
|
|
799
|
+
}, opts.timeoutMs);
|
|
800
|
+
},
|
|
801
|
+
clear() {
|
|
802
|
+
if (timer !== null) {
|
|
803
|
+
clearTimeout(timer);
|
|
804
|
+
timer = null;
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
/**
|
|
810
|
+
* Complete a tool call by truncating the result, emitting a `tool_call_done` event,
|
|
811
|
+
* and accumulating the result in `ctx.reply.pendingTools` — but only if the reply that
|
|
812
|
+
* initiated this call is still active.
|
|
813
|
+
*/
|
|
814
|
+
function finishToolCall(ctx, callId, result, replyId) {
|
|
815
|
+
const truncatedResult = result.length > 4e3 ? result.slice(0, MAX_TOOL_RESULT_CHARS) : result;
|
|
816
|
+
ctx.client.event({
|
|
817
|
+
type: "tool_call_done",
|
|
818
|
+
toolCallId: callId,
|
|
819
|
+
result: truncatedResult
|
|
820
|
+
});
|
|
821
|
+
if (replyId !== null && replyId === ctx.reply.currentReplyId) {
|
|
822
|
+
ctx.reply.pendingTools.push({
|
|
823
|
+
callId,
|
|
824
|
+
result
|
|
825
|
+
});
|
|
826
|
+
if (ctx.maxHistory > 0 && ctx.reply.pendingTools.length > ctx.maxHistory) ctx.reply.pendingTools.shift();
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
async function handleToolCall(ctx, detail) {
|
|
830
|
+
const { callId, name, args: parsedArgs } = detail;
|
|
831
|
+
const replyId = ctx.reply.currentReplyId;
|
|
832
|
+
ctx.client.event({
|
|
833
|
+
type: "tool_call_start",
|
|
834
|
+
toolCallId: callId,
|
|
835
|
+
toolName: name,
|
|
836
|
+
args: parsedArgs
|
|
837
|
+
});
|
|
838
|
+
let turnConfig;
|
|
839
|
+
try {
|
|
840
|
+
turnConfig = await ctx.resolveTurnConfig();
|
|
841
|
+
} catch (err) {
|
|
842
|
+
const msg = `resolveTurnConfig hook error: ${errorMessage(err)}`;
|
|
843
|
+
ctx.log.error(msg);
|
|
844
|
+
finishToolCall(ctx, callId, toolError(msg), replyId);
|
|
845
|
+
return;
|
|
846
|
+
}
|
|
847
|
+
const refused = ctx.consumeToolCallStep(turnConfig, name, replyId);
|
|
848
|
+
if (refused !== null) {
|
|
849
|
+
finishToolCall(ctx, callId, refused, replyId);
|
|
850
|
+
return;
|
|
851
|
+
}
|
|
852
|
+
ctx.log.info("S2S tool call", {
|
|
853
|
+
tool: name,
|
|
854
|
+
callId,
|
|
855
|
+
args: parsedArgs,
|
|
856
|
+
agent: ctx.agent
|
|
857
|
+
});
|
|
858
|
+
let result;
|
|
859
|
+
try {
|
|
860
|
+
result = await ctx.executeTool(name, parsedArgs, ctx.id, ctx.conversationMessages);
|
|
861
|
+
} catch (err) {
|
|
862
|
+
const msg = errorMessage(err);
|
|
863
|
+
ctx.log.error("Tool execution failed", {
|
|
864
|
+
tool: name,
|
|
865
|
+
error: errorDetail(err)
|
|
866
|
+
});
|
|
867
|
+
result = toolError(msg);
|
|
868
|
+
}
|
|
869
|
+
ctx.log.info("S2S tool result", {
|
|
870
|
+
tool: name,
|
|
871
|
+
callId,
|
|
872
|
+
resultLength: result.length
|
|
873
|
+
});
|
|
874
|
+
finishToolCall(ctx, callId, result, replyId);
|
|
875
|
+
}
|
|
876
|
+
function handleUserTranscript(ctx, text) {
|
|
877
|
+
ctx.log.info("S2S user transcript", { text });
|
|
878
|
+
ctx.client.event({
|
|
879
|
+
type: "transcript",
|
|
880
|
+
text,
|
|
881
|
+
isFinal: true
|
|
882
|
+
});
|
|
883
|
+
ctx.client.event({
|
|
884
|
+
type: "turn",
|
|
885
|
+
text
|
|
886
|
+
});
|
|
887
|
+
ctx.pushMessages({
|
|
888
|
+
role: "user",
|
|
889
|
+
content: text
|
|
890
|
+
});
|
|
891
|
+
ctx.fireHook("turn", ctx.id, text, HOOK_TIMEOUT_MS);
|
|
892
|
+
}
|
|
893
|
+
function handleAgentTranscript(ctx, text, interrupted) {
|
|
894
|
+
ctx.client.event({
|
|
895
|
+
type: "chat",
|
|
896
|
+
text
|
|
897
|
+
});
|
|
898
|
+
if (!interrupted) ctx.pushMessages({
|
|
899
|
+
role: "assistant",
|
|
900
|
+
content: text
|
|
901
|
+
});
|
|
902
|
+
}
|
|
903
|
+
function handleReplyDone(ctx, status) {
|
|
904
|
+
if (status === "interrupted") {
|
|
905
|
+
ctx.log.info("S2S reply interrupted (barge-in)");
|
|
906
|
+
ctx.cancelReply();
|
|
907
|
+
ctx.client.event({ type: "cancelled" });
|
|
908
|
+
return;
|
|
909
|
+
}
|
|
910
|
+
const doneReplyId = ctx.reply.currentReplyId;
|
|
911
|
+
const sendPending = () => {
|
|
912
|
+
if (ctx.reply.currentReplyId !== doneReplyId) {
|
|
913
|
+
ctx.reply.pendingTools = [];
|
|
914
|
+
return;
|
|
915
|
+
}
|
|
916
|
+
if (ctx.reply.pendingTools.length > 0) {
|
|
917
|
+
for (const tool of ctx.reply.pendingTools) ctx.s2s?.sendToolResult(tool.callId, tool.result);
|
|
918
|
+
ctx.reply.pendingTools = [];
|
|
919
|
+
} else {
|
|
920
|
+
const stepsUsed = ctx.reply.toolCallCount;
|
|
921
|
+
if (stepsUsed > 0) ctx.log.info("Turn complete", {
|
|
922
|
+
steps: stepsUsed,
|
|
923
|
+
agent: ctx.agent
|
|
924
|
+
});
|
|
925
|
+
ctx.client.playAudioDone();
|
|
926
|
+
ctx.client.event({ type: "tts_done" });
|
|
927
|
+
}
|
|
928
|
+
};
|
|
929
|
+
if (ctx.turnPromise !== null) ctx.turnPromise.then(sendPending);
|
|
930
|
+
else sendPending();
|
|
931
|
+
}
|
|
932
|
+
function setupListeners(ctx, handle) {
|
|
933
|
+
handle.on("ready", ({ sessionId }) => ctx.log.info("S2S session ready", { sessionId }));
|
|
934
|
+
handle.on("sessionExpired", () => {
|
|
935
|
+
ctx.log.info("S2S session expired");
|
|
936
|
+
handle.close();
|
|
937
|
+
});
|
|
938
|
+
handle.on("speechStarted", () => ctx.client.event({ type: "speech_started" }));
|
|
939
|
+
handle.on("speechStopped", () => ctx.client.event({ type: "speech_stopped" }));
|
|
940
|
+
handle.on("userTranscriptDelta", ({ text }) => ctx.client.event({
|
|
941
|
+
type: "transcript",
|
|
942
|
+
text,
|
|
943
|
+
isFinal: false
|
|
944
|
+
}));
|
|
945
|
+
handle.on("userTranscript", ({ text }) => handleUserTranscript(ctx, text));
|
|
946
|
+
handle.on("replyStarted", ({ replyId }) => {
|
|
947
|
+
ctx.beginReply(replyId);
|
|
948
|
+
});
|
|
949
|
+
handle.on("audio", ({ audio }) => ctx.client.playAudioChunk(audio));
|
|
950
|
+
handle.on("agentTranscriptDelta", ({ text }) => ctx.client.event({
|
|
951
|
+
type: "chat_delta",
|
|
952
|
+
text
|
|
953
|
+
}));
|
|
954
|
+
handle.on("agentTranscript", ({ text, interrupted }) => handleAgentTranscript(ctx, text, interrupted));
|
|
955
|
+
handle.on("toolCall", (detail) => {
|
|
956
|
+
const p = handleToolCall(ctx, detail).catch((err) => {
|
|
957
|
+
ctx.log.error("Tool call handler failed", { err: errorMessage(err) });
|
|
958
|
+
});
|
|
959
|
+
ctx.chainTurn(p);
|
|
960
|
+
});
|
|
961
|
+
handle.on("replyDone", ({ status }) => handleReplyDone(ctx, status));
|
|
962
|
+
handle.on("error", ({ code, message }) => {
|
|
963
|
+
ctx.log.error("S2S error", {
|
|
964
|
+
code,
|
|
965
|
+
message
|
|
966
|
+
});
|
|
967
|
+
ctx.client.event({
|
|
968
|
+
type: "error",
|
|
969
|
+
code: "internal",
|
|
970
|
+
message
|
|
971
|
+
});
|
|
972
|
+
handle.close();
|
|
973
|
+
});
|
|
974
|
+
handle.on("close", () => {
|
|
975
|
+
ctx.log.info("S2S closed");
|
|
976
|
+
ctx.s2s = null;
|
|
977
|
+
ctx.cancelReply();
|
|
978
|
+
});
|
|
979
|
+
}
|
|
980
|
+
function createS2sSession(opts) {
|
|
981
|
+
const { id, agent, client, toolSchemas, apiKey, s2sConfig, executeTool, createWebSocket = defaultCreateS2sWebSocket, hooks, logger: log = consoleLogger } = opts;
|
|
982
|
+
const agentConfig = opts.skipGreeting ? {
|
|
983
|
+
...opts.agentConfig,
|
|
984
|
+
greeting: ""
|
|
985
|
+
} : opts.agentConfig;
|
|
986
|
+
const systemPrompt = buildSystemPrompt(agentConfig, {
|
|
987
|
+
hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
|
|
988
|
+
voice: true
|
|
989
|
+
});
|
|
990
|
+
const s2sTools = toolSchemas.map((ts) => ({
|
|
991
|
+
type: "function",
|
|
992
|
+
name: ts.name,
|
|
993
|
+
description: ts.description,
|
|
994
|
+
parameters: ts.parameters
|
|
995
|
+
}));
|
|
996
|
+
const sessionAbort = new AbortController();
|
|
997
|
+
const ctx = buildCtx({
|
|
998
|
+
id,
|
|
999
|
+
agent,
|
|
1000
|
+
client,
|
|
1001
|
+
agentConfig,
|
|
1002
|
+
executeTool,
|
|
1003
|
+
hooks,
|
|
1004
|
+
log,
|
|
1005
|
+
maxHistory: opts.maxHistory
|
|
1006
|
+
});
|
|
1007
|
+
const rawTimeout = agentConfig.idleTimeoutMs ?? 3e5;
|
|
1008
|
+
const idle = createIdleTimer({
|
|
1009
|
+
timeoutMs: rawTimeout === 0 || !Number.isFinite(rawTimeout) ? 0 : rawTimeout,
|
|
1010
|
+
agent,
|
|
1011
|
+
log,
|
|
1012
|
+
client,
|
|
1013
|
+
ctx
|
|
1014
|
+
});
|
|
1015
|
+
let connectGeneration = 0;
|
|
1016
|
+
const sessionUpdatePayload = {
|
|
1017
|
+
systemPrompt,
|
|
1018
|
+
tools: s2sTools,
|
|
1019
|
+
...agentConfig.greeting ? { greeting: agentConfig.greeting } : {}
|
|
1020
|
+
};
|
|
1021
|
+
async function connectAndSetup() {
|
|
1022
|
+
const generation = ++connectGeneration;
|
|
1023
|
+
try {
|
|
1024
|
+
const handle = await _internals.connectS2s({
|
|
1025
|
+
apiKey,
|
|
1026
|
+
config: s2sConfig,
|
|
1027
|
+
createWebSocket,
|
|
1028
|
+
logger: log
|
|
1029
|
+
});
|
|
1030
|
+
if (sessionAbort.signal.aborted || generation !== connectGeneration) {
|
|
1031
|
+
handle.close();
|
|
1032
|
+
return;
|
|
1033
|
+
}
|
|
1034
|
+
setupListeners(ctx, handle);
|
|
1035
|
+
handle.updateSession(sessionUpdatePayload);
|
|
1036
|
+
ctx.s2s = handle;
|
|
1037
|
+
idle.reset();
|
|
1038
|
+
} catch (err) {
|
|
1039
|
+
const msg = errorMessage(err);
|
|
1040
|
+
log.error("S2S connect failed", { error: errorDetail(err) });
|
|
1041
|
+
client.event({
|
|
1042
|
+
type: "error",
|
|
1043
|
+
code: "internal",
|
|
1044
|
+
message: msg
|
|
1045
|
+
});
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
return {
|
|
1049
|
+
async start() {
|
|
1050
|
+
ctx.fireHook("connect", id, HOOK_TIMEOUT_MS);
|
|
1051
|
+
await connectAndSetup();
|
|
1052
|
+
},
|
|
1053
|
+
async stop() {
|
|
1054
|
+
if (sessionAbort.signal.aborted) return;
|
|
1055
|
+
sessionAbort.abort();
|
|
1056
|
+
idle.clear();
|
|
1057
|
+
if (ctx.turnPromise !== null) await ctx.turnPromise;
|
|
1058
|
+
await ctx.drainHooks();
|
|
1059
|
+
ctx.s2s?.close();
|
|
1060
|
+
ctx.fireHook("disconnect", id, HOOK_TIMEOUT_MS);
|
|
1061
|
+
await ctx.drainHooks();
|
|
1062
|
+
},
|
|
1063
|
+
onAudio(data) {
|
|
1064
|
+
idle.reset();
|
|
1065
|
+
ctx.s2s?.sendAudio(data);
|
|
1066
|
+
},
|
|
1067
|
+
onAudioReady() {},
|
|
1068
|
+
onCancel() {
|
|
1069
|
+
client.event({ type: "cancelled" });
|
|
1070
|
+
},
|
|
1071
|
+
onReset() {
|
|
1072
|
+
ctx.cancelReply();
|
|
1073
|
+
ctx.conversationMessages = [];
|
|
1074
|
+
ctx.reply.toolCallCount = 0;
|
|
1075
|
+
ctx.turnPromise = null;
|
|
1076
|
+
idle.clear();
|
|
1077
|
+
ctx.s2s?.close();
|
|
1078
|
+
client.event({ type: "reset" });
|
|
1079
|
+
connectAndSetup().catch((err) => log.error("S2S reset reconnect failed", { error: errorMessage(err) }));
|
|
1080
|
+
},
|
|
1081
|
+
onHistory(incoming) {
|
|
1082
|
+
ctx.pushMessages(...incoming.map((m) => ({
|
|
1083
|
+
role: m.role,
|
|
1084
|
+
content: m.content
|
|
1085
|
+
})));
|
|
1086
|
+
},
|
|
1087
|
+
waitForTurn() {
|
|
1088
|
+
return ctx.turnPromise ?? Promise.resolve();
|
|
1089
|
+
}
|
|
1090
|
+
};
|
|
1091
|
+
}
|
|
1092
|
+
//#endregion
|
|
1093
|
+
//#region unstorage-kv.ts
|
|
1094
|
+
/**
|
|
1095
|
+
* Key-value store backed by unstorage.
|
|
1096
|
+
*
|
|
1097
|
+
* Works with any unstorage driver (memory, fs, S3/R2, etc.).
|
|
1098
|
+
*/
|
|
1099
|
+
/**
|
|
1100
|
+
* Create a KV store backed by any unstorage driver.
|
|
1101
|
+
*
|
|
1102
|
+
* @param options - See {@link UnstorageKvOptions}.
|
|
1103
|
+
* @returns A {@link Kv} instance.
|
|
1104
|
+
*
|
|
1105
|
+
* @example
|
|
1106
|
+
* ```ts
|
|
1107
|
+
* import { createStorage } from "unstorage";
|
|
1108
|
+
* import { createUnstorageKv } from "@alexkroman1/aai/unstorage-kv";
|
|
1109
|
+
*
|
|
1110
|
+
* const kv = createUnstorageKv({ storage: createStorage() });
|
|
1111
|
+
* await kv.set("greeting", "hello");
|
|
1112
|
+
* const value = await kv.get<string>("greeting"); // "hello"
|
|
1113
|
+
* ```
|
|
1114
|
+
*/
|
|
1115
|
+
function createUnstorageKv(options) {
|
|
1116
|
+
const store = options.prefix ? prefixStorage(options.storage, options.prefix) : options.storage;
|
|
1117
|
+
return {
|
|
1118
|
+
async get(key) {
|
|
1119
|
+
return await store.getItem(key) ?? null;
|
|
1120
|
+
},
|
|
1121
|
+
async set(key, value, setOptions) {
|
|
1122
|
+
if (JSON.stringify(value).length > 65536) throw new Error(`Value exceeds max size of ${MAX_VALUE_SIZE} bytes`);
|
|
1123
|
+
const storable = value;
|
|
1124
|
+
if (setOptions?.expireIn && setOptions.expireIn > 0) await store.setItem(key, storable, { ttl: Math.ceil(setOptions.expireIn / 1e3) });
|
|
1125
|
+
else await store.setItem(key, storable);
|
|
1126
|
+
},
|
|
1127
|
+
async delete(keys) {
|
|
1128
|
+
const keyArray = Array.isArray(keys) ? keys : [keys];
|
|
1129
|
+
await Promise.all(keyArray.map((k) => store.removeItem(k)));
|
|
1130
|
+
},
|
|
1131
|
+
async list(listPrefix, listOptions) {
|
|
1132
|
+
const allKeys = await store.getKeys(listPrefix);
|
|
1133
|
+
const entries = [];
|
|
1134
|
+
for (const key of allKeys) {
|
|
1135
|
+
const value = await store.getItem(key);
|
|
1136
|
+
if (value != null) entries.push({
|
|
1137
|
+
key,
|
|
1138
|
+
value
|
|
1139
|
+
});
|
|
1140
|
+
}
|
|
1141
|
+
return sortAndPaginate(entries, listOptions);
|
|
1142
|
+
},
|
|
1143
|
+
async keys(pattern) {
|
|
1144
|
+
const allKeys = await store.getKeys();
|
|
1145
|
+
if (!pattern) return allKeys.sort((a, b) => a.localeCompare(b));
|
|
1146
|
+
return allKeys.filter((key) => matchGlob(key, pattern)).sort((a, b) => a.localeCompare(b));
|
|
1147
|
+
},
|
|
1148
|
+
close() {
|
|
1149
|
+
store.dispose();
|
|
1150
|
+
}
|
|
1151
|
+
};
|
|
1152
|
+
}
|
|
1153
|
+
//#endregion
|
|
1154
|
+
//#region ws-handler.ts
|
|
1155
|
+
/**
|
|
1156
|
+
* WebSocket session lifecycle handler.
|
|
1157
|
+
*
|
|
1158
|
+
* Audio validation is handled at the host transport layer (see server.ts).
|
|
1159
|
+
*/
|
|
1160
|
+
/**
|
|
1161
|
+
* Creates a {@link ClientSink} backed by a plain WebSocket.
|
|
1162
|
+
*
|
|
1163
|
+
* Text events are sent as JSON text frames; audio chunks are sent as
|
|
1164
|
+
* binary frames (zero-copy).
|
|
1165
|
+
*/
|
|
1166
|
+
function createClientSink(ws, log) {
|
|
1167
|
+
/** Send data over ws, silently dropping if the socket is not open. */
|
|
1168
|
+
function safeSend(data) {
|
|
1169
|
+
try {
|
|
1170
|
+
if (ws.readyState !== 1) return;
|
|
1171
|
+
ws.send(data);
|
|
1172
|
+
} catch (err) {
|
|
1173
|
+
log.debug?.("safeSend: socket closed between readyState check and send", { error: errorMessage(err) });
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
return {
|
|
1177
|
+
get open() {
|
|
1178
|
+
return ws.readyState === 1;
|
|
1179
|
+
},
|
|
1180
|
+
event(e) {
|
|
1181
|
+
safeSend(JSON.stringify(e));
|
|
1182
|
+
},
|
|
1183
|
+
playAudioChunk(chunk) {
|
|
1184
|
+
safeSend(chunk);
|
|
1185
|
+
},
|
|
1186
|
+
playAudioDone() {
|
|
1187
|
+
safeSend(JSON.stringify({ type: "audio_done" }));
|
|
1188
|
+
}
|
|
1189
|
+
};
|
|
1190
|
+
}
|
|
1191
|
+
function handleBinaryAudio(data, session) {
|
|
1192
|
+
if (data instanceof Uint8Array) {
|
|
1193
|
+
session.onAudio(data);
|
|
1194
|
+
return true;
|
|
1195
|
+
}
|
|
1196
|
+
if (data instanceof ArrayBuffer) {
|
|
1197
|
+
session.onAudio(new Uint8Array(data));
|
|
1198
|
+
return true;
|
|
1199
|
+
}
|
|
1200
|
+
return false;
|
|
1201
|
+
}
|
|
1202
|
+
function handleTextMessage(data, session, log, ctx, sid) {
|
|
1203
|
+
if (typeof data !== "string") return;
|
|
1204
|
+
let json;
|
|
1205
|
+
try {
|
|
1206
|
+
json = JSON.parse(data);
|
|
1207
|
+
} catch {
|
|
1208
|
+
log.warn("Invalid JSON from client", {
|
|
1209
|
+
...ctx,
|
|
1210
|
+
sid
|
|
1211
|
+
});
|
|
1212
|
+
return;
|
|
1213
|
+
}
|
|
1214
|
+
const parsed = ClientMessageSchema.safeParse(json);
|
|
1215
|
+
if (!parsed.success) {
|
|
1216
|
+
log.warn("Invalid client message", {
|
|
1217
|
+
...ctx,
|
|
1218
|
+
sid,
|
|
1219
|
+
error: parsed.error.message
|
|
1220
|
+
});
|
|
1221
|
+
return;
|
|
1222
|
+
}
|
|
1223
|
+
const msg = parsed.data;
|
|
1224
|
+
switch (msg.type) {
|
|
1225
|
+
case "audio_ready":
|
|
1226
|
+
session.onAudioReady();
|
|
1227
|
+
break;
|
|
1228
|
+
case "cancel":
|
|
1229
|
+
session.onCancel();
|
|
1230
|
+
break;
|
|
1231
|
+
case "reset":
|
|
1232
|
+
session.onReset();
|
|
1233
|
+
break;
|
|
1234
|
+
case "history":
|
|
1235
|
+
session.onHistory(msg.messages);
|
|
1236
|
+
break;
|
|
1237
|
+
default: break;
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
/**
|
|
1241
|
+
* Attaches session lifecycle handlers to a native WebSocket using
|
|
1242
|
+
* plain JSON text frames and binary audio frames.
|
|
1243
|
+
*
|
|
1244
|
+
* Connection flow:
|
|
1245
|
+
* 1. WebSocket opens → server sends `{ type: "config", ...ReadyConfig }`
|
|
1246
|
+
* 2. Client sets up audio → sends `{ type: "audio_ready" }`
|
|
1247
|
+
* 3. If reconnecting → client sends `{ type: "history", messages: [...] }`
|
|
1248
|
+
*/
|
|
1249
|
+
function wireSessionSocket(ws, opts) {
|
|
1250
|
+
const { sessions, logger: log = consoleLogger } = opts;
|
|
1251
|
+
const sessionId = opts.resumeFrom ?? crypto.randomUUID();
|
|
1252
|
+
const sid = sessionId.slice(0, 8);
|
|
1253
|
+
const ctx = opts.logContext ?? {};
|
|
1254
|
+
let session = null;
|
|
1255
|
+
/** Set to true once session.start() resolves. Messages arriving before
|
|
1256
|
+
* this flag is set are buffered and replayed once the session is ready,
|
|
1257
|
+
* preventing audio/text from being dispatched to a half-initialized session. */
|
|
1258
|
+
let sessionReady = false;
|
|
1259
|
+
let messageBuffer = [];
|
|
1260
|
+
function drainBuffer() {
|
|
1261
|
+
if (!(session && messageBuffer)) return;
|
|
1262
|
+
const buf = messageBuffer;
|
|
1263
|
+
messageBuffer = null;
|
|
1264
|
+
for (const event of buf) {
|
|
1265
|
+
const { data } = event;
|
|
1266
|
+
if (handleBinaryAudio(data, session)) continue;
|
|
1267
|
+
handleTextMessage(data, session, log, ctx, sid);
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
function onOpen() {
|
|
1271
|
+
opts.onOpen?.();
|
|
1272
|
+
log.info("Session connected", {
|
|
1273
|
+
...ctx,
|
|
1274
|
+
sid
|
|
1275
|
+
});
|
|
1276
|
+
const client = createClientSink(ws, log);
|
|
1277
|
+
session = opts.createSession(sessionId, client);
|
|
1278
|
+
sessions.set(sessionId, session);
|
|
1279
|
+
ws.send(JSON.stringify({
|
|
1280
|
+
type: "config",
|
|
1281
|
+
...opts.readyConfig,
|
|
1282
|
+
sessionId
|
|
1283
|
+
}));
|
|
1284
|
+
const timeoutMs = opts.sessionStartTimeoutMs ?? 1e4;
|
|
1285
|
+
pTimeout(session.start(), {
|
|
1286
|
+
milliseconds: timeoutMs,
|
|
1287
|
+
message: `session.start() timed out after ${timeoutMs}ms`
|
|
1288
|
+
}).then(() => {
|
|
1289
|
+
log.info("Session ready", {
|
|
1290
|
+
...ctx,
|
|
1291
|
+
sid
|
|
1292
|
+
});
|
|
1293
|
+
sessionReady = true;
|
|
1294
|
+
drainBuffer();
|
|
1295
|
+
}).catch((err) => {
|
|
1296
|
+
log.error("Session start failed", {
|
|
1297
|
+
...ctx,
|
|
1298
|
+
sid,
|
|
1299
|
+
error: errorDetail(err)
|
|
1300
|
+
});
|
|
1301
|
+
sessions.delete(sessionId);
|
|
1302
|
+
session = null;
|
|
1303
|
+
messageBuffer = null;
|
|
1304
|
+
});
|
|
1305
|
+
}
|
|
1306
|
+
if (ws.readyState === 1) onOpen();
|
|
1307
|
+
else ws.addEventListener("open", onOpen);
|
|
1308
|
+
ws.addEventListener("message", (event) => {
|
|
1309
|
+
if (!session) return;
|
|
1310
|
+
if (!sessionReady) {
|
|
1311
|
+
messageBuffer?.push(event);
|
|
1312
|
+
return;
|
|
1313
|
+
}
|
|
1314
|
+
const { data } = event;
|
|
1315
|
+
if (handleBinaryAudio(data, session)) return;
|
|
1316
|
+
handleTextMessage(data, session, log, ctx, sid);
|
|
1317
|
+
});
|
|
1318
|
+
ws.addEventListener("close", () => {
|
|
1319
|
+
log.info("Session disconnected", {
|
|
1320
|
+
...ctx,
|
|
1321
|
+
sid
|
|
1322
|
+
});
|
|
1323
|
+
if (session) session.stop().catch((err) => {
|
|
1324
|
+
log.error("Session stop failed", {
|
|
1325
|
+
...ctx,
|
|
1326
|
+
sid,
|
|
1327
|
+
error: errorDetail(err)
|
|
1328
|
+
});
|
|
1329
|
+
}).finally(() => {
|
|
1330
|
+
sessions.delete(sessionId);
|
|
1331
|
+
});
|
|
1332
|
+
opts.onClose?.();
|
|
1333
|
+
});
|
|
1334
|
+
ws.addEventListener("error", (ev) => {
|
|
1335
|
+
const msg = typeof ev.message === "string" ? ev.message : "WebSocket error";
|
|
1336
|
+
log.error("WebSocket error", {
|
|
1337
|
+
...ctx,
|
|
1338
|
+
sid,
|
|
1339
|
+
error: msg
|
|
1340
|
+
});
|
|
1341
|
+
});
|
|
1342
|
+
}
|
|
1343
|
+
//#endregion
|
|
1344
|
+
//#region direct-executor.ts
|
|
1345
|
+
/**
|
|
1346
|
+
* Agent runtime — the execution engine for voice agents.
|
|
1347
|
+
*
|
|
1348
|
+
* {@link createRuntime} builds the single execution engine used by both
|
|
1349
|
+
* self-hosted servers and the platform sandbox. It wires up tool execution,
|
|
1350
|
+
* lifecycle hooks, and session management.
|
|
1351
|
+
*/
|
|
1352
|
+
const yieldTick = () => new Promise((r) => setTimeout(r, 0));
|
|
1353
|
+
function buildToolContext(opts) {
|
|
1354
|
+
const { env, state, kv, messages, fetch: fetchFn, sessionId } = opts;
|
|
1355
|
+
return {
|
|
1356
|
+
env: { ...env },
|
|
1357
|
+
state: state ?? {},
|
|
1358
|
+
get kv() {
|
|
1359
|
+
if (!kv) throw new Error("KV not available");
|
|
1360
|
+
return kv;
|
|
1361
|
+
},
|
|
1362
|
+
messages: messages ?? [],
|
|
1363
|
+
fetch: fetchFn ?? globalThis.fetch,
|
|
1364
|
+
sessionId: sessionId ?? ""
|
|
1365
|
+
};
|
|
1366
|
+
}
|
|
1367
|
+
async function executeToolCall(name, args, options) {
|
|
1368
|
+
const { tool } = options;
|
|
1369
|
+
const parsed = (tool.parameters ?? EMPTY_PARAMS).safeParse(args);
|
|
1370
|
+
if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${(parsed.error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ")}`);
|
|
1371
|
+
try {
|
|
1372
|
+
const ctx = buildToolContext(options);
|
|
1373
|
+
await yieldTick();
|
|
1374
|
+
const result = await pTimeout(Promise.resolve(tool.execute(parsed.data, ctx)), {
|
|
1375
|
+
milliseconds: TOOL_EXECUTION_TIMEOUT_MS,
|
|
1376
|
+
message: `Tool "${name}" timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
|
|
1377
|
+
});
|
|
1378
|
+
await yieldTick();
|
|
1379
|
+
if (result == null) return "null";
|
|
1380
|
+
return typeof result === "string" ? result : JSON.stringify(result);
|
|
1381
|
+
} catch (err) {
|
|
1382
|
+
const log = options.logger;
|
|
1383
|
+
if (log) log.warn("Tool execution failed", {
|
|
1384
|
+
tool: name,
|
|
1385
|
+
error: errorDetail(err)
|
|
1386
|
+
});
|
|
1387
|
+
else console.warn(`[tool-executor] Tool execution failed: ${name}`, err);
|
|
1388
|
+
return toolError(errorMessage(err));
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
/** Create an in-memory KV store (default for self-hosted). */
|
|
1392
|
+
function createLocalKv() {
|
|
1393
|
+
return createUnstorageKv({ storage: createStorage() });
|
|
1394
|
+
}
|
|
1395
|
+
/**
|
|
1396
|
+
* Create an agent runtime — the execution engine for a voice agent.
|
|
1397
|
+
*
|
|
1398
|
+
* Merges built-in and custom tool definitions, builds tool schemas for the
|
|
1399
|
+
* S2S API, and wires up lifecycle hooks.
|
|
1400
|
+
*
|
|
1401
|
+
* @param opts - Runtime configuration. See {@link RuntimeOptions}.
|
|
1402
|
+
* @returns A {@link Runtime} with tool execution, hook invocation,
|
|
1403
|
+
* schemas, and session management.
|
|
1404
|
+
*
|
|
1405
|
+
* @public
|
|
1406
|
+
*/
|
|
1407
|
+
function createRuntime(opts) {
|
|
1408
|
+
const { agent, env, kv = createLocalKv(), createWebSocket, logger = consoleLogger, s2sConfig = DEFAULT_S2S_CONFIG, sessionStartTimeoutMs, shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS } = opts;
|
|
1409
|
+
const agentConfig = toAgentConfig(agent);
|
|
1410
|
+
const sessions = /* @__PURE__ */ new Map();
|
|
1411
|
+
const readyConfig = buildReadyConfig(s2sConfig);
|
|
1412
|
+
let executeTool;
|
|
1413
|
+
let hooks;
|
|
1414
|
+
let toolSchemas;
|
|
1415
|
+
if (opts.executeTool && opts.hooks && opts.toolSchemas) {
|
|
1416
|
+
executeTool = opts.executeTool;
|
|
1417
|
+
hooks = opts.hooks;
|
|
1418
|
+
toolSchemas = opts.toolSchemas;
|
|
1419
|
+
} else {
|
|
1420
|
+
const allTools = {
|
|
1421
|
+
...getBuiltinToolDefs(agent.builtinTools ?? []),
|
|
1422
|
+
...agent.tools
|
|
1423
|
+
};
|
|
1424
|
+
const customSchemas = agentToolsToSchemas(agent.tools ?? {});
|
|
1425
|
+
const builtinSchemas = getBuiltinToolSchemas(agent.builtinTools ?? []);
|
|
1426
|
+
toolSchemas = [...customSchemas, ...builtinSchemas];
|
|
1427
|
+
const stateMap = /* @__PURE__ */ new Map();
|
|
1428
|
+
const getState = (sid) => {
|
|
1429
|
+
if (!stateMap.has(sid) && agent.state) stateMap.set(sid, agent.state());
|
|
1430
|
+
return stateMap.get(sid) ?? {};
|
|
1431
|
+
};
|
|
1432
|
+
const frozenEnv = Object.freeze({ ...env });
|
|
1433
|
+
function makeHookContext(sessionId) {
|
|
1434
|
+
return {
|
|
1435
|
+
env: frozenEnv,
|
|
1436
|
+
state: getState(sessionId),
|
|
1437
|
+
sessionId,
|
|
1438
|
+
get kv() {
|
|
1439
|
+
return kv;
|
|
1440
|
+
},
|
|
1441
|
+
fetch: globalThis.fetch
|
|
1442
|
+
};
|
|
1443
|
+
}
|
|
1444
|
+
executeTool = async (name, args, sessionId, messages) => {
|
|
1445
|
+
const tool = allTools[name];
|
|
1446
|
+
if (!tool) return toolError(`Unknown tool: ${name}`);
|
|
1447
|
+
return executeToolCall(name, args, {
|
|
1448
|
+
tool,
|
|
1449
|
+
env: frozenEnv,
|
|
1450
|
+
state: getState(sessionId ?? ""),
|
|
1451
|
+
sessionId: sessionId ?? "",
|
|
1452
|
+
kv,
|
|
1453
|
+
messages,
|
|
1454
|
+
logger,
|
|
1455
|
+
fetch: globalThis.fetch
|
|
1456
|
+
});
|
|
1457
|
+
};
|
|
1458
|
+
hooks = createAgentHooks({
|
|
1459
|
+
agent,
|
|
1460
|
+
makeCtx: makeHookContext
|
|
1461
|
+
});
|
|
1462
|
+
hooks.hook("disconnect", async (sessionId) => {
|
|
1463
|
+
stateMap.delete(sessionId);
|
|
1464
|
+
});
|
|
1465
|
+
}
|
|
1466
|
+
function createSession(sessionOpts) {
|
|
1467
|
+
const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
|
|
1468
|
+
return createS2sSession({
|
|
1469
|
+
id: sessionOpts.id,
|
|
1470
|
+
agent: sessionOpts.agent,
|
|
1471
|
+
client: sessionOpts.client,
|
|
1472
|
+
agentConfig,
|
|
1473
|
+
toolSchemas,
|
|
1474
|
+
apiKey,
|
|
1475
|
+
s2sConfig,
|
|
1476
|
+
executeTool,
|
|
1477
|
+
...createWebSocket ? { createWebSocket } : {},
|
|
1478
|
+
hooks,
|
|
1479
|
+
skipGreeting: sessionOpts.skipGreeting ?? false,
|
|
1480
|
+
logger,
|
|
1481
|
+
...sessionOpts.resumeFrom ? { resumeFrom: sessionOpts.resumeFrom } : {}
|
|
1482
|
+
});
|
|
1483
|
+
}
|
|
1484
|
+
function startSession(ws, startOpts) {
|
|
1485
|
+
const resumeFrom = startOpts?.resumeFrom;
|
|
1486
|
+
wireSessionSocket(ws, {
|
|
1487
|
+
sessions,
|
|
1488
|
+
createSession: (sid, client) => createSession({
|
|
1489
|
+
id: sid,
|
|
1490
|
+
agent: agent.name,
|
|
1491
|
+
client,
|
|
1492
|
+
skipGreeting: startOpts?.skipGreeting ?? false,
|
|
1493
|
+
...resumeFrom ? { resumeFrom } : {}
|
|
1494
|
+
}),
|
|
1495
|
+
readyConfig,
|
|
1496
|
+
logger,
|
|
1497
|
+
...startOpts?.logContext ? { logContext: startOpts.logContext } : {},
|
|
1498
|
+
...startOpts?.onOpen ? { onOpen: startOpts.onOpen } : {},
|
|
1499
|
+
...startOpts?.onClose ? { onClose: startOpts.onClose } : {},
|
|
1500
|
+
...sessionStartTimeoutMs !== void 0 ? { sessionStartTimeoutMs } : {},
|
|
1501
|
+
...resumeFrom ? { resumeFrom } : {}
|
|
1502
|
+
});
|
|
1503
|
+
}
|
|
1504
|
+
async function shutdown() {
|
|
1505
|
+
if (sessions.size === 0) return;
|
|
1506
|
+
let timer;
|
|
1507
|
+
const timeout = new Promise((resolve) => {
|
|
1508
|
+
timer = setTimeout(resolve, shutdownTimeoutMs, "timeout");
|
|
1509
|
+
});
|
|
1510
|
+
const graceful = Promise.allSettled([...sessions.values()].map((s) => s.stop())).then((results) => {
|
|
1511
|
+
for (const r of results) if (r.status === "rejected") logger.warn(`Session stop failed during shutdown: ${r.reason}`);
|
|
1512
|
+
return "done";
|
|
1513
|
+
});
|
|
1514
|
+
const outcome = await Promise.race([graceful, timeout]);
|
|
1515
|
+
if (timer) clearTimeout(timer);
|
|
1516
|
+
if (outcome === "timeout") logger.warn(`Shutdown timeout (${shutdownTimeoutMs}ms) exceeded — force-closing ${sessions.size} remaining session(s)`);
|
|
1517
|
+
sessions.clear();
|
|
1518
|
+
}
|
|
1519
|
+
return {
|
|
1520
|
+
executeTool,
|
|
1521
|
+
hooks,
|
|
1522
|
+
toolSchemas,
|
|
1523
|
+
createSession,
|
|
1524
|
+
startSession,
|
|
1525
|
+
shutdown,
|
|
1526
|
+
readyConfig
|
|
1527
|
+
};
|
|
1528
|
+
}
|
|
1529
|
+
//#endregion
|
|
1530
|
+
export { consoleLogger as _, _internals as a, buildSystemPrompt as c, AgentConfigSchema as d, EMPTY_PARAMS as f, DEFAULT_S2S_CONFIG as g, toAgentConfig as h, createUnstorageKv as i, connectS2s as l, agentToolsToSchemas as m, executeToolCall as n, buildCtx as o, ToolSchemaSchema as p, wireSessionSocket as r, createS2sSession as s, createRuntime as t, defaultCreateS2sWebSocket as u, jsonLogger as v };
|