@alexkroman1/aai 1.7.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +11 -9
- package/CHANGELOG.md +10 -0
- package/dist/{_internal-types-CrnTi9Ew.js → _internal-types-CfOAbK6V.js} +22 -35
- package/dist/constants-y68COEGj.js +29 -0
- package/dist/host/_base64.d.ts +2 -0
- package/dist/host/_mock-ws.d.ts +0 -61
- package/dist/host/_pipeline-test-fakes.d.ts +7 -4
- package/dist/host/_run-code.d.ts +0 -25
- package/dist/host/_runtime-conformance.d.ts +3 -34
- package/dist/host/memory-vector.d.ts +0 -11
- package/dist/host/providers/resolve-kv.d.ts +0 -7
- package/dist/host/providers/resolve-vector.d.ts +0 -8
- package/dist/host/providers/stt/assemblyai.d.ts +0 -14
- package/dist/host/providers/stt/deepgram.d.ts +2 -14
- package/dist/host/providers/stt/soniox.d.ts +0 -22
- package/dist/host/providers/tts/rime.d.ts +10 -31
- package/dist/host/runtime-barrel.js +619 -630
- package/dist/host/runtime-config.d.ts +9 -6
- package/dist/host/runtime.d.ts +3 -0
- package/dist/host/to-vercel-tools.d.ts +3 -33
- package/dist/host/transports/openai-realtime-transport.d.ts +43 -0
- package/dist/host/unstorage-kv.d.ts +0 -26
- package/dist/index.js +3 -3
- package/dist/openai-realtime-cjPAHMMx.js +10 -0
- package/dist/sdk/_internal-types.d.ts +6 -55
- package/dist/sdk/allowed-hosts.d.ts +4 -3
- package/dist/sdk/constants.d.ts +4 -29
- package/dist/sdk/define.d.ts +7 -4
- package/dist/sdk/kv.d.ts +13 -37
- package/dist/sdk/manifest-barrel.js +1 -1
- package/dist/sdk/manifest.d.ts +8 -2
- package/dist/sdk/protocol.js +1 -1
- package/dist/sdk/providers/s2s/openai-realtime.d.ts +17 -0
- package/dist/sdk/providers/s2s-barrel.d.ts +9 -0
- package/dist/sdk/providers/s2s-barrel.js +2 -0
- package/dist/sdk/providers/tts/rime.d.ts +1 -1
- package/dist/sdk/providers.d.ts +6 -2
- package/dist/sdk/types.d.ts +7 -1
- package/dist/{types-KUgezM6u.js → types-DOWVZhb9.js} +1 -7
- package/dist/{ws-upgrade-BeOQ7fXL.js → ws-upgrade-CG8-by1n.js} +2 -3
- package/host/_base64.ts +9 -0
- package/host/_mock-ws.ts +0 -65
- package/host/_pipeline-test-fakes.ts +19 -31
- package/host/_run-code.ts +10 -53
- package/host/_runtime-conformance.ts +3 -44
- package/host/_test-utils.ts +20 -42
- package/host/builtin-tools.test.ts +127 -222
- package/host/builtin-tools.ts +6 -10
- package/host/cleanup.test.ts +30 -73
- package/host/integration/pipeline-reference.integration.test.ts +12 -17
- package/host/integration.test.ts +0 -7
- package/host/memory-vector.test.ts +3 -1
- package/host/memory-vector.ts +16 -21
- package/host/pinecone-vector.test.ts +14 -17
- package/host/pinecone-vector.ts +10 -19
- package/host/providers/providers.test-d.ts +5 -3
- package/host/providers/resolve-kv.ts +23 -41
- package/host/providers/resolve-vector.ts +3 -12
- package/host/providers/resolve.test.ts +15 -28
- package/host/providers/resolve.ts +24 -24
- package/host/providers/stt/assemblyai.test.ts +2 -14
- package/host/providers/stt/assemblyai.ts +12 -35
- package/host/providers/stt/deepgram.test.ts +23 -83
- package/host/providers/stt/deepgram.ts +15 -40
- package/host/providers/stt/elevenlabs.test.ts +26 -38
- package/host/providers/stt/elevenlabs.ts +10 -9
- package/host/providers/stt/soniox.test.ts +35 -85
- package/host/providers/stt/soniox.ts +8 -53
- package/host/providers/tts/cartesia.test.ts +19 -58
- package/host/providers/tts/cartesia.ts +36 -66
- package/host/providers/tts/rime.test.ts +12 -38
- package/host/providers/tts/rime.ts +23 -86
- package/host/runtime-config.test.ts +9 -9
- package/host/runtime-config.ts +16 -22
- package/host/runtime.test.ts +111 -73
- package/host/runtime.ts +138 -86
- package/host/s2s.test.ts +92 -191
- package/host/s2s.ts +55 -49
- package/host/server-shutdown.test.ts +9 -30
- package/host/server.test.ts +2 -13
- package/host/server.ts +85 -100
- package/host/session-core.test.ts +15 -30
- package/host/session-core.ts +10 -13
- package/host/session-prompt.test.ts +1 -5
- package/host/to-vercel-tools.test.ts +53 -72
- package/host/to-vercel-tools.ts +9 -39
- package/host/tool-executor.test.ts +25 -51
- package/host/tool-executor.ts +18 -12
- package/host/transports/openai-realtime-transport.test.ts +371 -0
- package/host/transports/openai-realtime-transport.ts +319 -0
- package/host/transports/pipeline-transport.test.ts +125 -298
- package/host/transports/pipeline-transport.ts +20 -68
- package/host/transports/s2s-transport-fixtures.test.ts +31 -92
- package/host/transports/s2s-transport.test.ts +65 -134
- package/host/transports/s2s-transport.ts +15 -43
- package/host/transports/types.test.ts +4 -8
- package/host/unstorage-kv.test.ts +3 -2
- package/host/unstorage-kv.ts +5 -35
- package/host/ws-handler.test.ts +72 -176
- package/host/ws-handler.ts +6 -12
- package/package.json +6 -1
- package/sdk/__snapshots__/exports.test.ts.snap +7 -0
- package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
- package/sdk/_internal-types.test.ts +6 -9
- package/sdk/_internal-types.ts +16 -57
- package/sdk/_test-matchers.ts +25 -15
- package/sdk/allowed-hosts.test.ts +50 -114
- package/sdk/allowed-hosts.ts +8 -14
- package/sdk/constants.ts +5 -52
- package/sdk/define.test.ts +7 -6
- package/sdk/define.ts +7 -3
- package/sdk/exports.test.ts +6 -1
- package/sdk/kv.ts +13 -37
- package/sdk/manifest.test-d.ts +5 -0
- package/sdk/manifest.test.ts +61 -9
- package/sdk/manifest.ts +11 -11
- package/sdk/protocol-compat.test.ts +66 -98
- package/sdk/protocol-snapshot.test.ts +2 -16
- package/sdk/protocol.test.ts +13 -22
- package/sdk/providers/s2s/openai-realtime.ts +36 -0
- package/sdk/providers/s2s-barrel.ts +12 -0
- package/sdk/providers/tts/rime.ts +1 -1
- package/sdk/providers.ts +24 -5
- package/sdk/schema-alignment.test.ts +25 -73
- package/sdk/schema-shapes.test.ts +1 -29
- package/sdk/system-prompt.test.ts +0 -1
- package/sdk/system-prompt.ts +17 -19
- package/sdk/types-inference.test.ts +10 -36
- package/sdk/types.ts +7 -0
- package/sdk/ws-upgrade.test.ts +24 -23
- package/sdk/ws-upgrade.ts +2 -3
- package/tsdown.config.ts +8 -11
- package/dist/constants-C2nirZUI.js +0 -54
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
import { r as DEFAULT_SYSTEM_PROMPT } from "../types-
|
|
2
|
-
import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-
|
|
3
|
-
import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-
|
|
1
|
+
import { r as DEFAULT_SYSTEM_PROMPT } from "../types-DOWVZhb9.js";
|
|
2
|
+
import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-y68COEGj.js";
|
|
3
|
+
import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-CG8-by1n.js";
|
|
4
4
|
import { ClientMessageSchema, VectorRequestSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
|
|
5
|
-
import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-
|
|
5
|
+
import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-CfOAbK6V.js";
|
|
6
6
|
import { a as MISTRAL_KIND, d as ANTHROPIC_KIND, l as GOOGLE_KIND, r as OPENAI_KIND, s as GROQ_KIND } from "../xai-BDI61Y2M.js";
|
|
7
7
|
import { a as DEEPGRAM_KIND, r as ELEVENLABS_KIND, s as ASSEMBLYAI_KIND, t as SONIOX_KIND } from "../soniox-BQdL0mB5.js";
|
|
8
8
|
import { a as CARTESIA_KIND, n as RIME_KIND } from "../rime-58p9mDR8.js";
|
|
9
9
|
import { a as MEMORY_KV_KIND, r as REDIS_KV_KIND } from "../s3-BtCMvCod.js";
|
|
10
10
|
import { r as IN_MEMORY_VECTOR_KIND, t as PINECONE_VECTOR_KIND } from "../pinecone-CeJ69aRs.js";
|
|
11
|
+
import "../openai-realtime-cjPAHMMx.js";
|
|
11
12
|
import { createRequire } from "node:module";
|
|
12
13
|
import { z } from "zod";
|
|
13
14
|
import { convert } from "html-to-text";
|
|
@@ -35,20 +36,12 @@ import path from "node:path";
|
|
|
35
36
|
import escapeHtml from "escape-html";
|
|
36
37
|
import { lookup } from "mime-types";
|
|
37
38
|
//#region host/_run-code.ts
|
|
38
|
-
/**
|
|
39
|
-
* run_code built-in tool — executes user JavaScript in a fresh `node:vm`
|
|
40
|
-
* context with no network, filesystem, or process access.
|
|
41
|
-
*/
|
|
42
39
|
const SKIPPED_CLASS_KEYS = new Set([
|
|
43
40
|
"constructor",
|
|
44
41
|
"prototype",
|
|
45
42
|
"length",
|
|
46
43
|
"name"
|
|
47
44
|
]);
|
|
48
|
-
/**
|
|
49
|
-
* Copy static members from a class constructor to a wrapper function,
|
|
50
|
-
* skipping built-in keys that must not be forwarded.
|
|
51
|
-
*/
|
|
52
45
|
function copyStaticMembers(src, dst) {
|
|
53
46
|
for (const key of Object.getOwnPropertyNames(src)) {
|
|
54
47
|
if (SKIPPED_CLASS_KEYS.has(key)) continue;
|
|
@@ -59,16 +52,10 @@ function copyStaticMembers(src, dst) {
|
|
|
59
52
|
}
|
|
60
53
|
}
|
|
61
54
|
/**
|
|
62
|
-
*
|
|
63
|
-
*
|
|
64
|
-
*
|
|
65
|
-
*
|
|
66
|
-
*
|
|
67
|
-
* For class constructors: additionally copies static methods and neutralizes
|
|
68
|
-
* `prototype.constructor` so instances created via `new` also cannot escape.
|
|
69
|
-
*
|
|
70
|
-
* This prevents sandbox code from reaching the host `Function` constructor
|
|
71
|
-
* via patterns like `fn.constructor.constructor('return process')()`.
|
|
55
|
+
* Prevents sandbox code from reaching the host `Function` constructor via
|
|
56
|
+
* `fn.constructor.constructor('return process')()`. For class constructors
|
|
57
|
+
* we also copy static members and neuter `prototype.constructor` so
|
|
58
|
+
* instances created via `new` cannot escape either.
|
|
72
59
|
*/
|
|
73
60
|
function neutralizeConstructor(fn) {
|
|
74
61
|
const hasPrototype = typeof fn.prototype === "object" && fn.prototype !== null;
|
|
@@ -92,19 +79,6 @@ function neutralizeConstructor(fn) {
|
|
|
92
79
|
return Wrapper;
|
|
93
80
|
}
|
|
94
81
|
const runCodeParams = z.object({ code: z.string().describe("JavaScript code to execute. Use console.log() for output.") });
|
|
95
|
-
/**
|
|
96
|
-
* Execute JavaScript code inside a fresh `node:vm` context.
|
|
97
|
-
*
|
|
98
|
-
* Each invocation creates a disposable VM context with:
|
|
99
|
-
* - No filesystem access (`node:fs` and other built-ins unavailable)
|
|
100
|
-
* - No network access (`fetch`, `http` unavailable)
|
|
101
|
-
* - No child process spawning
|
|
102
|
-
* - No environment variable access (`process` unavailable)
|
|
103
|
-
* - Execution timeout (default 5 s)
|
|
104
|
-
*
|
|
105
|
-
* The context is discarded after execution, so no state leaks between
|
|
106
|
-
* invocations or across sessions.
|
|
107
|
-
*/
|
|
108
82
|
function createRunCode() {
|
|
109
83
|
return {
|
|
110
84
|
guidance: "You MUST use the run_code tool for ANY question involving math, counting, calculations, data processing, or code. NEVER do mental math or recite code verbally. run_code executes JavaScript (not Python). Always write JavaScript.",
|
|
@@ -115,14 +89,6 @@ function createRunCode() {
|
|
|
115
89
|
}
|
|
116
90
|
};
|
|
117
91
|
}
|
|
118
|
-
/**
|
|
119
|
-
* Execute user code in a fresh `node:vm` context.
|
|
120
|
-
*
|
|
121
|
-
* @remarks
|
|
122
|
-
* The VM context only exposes standard ECMAScript globals and a console
|
|
123
|
-
* object that captures output. Node.js APIs (`process`, `require`,
|
|
124
|
-
* `import()`) are not available inside the sandbox.
|
|
125
|
-
*/
|
|
126
92
|
async function executeInIsolate(code) {
|
|
127
93
|
const output = [];
|
|
128
94
|
const capture = (...args) => output.push(args.map(String).join(" "));
|
|
@@ -258,12 +224,11 @@ function createVisitWebpage(fetchFn = globalThis.fetch) {
|
|
|
258
224
|
error: `Failed to fetch: ${resp.status} ${resp.statusText}`,
|
|
259
225
|
url
|
|
260
226
|
};
|
|
261
|
-
const
|
|
262
|
-
const text = htmlToText(htmlContent.length > 2e5 ? htmlContent.slice(0, MAX_HTML_BYTES) : htmlContent);
|
|
227
|
+
const text = htmlToText((await resp.text()).slice(0, MAX_HTML_BYTES));
|
|
263
228
|
const truncated = text.length > MAX_PAGE_CHARS;
|
|
264
229
|
return {
|
|
265
230
|
url,
|
|
266
|
-
content:
|
|
231
|
+
content: text.slice(0, MAX_PAGE_CHARS),
|
|
267
232
|
...truncated ? {
|
|
268
233
|
truncated: true,
|
|
269
234
|
totalChars: text.length
|
|
@@ -323,7 +288,6 @@ function createFetchJson(fetchFn = globalThis.fetch) {
|
|
|
323
288
|
}
|
|
324
289
|
};
|
|
325
290
|
}
|
|
326
|
-
/** Resolve a builtin name to an array of [toolName, ToolDef] pairs. */
|
|
327
291
|
function resolveBuiltin(name, opts) {
|
|
328
292
|
switch (name) {
|
|
329
293
|
case "web_search": return [["web_search", createWebSearch(opts?.fetch)]];
|
|
@@ -349,8 +313,7 @@ function resolveAllBuiltins(names, opts) {
|
|
|
349
313
|
description: def.description,
|
|
350
314
|
parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
|
|
351
315
|
});
|
|
352
|
-
|
|
353
|
-
if (g) guidance.push(g);
|
|
316
|
+
if (def.guidance) guidance.push(def.guidance);
|
|
354
317
|
}
|
|
355
318
|
return {
|
|
356
319
|
defs,
|
|
@@ -360,16 +323,7 @@ function resolveAllBuiltins(names, opts) {
|
|
|
360
323
|
}
|
|
361
324
|
//#endregion
|
|
362
325
|
//#region host/memory-vector.ts
|
|
363
|
-
|
|
364
|
-
* In-memory Vector implementation.
|
|
365
|
-
*
|
|
366
|
-
* INTENTIONALLY BAD QUALITY. Pseudo-embedding hashes the text into a
|
|
367
|
-
* 64-dim Float32Array of values in [-1, ~0.99], then L2-normalizes
|
|
368
|
-
* the result. Because both stored and probe vectors are unit-length,
|
|
369
|
-
* cosine similarity reduces to a plain dot product — that's what
|
|
370
|
-
* `cosine()` computes. Used only for `aai dev` and tests — the goal
|
|
371
|
-
* is proving tool wiring, not retrieval ranking.
|
|
372
|
-
*/
|
|
326
|
+
const DIM = 64;
|
|
373
327
|
const stores = /* @__PURE__ */ new Map();
|
|
374
328
|
function getStore(ns) {
|
|
375
329
|
let store = stores.get(ns);
|
|
@@ -379,13 +333,14 @@ function getStore(ns) {
|
|
|
379
333
|
}
|
|
380
334
|
return store;
|
|
381
335
|
}
|
|
382
|
-
const DIM = 64;
|
|
383
336
|
function pseudoEmbed(text) {
|
|
384
337
|
const out = new Float32Array(DIM);
|
|
385
338
|
const h1 = createHash("sha256").update(text).digest();
|
|
386
339
|
const h2 = createHash("sha256").update(h1).digest();
|
|
387
|
-
for (let i = 0; i < 32; i++)
|
|
388
|
-
|
|
340
|
+
for (let i = 0; i < 32; i++) {
|
|
341
|
+
out[i] = (h1[i] - 128) / 128;
|
|
342
|
+
out[i + 32] = (h2[i] - 128) / 128;
|
|
343
|
+
}
|
|
389
344
|
let norm = 0;
|
|
390
345
|
for (let i = 0; i < DIM; i++) norm += out[i] * out[i];
|
|
391
346
|
norm = Math.sqrt(norm) || 1;
|
|
@@ -421,12 +376,13 @@ function createMemoryVector(opts) {
|
|
|
421
376
|
const scored = [];
|
|
422
377
|
for (const [id, rec] of getStore(ns)) {
|
|
423
378
|
if (filter && !matches(rec.metadata, filter)) continue;
|
|
424
|
-
|
|
379
|
+
const match = {
|
|
425
380
|
id,
|
|
426
381
|
score: cosine(probe, rec.vec),
|
|
427
|
-
text: rec.text
|
|
428
|
-
|
|
429
|
-
|
|
382
|
+
text: rec.text
|
|
383
|
+
};
|
|
384
|
+
if (rec.metadata !== void 0) match.metadata = rec.metadata;
|
|
385
|
+
scored.push(match);
|
|
430
386
|
}
|
|
431
387
|
scored.sort((a, b) => b.score - a.score);
|
|
432
388
|
return scored.slice(0, topK);
|
|
@@ -440,24 +396,9 @@ function createMemoryVector(opts) {
|
|
|
440
396
|
}
|
|
441
397
|
//#endregion
|
|
442
398
|
//#region host/providers/stt/assemblyai.ts
|
|
443
|
-
/**
|
|
444
|
-
* AssemblyAI Universal-Streaming STT opener (host-only).
|
|
445
|
-
*
|
|
446
|
-
* The user-facing descriptor factory (`assemblyAI(...)`) lives in
|
|
447
|
-
* `sdk/providers/stt/assemblyai.ts`. This module is the host-side
|
|
448
|
-
* counterpart: it takes the descriptor options + an API key and
|
|
449
|
-
* returns an {@link SttOpener} that the pipeline session drives.
|
|
450
|
-
*
|
|
451
|
-
* Default model: `"u3pro-rt"` (Universal-3 Pro Real-Time). The adapter
|
|
452
|
-
* maps that to the SDK's `"u3-rt-pro"` `speechModel` value; any other
|
|
453
|
-
* string is forwarded verbatim.
|
|
454
|
-
*/
|
|
455
|
-
/** Translate the descriptor's model alias to the SDK's `speechModel` value. */
|
|
456
399
|
function resolveSpeechModel(model) {
|
|
457
|
-
|
|
458
|
-
return model;
|
|
400
|
+
return model === "u3pro-rt" ? "u3-rt-pro" : model;
|
|
459
401
|
}
|
|
460
|
-
/** Build an {@link SttOpener} from resolved AssemblyAI descriptor options. */
|
|
461
402
|
function openAssemblyAI(opts = {}) {
|
|
462
403
|
return {
|
|
463
404
|
name: "assemblyai",
|
|
@@ -476,17 +417,16 @@ function openAssemblyAI(opts = {}) {
|
|
|
476
417
|
transcriber.on("turn", (event) => {
|
|
477
418
|
if (closed) return;
|
|
478
419
|
const text = event.transcript ?? "";
|
|
479
|
-
if (
|
|
480
|
-
|
|
481
|
-
} else if (text.length > 0) emitter.emit("partial", text);
|
|
420
|
+
if (text.length === 0) return;
|
|
421
|
+
emitter.emit(event.end_of_turn ? "final" : "partial", text);
|
|
482
422
|
});
|
|
483
423
|
transcriber.on("error", (err) => {
|
|
484
424
|
if (closed) return;
|
|
485
425
|
emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
|
|
486
426
|
});
|
|
487
427
|
transcriber.on("close", (code) => {
|
|
488
|
-
if (closed) return;
|
|
489
|
-
|
|
428
|
+
if (closed || code === 1e3) return;
|
|
429
|
+
emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
|
|
490
430
|
});
|
|
491
431
|
try {
|
|
492
432
|
await transcriber.connect();
|
|
@@ -505,8 +445,7 @@ function openAssemblyAI(opts = {}) {
|
|
|
505
445
|
return {
|
|
506
446
|
sendAudio(pcm) {
|
|
507
447
|
if (closed) return;
|
|
508
|
-
const copy = new Uint8Array(pcm.byteLength);
|
|
509
|
-
copy.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
448
|
+
const copy = new Uint8Array(pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength));
|
|
510
449
|
transcriber.sendAudio(copy.buffer);
|
|
511
450
|
},
|
|
512
451
|
on(event, fn) {
|
|
@@ -523,33 +462,18 @@ function openAssemblyAI(opts = {}) {
|
|
|
523
462
|
/**
|
|
524
463
|
* Deepgram Nova streaming STT opener (host-only).
|
|
525
464
|
*
|
|
526
|
-
*
|
|
527
|
-
* `
|
|
528
|
-
* counterpart: it takes the descriptor options + an API key and
|
|
529
|
-
* returns an {@link SttOpener} that the pipeline session drives.
|
|
530
|
-
*
|
|
531
|
-
* Default model: `"nova-3"`. Any string is forwarded verbatim to the SDK.
|
|
532
|
-
*
|
|
533
|
-
* This adapter targets the Deepgram SDK v5 (`@deepgram/sdk@^5`). The v5
|
|
534
|
-
* streaming API is:
|
|
535
|
-
* `client.listen.v1.connect(args)` → `Promise<V1Socket>`
|
|
536
|
-
* followed by:
|
|
537
|
-
* `socket.connect()` + `socket.waitForOpen()` to establish the connection.
|
|
538
|
-
*/
|
|
539
|
-
/**
|
|
540
|
-
* Handle an incoming Deepgram transcript message, emitting `partial` or
|
|
541
|
-
* `final` events on the emitter. Empty transcripts are silently dropped.
|
|
465
|
+
* Targets Deepgram SDK v5: `client.listen.v1.connect(args)` returns a
|
|
466
|
+
* socket; `socket.connect()` + `socket.waitForOpen()` establish it.
|
|
542
467
|
*/
|
|
468
|
+
function errMsg(cause) {
|
|
469
|
+
return cause instanceof Error ? cause.message : String(cause);
|
|
470
|
+
}
|
|
543
471
|
function handleMessage(data, closed, emitter) {
|
|
544
|
-
if (closed) return;
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
if (text.length > 0) emitter.emit("final", text);
|
|
550
|
-
} else if (text.length > 0) emitter.emit("partial", text);
|
|
551
|
-
}
|
|
552
|
-
/** Wire Deepgram socket events onto the nanoevents emitter. */
|
|
472
|
+
if (closed || data.type !== "Results") return;
|
|
473
|
+
const text = data.channel?.alternatives?.[0]?.transcript ?? "";
|
|
474
|
+
if (text.length === 0) return;
|
|
475
|
+
emitter.emit(data.is_final ? "final" : "partial", text);
|
|
476
|
+
}
|
|
553
477
|
function wireSocketEvents(connection, emitter, getIsClosed) {
|
|
554
478
|
connection.on("message", (data) => handleMessage(data, getIsClosed(), emitter));
|
|
555
479
|
connection.on("error", (err) => {
|
|
@@ -562,12 +486,13 @@ function wireSocketEvents(connection, emitter, getIsClosed) {
|
|
|
562
486
|
if (code !== void 0 && code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
|
|
563
487
|
});
|
|
564
488
|
}
|
|
565
|
-
/** Wire the AbortSignal to the close function. */
|
|
566
489
|
function wireAbortSignal(signal, close) {
|
|
567
|
-
if (signal.aborted)
|
|
568
|
-
|
|
490
|
+
if (signal.aborted) {
|
|
491
|
+
close();
|
|
492
|
+
return;
|
|
493
|
+
}
|
|
494
|
+
signal.addEventListener("abort", () => void close(), { once: true });
|
|
569
495
|
}
|
|
570
|
-
/** Build an {@link SttOpener} from resolved Deepgram descriptor options. */
|
|
571
496
|
function openDeepgram(opts = {}) {
|
|
572
497
|
return {
|
|
573
498
|
name: "deepgram",
|
|
@@ -590,7 +515,7 @@ function openDeepgram(opts = {}) {
|
|
|
590
515
|
Authorization: apiKey
|
|
591
516
|
});
|
|
592
517
|
} catch (cause) {
|
|
593
|
-
throw makeSttError("stt_connect_failed", `Deepgram STT: connect failed: ${
|
|
518
|
+
throw makeSttError("stt_connect_failed", `Deepgram STT: connect failed: ${errMsg(cause)}`);
|
|
594
519
|
}
|
|
595
520
|
const emitter = createNanoEvents();
|
|
596
521
|
let closed = false;
|
|
@@ -599,7 +524,7 @@ function openDeepgram(opts = {}) {
|
|
|
599
524
|
try {
|
|
600
525
|
await connection.waitForOpen();
|
|
601
526
|
} catch (cause) {
|
|
602
|
-
throw makeSttError("stt_connect_failed", `Deepgram STT: WebSocket open failed: ${
|
|
527
|
+
throw makeSttError("stt_connect_failed", `Deepgram STT: WebSocket open failed: ${errMsg(cause)}`);
|
|
603
528
|
}
|
|
604
529
|
const close = async () => {
|
|
605
530
|
if (closed) return;
|
|
@@ -671,15 +596,15 @@ function openElevenLabs(opts = {}) {
|
|
|
671
596
|
}
|
|
672
597
|
const emitter = createNanoEvents();
|
|
673
598
|
let closed = false;
|
|
674
|
-
|
|
599
|
+
function emitTranscript(event, text) {
|
|
675
600
|
if (closed) return;
|
|
676
|
-
|
|
677
|
-
|
|
601
|
+
if (text && text.length > 0) emitter.emit(event, text);
|
|
602
|
+
}
|
|
603
|
+
connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (msg) => {
|
|
604
|
+
emitTranscript("partial", msg.text);
|
|
678
605
|
});
|
|
679
606
|
connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (msg) => {
|
|
680
|
-
|
|
681
|
-
const text = msg.text ?? "";
|
|
682
|
-
if (text.length > 0) emitter.emit("final", text);
|
|
607
|
+
emitTranscript("final", msg.text);
|
|
683
608
|
});
|
|
684
609
|
connection.on(RealtimeEvents.ERROR, (payload) => {
|
|
685
610
|
if (closed) return;
|
|
@@ -690,13 +615,13 @@ function openElevenLabs(opts = {}) {
|
|
|
690
615
|
if (closed) return;
|
|
691
616
|
emitter.emit("error", makeSttError("stt_auth_failed", msg.error));
|
|
692
617
|
});
|
|
693
|
-
|
|
618
|
+
async function close() {
|
|
694
619
|
if (closed) return;
|
|
695
620
|
closed = true;
|
|
696
621
|
try {
|
|
697
622
|
connection.close();
|
|
698
623
|
} catch {}
|
|
699
|
-
}
|
|
624
|
+
}
|
|
700
625
|
if (openOpts.signal.aborted) close();
|
|
701
626
|
else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
|
|
702
627
|
return {
|
|
@@ -715,32 +640,7 @@ function openElevenLabs(opts = {}) {
|
|
|
715
640
|
}
|
|
716
641
|
//#endregion
|
|
717
642
|
//#region host/providers/stt/soniox.ts
|
|
718
|
-
/**
|
|
719
|
-
* Soniox real-time STT opener (host-only).
|
|
720
|
-
*
|
|
721
|
-
* The user-facing descriptor factory (`soniox(...)`) lives in
|
|
722
|
-
* `sdk/providers/stt/soniox.ts`. This module is the host-side
|
|
723
|
-
* counterpart: it takes the descriptor options + an API key and
|
|
724
|
-
* returns an {@link SttOpener} that the pipeline session drives.
|
|
725
|
-
*
|
|
726
|
-
* Soniox's published JS client (`@soniox/speech-to-text-web`) is
|
|
727
|
-
* browser-only — it depends on `MediaRecorder` and `getUserMedia`. For
|
|
728
|
-
* server-side use we talk to the WebSocket directly:
|
|
729
|
-
* `wss://stt-rt.soniox.com/transcribe-websocket`
|
|
730
|
-
*
|
|
731
|
-
* Wire format:
|
|
732
|
-
* - First text frame: JSON config with api_key, model, audio_format,
|
|
733
|
-
* sample_rate, num_channels (and optional language hints).
|
|
734
|
-
* - Subsequent binary frames: 16-bit signed little-endian PCM audio.
|
|
735
|
-
* - Server replies: JSON `{ tokens: [{ text, is_final }] }` messages.
|
|
736
|
-
* Final tokens accumulate; non-final tokens are a rolling preview.
|
|
737
|
-
* - On error: `{ error_code, error_message }`.
|
|
738
|
-
*/
|
|
739
643
|
const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
740
|
-
/**
|
|
741
|
-
* Walk a batch of Soniox tokens, sending finals into `appendFinal` and
|
|
742
|
-
* returning the concatenated non-finals as a rolling preview string.
|
|
743
|
-
*/
|
|
744
644
|
function consumeTokens(tokens, appendFinal) {
|
|
745
645
|
let nonFinal = "";
|
|
746
646
|
for (const tok of tokens) {
|
|
@@ -751,7 +651,6 @@ function consumeTokens(tokens, appendFinal) {
|
|
|
751
651
|
}
|
|
752
652
|
return nonFinal;
|
|
753
653
|
}
|
|
754
|
-
/** Resolve once the WebSocket opens; reject on the first error. */
|
|
755
654
|
function waitForOpen$1(ws) {
|
|
756
655
|
return new Promise((resolve, reject) => {
|
|
757
656
|
const onOpen = () => {
|
|
@@ -766,7 +665,6 @@ function waitForOpen$1(ws) {
|
|
|
766
665
|
ws.once("error", onErr);
|
|
767
666
|
});
|
|
768
667
|
}
|
|
769
|
-
/** Build the initial JSON config frame for a Soniox session. */
|
|
770
668
|
function buildConfigFrame(apiKey, opts, sampleRate) {
|
|
771
669
|
const config = {
|
|
772
670
|
api_key: apiKey,
|
|
@@ -778,7 +676,6 @@ function buildConfigFrame(apiKey, opts, sampleRate) {
|
|
|
778
676
|
if (opts.languageHints && opts.languageHints.length > 0) config.language_hints = [...opts.languageHints];
|
|
779
677
|
return config;
|
|
780
678
|
}
|
|
781
|
-
/** Parse a Soniox text frame into a {@link SonioxResponse}; returns null on garbage. */
|
|
782
679
|
function parseFrame(raw) {
|
|
783
680
|
try {
|
|
784
681
|
return JSON.parse(raw.toString());
|
|
@@ -786,12 +683,6 @@ function parseFrame(raw) {
|
|
|
786
683
|
return null;
|
|
787
684
|
}
|
|
788
685
|
}
|
|
789
|
-
/**
|
|
790
|
-
* Handle one server response. Emits `error`, `final`, and `partial` events
|
|
791
|
-
* onto `emitter` based on the token batch and the running `finalBuf`. The
|
|
792
|
-
* caller owns `finalBuf` so it survives across messages and can be flushed
|
|
793
|
-
* on close.
|
|
794
|
-
*/
|
|
795
686
|
function handleResponse(res, emitter, finalBuf) {
|
|
796
687
|
if (res.error_code !== void 0) {
|
|
797
688
|
emitter.emit("error", makeSttError("stt_stream_error", `Soniox error ${res.error_code}: ${res.error_message ?? "unknown"}`));
|
|
@@ -807,7 +698,6 @@ function handleResponse(res, emitter, finalBuf) {
|
|
|
807
698
|
}
|
|
808
699
|
if (nonFinal.length > 0) emitter.emit("partial", nonFinal);
|
|
809
700
|
}
|
|
810
|
-
/** Build an {@link SttOpener} from resolved Soniox descriptor options. */
|
|
811
701
|
function openSoniox(opts = {}) {
|
|
812
702
|
return {
|
|
813
703
|
name: "soniox",
|
|
@@ -918,8 +808,7 @@ function openCartesia(opts) {
|
|
|
918
808
|
}
|
|
919
809
|
const emitter = createNanoEvents();
|
|
920
810
|
let closed = false;
|
|
921
|
-
|
|
922
|
-
const mintContext = () => ws.context({
|
|
811
|
+
const audioConfig = {
|
|
923
812
|
model_id: model,
|
|
924
813
|
voice: {
|
|
925
814
|
mode: "id",
|
|
@@ -929,39 +818,32 @@ function openCartesia(opts) {
|
|
|
929
818
|
container: "raw",
|
|
930
819
|
encoding: "pcm_s16le",
|
|
931
820
|
sample_rate: sampleRate
|
|
932
|
-
}
|
|
821
|
+
}
|
|
822
|
+
};
|
|
823
|
+
const baseRequest = {
|
|
824
|
+
...audioConfig,
|
|
825
|
+
language
|
|
826
|
+
};
|
|
827
|
+
const mintContext = () => ws.context({
|
|
828
|
+
...audioConfig,
|
|
933
829
|
contextId: randomUUID()
|
|
934
830
|
});
|
|
935
831
|
let context = mintContext();
|
|
936
|
-
/**
|
|
937
|
-
* `doneEmitted` guards against emitting `done` more than once per turn.
|
|
938
|
-
* Reset whenever a fresh context is minted (i.e. at turn boundaries).
|
|
939
|
-
*/
|
|
940
832
|
let doneEmitted = false;
|
|
941
|
-
/**
|
|
942
|
-
* After `flush()` or `cancel()`, the current context is done accepting
|
|
943
|
-
* input. We defer minting a fresh one until the next `sendText()` so
|
|
944
|
-
* that late audio chunks + Cartesia's real `done` event (both tagged
|
|
945
|
-
* with the flushed context's id) still pass the filter below. Rotating
|
|
946
|
-
* eagerly would silently drop all audio still in flight.
|
|
947
|
-
*/
|
|
948
833
|
let rotatePending = false;
|
|
949
|
-
const
|
|
834
|
+
const rotateIfPending = () => {
|
|
835
|
+
if (!rotatePending) return;
|
|
950
836
|
context = mintContext();
|
|
951
837
|
doneEmitted = false;
|
|
952
838
|
rotatePending = false;
|
|
953
839
|
};
|
|
954
|
-
const rotateIfPending = () => {
|
|
955
|
-
if (rotatePending) rotateContext();
|
|
956
|
-
};
|
|
957
840
|
const emitDoneOnce = () => {
|
|
958
841
|
if (doneEmitted || closed) return;
|
|
959
842
|
doneEmitted = true;
|
|
960
843
|
emitter.emit("done");
|
|
961
844
|
};
|
|
962
845
|
ws.on("chunk", (event) => {
|
|
963
|
-
if (closed) return;
|
|
964
|
-
if (event.context_id !== context.contextId) return;
|
|
846
|
+
if (closed || event.context_id !== context.contextId) return;
|
|
965
847
|
const buf = event.audio;
|
|
966
848
|
if (!buf || buf.byteLength === 0) return;
|
|
967
849
|
const evenBytes = buf.byteLength - buf.byteLength % 2;
|
|
@@ -970,8 +852,7 @@ function openCartesia(opts) {
|
|
|
970
852
|
emitter.emit("audio", pcm);
|
|
971
853
|
});
|
|
972
854
|
ws.on("done", (event) => {
|
|
973
|
-
if (closed) return;
|
|
974
|
-
if (event.context_id !== context.contextId) return;
|
|
855
|
+
if (closed || event.context_id !== context.contextId) return;
|
|
975
856
|
emitDoneOnce();
|
|
976
857
|
});
|
|
977
858
|
ws.on("error", (err) => {
|
|
@@ -990,19 +871,6 @@ function openCartesia(opts) {
|
|
|
990
871
|
};
|
|
991
872
|
if (openOpts.signal.aborted) close();
|
|
992
873
|
else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
|
|
993
|
-
const baseRequest = {
|
|
994
|
-
model_id: model,
|
|
995
|
-
voice: {
|
|
996
|
-
mode: "id",
|
|
997
|
-
id: voice
|
|
998
|
-
},
|
|
999
|
-
output_format: {
|
|
1000
|
-
container: "raw",
|
|
1001
|
-
encoding: "pcm_s16le",
|
|
1002
|
-
sample_rate: sampleRate
|
|
1003
|
-
},
|
|
1004
|
-
language
|
|
1005
|
-
};
|
|
1006
874
|
const ignoreRejection = (_err) => {};
|
|
1007
875
|
return {
|
|
1008
876
|
sendText(text) {
|
|
@@ -1044,38 +912,18 @@ function openCartesia(opts) {
|
|
|
1044
912
|
/**
|
|
1045
913
|
* Rime TTS opener (host-only).
|
|
1046
914
|
*
|
|
1047
|
-
*
|
|
1048
|
-
*
|
|
1049
|
-
*
|
|
1050
|
-
*
|
|
1051
|
-
*
|
|
1052
|
-
*
|
|
1053
|
-
* (`wss://users-ws.rime.ai/ws2`). Client-to-server messages are JSON:
|
|
1054
|
-
* - `{ "text": "..." }` — append text to the synthesis buffer
|
|
1055
|
-
* - `{ "operation": "clear" }` — drop buffered text (barge-in)
|
|
1056
|
-
* - `{ "operation": "eos" }` — drain buffer, close connection (NOT used
|
|
1057
|
-
* during a session: it would tear down the WS, forcing reconnect per
|
|
1058
|
-
* turn). We force end-of-turn synthesis with a trailing `"."` instead.
|
|
1059
|
-
* The server responds with JSON frames:
|
|
1060
|
-
* - `{ type: "chunk", data: <base64 PCM16 LE>, contextId: string | null }`
|
|
1061
|
-
* - `{ type: "timestamps", ... }` (ignored)
|
|
1062
|
-
* - `{ type: "error", message: string }` (surfaced as `tts_stream_error`)
|
|
1063
|
-
*
|
|
1064
|
-
* **Single long-lived connection per session.** Rime buffers text until it
|
|
1065
|
-
* sees terminal punctuation (`.`, `?`, `!`), so we use one WebSocket per
|
|
1066
|
-
* `open()` call and reuse it across turns. `clear` resets the buffer
|
|
1067
|
-
* between cancellations.
|
|
915
|
+
* Connects to Rime's `ws2` JSON WebSocket endpoint with one long-lived
|
|
916
|
+
* connection per session. Client → server: `{ text }` appends to the
|
|
917
|
+
* synthesis buffer, `{ operation: "clear" }` drops it (barge-in). We never
|
|
918
|
+
* send `eos` since it tears down the WS — `flush()` instead sends a
|
|
919
|
+
* trailing `"."` to force synthesis of any text buffered behind missing
|
|
920
|
+
* terminal punctuation while keeping the connection reusable.
|
|
1068
921
|
*
|
|
1069
|
-
*
|
|
1070
|
-
*
|
|
1071
|
-
*
|
|
1072
|
-
*
|
|
1073
|
-
*
|
|
1074
|
-
* **Audio format.** The URL requests `audioFormat=pcm` at the negotiated
|
|
1075
|
-
* `sampleRate`, which returns raw PCM16 little-endian. We decode the base64
|
|
1076
|
-
* payload and construct a zero-copy `Int16Array` view over the decoded bytes.
|
|
922
|
+
* Server → client: `{ type: "chunk", data: <base64 PCM16 LE> }` carries
|
|
923
|
+
* audio; `timestamps` is ignored; `error` surfaces as `tts_stream_error`.
|
|
924
|
+
* The `audioFormat=pcm` query param at the negotiated `sampleRate` returns
|
|
925
|
+
* raw PCM16 LE that we view as a zero-copy `Int16Array`.
|
|
1077
926
|
*/
|
|
1078
|
-
/** PCM16 sample rates accepted by the Rime `ws2` endpoint. */
|
|
1079
927
|
const RIME_PCM16_RATES = [
|
|
1080
928
|
8e3,
|
|
1081
929
|
16e3,
|
|
@@ -1088,31 +936,14 @@ function assertSupportedSampleRate(rate) {
|
|
|
1088
936
|
if (RIME_PCM16_RATES.includes(rate)) return rate;
|
|
1089
937
|
throw makeTtsError("tts_connect_failed", `Rime TTS: unsupported sample rate ${rate}. Supported: ${RIME_PCM16_RATES.join(", ")}.`);
|
|
1090
938
|
}
|
|
1091
|
-
/**
|
|
1092
|
-
* Decode a base64 string from Rime into a zero-copy `Int16Array`.
|
|
1093
|
-
*
|
|
1094
|
-
* Rime's `ws2` endpoint returns base64-encoded PCM16 LE in each chunk.
|
|
1095
|
-
* `Buffer.from(base64, "base64")` gives us a Node.js Buffer (which is a
|
|
1096
|
-
* Uint8Array subclass) with `byteOffset === 0`. PCM16 bytes always come in
|
|
1097
|
-
* pairs so the length is guaranteed to be even.
|
|
1098
|
-
*/
|
|
1099
939
|
function base64ToPcm(data) {
|
|
1100
940
|
const bytes = Buffer.from(data, "base64");
|
|
1101
941
|
const evenLen = bytes.byteLength - bytes.byteLength % 2;
|
|
1102
942
|
if (evenLen === 0) return new Int16Array(0);
|
|
1103
943
|
return new Int16Array(bytes.buffer, bytes.byteOffset, evenLen / 2);
|
|
1104
944
|
}
|
|
1105
|
-
/** Quiescence timeout in ms — how long to wait after the last audio chunk before emitting `done`. */
|
|
1106
945
|
const QUIESCENCE_MS = 500;
|
|
1107
|
-
/**
|
|
1108
|
-
* After `flush()`, how long to wait for the FIRST audio chunk before
|
|
1109
|
-
* giving up and emitting `done`. Greeting and short replies hit this
|
|
1110
|
-
* path: `flush()` runs immediately after `sendText()`, so audio TTFB
|
|
1111
|
-
* exceeds the 500 ms quiescence window. Once the first chunk arrives,
|
|
1112
|
-
* we transition to the shorter quiescence timeout.
|
|
1113
|
-
*/
|
|
1114
946
|
const FIRST_AUDIO_TIMEOUT_MS = 5e3;
|
|
1115
|
-
/** Wait for the WebSocket `open` event; reject on first `error`. */
|
|
1116
947
|
function waitForOpen(ws) {
|
|
1117
948
|
return new Promise((resolve, reject) => {
|
|
1118
949
|
const onOpen = () => {
|
|
@@ -1127,12 +958,6 @@ function waitForOpen(ws) {
|
|
|
1127
958
|
ws.once("error", onError);
|
|
1128
959
|
});
|
|
1129
960
|
}
|
|
1130
|
-
/**
|
|
1131
|
-
* Handle one incoming WebSocket message frame.
|
|
1132
|
-
*
|
|
1133
|
-
* Extracted into a top-level function to keep `open()` under the cognitive
|
|
1134
|
-
* complexity limit while retaining full access to the session state via refs.
|
|
1135
|
-
*/
|
|
1136
961
|
function handleRimeMessage(raw, emitter, armQuiescence, isActiveTimer) {
|
|
1137
962
|
let msg;
|
|
1138
963
|
try {
|
|
@@ -1150,7 +975,6 @@ function handleRimeMessage(raw, emitter, armQuiescence, isActiveTimer) {
|
|
|
1150
975
|
}
|
|
1151
976
|
if (msg.type === "error") emitter.emit("error", makeTtsError("tts_stream_error", `Rime TTS: ${msg.message ?? "unknown error"}`));
|
|
1152
977
|
}
|
|
1153
|
-
/** Build a {@link TtsOpener} from resolved Rime descriptor options. */
|
|
1154
978
|
function openRime(opts) {
|
|
1155
979
|
return {
|
|
1156
980
|
name: "rime",
|
|
@@ -1172,12 +996,6 @@ function openRime(opts) {
|
|
|
1172
996
|
const emitter = createNanoEvents();
|
|
1173
997
|
let closed = false;
|
|
1174
998
|
let doneEmitted = false;
|
|
1175
|
-
/**
|
|
1176
|
-
* After `flush()`, we arm a timer that fires `done`. Initial timeout is
|
|
1177
|
-
* `FIRST_AUDIO_TIMEOUT_MS` to give Rime headroom on TTFB; the first
|
|
1178
|
-
* chunk swaps it for a shorter `QUIESCENCE_MS` window that resets on
|
|
1179
|
-
* each subsequent chunk. `cancel()` emits `done` synchronously.
|
|
1180
|
-
*/
|
|
1181
999
|
let quiescenceTimer = null;
|
|
1182
1000
|
const clearQuiescence = () => {
|
|
1183
1001
|
if (quiescenceTimer !== null) {
|
|
@@ -1271,21 +1089,24 @@ function openRime(opts) {
|
|
|
1271
1089
|
function resolveApiKey(envVar, env) {
|
|
1272
1090
|
return env[envVar] ?? process.env[envVar] ?? "";
|
|
1273
1091
|
}
|
|
1092
|
+
function options(descriptor) {
|
|
1093
|
+
return descriptor.options;
|
|
1094
|
+
}
|
|
1274
1095
|
/** Resolve an {@link SttProvider} descriptor into a host-side opener. */
|
|
1275
1096
|
function resolveStt(descriptor) {
|
|
1276
1097
|
switch (descriptor.kind) {
|
|
1277
|
-
case ASSEMBLYAI_KIND: return openAssemblyAI(descriptor
|
|
1278
|
-
case DEEPGRAM_KIND: return openDeepgram(descriptor
|
|
1279
|
-
case ELEVENLABS_KIND: return openElevenLabs(descriptor
|
|
1280
|
-
case SONIOX_KIND: return openSoniox(descriptor
|
|
1098
|
+
case ASSEMBLYAI_KIND: return openAssemblyAI(options(descriptor));
|
|
1099
|
+
case DEEPGRAM_KIND: return openDeepgram(options(descriptor));
|
|
1100
|
+
case ELEVENLABS_KIND: return openElevenLabs(options(descriptor));
|
|
1101
|
+
case SONIOX_KIND: return openSoniox(options(descriptor));
|
|
1281
1102
|
default: throw new Error(`Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}, ${DEEPGRAM_KIND}, ${ELEVENLABS_KIND}, ${SONIOX_KIND}.`);
|
|
1282
1103
|
}
|
|
1283
1104
|
}
|
|
1284
1105
|
/** Resolve a {@link TtsProvider} descriptor into a host-side opener. */
|
|
1285
1106
|
function resolveTts(descriptor) {
|
|
1286
1107
|
switch (descriptor.kind) {
|
|
1287
|
-
case CARTESIA_KIND: return openCartesia(descriptor
|
|
1288
|
-
case RIME_KIND: return openRime(descriptor
|
|
1108
|
+
case CARTESIA_KIND: return openCartesia(options(descriptor));
|
|
1109
|
+
case RIME_KIND: return openRime(options(descriptor));
|
|
1289
1110
|
default: throw new Error(`Unknown TTS provider kind: "${descriptor.kind}". Supported: ${CARTESIA_KIND}, ${RIME_KIND}.`);
|
|
1290
1111
|
}
|
|
1291
1112
|
}
|
|
@@ -1302,12 +1123,12 @@ function resolveLlm(descriptor, env) {
|
|
|
1302
1123
|
case ANTHROPIC_KIND: return createAnthropic({
|
|
1303
1124
|
apiKey: requireKey(env, "ANTHROPIC_API_KEY", "Anthropic"),
|
|
1304
1125
|
baseURL: "https://api.anthropic.com/v1"
|
|
1305
|
-
})(descriptor.
|
|
1306
|
-
case OPENAI_KIND: return createOpenAI({ apiKey: requireKey(env, "OPENAI_API_KEY", "OpenAI") })(descriptor.
|
|
1307
|
-
case GOOGLE_KIND: return createGoogleGenerativeAI({ apiKey: requireKey(env, "GOOGLE_GENERATIVE_AI_API_KEY", "Google") })(descriptor.
|
|
1308
|
-
case MISTRAL_KIND: return createMistral({ apiKey: requireKey(env, "MISTRAL_API_KEY", "Mistral") })(descriptor.
|
|
1309
|
-
case "xai": return createXai({ apiKey: requireKey(env, "XAI_API_KEY", "xAI") })(descriptor.
|
|
1310
|
-
case GROQ_KIND: return createGroq({ apiKey: requireKey(env, "GROQ_API_KEY", "Groq") })(descriptor.
|
|
1126
|
+
})(options(descriptor).model);
|
|
1127
|
+
case OPENAI_KIND: return createOpenAI({ apiKey: requireKey(env, "OPENAI_API_KEY", "OpenAI") })(options(descriptor).model);
|
|
1128
|
+
case GOOGLE_KIND: return createGoogleGenerativeAI({ apiKey: requireKey(env, "GOOGLE_GENERATIVE_AI_API_KEY", "Google") })(options(descriptor).model);
|
|
1129
|
+
case MISTRAL_KIND: return createMistral({ apiKey: requireKey(env, "MISTRAL_API_KEY", "Mistral") })(options(descriptor).model);
|
|
1130
|
+
case "xai": return createXai({ apiKey: requireKey(env, "XAI_API_KEY", "xAI") })(options(descriptor).model);
|
|
1131
|
+
case GROQ_KIND: return createGroq({ apiKey: requireKey(env, "GROQ_API_KEY", "Groq") })(options(descriptor).model);
|
|
1311
1132
|
default: throw new Error(`Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}, ${OPENAI_KIND}, ${GOOGLE_KIND}, ${MISTRAL_KIND}, xai, ${GROQ_KIND}.`);
|
|
1312
1133
|
}
|
|
1313
1134
|
}
|
|
@@ -1321,8 +1142,9 @@ function loadProviderPackage(name, label) {
|
|
|
1321
1142
|
try {
|
|
1322
1143
|
return requireFromHere(name);
|
|
1323
1144
|
} catch (err) {
|
|
1324
|
-
|
|
1325
|
-
throw err;
|
|
1145
|
+
const code = err?.code;
|
|
1146
|
+
if (!(err instanceof Error && (code === "MODULE_NOT_FOUND" || code === "ERR_MODULE_NOT_FOUND") && err.message.includes(name))) throw err;
|
|
1147
|
+
throw new Error(`${label}: package \`${name}\` is not installed. Run \`pnpm add ${name}\`.`, { cause: err });
|
|
1326
1148
|
}
|
|
1327
1149
|
}
|
|
1328
1150
|
function requireKey(env, name, label) {
|
|
@@ -1334,67 +1156,42 @@ function requireKey(env, name, label) {
|
|
|
1334
1156
|
//#region host/pinecone-vector.ts
|
|
1335
1157
|
function createPineconeVector(opts) {
|
|
1336
1158
|
const { Pinecone } = loadProviderPackage("@pinecone-database/pinecone", "Pinecone Vector");
|
|
1337
|
-
const
|
|
1338
|
-
const ns = () => client.index(opts.index).namespace(opts.namespace);
|
|
1159
|
+
const ns = new Pinecone({ apiKey: opts.apiKey }).index(opts.index).namespace(opts.namespace);
|
|
1339
1160
|
return {
|
|
1340
1161
|
async upsert(id, text, metadata) {
|
|
1341
|
-
|
|
1162
|
+
await ns.upsertRecords([{
|
|
1342
1163
|
_id: id,
|
|
1343
1164
|
text,
|
|
1344
1165
|
...metadata ?? {}
|
|
1345
|
-
};
|
|
1346
|
-
await ns().upsertRecords([record]);
|
|
1166
|
+
}]);
|
|
1347
1167
|
},
|
|
1348
1168
|
async query(text, queryOpts) {
|
|
1349
|
-
const topK = queryOpts
|
|
1350
|
-
|
|
1169
|
+
const { topK = 5, filter } = queryOpts ?? {};
|
|
1170
|
+
return (await ns.searchRecords({
|
|
1351
1171
|
query: {
|
|
1352
1172
|
inputs: { text },
|
|
1353
1173
|
topK,
|
|
1354
|
-
...
|
|
1174
|
+
...filter !== void 0 ? { filter } : {}
|
|
1355
1175
|
},
|
|
1356
1176
|
fields: ["*"]
|
|
1357
|
-
}
|
|
1358
|
-
return (await ns().searchRecords(req)).result.hits.map((hit) => {
|
|
1177
|
+
})).result.hits.map((hit) => {
|
|
1359
1178
|
const { text: hitText, ...rest } = hit.fields;
|
|
1360
|
-
const
|
|
1361
|
-
return {
|
|
1179
|
+
const match = {
|
|
1362
1180
|
id: hit._id,
|
|
1363
1181
|
score: hit._score,
|
|
1364
|
-
text: typeof hitText === "string" ? hitText : ""
|
|
1365
|
-
...metadata !== void 0 ? { metadata } : {}
|
|
1182
|
+
text: typeof hitText === "string" ? hitText : ""
|
|
1366
1183
|
};
|
|
1184
|
+
if (Object.keys(rest).length > 0) match.metadata = rest;
|
|
1185
|
+
return match;
|
|
1367
1186
|
});
|
|
1368
1187
|
},
|
|
1369
1188
|
async delete(ids) {
|
|
1370
|
-
|
|
1371
|
-
await ns().deleteMany(list);
|
|
1189
|
+
await ns.deleteMany(Array.isArray(ids) ? ids : [ids]);
|
|
1372
1190
|
}
|
|
1373
1191
|
};
|
|
1374
1192
|
}
|
|
1375
1193
|
//#endregion
|
|
1376
1194
|
//#region host/unstorage-kv.ts
|
|
1377
|
-
/**
|
|
1378
|
-
* Key-value store backed by unstorage.
|
|
1379
|
-
*
|
|
1380
|
-
* Works with any unstorage driver (memory, fs, S3/R2, etc.).
|
|
1381
|
-
*/
|
|
1382
|
-
/**
|
|
1383
|
-
* Create a KV store backed by any unstorage driver.
|
|
1384
|
-
*
|
|
1385
|
-
* @param options - See {@link UnstorageKvOptions}.
|
|
1386
|
-
* @returns A {@link Kv} instance.
|
|
1387
|
-
*
|
|
1388
|
-
* @example
|
|
1389
|
-
* ```ts
|
|
1390
|
-
* import { createStorage } from "unstorage";
|
|
1391
|
-
* import { createUnstorageKv } from "@alexkroman1/aai/unstorage-kv";
|
|
1392
|
-
*
|
|
1393
|
-
* const kv = createUnstorageKv({ storage: createStorage() });
|
|
1394
|
-
* await kv.set("greeting", "hello");
|
|
1395
|
-
* const value = await kv.get<string>("greeting"); // "hello"
|
|
1396
|
-
* ```
|
|
1397
|
-
*/
|
|
1398
1195
|
function createUnstorageKv(options) {
|
|
1399
1196
|
const store = options.prefix ? prefixStorage(options.storage, options.prefix) : options.storage;
|
|
1400
1197
|
return {
|
|
@@ -1403,9 +1200,9 @@ function createUnstorageKv(options) {
|
|
|
1403
1200
|
},
|
|
1404
1201
|
async set(key, value, setOptions) {
|
|
1405
1202
|
if (JSON.stringify(value).length > 65536) throw new Error(`Value exceeds max size of ${MAX_VALUE_SIZE} bytes`);
|
|
1406
|
-
const
|
|
1407
|
-
|
|
1408
|
-
|
|
1203
|
+
const expireIn = setOptions?.expireIn;
|
|
1204
|
+
const ttlOption = expireIn && expireIn > 0 ? { ttl: Math.ceil(expireIn / 1e3) } : void 0;
|
|
1205
|
+
await store.setItem(key, value, ttlOption);
|
|
1409
1206
|
},
|
|
1410
1207
|
async delete(keys) {
|
|
1411
1208
|
const keyArray = Array.isArray(keys) ? keys : [keys];
|
|
@@ -1418,36 +1215,15 @@ function createUnstorageKv(options) {
|
|
|
1418
1215
|
}
|
|
1419
1216
|
//#endregion
|
|
1420
1217
|
//#region host/providers/resolve-kv.ts
|
|
1421
|
-
/**
|
|
1422
|
-
* Descriptor → concrete `Kv` resolver. Mirror of `resolveLlm` /
|
|
1423
|
-
* `resolveVector`. Always wraps the produced unstorage Storage in
|
|
1424
|
-
* `createUnstorageKv` with the provided per-tenant prefix so namespace
|
|
1425
|
-
* isolation is enforced regardless of backend choice.
|
|
1426
|
-
*/
|
|
1427
|
-
/**
|
|
1428
|
-
* Load a CJS unstorage driver factory. The CJS variants use
|
|
1429
|
-
* `module.exports = defineDriver(...)` so the require result is the
|
|
1430
|
-
* factory itself (not an object with `.default`).
|
|
1431
|
-
*
|
|
1432
|
-
* Delegates to loadProviderPackage (lazy-load via createRequire so the
|
|
1433
|
-
* driver is a true optional peer dep).
|
|
1434
|
-
*/
|
|
1435
1218
|
function loadDriver(modulePath, label) {
|
|
1436
1219
|
return loadProviderPackage(modulePath, `${label} KV: driver`);
|
|
1437
1220
|
}
|
|
1438
|
-
/**
|
|
1439
|
-
* Build a lazy unstorage Driver that defers loading the real driver
|
|
1440
|
-
* factory until the first I/O operation. This is necessary for drivers
|
|
1441
|
-
* whose peer dependencies (e.g. `ioredis`) may not be installed on the
|
|
1442
|
-
* host at startup — the missing package will only surface when the agent
|
|
1443
|
-
* actually performs KV operations, not at session creation time.
|
|
1444
|
-
*/
|
|
1445
1221
|
function makeLazyDriver(modulePath, label, opts) {
|
|
1446
1222
|
let resolved = null;
|
|
1447
|
-
|
|
1223
|
+
function get() {
|
|
1448
1224
|
if (!resolved) resolved = loadDriver(modulePath, label)(opts);
|
|
1449
1225
|
return resolved;
|
|
1450
|
-
}
|
|
1226
|
+
}
|
|
1451
1227
|
return {
|
|
1452
1228
|
name: label.toLowerCase(),
|
|
1453
1229
|
hasItem: (key, txOpts) => get().hasItem(key, txOpts),
|
|
@@ -1458,10 +1234,9 @@ function makeLazyDriver(modulePath, label, opts) {
|
|
|
1458
1234
|
removeItem: (key, txOpts) => get().removeItem?.(key, txOpts),
|
|
1459
1235
|
getKeys: (base, txOpts) => get().getKeys(base, txOpts),
|
|
1460
1236
|
clear: (base, txOpts) => get().clear?.(base, txOpts),
|
|
1461
|
-
dispose: () => resolved
|
|
1237
|
+
dispose: () => resolved?.dispose?.()
|
|
1462
1238
|
};
|
|
1463
1239
|
}
|
|
1464
|
-
/** Resolve a {@link KvProvider} descriptor into a {@link Kv}. */
|
|
1465
1240
|
function resolveKv(descriptor, env, prefix) {
|
|
1466
1241
|
switch (descriptor.kind) {
|
|
1467
1242
|
case MEMORY_KV_KIND: return createUnstorageKv({
|
|
@@ -1508,24 +1283,16 @@ function resolveKv(descriptor, env, prefix) {
|
|
|
1508
1283
|
}
|
|
1509
1284
|
//#endregion
|
|
1510
1285
|
//#region host/providers/resolve-vector.ts
|
|
1511
|
-
/**
|
|
1512
|
-
* Descriptor → concrete `Vector` resolver. Mirror of `resolveLlm`.
|
|
1513
|
-
*
|
|
1514
|
-
* Pulls API keys from the agent env so descriptors stay
|
|
1515
|
-
* secret-free. Lazy-loads provider SDKs via `createRequire` so
|
|
1516
|
-
* unused providers never enter the bundle.
|
|
1517
|
-
*/
|
|
1518
|
-
/** Resolve a {@link VectorProvider} descriptor into a {@link Vector}. */
|
|
1519
1286
|
function resolveVector(descriptor, env, namespace) {
|
|
1520
1287
|
switch (descriptor.kind) {
|
|
1521
1288
|
case IN_MEMORY_VECTOR_KIND: return createMemoryVector({ namespace });
|
|
1522
1289
|
case PINECONE_VECTOR_KIND: {
|
|
1523
1290
|
const apiKey = resolveApiKey("PINECONE_API_KEY", env);
|
|
1524
1291
|
if (!apiKey) throw new Error("Pinecone Vector: missing API key. Set PINECONE_API_KEY in the agent env.");
|
|
1525
|
-
const
|
|
1292
|
+
const { index } = descriptor.options;
|
|
1526
1293
|
return createPineconeVector({
|
|
1527
1294
|
apiKey,
|
|
1528
|
-
index
|
|
1295
|
+
index,
|
|
1529
1296
|
namespace
|
|
1530
1297
|
});
|
|
1531
1298
|
}
|
|
@@ -1534,14 +1301,13 @@ function resolveVector(descriptor, env, namespace) {
|
|
|
1534
1301
|
}
|
|
1535
1302
|
//#endregion
|
|
1536
1303
|
//#region sdk/system-prompt.ts
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
}
|
|
1304
|
+
const DATE_FORMAT_OPTIONS = {
|
|
1305
|
+
weekday: "long",
|
|
1306
|
+
year: "numeric",
|
|
1307
|
+
month: "long",
|
|
1308
|
+
day: "numeric"
|
|
1309
|
+
};
|
|
1310
|
+
const TOOL_PREAMBLE = "\n\nWhen you decide to use a tool, ALWAYS say a brief natural phrase BEFORE the tool call (e.g. \"Let me look that up\" or \"One moment while I check\"). This fills silence while the tool executes. Keep preambles to one short sentence.";
|
|
1545
1311
|
const VOICE_RULES = "\n\nCRITICAL OUTPUT RULES — you MUST follow these for EVERY response:\nYour response will be spoken aloud by a TTS system and displayed as plain text.\n- NEVER use markdown: no **, no *, no _, no #, no `, no [](), no ---\n- NEVER use bullet points (-, *, •) or numbered lists (1., 2.)\n- NEVER use code blocks or inline code\n- NEVER mention tools, search, APIs, or technical failures to the user. If a tool returns no results, just answer naturally without explaining why.\n- Write exactly as you would say it out loud to a friend\n- Use short conversational sentences. To list things, say \"First,\" \"Next,\" \"Finally,\"\n- Keep responses concise — 1 to 3 sentences max";
|
|
1546
1312
|
/**
|
|
1547
1313
|
* Build the system prompt sent to the LLM from the agent configuration.
|
|
@@ -1557,11 +1323,10 @@ const VOICE_RULES = "\n\nCRITICAL OUTPUT RULES — you MUST follow these for EVE
|
|
|
1557
1323
|
* @returns The assembled system prompt string.
|
|
1558
1324
|
*/
|
|
1559
1325
|
function buildSystemPrompt(config, opts) {
|
|
1560
|
-
const { hasTools } = opts;
|
|
1561
1326
|
const agentInstructions = config.systemPrompt && config.systemPrompt !== DEFAULT_SYSTEM_PROMPT ? `\n\nAgent-Specific Instructions:\n${config.systemPrompt}` : "";
|
|
1562
|
-
const toolPreamble = hasTools ?
|
|
1327
|
+
const toolPreamble = opts.hasTools ? TOOL_PREAMBLE : "";
|
|
1563
1328
|
const guidance = opts.toolGuidance && opts.toolGuidance.length > 0 ? `\n\nBuilt-in Tool Usage:\n${opts.toolGuidance.join("\n")}` : "";
|
|
1564
|
-
return DEFAULT_SYSTEM_PROMPT + `\n\nToday's date is ${
|
|
1329
|
+
return DEFAULT_SYSTEM_PROMPT + `\n\nToday's date is ${(/* @__PURE__ */ new Date()).toLocaleDateString("en-US", DATE_FORMAT_OPTIONS)}.` + agentInstructions + toolPreamble + guidance + (opts.voice ? VOICE_RULES : "");
|
|
1565
1330
|
}
|
|
1566
1331
|
//#endregion
|
|
1567
1332
|
//#region host/runtime-config.ts
|
|
@@ -1581,22 +1346,23 @@ const consoleLogger = {
|
|
|
1581
1346
|
error: consoleLog(console.error),
|
|
1582
1347
|
debug: consoleLog(console.debug)
|
|
1583
1348
|
};
|
|
1584
|
-
/**
|
|
1585
|
-
* Structured JSON logger for production diagnostics. Each log entry is a
|
|
1586
|
-
* single-line JSON object with `timestamp`, `level`, `msg`, and any
|
|
1587
|
-
* caller-provided context fields.
|
|
1588
|
-
*/
|
|
1589
1349
|
function jsonLog(level) {
|
|
1350
|
+
const out = level === "error" || level === "warn" ? process.stderr : process.stdout;
|
|
1590
1351
|
return (msg, ctx) => {
|
|
1591
1352
|
const entry = {
|
|
1592
1353
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1593
1354
|
level,
|
|
1594
|
-
msg
|
|
1355
|
+
msg,
|
|
1356
|
+
...ctx
|
|
1595
1357
|
};
|
|
1596
|
-
|
|
1597
|
-
(level === "error" || level === "warn" ? process.stderr : process.stdout).write(`${JSON.stringify(entry)}\n`);
|
|
1358
|
+
out.write(`${JSON.stringify(entry)}\n`);
|
|
1598
1359
|
};
|
|
1599
1360
|
}
|
|
1361
|
+
/**
|
|
1362
|
+
* Structured JSON logger for production diagnostics. Each log entry is a
|
|
1363
|
+
* single-line JSON object with `timestamp`, `level`, `msg`, and any
|
|
1364
|
+
* caller-provided context fields.
|
|
1365
|
+
*/
|
|
1600
1366
|
const jsonLogger = {
|
|
1601
1367
|
info: jsonLog("info"),
|
|
1602
1368
|
warn: jsonLog("warn"),
|
|
@@ -1615,15 +1381,16 @@ const REPLY_DONE_SLOW_THRESHOLD_MS = 50;
|
|
|
1615
1381
|
function createSessionCore(opts) {
|
|
1616
1382
|
const log = opts.logger ?? consoleLogger;
|
|
1617
1383
|
const maxHistory = opts.maxHistory ?? 200;
|
|
1618
|
-
const
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
}
|
|
1384
|
+
const rawIdleMs = opts.agentConfig.idleTimeoutMs ?? 3e5;
|
|
1385
|
+
const idleMs = rawIdleMs === 0 || !Number.isFinite(rawIdleMs) ? 0 : rawIdleMs;
|
|
1386
|
+
function emptyReply() {
|
|
1387
|
+
return {
|
|
1388
|
+
currentReplyId: null,
|
|
1389
|
+
pendingTools: [],
|
|
1390
|
+
toolCallCount: 0
|
|
1391
|
+
};
|
|
1392
|
+
}
|
|
1393
|
+
let reply = emptyReply();
|
|
1627
1394
|
let history = [];
|
|
1628
1395
|
let turnPromise = null;
|
|
1629
1396
|
let idleTimer = null;
|
|
@@ -1645,18 +1412,13 @@ function createSessionCore(opts) {
|
|
|
1645
1412
|
}
|
|
1646
1413
|
function beginReply(replyId) {
|
|
1647
1414
|
reply = {
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
toolCallCount: 0
|
|
1415
|
+
...emptyReply(),
|
|
1416
|
+
currentReplyId: replyId
|
|
1651
1417
|
};
|
|
1652
1418
|
turnPromise = null;
|
|
1653
1419
|
}
|
|
1654
1420
|
function cancelReply() {
|
|
1655
|
-
reply =
|
|
1656
|
-
currentReplyId: null,
|
|
1657
|
-
pendingTools: [],
|
|
1658
|
-
toolCallCount: 0
|
|
1659
|
-
};
|
|
1421
|
+
reply = emptyReply();
|
|
1660
1422
|
}
|
|
1661
1423
|
function flushReply(startMs, hadTurnPromise) {
|
|
1662
1424
|
const stepsUsed = reply.toolCallCount;
|
|
@@ -1846,7 +1608,7 @@ function createSessionCore(opts) {
|
|
|
1846
1608
|
*/
|
|
1847
1609
|
const yieldTick = () => new Promise((r) => setTimeout(r, 0));
|
|
1848
1610
|
function buildToolContext(opts) {
|
|
1849
|
-
const { env, state, kv, vector, messages, sessionId } = opts;
|
|
1611
|
+
const { env, state, kv, vector, messages, sessionId, send } = opts;
|
|
1850
1612
|
return {
|
|
1851
1613
|
env,
|
|
1852
1614
|
state: state ?? {},
|
|
@@ -1861,14 +1623,21 @@ function buildToolContext(opts) {
|
|
|
1861
1623
|
messages: messages ?? [],
|
|
1862
1624
|
sessionId: sessionId ?? "",
|
|
1863
1625
|
send(event, data) {
|
|
1864
|
-
|
|
1626
|
+
send?.(event, data);
|
|
1865
1627
|
}
|
|
1866
1628
|
};
|
|
1867
1629
|
}
|
|
1630
|
+
function formatZodIssues(error) {
|
|
1631
|
+
return (error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ");
|
|
1632
|
+
}
|
|
1633
|
+
function stringifyResult(result) {
|
|
1634
|
+
if (result == null) return "null";
|
|
1635
|
+
return typeof result === "string" ? result : JSON.stringify(result);
|
|
1636
|
+
}
|
|
1868
1637
|
async function executeToolCall(name, args, options) {
|
|
1869
|
-
const { tool } = options;
|
|
1638
|
+
const { tool, logger } = options;
|
|
1870
1639
|
const parsed = (tool.parameters ?? EMPTY_PARAMS).safeParse(args);
|
|
1871
|
-
if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${(parsed.error
|
|
1640
|
+
if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${formatZodIssues(parsed.error)}`);
|
|
1872
1641
|
try {
|
|
1873
1642
|
const ctx = buildToolContext(options);
|
|
1874
1643
|
await yieldTick();
|
|
@@ -1877,11 +1646,9 @@ async function executeToolCall(name, args, options) {
|
|
|
1877
1646
|
message: `Tool "${name}" timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
|
|
1878
1647
|
});
|
|
1879
1648
|
await yieldTick();
|
|
1880
|
-
|
|
1881
|
-
return typeof result === "string" ? result : JSON.stringify(result);
|
|
1649
|
+
return stringifyResult(result);
|
|
1882
1650
|
} catch (err) {
|
|
1883
|
-
|
|
1884
|
-
if (log) log.warn("Tool execution failed", {
|
|
1651
|
+
if (logger) logger.warn("Tool execution failed", {
|
|
1885
1652
|
tool: name,
|
|
1886
1653
|
error: errorDetail(err)
|
|
1887
1654
|
});
|
|
@@ -1890,29 +1657,267 @@ async function executeToolCall(name, args, options) {
|
|
|
1890
1657
|
}
|
|
1891
1658
|
}
|
|
1892
1659
|
//#endregion
|
|
1660
|
+
//#region host/_base64.ts
|
|
1661
|
+
function uint8ToBase64(bytes) {
|
|
1662
|
+
return Buffer.from(bytes).toString("base64");
|
|
1663
|
+
}
|
|
1664
|
+
function base64ToUint8(base64) {
|
|
1665
|
+
return new Uint8Array(Buffer.from(base64, "base64"));
|
|
1666
|
+
}
|
|
1667
|
+
//#endregion
|
|
1668
|
+
//#region host/transports/openai-realtime-transport.ts
|
|
1669
|
+
const DEFAULT_MODEL = "gpt-realtime-2";
|
|
1670
|
+
const DEFAULT_VOICE = "alloy";
|
|
1671
|
+
const DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
1672
|
+
const defaultCreateOpenaiRealtimeWebSocket = (url, opts) => new WsWebSocket(url, { headers: opts.headers });
|
|
1673
|
+
function createOpenaiRealtimeTransport(opts) {
|
|
1674
|
+
const log = opts.logger ?? consoleLogger;
|
|
1675
|
+
const createWs = opts.createWebSocket ?? defaultCreateOpenaiRealtimeWebSocket;
|
|
1676
|
+
const model = opts.options.model ?? DEFAULT_MODEL;
|
|
1677
|
+
const voice = opts.options.voice ?? DEFAULT_VOICE;
|
|
1678
|
+
const baseUrl = opts.options.url ?? DEFAULT_URL;
|
|
1679
|
+
let ws = null;
|
|
1680
|
+
let closing = false;
|
|
1681
|
+
const agentTranscriptBuffers = /* @__PURE__ */ new Map();
|
|
1682
|
+
const toolBuffers = /* @__PURE__ */ new Map();
|
|
1683
|
+
let currentResponseId = null;
|
|
1684
|
+
function send(payload) {
|
|
1685
|
+
if (!ws || ws.readyState !== 1) {
|
|
1686
|
+
log.debug("OpenAI Realtime send dropped: socket not open", { type: payload.type });
|
|
1687
|
+
return;
|
|
1688
|
+
}
|
|
1689
|
+
ws.send(JSON.stringify(payload));
|
|
1690
|
+
}
|
|
1691
|
+
function sendSessionUpdate() {
|
|
1692
|
+
send({
|
|
1693
|
+
type: "session.update",
|
|
1694
|
+
session: {
|
|
1695
|
+
modalities: ["audio", "text"],
|
|
1696
|
+
voice,
|
|
1697
|
+
instructions: opts.sessionConfig.systemPrompt,
|
|
1698
|
+
input_audio_format: "pcm16",
|
|
1699
|
+
output_audio_format: "pcm16",
|
|
1700
|
+
input_audio_transcription: { model: "whisper-1" },
|
|
1701
|
+
turn_detection: { type: "server_vad" },
|
|
1702
|
+
tools: opts.toolSchemas,
|
|
1703
|
+
tool_choice: opts.toolChoice
|
|
1704
|
+
}
|
|
1705
|
+
});
|
|
1706
|
+
}
|
|
1707
|
+
async function start() {
|
|
1708
|
+
const url = `${baseUrl}?model=${encodeURIComponent(model)}`;
|
|
1709
|
+
log.info("OpenAI Realtime connecting", { url });
|
|
1710
|
+
return new Promise((resolve, reject) => {
|
|
1711
|
+
const sock = createWs(url, { headers: {
|
|
1712
|
+
Authorization: `Bearer ${opts.apiKey}`,
|
|
1713
|
+
"OpenAI-Beta": "realtime=v1"
|
|
1714
|
+
} });
|
|
1715
|
+
ws = sock;
|
|
1716
|
+
let opened = false;
|
|
1717
|
+
sock.addEventListener("open", () => {
|
|
1718
|
+
opened = true;
|
|
1719
|
+
sendSessionUpdate();
|
|
1720
|
+
resolve();
|
|
1721
|
+
});
|
|
1722
|
+
sock.addEventListener("message", (ev) => handleMessage(ev.data));
|
|
1723
|
+
sock.addEventListener("close", (ev) => handleClose(ev.code ?? 0, ev.reason ?? ""));
|
|
1724
|
+
sock.addEventListener("error", (ev) => {
|
|
1725
|
+
const msg = typeof ev.message === "string" ? ev.message : "WebSocket error";
|
|
1726
|
+
if (!opened) {
|
|
1727
|
+
reject(new Error(msg));
|
|
1728
|
+
return;
|
|
1729
|
+
}
|
|
1730
|
+
if (closing) {
|
|
1731
|
+
log.info("OpenAI Realtime error during close", { error: msg });
|
|
1732
|
+
return;
|
|
1733
|
+
}
|
|
1734
|
+
opts.callbacks.onError("internal", msg);
|
|
1735
|
+
});
|
|
1736
|
+
});
|
|
1737
|
+
}
|
|
1738
|
+
function asString(v) {
|
|
1739
|
+
return typeof v === "string" ? v : "";
|
|
1740
|
+
}
|
|
1741
|
+
function handleAudioDelta(obj) {
|
|
1742
|
+
if (typeof obj.delta === "string") opts.callbacks.onAudioChunk(base64ToUint8(obj.delta));
|
|
1743
|
+
}
|
|
1744
|
+
function handleUserTranscript(obj) {
|
|
1745
|
+
if (typeof obj.transcript === "string") opts.callbacks.onUserTranscript(obj.transcript);
|
|
1746
|
+
}
|
|
1747
|
+
function handleResponseCreated(obj) {
|
|
1748
|
+
const resp = obj.response;
|
|
1749
|
+
const id = asString(resp?.id);
|
|
1750
|
+
currentResponseId = id;
|
|
1751
|
+
opts.callbacks.onReplyStarted(id);
|
|
1752
|
+
}
|
|
1753
|
+
function handleAgentTranscriptDelta(obj) {
|
|
1754
|
+
const id = asString(obj.item_id);
|
|
1755
|
+
const delta = asString(obj.delta);
|
|
1756
|
+
agentTranscriptBuffers.set(id, (agentTranscriptBuffers.get(id) ?? "") + delta);
|
|
1757
|
+
}
|
|
1758
|
+
function handleAgentTranscriptDone(obj) {
|
|
1759
|
+
const id = asString(obj.item_id);
|
|
1760
|
+
const text = agentTranscriptBuffers.get(id) ?? "";
|
|
1761
|
+
agentTranscriptBuffers.delete(id);
|
|
1762
|
+
if (text) opts.callbacks.onAgentTranscript(text, false);
|
|
1763
|
+
}
|
|
1764
|
+
function clearTurnBuffers() {
|
|
1765
|
+
agentTranscriptBuffers.clear();
|
|
1766
|
+
toolBuffers.clear();
|
|
1767
|
+
}
|
|
1768
|
+
function handleResponseDone() {
|
|
1769
|
+
currentResponseId = null;
|
|
1770
|
+
clearTurnBuffers();
|
|
1771
|
+
opts.callbacks.onReplyDone();
|
|
1772
|
+
}
|
|
1773
|
+
function handleErrorEvent(obj) {
|
|
1774
|
+
const err = obj.error;
|
|
1775
|
+
const message = typeof err?.message === "string" ? err.message : "OpenAI Realtime error";
|
|
1776
|
+
clearTurnBuffers();
|
|
1777
|
+
opts.callbacks.onError("internal", message);
|
|
1778
|
+
}
|
|
1779
|
+
function handleOutputItemAdded(obj) {
|
|
1780
|
+
const item = obj.item;
|
|
1781
|
+
if (item?.type !== "function_call" || !item.id) return;
|
|
1782
|
+
toolBuffers.set(item.id, {
|
|
1783
|
+
callId: item.call_id ?? "",
|
|
1784
|
+
name: item.name ?? "",
|
|
1785
|
+
argsBuffer: ""
|
|
1786
|
+
});
|
|
1787
|
+
}
|
|
1788
|
+
function handleFunctionCallArgsDelta(obj) {
|
|
1789
|
+
const id = asString(obj.item_id);
|
|
1790
|
+
const delta = asString(obj.delta);
|
|
1791
|
+
const buf = toolBuffers.get(id);
|
|
1792
|
+
if (buf) buf.argsBuffer += delta;
|
|
1793
|
+
}
|
|
1794
|
+
function parseToolArgs(argsStr, name, callId) {
|
|
1795
|
+
if (!argsStr) return {};
|
|
1796
|
+
try {
|
|
1797
|
+
const parsed = JSON.parse(argsStr);
|
|
1798
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) return parsed;
|
|
1799
|
+
} catch {
|
|
1800
|
+
log.warn("OpenAI Realtime: invalid tool args JSON", {
|
|
1801
|
+
name,
|
|
1802
|
+
callId
|
|
1803
|
+
});
|
|
1804
|
+
}
|
|
1805
|
+
return {};
|
|
1806
|
+
}
|
|
1807
|
+
function handleFunctionCallArgsDone(obj) {
|
|
1808
|
+
const id = asString(obj.item_id);
|
|
1809
|
+
const buf = toolBuffers.get(id);
|
|
1810
|
+
toolBuffers.delete(id);
|
|
1811
|
+
const callId = asString(obj.call_id) || (buf?.callId ?? "");
|
|
1812
|
+
const name = asString(obj.name) || (buf?.name ?? "");
|
|
1813
|
+
const args = parseToolArgs(asString(obj.arguments) || (buf?.argsBuffer ?? ""), name, callId);
|
|
1814
|
+
opts.callbacks.onToolCall(callId, name, args);
|
|
1815
|
+
}
|
|
1816
|
+
function handleMessage(data) {
|
|
1817
|
+
let raw;
|
|
1818
|
+
try {
|
|
1819
|
+
raw = JSON.parse(String(data));
|
|
1820
|
+
} catch {
|
|
1821
|
+
log.warn("OpenAI Realtime: invalid JSON");
|
|
1822
|
+
return;
|
|
1823
|
+
}
|
|
1824
|
+
if (typeof raw !== "object" || raw === null) return;
|
|
1825
|
+
const obj = raw;
|
|
1826
|
+
switch (obj.type) {
|
|
1827
|
+
case "response.audio.delta":
|
|
1828
|
+
handleAudioDelta(obj);
|
|
1829
|
+
return;
|
|
1830
|
+
case "response.audio.done":
|
|
1831
|
+
opts.callbacks.onAudioDone();
|
|
1832
|
+
return;
|
|
1833
|
+
case "input_audio_buffer.speech_started":
|
|
1834
|
+
opts.callbacks.onSpeechStarted();
|
|
1835
|
+
return;
|
|
1836
|
+
case "input_audio_buffer.speech_stopped":
|
|
1837
|
+
opts.callbacks.onSpeechStopped();
|
|
1838
|
+
return;
|
|
1839
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
1840
|
+
handleUserTranscript(obj);
|
|
1841
|
+
return;
|
|
1842
|
+
case "response.created":
|
|
1843
|
+
handleResponseCreated(obj);
|
|
1844
|
+
return;
|
|
1845
|
+
case "response.audio_transcript.delta":
|
|
1846
|
+
handleAgentTranscriptDelta(obj);
|
|
1847
|
+
return;
|
|
1848
|
+
case "response.audio_transcript.done":
|
|
1849
|
+
handleAgentTranscriptDone(obj);
|
|
1850
|
+
return;
|
|
1851
|
+
case "response.done":
|
|
1852
|
+
handleResponseDone();
|
|
1853
|
+
return;
|
|
1854
|
+
case "response.output_item.added":
|
|
1855
|
+
handleOutputItemAdded(obj);
|
|
1856
|
+
return;
|
|
1857
|
+
case "response.function_call_arguments.delta":
|
|
1858
|
+
handleFunctionCallArgsDelta(obj);
|
|
1859
|
+
return;
|
|
1860
|
+
case "response.function_call_arguments.done":
|
|
1861
|
+
handleFunctionCallArgsDone(obj);
|
|
1862
|
+
return;
|
|
1863
|
+
case "error":
|
|
1864
|
+
handleErrorEvent(obj);
|
|
1865
|
+
return;
|
|
1866
|
+
default: return;
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
function handleClose(code, reason) {
|
|
1870
|
+
if (closing) {
|
|
1871
|
+
log.info("OpenAI Realtime closed", {
|
|
1872
|
+
code,
|
|
1873
|
+
reason
|
|
1874
|
+
});
|
|
1875
|
+
return;
|
|
1876
|
+
}
|
|
1877
|
+
log.warn("OpenAI Realtime closed unexpectedly", {
|
|
1878
|
+
code,
|
|
1879
|
+
reason
|
|
1880
|
+
});
|
|
1881
|
+
opts.callbacks.onError("connection", `OpenAI Realtime closed (code=${code})`);
|
|
1882
|
+
}
|
|
1883
|
+
async function stop() {
|
|
1884
|
+
closing = true;
|
|
1885
|
+
ws?.close();
|
|
1886
|
+
ws = null;
|
|
1887
|
+
}
|
|
1888
|
+
return {
|
|
1889
|
+
start,
|
|
1890
|
+
stop,
|
|
1891
|
+
sendUserAudio(bytes) {
|
|
1892
|
+
if (!ws || ws.readyState !== 1) return;
|
|
1893
|
+
ws.send(`{"type":"input_audio_buffer.append","audio":"${uint8ToBase64(bytes)}"}`);
|
|
1894
|
+
},
|
|
1895
|
+
sendToolResult(callId, result) {
|
|
1896
|
+
send({
|
|
1897
|
+
type: "conversation.item.create",
|
|
1898
|
+
item: {
|
|
1899
|
+
type: "function_call_output",
|
|
1900
|
+
call_id: callId,
|
|
1901
|
+
output: result
|
|
1902
|
+
}
|
|
1903
|
+
});
|
|
1904
|
+
send({ type: "response.create" });
|
|
1905
|
+
},
|
|
1906
|
+
cancelReply() {
|
|
1907
|
+
if (currentResponseId === null) return;
|
|
1908
|
+
send({ type: "response.cancel" });
|
|
1909
|
+
currentResponseId = null;
|
|
1910
|
+
clearTurnBuffers();
|
|
1911
|
+
opts.callbacks.onCancelled();
|
|
1912
|
+
}
|
|
1913
|
+
};
|
|
1914
|
+
}
|
|
1915
|
+
//#endregion
|
|
1893
1916
|
//#region host/to-vercel-tools.ts
|
|
1894
1917
|
/**
|
|
1895
|
-
* Converts agent {@link ToolSchema}[] to Vercel AI SDK tools
|
|
1896
|
-
*
|
|
1897
|
-
*
|
|
1898
|
-
* The pipeline orchestrator passes the output to `streamText({ tools })`.
|
|
1899
|
-
* Each produced tool's `execute` closure calls
|
|
1900
|
-
* `ctx.executeTool(name, args, sessionId, messages(), { signal, toolCallId })`,
|
|
1901
|
-
* so the existing agent tool infrastructure (argument validation, KV, hooks,
|
|
1902
|
-
* timeout) remains the single source of truth for tool behavior.
|
|
1903
|
-
*
|
|
1904
|
-
* Per-call `options.abortSignal` (forwarded by `streamText` when the
|
|
1905
|
-
* outer turn is aborted, e.g. barge-in) takes precedence over the
|
|
1906
|
-
* bag-level `ctx.signal` so individual invocations respect streamText
|
|
1907
|
-
* aborts.
|
|
1908
|
-
*/
|
|
1909
|
-
/**
|
|
1910
|
-
* Convert an array of {@link ToolSchema} to a Vercel AI SDK `ToolSet`
|
|
1911
|
-
* (record keyed by tool name).
|
|
1912
|
-
*
|
|
1913
|
-
* Uses the v6 `tool()` helper with `inputSchema: jsonSchema(...)` wrapping
|
|
1914
|
-
* the agent's JSON Schema `parameters`. Execution is delegated to
|
|
1915
|
-
* `ctx.executeTool` so validation, KV, timeouts, and hooks keep working.
|
|
1918
|
+
* Converts agent {@link ToolSchema}[] to Vercel AI SDK tools, delegating
|
|
1919
|
+
* `execute` to the agent's {@link ExecuteTool} so validation, KV, hooks,
|
|
1920
|
+
* and timeouts remain the single source of truth for tool behavior.
|
|
1916
1921
|
*/
|
|
1917
1922
|
function toVercelTools(schemas, ctx) {
|
|
1918
1923
|
const out = {};
|
|
@@ -1925,7 +1930,8 @@ function toVercelTools(schemas, ctx) {
|
|
|
1925
1930
|
const opts = {};
|
|
1926
1931
|
if (signal !== void 0) opts.signal = signal;
|
|
1927
1932
|
if (options.toolCallId !== void 0) opts.toolCallId = options.toolCallId;
|
|
1928
|
-
|
|
1933
|
+
const history = ctx.messages().slice();
|
|
1934
|
+
return ctx.executeTool(schema.name, input, ctx.sessionId, history, opts);
|
|
1929
1935
|
}
|
|
1930
1936
|
});
|
|
1931
1937
|
return out;
|
|
@@ -1976,10 +1982,6 @@ function createPipelineTransport(opts) {
|
|
|
1976
1982
|
function emitError(code, message) {
|
|
1977
1983
|
callbacks.onError(code, message);
|
|
1978
1984
|
}
|
|
1979
|
-
/**
|
|
1980
|
-
* Tear down after an unrecoverable provider error. Aborts the in-flight
|
|
1981
|
-
* turn, cancels TTS, signals providers to close. Idempotent.
|
|
1982
|
-
*/
|
|
1983
1985
|
function terminate() {
|
|
1984
1986
|
if (terminated) return;
|
|
1985
1987
|
terminated = true;
|
|
@@ -2115,16 +2117,10 @@ function createPipelineTransport(opts) {
|
|
|
2115
2117
|
}
|
|
2116
2118
|
};
|
|
2117
2119
|
}
|
|
2118
|
-
/**
|
|
2119
|
-
* Flush TTS and wait for drain. Resolves on:
|
|
2120
|
-
* - TTS emits `done`
|
|
2121
|
-
* - `signal` aborts (barge-in / provider error / session stop)
|
|
2122
|
-
* - PIPELINE_FLUSH_TIMEOUT_MS elapses
|
|
2123
|
-
* Resolves immediately if no TTS session.
|
|
2124
|
-
*/
|
|
2125
2120
|
function flushTtsAndWait(signal) {
|
|
2126
2121
|
const tts = ttsSession;
|
|
2127
2122
|
if (!tts) return Promise.resolve();
|
|
2123
|
+
if (signal.aborted) return Promise.resolve();
|
|
2128
2124
|
return new Promise((resolve) => {
|
|
2129
2125
|
let off = null;
|
|
2130
2126
|
let timer = null;
|
|
@@ -2144,10 +2140,6 @@ function createPipelineTransport(opts) {
|
|
|
2144
2140
|
resolve();
|
|
2145
2141
|
};
|
|
2146
2142
|
const onAbort = () => finish();
|
|
2147
|
-
if (signal.aborted) {
|
|
2148
|
-
resolve();
|
|
2149
|
-
return;
|
|
2150
|
-
}
|
|
2151
2143
|
signal.addEventListener("abort", onAbort, { once: true });
|
|
2152
2144
|
off = tts.on("done", finish);
|
|
2153
2145
|
timer = setTimeout(() => {
|
|
@@ -2301,8 +2293,7 @@ function createPipelineTransport(opts) {
|
|
|
2301
2293
|
},
|
|
2302
2294
|
sendUserAudio(bytes) {
|
|
2303
2295
|
if (terminated || !audioReady) return;
|
|
2304
|
-
const offset = bytes
|
|
2305
|
-
const length = bytes.byteLength;
|
|
2296
|
+
const { byteOffset: offset, byteLength: length } = bytes;
|
|
2306
2297
|
let pcm;
|
|
2307
2298
|
if (offset % 2 === 0 && length % 2 === 0) pcm = new Int16Array(bytes.buffer, offset, length / 2);
|
|
2308
2299
|
else {
|
|
@@ -2323,15 +2314,16 @@ function createPipelineTransport(opts) {
|
|
|
2323
2314
|
}
|
|
2324
2315
|
//#endregion
|
|
2325
2316
|
//#region host/s2s.ts
|
|
2326
|
-
const uint8ToBase64 = (bytes) => Buffer.from(bytes).toString("base64");
|
|
2327
|
-
const base64ToUint8 = (base64) => new Uint8Array(Buffer.from(base64, "base64"));
|
|
2328
2317
|
const defaultCreateS2sWebSocket = (url, opts) => new WsWebSocket(url, { headers: opts.headers });
|
|
2329
2318
|
const S2sMessageSchema = z.discriminatedUnion("type", [
|
|
2330
2319
|
z.object({
|
|
2331
2320
|
type: z.literal("session.ready"),
|
|
2332
2321
|
session_id: z.string()
|
|
2333
2322
|
}).passthrough(),
|
|
2334
|
-
z.object({
|
|
2323
|
+
z.object({
|
|
2324
|
+
type: z.literal("session.updated"),
|
|
2325
|
+
config: z.object({ id: z.string().optional() }).passthrough().optional()
|
|
2326
|
+
}).passthrough(),
|
|
2335
2327
|
z.object({ type: z.literal("input.speech.started") }),
|
|
2336
2328
|
z.object({ type: z.literal("input.speech.stopped") }),
|
|
2337
2329
|
z.object({
|
|
@@ -2374,12 +2366,17 @@ function parseS2sMessage(obj) {
|
|
|
2374
2366
|
const result = S2sMessageSchema.safeParse(obj);
|
|
2375
2367
|
return result.success ? result.data : void 0;
|
|
2376
2368
|
}
|
|
2369
|
+
function sidFields(ctx) {
|
|
2370
|
+
return ctx.sid !== void 0 ? { sid: ctx.sid } : {};
|
|
2371
|
+
}
|
|
2377
2372
|
function dispatchS2sMessage(callbacks, msg, state, ctx) {
|
|
2378
2373
|
switch (msg.type) {
|
|
2379
2374
|
case "session.ready":
|
|
2380
2375
|
callbacks.onSessionReady(msg.session_id);
|
|
2381
2376
|
break;
|
|
2382
|
-
case "session.updated":
|
|
2377
|
+
case "session.updated":
|
|
2378
|
+
if (msg.config?.id !== void 0) callbacks.onSessionReady(msg.config.id);
|
|
2379
|
+
break;
|
|
2383
2380
|
case "input.speech.started":
|
|
2384
2381
|
if (!state.speechActive) {
|
|
2385
2382
|
state.speechActive = true;
|
|
@@ -2406,13 +2403,18 @@ function dispatchS2sMessage(callbacks, msg, state, ctx) {
|
|
|
2406
2403
|
break;
|
|
2407
2404
|
case "reply.done":
|
|
2408
2405
|
ctx.log.info("S2S << reply.done", {
|
|
2409
|
-
...ctx
|
|
2406
|
+
...sidFields(ctx),
|
|
2410
2407
|
status: msg.status ?? "completed"
|
|
2411
2408
|
});
|
|
2412
2409
|
if (msg.status === "interrupted") callbacks.onCancelled();
|
|
2413
2410
|
else callbacks.onReplyDone();
|
|
2414
2411
|
break;
|
|
2415
2412
|
case "session.error":
|
|
2413
|
+
ctx.log.warn("S2S << session.error", {
|
|
2414
|
+
...sidFields(ctx),
|
|
2415
|
+
code: msg.code,
|
|
2416
|
+
message: msg.message
|
|
2417
|
+
});
|
|
2416
2418
|
if (msg.code === "session_not_found" || msg.code === "session_forbidden") callbacks.onSessionExpired();
|
|
2417
2419
|
else callbacks.onError(new Error(msg.message));
|
|
2418
2420
|
break;
|
|
@@ -2439,8 +2441,8 @@ function connectS2s(opts) {
|
|
|
2439
2441
|
return;
|
|
2440
2442
|
}
|
|
2441
2443
|
const json = JSON.stringify(msg);
|
|
2442
|
-
if (msg.type
|
|
2443
|
-
else log.info(`S2S >> ${msg.type}`);
|
|
2444
|
+
if (msg.type === "session.update") log.info(`S2S >> ${msg.type}`, { payload: json });
|
|
2445
|
+
else if (msg.type !== "input.audio") log.info(`S2S >> ${msg.type}`);
|
|
2444
2446
|
ws.send(json);
|
|
2445
2447
|
}
|
|
2446
2448
|
const handle = {
|
|
@@ -2489,35 +2491,28 @@ function connectS2s(opts) {
|
|
|
2489
2491
|
log.info("S2S WebSocket open");
|
|
2490
2492
|
resolve(handle);
|
|
2491
2493
|
});
|
|
2492
|
-
function
|
|
2494
|
+
function logIncoming(type) {
|
|
2495
|
+
if (type === "reply.audio" || type === "input.audio" || type === "reply.done" || type === "session.error") return;
|
|
2496
|
+
log.info(`S2S << ${type}`);
|
|
2497
|
+
}
|
|
2498
|
+
ws.addEventListener("message", (ev) => {
|
|
2499
|
+
let raw;
|
|
2493
2500
|
try {
|
|
2494
|
-
|
|
2501
|
+
raw = JSON.parse(String(ev.data));
|
|
2495
2502
|
} catch {
|
|
2496
|
-
log.warn("S2S << invalid JSON", { data: String(data).slice(0, 200) });
|
|
2497
|
-
|
|
2498
|
-
}
|
|
2499
|
-
function handleAudioFastPath(obj) {
|
|
2500
|
-
if (obj.type === "reply.audio" && typeof obj.data === "string") {
|
|
2501
|
-
callbacks.onAudio(base64ToUint8(obj.data));
|
|
2502
|
-
return true;
|
|
2503
|
+
log.warn("S2S << invalid JSON", { data: String(ev.data).slice(0, 200) });
|
|
2504
|
+
return;
|
|
2503
2505
|
}
|
|
2504
|
-
return false;
|
|
2505
|
-
}
|
|
2506
|
-
function logIncoming(obj) {
|
|
2507
|
-
if (obj.type === "reply.audio" || obj.type === "input.audio") return;
|
|
2508
|
-
if (obj.type === "reply.done") return;
|
|
2509
|
-
log.info(`S2S << ${obj.type}`);
|
|
2510
|
-
}
|
|
2511
|
-
ws.addEventListener("message", (ev) => {
|
|
2512
|
-
const raw = tryParseJson(ev.data);
|
|
2513
|
-
if (raw === void 0) return;
|
|
2514
2506
|
if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
|
|
2515
2507
|
log.warn("S2S << non-object JSON message", { type: typeof raw });
|
|
2516
2508
|
return;
|
|
2517
2509
|
}
|
|
2518
2510
|
const obj = raw;
|
|
2519
|
-
logIncoming(obj);
|
|
2520
|
-
if (
|
|
2511
|
+
logIncoming(obj.type);
|
|
2512
|
+
if (obj.type === "reply.audio" && typeof obj.data === "string") {
|
|
2513
|
+
callbacks.onAudio(base64ToUint8(obj.data));
|
|
2514
|
+
return;
|
|
2515
|
+
}
|
|
2521
2516
|
const parsed = parseS2sMessage(obj);
|
|
2522
2517
|
if (!parsed) {
|
|
2523
2518
|
log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
|
|
@@ -2550,9 +2545,9 @@ function connectS2s(opts) {
|
|
|
2550
2545
|
const _internals = { connectS2s };
|
|
2551
2546
|
/**
|
|
2552
2547
|
* Close codes worth attempting `session.resume` on. These are network/server
|
|
2553
|
-
* blips, not protocol or auth violations.
|
|
2554
|
-
*
|
|
2555
|
-
*
|
|
2548
|
+
* blips, not protocol or auth violations. AssemblyAI keeps the session
|
|
2549
|
+
* available for 30 s after disconnect; reconnect runs immediately on close,
|
|
2550
|
+
* so the resume request reliably lands inside that window.
|
|
2556
2551
|
*/
|
|
2557
2552
|
const TRANSIENT_CLOSE_CODES = new Set([
|
|
2558
2553
|
1005,
|
|
@@ -2560,48 +2555,29 @@ const TRANSIENT_CLOSE_CODES = new Set([
|
|
|
2560
2555
|
1011,
|
|
2561
2556
|
3005
|
|
2562
2557
|
]);
|
|
2563
|
-
/**
|
|
2564
|
-
* AssemblyAI keeps the session alive for 30 s after disconnect; we leave a
|
|
2565
|
-
* little headroom so the resume request still fits inside that window after
|
|
2566
|
-
* the new WebSocket finishes opening.
|
|
2567
|
-
*/
|
|
2568
|
-
const RESUME_WINDOW_MS = 25e3;
|
|
2569
2558
|
function createS2sTransport(opts) {
|
|
2570
2559
|
const log = opts.logger ?? consoleLogger;
|
|
2571
2560
|
const createWs = opts.createWebSocket ?? defaultCreateS2sWebSocket;
|
|
2572
2561
|
let handle = null;
|
|
2573
2562
|
let currentReplyId = null;
|
|
2574
|
-
/** Most recent `session.ready` ID — present once the upstream session is established. */
|
|
2575
2563
|
let providerSessionId = null;
|
|
2576
|
-
/** When the current session became ready; bounds the resume window. */
|
|
2577
|
-
let sessionReadyAt = 0;
|
|
2578
|
-
/** Set by `stop()` so a deliberate close doesn't trigger a reconnect. */
|
|
2579
2564
|
let closing = false;
|
|
2580
|
-
/**
|
|
2581
|
-
* True while a `session.resume` round-trip is in flight (between sending
|
|
2582
|
-
* resume and the next `session.ready`). Used to distinguish a resume failure
|
|
2583
|
-
* (close before ready) from a normal close.
|
|
2584
|
-
*/
|
|
2585
2565
|
let reconnecting = false;
|
|
2586
|
-
/**
|
|
2587
|
-
* Set when a reconnect attempt is kicked off, cleared once the resumed
|
|
2588
|
-
* session's `session.ready` arrives. Prevents back-to-back reconnect loops
|
|
2589
|
-
* when the freshly-resumed socket also drops before fully recovering.
|
|
2590
|
-
*/
|
|
2591
|
-
let reconnectInFlight = false;
|
|
2592
2566
|
function buildCallbacks() {
|
|
2593
2567
|
return {
|
|
2594
2568
|
onSessionReady: (id) => {
|
|
2569
|
+
const isFirstReady = providerSessionId === null;
|
|
2595
2570
|
providerSessionId = id;
|
|
2596
|
-
sessionReadyAt = Date.now();
|
|
2597
2571
|
if (reconnecting) {
|
|
2598
2572
|
reconnecting = false;
|
|
2599
|
-
reconnectInFlight = false;
|
|
2600
2573
|
log.info("S2S resumed", {
|
|
2601
2574
|
sid: opts.sid,
|
|
2602
2575
|
sessionId: id
|
|
2603
2576
|
});
|
|
2604
|
-
}
|
|
2577
|
+
} else if (isFirstReady) log.info("S2S session ready", {
|
|
2578
|
+
sid: opts.sid,
|
|
2579
|
+
sessionId: id
|
|
2580
|
+
});
|
|
2605
2581
|
opts.callbacks.onSessionReady?.(id);
|
|
2606
2582
|
},
|
|
2607
2583
|
onReplyStarted: (replyId) => {
|
|
@@ -2625,7 +2601,6 @@ function createS2sTransport(opts) {
|
|
|
2625
2601
|
onSessionExpired: () => {
|
|
2626
2602
|
if (reconnecting) {
|
|
2627
2603
|
reconnecting = false;
|
|
2628
|
-
reconnectInFlight = false;
|
|
2629
2604
|
log.warn("S2S resume rejected: session expired", { sid: opts.sid });
|
|
2630
2605
|
opts.callbacks.onError("connection", "S2S resume failed: session expired");
|
|
2631
2606
|
return;
|
|
@@ -2638,15 +2613,11 @@ function createS2sTransport(opts) {
|
|
|
2638
2613
|
};
|
|
2639
2614
|
}
|
|
2640
2615
|
function canResumeAfter(code) {
|
|
2641
|
-
|
|
2642
|
-
if (providerSessionId === null) return false;
|
|
2643
|
-
if (reconnectInFlight) return false;
|
|
2644
|
-
return sessionReadyAt > 0 && Date.now() - sessionReadyAt < RESUME_WINDOW_MS;
|
|
2616
|
+
return TRANSIENT_CLOSE_CODES.has(code) && providerSessionId !== null && !reconnecting;
|
|
2645
2617
|
}
|
|
2646
2618
|
function emitFatalClose(code, reason, wasReconnecting) {
|
|
2647
2619
|
if (wasReconnecting) {
|
|
2648
2620
|
reconnecting = false;
|
|
2649
|
-
reconnectInFlight = false;
|
|
2650
2621
|
opts.callbacks.onError("connection", `S2S resume failed (code=${code})`);
|
|
2651
2622
|
return;
|
|
2652
2623
|
}
|
|
@@ -2667,7 +2638,6 @@ function createS2sTransport(opts) {
|
|
|
2667
2638
|
});
|
|
2668
2639
|
}
|
|
2669
2640
|
function startResume(prevId, code, reason) {
|
|
2670
|
-
reconnectInFlight = true;
|
|
2671
2641
|
reconnecting = true;
|
|
2672
2642
|
log.warn("S2S unexpected close — attempting resume", {
|
|
2673
2643
|
sid: opts.sid,
|
|
@@ -2682,7 +2652,6 @@ function createS2sTransport(opts) {
|
|
|
2682
2652
|
}
|
|
2683
2653
|
resume(prevId).catch((err) => {
|
|
2684
2654
|
reconnecting = false;
|
|
2685
|
-
reconnectInFlight = false;
|
|
2686
2655
|
const msg = err instanceof Error ? err.message : String(err);
|
|
2687
2656
|
log.warn("S2S resume failed", {
|
|
2688
2657
|
sid: opts.sid,
|
|
@@ -2700,12 +2669,11 @@ function createS2sTransport(opts) {
|
|
|
2700
2669
|
return;
|
|
2701
2670
|
}
|
|
2702
2671
|
const wasReconnecting = reconnecting;
|
|
2703
|
-
|
|
2672
|
+
const prevId = providerSessionId;
|
|
2673
|
+
if (!canResumeAfter(code) || prevId === null) {
|
|
2704
2674
|
emitFatalClose(code, reason, wasReconnecting);
|
|
2705
2675
|
return;
|
|
2706
2676
|
}
|
|
2707
|
-
const prevId = providerSessionId;
|
|
2708
|
-
if (prevId === null) return;
|
|
2709
2677
|
startResume(prevId, code, reason);
|
|
2710
2678
|
}
|
|
2711
2679
|
async function resume(prevSessionId) {
|
|
@@ -2714,7 +2682,7 @@ function createS2sTransport(opts) {
|
|
|
2714
2682
|
config: opts.s2sConfig,
|
|
2715
2683
|
createWebSocket: createWs,
|
|
2716
2684
|
logger: log,
|
|
2717
|
-
|
|
2685
|
+
sid: opts.sid,
|
|
2718
2686
|
callbacks: buildCallbacks()
|
|
2719
2687
|
});
|
|
2720
2688
|
if (closing) {
|
|
@@ -2799,14 +2767,11 @@ function createClientSink(ws, log) {
|
|
|
2799
2767
|
}
|
|
2800
2768
|
};
|
|
2801
2769
|
}
|
|
2802
|
-
function
|
|
2770
|
+
function dispatchMessage(data, session, log, sid) {
|
|
2803
2771
|
if (data instanceof Uint8Array) {
|
|
2804
2772
|
session.onAudio(data);
|
|
2805
|
-
return
|
|
2773
|
+
return;
|
|
2806
2774
|
}
|
|
2807
|
-
return false;
|
|
2808
|
-
}
|
|
2809
|
-
function handleTextMessage(data, session, log, sid) {
|
|
2810
2775
|
if (typeof data !== "string") {
|
|
2811
2776
|
log.warn("ws: non-string, non-binary frame received; dropping", { sid });
|
|
2812
2777
|
return;
|
|
@@ -2869,10 +2834,7 @@ function wireSessionSocket(ws, opts) {
|
|
|
2869
2834
|
if (!(session && messageBuffer)) return;
|
|
2870
2835
|
const buf = messageBuffer;
|
|
2871
2836
|
messageBuffer = null;
|
|
2872
|
-
for (const event of buf)
|
|
2873
|
-
if (handleBinaryAudio(event.data, session)) continue;
|
|
2874
|
-
handleTextMessage(event.data, session, log, sid);
|
|
2875
|
-
}
|
|
2837
|
+
for (const event of buf) dispatchMessage(event.data, session, log, sid);
|
|
2876
2838
|
}
|
|
2877
2839
|
function onOpen() {
|
|
2878
2840
|
opts.onOpen?.();
|
|
@@ -2921,8 +2883,7 @@ function wireSessionSocket(ws, opts) {
|
|
|
2921
2883
|
if (messageBuffer && messageBuffer.length < 100) messageBuffer.push(event);
|
|
2922
2884
|
return;
|
|
2923
2885
|
}
|
|
2924
|
-
|
|
2925
|
-
handleTextMessage(event.data, session, log, sid);
|
|
2886
|
+
dispatchMessage(event.data, session, log, sid);
|
|
2926
2887
|
});
|
|
2927
2888
|
ws.addEventListener("close", () => {
|
|
2928
2889
|
log.info("Session disconnected", {
|
|
@@ -2953,27 +2914,19 @@ function wireSessionSocket(ws, opts) {
|
|
|
2953
2914
|
//#endregion
|
|
2954
2915
|
//#region host/runtime.ts
|
|
2955
2916
|
/**
|
|
2956
|
-
*
|
|
2957
|
-
*
|
|
2958
|
-
* Each STT provider uses its own env var (e.g. `ASSEMBLYAI_API_KEY`,
|
|
2959
|
-
* `DEEPGRAM_API_KEY`). We read the kind from the descriptor if it is one;
|
|
2960
|
-
* pre-resolved openers have no kind field so we fall back to AssemblyAI for
|
|
2961
|
-
* backward compatibility (openers supply their own key at open-time anyway).
|
|
2917
|
+
* Read the descriptor `kind` if present. Pre-resolved openers (test escape
|
|
2918
|
+
* hatch) have no `kind` field, so callers fall back to a default env var.
|
|
2962
2919
|
*/
|
|
2920
|
+
function descriptorKind(value) {
|
|
2921
|
+
const kind = value?.kind;
|
|
2922
|
+
return typeof kind === "string" ? kind : void 0;
|
|
2923
|
+
}
|
|
2963
2924
|
function resolveSttApiKey(stt, env) {
|
|
2964
|
-
if ((stt
|
|
2925
|
+
if (descriptorKind(stt) === "deepgram") return resolveApiKey("DEEPGRAM_API_KEY", env);
|
|
2965
2926
|
return resolveApiKey("ASSEMBLYAI_API_KEY", env);
|
|
2966
2927
|
}
|
|
2967
|
-
/**
|
|
2968
|
-
* Resolve the API key env-var for the configured TTS provider.
|
|
2969
|
-
*
|
|
2970
|
-
* Each TTS provider uses its own env var (e.g. `CARTESIA_API_KEY`,
|
|
2971
|
-
* `RIME_API_KEY`). We read the kind from the descriptor if it is one;
|
|
2972
|
-
* pre-resolved openers have no kind field so we fall back to Cartesia for
|
|
2973
|
-
* backward compatibility (openers supply their own key at open-time anyway).
|
|
2974
|
-
*/
|
|
2975
2928
|
function resolveTtsApiKey(tts, env) {
|
|
2976
|
-
if ((tts
|
|
2929
|
+
if (descriptorKind(tts) === "rime") return resolveApiKey("RIME_API_KEY", env);
|
|
2977
2930
|
return resolveApiKey("CARTESIA_API_KEY", env);
|
|
2978
2931
|
}
|
|
2979
2932
|
/**
|
|
@@ -3014,7 +2967,7 @@ function createLocalVector(slug) {
|
|
|
3014
2967
|
* @public
|
|
3015
2968
|
*/
|
|
3016
2969
|
function createRuntime(opts) {
|
|
3017
|
-
const { agent, env, kv = createLocalKv(), vector, createWebSocket, logger = consoleLogger, s2sConfig = DEFAULT_S2S_CONFIG, sessionStartTimeoutMs, shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS } = opts;
|
|
2970
|
+
const { agent, env, kv = createLocalKv(), vector, createWebSocket, createOpenaiRealtimeWebSocket, logger = consoleLogger, s2sConfig = DEFAULT_S2S_CONFIG, sessionStartTimeoutMs, shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS } = opts;
|
|
3018
2971
|
const mode = assertProviderTriple(opts.stt, opts.llm, opts.tts);
|
|
3019
2972
|
const slug = agent.name ?? "local";
|
|
3020
2973
|
const resolvedKv = agent.kv ? resolveKv(agent.kv, env, "") : kv;
|
|
@@ -3083,49 +3036,20 @@ function createRuntime(opts) {
|
|
|
3083
3036
|
});
|
|
3084
3037
|
};
|
|
3085
3038
|
}
|
|
3086
|
-
|
|
3039
|
+
let pipelineProviders = null;
|
|
3040
|
+
if (mode === "pipeline" && opts.stt && opts.llm && opts.tts) pipelineProviders = {
|
|
3087
3041
|
stt: resolveSttIfDescriptor(opts.stt),
|
|
3088
3042
|
llm: resolveLlmIfDescriptor(opts.llm, env),
|
|
3089
3043
|
tts: resolveTtsIfDescriptor(opts.tts)
|
|
3090
|
-
}
|
|
3091
|
-
function
|
|
3092
|
-
|
|
3093
|
-
|
|
3094
|
-
const systemPrompt = buildSystemPrompt(agentConfig, {
|
|
3095
|
-
hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
|
|
3096
|
-
voice: true,
|
|
3097
|
-
toolGuidance
|
|
3098
|
-
});
|
|
3099
|
-
let core = null;
|
|
3100
|
-
function bindCore() {
|
|
3101
|
-
if (!core) throw new Error("SessionCore not yet created");
|
|
3102
|
-
return core;
|
|
3103
|
-
}
|
|
3104
|
-
const callbacks = {
|
|
3105
|
-
onReplyStarted: (replyId) => bindCore().onReplyStarted(replyId),
|
|
3106
|
-
onReplyDone: () => bindCore().onReplyDone(),
|
|
3107
|
-
onCancelled: () => bindCore().onCancelled(),
|
|
3108
|
-
onAudioChunk: (bytes) => bindCore().onAudioChunk(bytes),
|
|
3109
|
-
onAudioDone: () => bindCore().onAudioDone(),
|
|
3110
|
-
onUserTranscript: (text) => bindCore().onUserTranscript(text),
|
|
3111
|
-
onAgentTranscript: (text, interrupted) => bindCore().onAgentTranscript(text, interrupted),
|
|
3112
|
-
onToolCall: isPipeline ? (id, name, args) => sessionOpts.client.event({
|
|
3113
|
-
type: "tool_call",
|
|
3114
|
-
toolCallId: id,
|
|
3115
|
-
toolName: name,
|
|
3116
|
-
args
|
|
3117
|
-
}) : (id, name, args) => bindCore().onToolCall(id, name, args),
|
|
3118
|
-
onError: (code, message) => bindCore().onError(code, message),
|
|
3119
|
-
onSpeechStarted: () => bindCore().onSpeechStarted(),
|
|
3120
|
-
onSpeechStopped: () => bindCore().onSpeechStopped()
|
|
3121
|
-
};
|
|
3122
|
-
let transport;
|
|
3123
|
-
if (pipelineProviders) transport = createPipelineTransport({
|
|
3044
|
+
};
|
|
3045
|
+
function buildPipelineTransport(args) {
|
|
3046
|
+
const { sessionOpts, systemPrompt, callbacks, providers } = args;
|
|
3047
|
+
return createPipelineTransport({
|
|
3124
3048
|
sid: sessionOpts.id,
|
|
3125
3049
|
agent: sessionOpts.agent,
|
|
3126
|
-
stt:
|
|
3127
|
-
llm:
|
|
3128
|
-
tts:
|
|
3050
|
+
stt: providers.stt,
|
|
3051
|
+
llm: providers.llm,
|
|
3052
|
+
tts: providers.tts,
|
|
3129
3053
|
callbacks,
|
|
3130
3054
|
sessionConfig: {
|
|
3131
3055
|
systemPrompt,
|
|
@@ -3145,7 +3069,29 @@ function createRuntime(opts) {
|
|
|
3145
3069
|
skipGreeting: sessionOpts.skipGreeting ?? false,
|
|
3146
3070
|
logger
|
|
3147
3071
|
});
|
|
3148
|
-
|
|
3072
|
+
}
|
|
3073
|
+
function buildOpenaiRealtimeTransport(args) {
|
|
3074
|
+
const { sessionOpts, systemPrompt, callbacks } = args;
|
|
3075
|
+
return createOpenaiRealtimeTransport({
|
|
3076
|
+
apiKey: resolveApiKey("OPENAI_API_KEY", env),
|
|
3077
|
+
options: agent.s2s?.options ?? {},
|
|
3078
|
+
sessionConfig: {
|
|
3079
|
+
systemPrompt,
|
|
3080
|
+
...agentConfig.greeting !== void 0 ? { greeting: agentConfig.greeting } : {},
|
|
3081
|
+
tools: toolSchemas
|
|
3082
|
+
},
|
|
3083
|
+
toolSchemas,
|
|
3084
|
+
toolChoice: agentConfig.toolChoice ?? "auto",
|
|
3085
|
+
callbacks,
|
|
3086
|
+
sid: sessionOpts.id,
|
|
3087
|
+
agent: sessionOpts.agent,
|
|
3088
|
+
...createOpenaiRealtimeWebSocket ? { createWebSocket: createOpenaiRealtimeWebSocket } : {},
|
|
3089
|
+
logger
|
|
3090
|
+
});
|
|
3091
|
+
}
|
|
3092
|
+
function buildAssemblyS2sTransport(args) {
|
|
3093
|
+
const { sessionOpts, systemPrompt, callbacks } = args;
|
|
3094
|
+
return createS2sTransport({
|
|
3149
3095
|
apiKey: env.ASSEMBLYAI_API_KEY ?? "",
|
|
3150
3096
|
s2sConfig,
|
|
3151
3097
|
sessionConfig: {
|
|
@@ -3160,6 +3106,54 @@ function createRuntime(opts) {
|
|
|
3160
3106
|
...createWebSocket ? { createWebSocket } : {},
|
|
3161
3107
|
logger
|
|
3162
3108
|
});
|
|
3109
|
+
}
|
|
3110
|
+
function buildTransport(args) {
|
|
3111
|
+
if (pipelineProviders) return buildPipelineTransport({
|
|
3112
|
+
...args,
|
|
3113
|
+
providers: pipelineProviders
|
|
3114
|
+
});
|
|
3115
|
+
if (agent.s2s !== void 0) {
|
|
3116
|
+
const kind = descriptorKind(agent.s2s);
|
|
3117
|
+
if (kind === "openai-realtime") return buildOpenaiRealtimeTransport(args);
|
|
3118
|
+
throw new Error(`Unknown s2s provider kind: ${kind ?? "<missing>"}`);
|
|
3119
|
+
}
|
|
3120
|
+
return buildAssemblyS2sTransport(args);
|
|
3121
|
+
}
|
|
3122
|
+
function createSession(sessionOpts) {
|
|
3123
|
+
sinkMap.set(sessionOpts.id, sessionOpts.client);
|
|
3124
|
+
const isPipeline = Boolean(pipelineProviders);
|
|
3125
|
+
const systemPrompt = buildSystemPrompt(agentConfig, {
|
|
3126
|
+
hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
|
|
3127
|
+
voice: true,
|
|
3128
|
+
toolGuidance
|
|
3129
|
+
});
|
|
3130
|
+
let core = null;
|
|
3131
|
+
function bindCore() {
|
|
3132
|
+
if (!core) throw new Error("SessionCore not yet created");
|
|
3133
|
+
return core;
|
|
3134
|
+
}
|
|
3135
|
+
const transport = buildTransport({
|
|
3136
|
+
sessionOpts,
|
|
3137
|
+
systemPrompt,
|
|
3138
|
+
callbacks: {
|
|
3139
|
+
onReplyStarted: (replyId) => bindCore().onReplyStarted(replyId),
|
|
3140
|
+
onReplyDone: () => bindCore().onReplyDone(),
|
|
3141
|
+
onCancelled: () => bindCore().onCancelled(),
|
|
3142
|
+
onAudioChunk: (bytes) => bindCore().onAudioChunk(bytes),
|
|
3143
|
+
onAudioDone: () => bindCore().onAudioDone(),
|
|
3144
|
+
onUserTranscript: (text) => bindCore().onUserTranscript(text),
|
|
3145
|
+
onAgentTranscript: (text, interrupted) => bindCore().onAgentTranscript(text, interrupted),
|
|
3146
|
+
onToolCall: isPipeline ? (id, name, args) => sessionOpts.client.event({
|
|
3147
|
+
type: "tool_call",
|
|
3148
|
+
toolCallId: id,
|
|
3149
|
+
toolName: name,
|
|
3150
|
+
args
|
|
3151
|
+
}) : (id, name, args) => bindCore().onToolCall(id, name, args),
|
|
3152
|
+
onError: (code, message) => bindCore().onError(code, message),
|
|
3153
|
+
onSpeechStarted: () => bindCore().onSpeechStarted(),
|
|
3154
|
+
onSpeechStopped: () => bindCore().onSpeechStopped()
|
|
3155
|
+
}
|
|
3156
|
+
});
|
|
3163
3157
|
core = createSessionCore({
|
|
3164
3158
|
id: sessionOpts.id,
|
|
3165
3159
|
agent: sessionOpts.agent,
|
|
@@ -3228,6 +3222,11 @@ function createRuntime(opts) {
|
|
|
3228
3222
|
* **Internal module** — used by `aai-cli` dev server. Not a public API.
|
|
3229
3223
|
* Import via `aai/host`.
|
|
3230
3224
|
*/
|
|
3225
|
+
const JSON_HEADERS = { "Content-Type": "application/json" };
|
|
3226
|
+
function sendJson(res, status, body) {
|
|
3227
|
+
res.writeHead(status, JSON_HEADERS);
|
|
3228
|
+
res.end(JSON.stringify(body));
|
|
3229
|
+
}
|
|
3231
3230
|
async function serveStatic(dir, req, res) {
|
|
3232
3231
|
const url = req.url?.split("?")[0] ?? "/";
|
|
3233
3232
|
const filePath = path.join(dir, url === "/" ? "index.html" : url);
|
|
@@ -3247,66 +3246,59 @@ async function serveStatic(dir, req, res) {
|
|
|
3247
3246
|
return false;
|
|
3248
3247
|
}
|
|
3249
3248
|
}
|
|
3250
|
-
function
|
|
3249
|
+
async function readBody(req) {
|
|
3251
3250
|
let body = "";
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
res.end(JSON.stringify({ error: parsed.error.message }));
|
|
3262
|
-
return;
|
|
3263
|
-
}
|
|
3264
|
-
const op = parsed.data;
|
|
3265
|
-
let result;
|
|
3266
|
-
switch (op.op) {
|
|
3267
|
-
case "upsert":
|
|
3268
|
-
await vector.upsert(op.id, op.text, op.metadata);
|
|
3269
|
-
result = "OK";
|
|
3270
|
-
break;
|
|
3271
|
-
case "query":
|
|
3272
|
-
result = await vector.query(op.text, {
|
|
3273
|
-
...op.topK !== void 0 ? { topK: op.topK } : {},
|
|
3274
|
-
...op.filter !== void 0 ? { filter: op.filter } : {}
|
|
3275
|
-
});
|
|
3276
|
-
break;
|
|
3277
|
-
case "delete":
|
|
3278
|
-
await vector.delete(op.ids);
|
|
3279
|
-
result = "OK";
|
|
3280
|
-
break;
|
|
3281
|
-
default: break;
|
|
3282
|
-
}
|
|
3283
|
-
res.statusCode = 200;
|
|
3284
|
-
res.end(JSON.stringify({ result }));
|
|
3285
|
-
} catch (err) {
|
|
3286
|
-
res.statusCode = 500;
|
|
3287
|
-
res.end(JSON.stringify({ error: err instanceof Error ? err.message : String(err) }));
|
|
3251
|
+
for await (const chunk of req) body += chunk;
|
|
3252
|
+
return body;
|
|
3253
|
+
}
|
|
3254
|
+
async function handleVectorPost(vector, req, res) {
|
|
3255
|
+
try {
|
|
3256
|
+
const parsed = VectorRequestSchema.safeParse(JSON.parse(await readBody(req)));
|
|
3257
|
+
if (!parsed.success) {
|
|
3258
|
+
sendJson(res, 400, { error: parsed.error.message });
|
|
3259
|
+
return;
|
|
3288
3260
|
}
|
|
3289
|
-
|
|
3261
|
+
const op = parsed.data;
|
|
3262
|
+
let result;
|
|
3263
|
+
switch (op.op) {
|
|
3264
|
+
case "upsert":
|
|
3265
|
+
await vector.upsert(op.id, op.text, op.metadata);
|
|
3266
|
+
result = "OK";
|
|
3267
|
+
break;
|
|
3268
|
+
case "query":
|
|
3269
|
+
result = await vector.query(op.text, {
|
|
3270
|
+
...op.topK !== void 0 ? { topK: op.topK } : {},
|
|
3271
|
+
...op.filter !== void 0 ? { filter: op.filter } : {}
|
|
3272
|
+
});
|
|
3273
|
+
break;
|
|
3274
|
+
case "delete":
|
|
3275
|
+
await vector.delete(op.ids);
|
|
3276
|
+
result = "OK";
|
|
3277
|
+
break;
|
|
3278
|
+
default: return op;
|
|
3279
|
+
}
|
|
3280
|
+
sendJson(res, 200, { result });
|
|
3281
|
+
} catch (err) {
|
|
3282
|
+
sendJson(res, 500, { error: err instanceof Error ? err.message : String(err) });
|
|
3283
|
+
}
|
|
3290
3284
|
}
|
|
3291
|
-
function handleKvGet(kv, req, res) {
|
|
3285
|
+
async function handleKvGet(kv, req, res) {
|
|
3292
3286
|
const key = new URL(req.url ?? "/", "http://localhost").searchParams.get("key");
|
|
3293
3287
|
if (!key) {
|
|
3294
|
-
res
|
|
3295
|
-
res.end(JSON.stringify({ error: "Missing key query parameter" }));
|
|
3288
|
+
sendJson(res, 400, { error: "Missing key query parameter" });
|
|
3296
3289
|
return;
|
|
3297
3290
|
}
|
|
3298
|
-
|
|
3291
|
+
try {
|
|
3292
|
+
const value = await kv.get(key);
|
|
3299
3293
|
if (value === null) {
|
|
3300
|
-
res.writeHead(404,
|
|
3294
|
+
res.writeHead(404, JSON_HEADERS);
|
|
3301
3295
|
res.end("null");
|
|
3302
|
-
|
|
3303
|
-
res.writeHead(200, { "Content-Type": "application/json" });
|
|
3304
|
-
res.end(JSON.stringify(value));
|
|
3296
|
+
return;
|
|
3305
3297
|
}
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
res
|
|
3309
|
-
}
|
|
3298
|
+
sendJson(res, 200, value);
|
|
3299
|
+
} catch {
|
|
3300
|
+
sendJson(res, 500, { error: "KV error" });
|
|
3301
|
+
}
|
|
3310
3302
|
}
|
|
3311
3303
|
/**
|
|
3312
3304
|
* Create an HTTP + WebSocket server for an agent.
|
|
@@ -3317,8 +3309,17 @@ function createServer(options) {
|
|
|
3317
3309
|
const { runtime, clientHtml, clientDir, logger = consoleLogger, kv, vector } = options;
|
|
3318
3310
|
const name = options.name ?? "agent";
|
|
3319
3311
|
if (clientHtml && clientDir) throw new Error("clientHtml and clientDir are mutually exclusive");
|
|
3320
|
-
const
|
|
3321
|
-
|
|
3312
|
+
const defaultHtml = clientHtml ?? `<!DOCTYPE html><html><body><h1>${escapeHtml(name)}</h1><p>Agent server running.</p></body></html>`;
|
|
3313
|
+
async function handleRequest(req, res, url, method) {
|
|
3314
|
+
if (clientDir && await serveStatic(clientDir, req, res)) return;
|
|
3315
|
+
if (method === "GET" && url === "/") {
|
|
3316
|
+
res.writeHead(200, { "Content-Type": "text/html" });
|
|
3317
|
+
res.end(defaultHtml);
|
|
3318
|
+
return;
|
|
3319
|
+
}
|
|
3320
|
+
logger.error(`${method} ${url} 404`);
|
|
3321
|
+
sendJson(res, 404, { error: "Not found" });
|
|
3322
|
+
}
|
|
3322
3323
|
const httpServer = http.createServer((req, res) => {
|
|
3323
3324
|
const url = req.url?.split("?")[0] ?? "/";
|
|
3324
3325
|
const method = req.method ?? "GET";
|
|
@@ -3326,11 +3327,10 @@ function createServer(options) {
|
|
|
3326
3327
|
res.setHeader("X-Content-Type-Options", "nosniff");
|
|
3327
3328
|
res.setHeader("X-Frame-Options", "SAMEORIGIN");
|
|
3328
3329
|
if (method === "GET" && url === "/health") {
|
|
3329
|
-
res
|
|
3330
|
-
res.end(JSON.stringify({
|
|
3330
|
+
sendJson(res, 200, {
|
|
3331
3331
|
status: "ok",
|
|
3332
3332
|
name
|
|
3333
|
-
})
|
|
3333
|
+
});
|
|
3334
3334
|
return;
|
|
3335
3335
|
}
|
|
3336
3336
|
if (kv && method === "GET" && url === "/kv") {
|
|
@@ -3343,17 +3343,6 @@ function createServer(options) {
|
|
|
3343
3343
|
}
|
|
3344
3344
|
handleRequest(req, res, url, method);
|
|
3345
3345
|
});
|
|
3346
|
-
async function handleRequest(req, res, url, method) {
|
|
3347
|
-
if (clientDir && await serveStatic(clientDir, req, res)) return;
|
|
3348
|
-
if (method === "GET" && url === "/") {
|
|
3349
|
-
res.writeHead(200, { "Content-Type": "text/html" });
|
|
3350
|
-
res.end(defaultHtml);
|
|
3351
|
-
return;
|
|
3352
|
-
}
|
|
3353
|
-
logger.error(`${method} ${url} 404`);
|
|
3354
|
-
res.writeHead(404, { "Content-Type": "application/json" });
|
|
3355
|
-
res.end(JSON.stringify({ error: "Not found" }));
|
|
3356
|
-
}
|
|
3357
3346
|
const wss = new WebSocketServer({
|
|
3358
3347
|
noServer: true,
|
|
3359
3348
|
maxPayload: MAX_WS_PAYLOAD_BYTES
|