@alexkroman1/aai 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +17 -17
- package/CHANGELOG.md +18 -0
- package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
- package/dist/host/providers/resolve.d.ts +7 -1
- package/dist/host/providers/stt/elevenlabs.d.ts +16 -0
- package/dist/host/providers/stt/soniox.d.ts +25 -0
- package/dist/host/runtime-barrel.js +534 -77
- package/dist/sdk/_internal-types.d.ts +2 -0
- package/dist/sdk/manifest-barrel.js +1 -1
- package/dist/sdk/providers/llm/google.d.ts +22 -0
- package/dist/sdk/providers/llm/groq.d.ts +21 -0
- package/dist/sdk/providers/llm/mistral.d.ts +21 -0
- package/dist/sdk/providers/llm/openai.d.ts +21 -0
- package/dist/sdk/providers/llm/xai.d.ts +21 -0
- package/dist/sdk/providers/llm-barrel.d.ts +5 -0
- package/dist/sdk/providers/llm-barrel.js +2 -2
- package/dist/sdk/providers/stt/elevenlabs.d.ts +36 -0
- package/dist/sdk/providers/stt/soniox.d.ts +37 -0
- package/dist/sdk/providers/stt-barrel.d.ts +2 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/soniox-DCQ3GqJq.js +69 -0
- package/dist/xai-jfQsxxPZ.js +55 -0
- package/host/builtin-tools.ts +1 -0
- package/host/providers/resolve.test.ts +110 -0
- package/host/providers/resolve.ts +113 -10
- package/host/providers/stt/elevenlabs.test.ts +200 -0
- package/host/providers/stt/elevenlabs.ts +145 -0
- package/host/providers/stt/soniox.test.ts +338 -0
- package/host/providers/stt/soniox.ts +239 -0
- package/host/runtime.test.ts +3 -1
- package/host/to-vercel-tools.test.ts +9 -1
- package/host/transports/pipeline-transport.test.ts +93 -0
- package/host/transports/pipeline-transport.ts +53 -30
- package/host/transports/s2s-transport.test.ts +222 -2
- package/host/transports/s2s-transport.ts +176 -40
- package/package.json +35 -2
- package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
- package/sdk/_internal-types.ts +3 -0
- package/sdk/providers/llm/google.ts +30 -0
- package/sdk/providers/llm/groq.ts +29 -0
- package/sdk/providers/llm/mistral.ts +29 -0
- package/sdk/providers/llm/openai.ts +29 -0
- package/sdk/providers/llm/xai.ts +29 -0
- package/sdk/providers/llm-barrel.ts +10 -0
- package/sdk/providers/stt/elevenlabs.ts +44 -0
- package/sdk/providers/stt/soniox.ts +45 -0
- package/sdk/providers/stt-barrel.ts +4 -0
- package/sdk/schema-alignment.test.ts +18 -6
- package/dist/anthropic-CcLZygAr.js +0 -10
- package/dist/assemblyai-C969QGi4.js +0 -35
|
@@ -2,22 +2,24 @@ import { r as DEFAULT_SYSTEM_PROMPT } from "../types-KUgezM6u.js";
|
|
|
2
2
|
import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-C2nirZUI.js";
|
|
3
3
|
import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-BeOQ7fXL.js";
|
|
4
4
|
import { ClientMessageSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
|
|
5
|
-
import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-
|
|
6
|
-
import { r as DEEPGRAM_KIND, t as
|
|
5
|
+
import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-DFL07G3f.js";
|
|
6
|
+
import { a as ASSEMBLYAI_KIND, r as ELEVENLABS_KIND, s as DEEPGRAM_KIND, t as SONIOX_KIND } from "../soniox-DCQ3GqJq.js";
|
|
7
7
|
import { a as RIME_KIND, n as CARTESIA_KIND } from "../cartesia-BfQPOQ7Y.js";
|
|
8
|
-
import {
|
|
8
|
+
import { a as MISTRAL_KIND, d as ANTHROPIC_KIND, l as GOOGLE_KIND, r as OPENAI_KIND, s as GROQ_KIND } from "../xai-jfQsxxPZ.js";
|
|
9
|
+
import { createRequire } from "node:module";
|
|
9
10
|
import { z } from "zod";
|
|
10
11
|
import { convert } from "html-to-text";
|
|
11
12
|
import vm from "node:vm";
|
|
12
13
|
import pTimeout from "p-timeout";
|
|
13
14
|
import { createStorage, prefixStorage } from "unstorage";
|
|
14
|
-
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
15
15
|
import { AssemblyAI } from "assemblyai";
|
|
16
16
|
import { createNanoEvents } from "nanoevents";
|
|
17
17
|
import { DeepgramClient } from "@deepgram/sdk";
|
|
18
|
+
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
|
|
19
|
+
import { AudioFormat, CommitStrategy, RealtimeEvents } from "@elevenlabs/elevenlabs-js/wrapper/realtime/index.js";
|
|
20
|
+
import WsWebSocket, { WebSocketServer } from "ws";
|
|
18
21
|
import { randomUUID } from "node:crypto";
|
|
19
22
|
import { Cartesia } from "@cartesia/cartesia-js";
|
|
20
|
-
import WsWebSocket, { WebSocketServer } from "ws";
|
|
21
23
|
import { jsonSchema, stepCountIs, streamText, tool } from "ai";
|
|
22
24
|
import fs from "node:fs";
|
|
23
25
|
import http from "node:http";
|
|
@@ -334,6 +336,7 @@ function resolveAllBuiltins(names, opts) {
|
|
|
334
336
|
for (const name of names) for (const [toolName, def] of resolveBuiltin(name, opts)) {
|
|
335
337
|
defs[toolName] = def;
|
|
336
338
|
schemas.push({
|
|
339
|
+
type: "function",
|
|
337
340
|
name: toolName,
|
|
338
341
|
description: def.description,
|
|
339
342
|
parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
|
|
@@ -564,6 +567,246 @@ function openDeepgram(opts = {}) {
|
|
|
564
567
|
};
|
|
565
568
|
}
|
|
566
569
|
//#endregion
|
|
570
|
+
//#region host/providers/stt/elevenlabs.ts
|
|
571
|
+
/**
|
|
572
|
+
* ElevenLabs Scribe streaming STT opener (host-only).
|
|
573
|
+
*
|
|
574
|
+
* The user-facing descriptor factory (`elevenlabs(...)`) lives in
|
|
575
|
+
* `sdk/providers/stt/elevenlabs.ts`. This module is the host-side
|
|
576
|
+
* counterpart: it takes the descriptor options + an API key and
|
|
577
|
+
* returns an {@link SttOpener} that the pipeline session drives.
|
|
578
|
+
*
|
|
579
|
+
* Default model: `"scribe_v2_realtime"`. Audio is sent as base64-encoded
|
|
580
|
+
* PCM_16000; partial transcripts arrive on `transcript`, finals on
|
|
581
|
+
* `committed_transcript`.
|
|
582
|
+
*/
|
|
583
|
+
/** Map a numeric sample rate to the SDK's `AudioFormat` enum. */
|
|
584
|
+
function audioFormatFor(sampleRate) {
|
|
585
|
+
switch (sampleRate) {
|
|
586
|
+
case 8e3: return AudioFormat.PCM_8000;
|
|
587
|
+
case 16e3: return AudioFormat.PCM_16000;
|
|
588
|
+
case 22050: return AudioFormat.PCM_22050;
|
|
589
|
+
case 24e3: return AudioFormat.PCM_24000;
|
|
590
|
+
case 44100: return AudioFormat.PCM_44100;
|
|
591
|
+
case 48e3: return AudioFormat.PCM_48000;
|
|
592
|
+
default: throw makeSttError("stt_connect_failed", `ElevenLabs STT: unsupported sample rate ${sampleRate}. Supported: 8000, 16000, 22050, 24000, 44100, 48000.`);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
/** Build an {@link SttOpener} from resolved ElevenLabs descriptor options. */
|
|
596
|
+
function openElevenLabs(opts = {}) {
|
|
597
|
+
return {
|
|
598
|
+
name: "elevenlabs",
|
|
599
|
+
async open(openOpts) {
|
|
600
|
+
const apiKey = openOpts.apiKey || process.env.ELEVENLABS_API_KEY;
|
|
601
|
+
if (!apiKey) throw makeSttError("stt_auth_failed", "ElevenLabs STT: missing API key. Set ELEVENLABS_API_KEY in the agent env.");
|
|
602
|
+
const client = new ElevenLabsClient({ apiKey });
|
|
603
|
+
let connection;
|
|
604
|
+
try {
|
|
605
|
+
connection = await client.speechToText.realtime.connect({
|
|
606
|
+
modelId: opts.model ?? "scribe_v2_realtime",
|
|
607
|
+
audioFormat: audioFormatFor(openOpts.sampleRate),
|
|
608
|
+
sampleRate: openOpts.sampleRate,
|
|
609
|
+
commitStrategy: CommitStrategy.VAD,
|
|
610
|
+
...opts.languageCode ? { languageCode: opts.languageCode } : {}
|
|
611
|
+
});
|
|
612
|
+
} catch (cause) {
|
|
613
|
+
throw makeSttError("stt_connect_failed", `ElevenLabs STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
|
|
614
|
+
}
|
|
615
|
+
const emitter = createNanoEvents();
|
|
616
|
+
let closed = false;
|
|
617
|
+
connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (msg) => {
|
|
618
|
+
if (closed) return;
|
|
619
|
+
const text = msg.text ?? "";
|
|
620
|
+
if (text.length > 0) emitter.emit("partial", text);
|
|
621
|
+
});
|
|
622
|
+
connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (msg) => {
|
|
623
|
+
if (closed) return;
|
|
624
|
+
const text = msg.text ?? "";
|
|
625
|
+
if (text.length > 0) emitter.emit("final", text);
|
|
626
|
+
});
|
|
627
|
+
connection.on(RealtimeEvents.ERROR, (payload) => {
|
|
628
|
+
if (closed) return;
|
|
629
|
+
const msg = payload instanceof Error ? payload.message : payload.error ?? `${payload.message_type}`;
|
|
630
|
+
emitter.emit("error", makeSttError("stt_stream_error", msg));
|
|
631
|
+
});
|
|
632
|
+
connection.on(RealtimeEvents.AUTH_ERROR, (msg) => {
|
|
633
|
+
if (closed) return;
|
|
634
|
+
emitter.emit("error", makeSttError("stt_auth_failed", msg.error));
|
|
635
|
+
});
|
|
636
|
+
const close = async () => {
|
|
637
|
+
if (closed) return;
|
|
638
|
+
closed = true;
|
|
639
|
+
try {
|
|
640
|
+
connection.close();
|
|
641
|
+
} catch {}
|
|
642
|
+
};
|
|
643
|
+
if (openOpts.signal.aborted) close();
|
|
644
|
+
else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
|
|
645
|
+
return {
|
|
646
|
+
sendAudio(pcm) {
|
|
647
|
+
if (closed) return;
|
|
648
|
+
const bytes = Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
649
|
+
connection.send({ audioBase64: bytes.toString("base64") });
|
|
650
|
+
},
|
|
651
|
+
on(event, fn) {
|
|
652
|
+
return emitter.on(event, fn);
|
|
653
|
+
},
|
|
654
|
+
close
|
|
655
|
+
};
|
|
656
|
+
}
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
//#endregion
|
|
660
|
+
//#region host/providers/stt/soniox.ts
|
|
661
|
+
/**
|
|
662
|
+
* Soniox real-time STT opener (host-only).
|
|
663
|
+
*
|
|
664
|
+
* The user-facing descriptor factory (`soniox(...)`) lives in
|
|
665
|
+
* `sdk/providers/stt/soniox.ts`. This module is the host-side
|
|
666
|
+
* counterpart: it takes the descriptor options + an API key and
|
|
667
|
+
* returns an {@link SttOpener} that the pipeline session drives.
|
|
668
|
+
*
|
|
669
|
+
* Soniox's published JS client (`@soniox/speech-to-text-web`) is
|
|
670
|
+
* browser-only — it depends on `MediaRecorder` and `getUserMedia`. For
|
|
671
|
+
* server-side use we talk to the WebSocket directly:
|
|
672
|
+
* `wss://stt-rt.soniox.com/transcribe-websocket`
|
|
673
|
+
*
|
|
674
|
+
* Wire format:
|
|
675
|
+
* - First text frame: JSON config with api_key, model, audio_format,
|
|
676
|
+
* sample_rate, num_channels (and optional language hints).
|
|
677
|
+
* - Subsequent binary frames: 16-bit signed little-endian PCM audio.
|
|
678
|
+
* - Server replies: JSON `{ tokens: [{ text, is_final }] }` messages.
|
|
679
|
+
* Final tokens accumulate; non-final tokens are a rolling preview.
|
|
680
|
+
* - On error: `{ error_code, error_message }`.
|
|
681
|
+
*/
|
|
682
|
+
const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
683
|
+
/**
|
|
684
|
+
* Walk a batch of Soniox tokens, sending finals into `appendFinal` and
|
|
685
|
+
* returning the concatenated non-finals as a rolling preview string.
|
|
686
|
+
*/
|
|
687
|
+
function consumeTokens(tokens, appendFinal) {
|
|
688
|
+
let nonFinal = "";
|
|
689
|
+
for (const tok of tokens) {
|
|
690
|
+
const text = tok.text ?? "";
|
|
691
|
+
if (text.length === 0) continue;
|
|
692
|
+
if (tok.is_final) appendFinal(text);
|
|
693
|
+
else nonFinal += text;
|
|
694
|
+
}
|
|
695
|
+
return nonFinal;
|
|
696
|
+
}
|
|
697
|
+
/** Resolve once the WebSocket opens; reject on the first error. */
|
|
698
|
+
function waitForOpen$1(ws) {
|
|
699
|
+
return new Promise((resolve, reject) => {
|
|
700
|
+
const onOpen = () => {
|
|
701
|
+
ws.off("error", onErr);
|
|
702
|
+
resolve();
|
|
703
|
+
};
|
|
704
|
+
const onErr = (err) => {
|
|
705
|
+
ws.off("open", onOpen);
|
|
706
|
+
reject(err);
|
|
707
|
+
};
|
|
708
|
+
ws.once("open", onOpen);
|
|
709
|
+
ws.once("error", onErr);
|
|
710
|
+
});
|
|
711
|
+
}
|
|
712
|
+
/** Build the initial JSON config frame for a Soniox session. */
|
|
713
|
+
function buildConfigFrame(apiKey, opts, sampleRate) {
|
|
714
|
+
const config = {
|
|
715
|
+
api_key: apiKey,
|
|
716
|
+
model: opts.model ?? "stt-rt-v3",
|
|
717
|
+
audio_format: "pcm_s16le",
|
|
718
|
+
sample_rate: sampleRate,
|
|
719
|
+
num_channels: 1
|
|
720
|
+
};
|
|
721
|
+
if (opts.languageHints && opts.languageHints.length > 0) config.language_hints = [...opts.languageHints];
|
|
722
|
+
return config;
|
|
723
|
+
}
|
|
724
|
+
/** Parse a Soniox text frame into a {@link SonioxResponse}; returns null on garbage. */
|
|
725
|
+
function parseFrame(raw) {
|
|
726
|
+
try {
|
|
727
|
+
return JSON.parse(raw.toString());
|
|
728
|
+
} catch {
|
|
729
|
+
return null;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
/**
|
|
733
|
+
* Handle one server response. Emits `error`, `final`, and `partial` events
|
|
734
|
+
* onto `emitter` based on the token batch and the running `finalBuf`. The
|
|
735
|
+
* caller owns `finalBuf` so it survives across messages and can be flushed
|
|
736
|
+
* on close.
|
|
737
|
+
*/
|
|
738
|
+
function handleResponse(res, emitter, finalBuf) {
|
|
739
|
+
if (res.error_code !== void 0) {
|
|
740
|
+
emitter.emit("error", makeSttError("stt_stream_error", `Soniox error ${res.error_code}: ${res.error_message ?? "unknown"}`));
|
|
741
|
+
return;
|
|
742
|
+
}
|
|
743
|
+
if (!res.tokens || res.tokens.length === 0) return;
|
|
744
|
+
const nonFinal = consumeTokens(res.tokens, (text) => {
|
|
745
|
+
finalBuf.value += text;
|
|
746
|
+
});
|
|
747
|
+
if (finalBuf.value.length > 0 && (nonFinal.length > 0 || res.finished)) {
|
|
748
|
+
emitter.emit("final", finalBuf.value);
|
|
749
|
+
finalBuf.value = "";
|
|
750
|
+
}
|
|
751
|
+
if (nonFinal.length > 0) emitter.emit("partial", nonFinal);
|
|
752
|
+
}
|
|
753
|
+
/** Build an {@link SttOpener} from resolved Soniox descriptor options. */
|
|
754
|
+
function openSoniox(opts = {}) {
|
|
755
|
+
return {
|
|
756
|
+
name: "soniox",
|
|
757
|
+
async open(openOpts) {
|
|
758
|
+
const apiKey = openOpts.apiKey || process.env.SONIOX_API_KEY;
|
|
759
|
+
if (!apiKey) throw makeSttError("stt_auth_failed", "Soniox STT: missing API key. Set SONIOX_API_KEY in the agent env.");
|
|
760
|
+
const ws = new WsWebSocket(SONIOX_WS_URL);
|
|
761
|
+
const emitter = createNanoEvents();
|
|
762
|
+
let closed = false;
|
|
763
|
+
const finalBuf = { value: "" };
|
|
764
|
+
try {
|
|
765
|
+
await waitForOpen$1(ws);
|
|
766
|
+
} catch (cause) {
|
|
767
|
+
throw makeSttError("stt_connect_failed", `Soniox STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
|
|
768
|
+
}
|
|
769
|
+
ws.send(JSON.stringify(buildConfigFrame(apiKey, opts, openOpts.sampleRate)));
|
|
770
|
+
ws.on("message", (raw) => {
|
|
771
|
+
if (closed) return;
|
|
772
|
+
const res = parseFrame(raw);
|
|
773
|
+
if (res) handleResponse(res, emitter, finalBuf);
|
|
774
|
+
});
|
|
775
|
+
ws.on("error", (err) => {
|
|
776
|
+
if (closed) return;
|
|
777
|
+
emitter.emit("error", makeSttError("stt_stream_error", err.message ?? String(err)));
|
|
778
|
+
});
|
|
779
|
+
ws.on("close", (code) => {
|
|
780
|
+
if (closed) return;
|
|
781
|
+
if (code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
|
|
782
|
+
});
|
|
783
|
+
const close = async () => {
|
|
784
|
+
if (closed) return;
|
|
785
|
+
closed = true;
|
|
786
|
+
if (finalBuf.value.length > 0) {
|
|
787
|
+
emitter.emit("final", finalBuf.value);
|
|
788
|
+
finalBuf.value = "";
|
|
789
|
+
}
|
|
790
|
+
try {
|
|
791
|
+
ws.close();
|
|
792
|
+
} catch {}
|
|
793
|
+
};
|
|
794
|
+
if (openOpts.signal.aborted) close();
|
|
795
|
+
else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
|
|
796
|
+
return {
|
|
797
|
+
sendAudio(pcm) {
|
|
798
|
+
if (closed || ws.readyState !== WsWebSocket.OPEN) return;
|
|
799
|
+
ws.send(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength), { binary: true });
|
|
800
|
+
},
|
|
801
|
+
on(event, fn) {
|
|
802
|
+
return emitter.on(event, fn);
|
|
803
|
+
},
|
|
804
|
+
close
|
|
805
|
+
};
|
|
806
|
+
}
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
//#endregion
|
|
567
810
|
//#region host/providers/tts/cartesia.ts
|
|
568
811
|
/**
|
|
569
812
|
* Cartesia TTS opener (host-only).
|
|
@@ -962,7 +1205,14 @@ function openRime(opts) {
|
|
|
962
1205
|
* The guest sandbox never imports these functions, which is how the agent
|
|
963
1206
|
* bundle stays free of `@ai-sdk/anthropic` / `assemblyai` /
|
|
964
1207
|
* `@cartesia/cartesia-js`.
|
|
1208
|
+
*
|
|
1209
|
+
* `@ai-sdk/*` packages are loaded via `createRequire` lazily so self-hosted
|
|
1210
|
+
* users only need to install the providers they actually reference. A
|
|
1211
|
+
* missing package is reported as a friendly "package not installed" error
|
|
1212
|
+
* at first session start, not as a module-load failure when the host
|
|
1213
|
+
* package itself is imported.
|
|
965
1214
|
*/
|
|
1215
|
+
const requireFromHere = createRequire(import.meta.url);
|
|
966
1216
|
/**
|
|
967
1217
|
* Look up a provider API key: agent env first (set via `aai secret put` or
|
|
968
1218
|
* `.env`), then the host's `process.env` as a fallback for self-hosted mode.
|
|
@@ -976,7 +1226,9 @@ function resolveStt(descriptor) {
|
|
|
976
1226
|
switch (descriptor.kind) {
|
|
977
1227
|
case ASSEMBLYAI_KIND: return openAssemblyAI(descriptor.options);
|
|
978
1228
|
case DEEPGRAM_KIND: return openDeepgram(descriptor.options);
|
|
979
|
-
|
|
1229
|
+
case ELEVENLABS_KIND: return openElevenLabs(descriptor.options);
|
|
1230
|
+
case SONIOX_KIND: return openSoniox(descriptor.options);
|
|
1231
|
+
default: throw new Error(`Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}, ${DEEPGRAM_KIND}, ${ELEVENLABS_KIND}, ${SONIOX_KIND}.`);
|
|
980
1232
|
}
|
|
981
1233
|
}
|
|
982
1234
|
/** Resolve a {@link TtsProvider} descriptor into a host-side opener. */
|
|
@@ -991,24 +1243,73 @@ function resolveTts(descriptor) {
|
|
|
991
1243
|
* Resolve an {@link LlmProvider} descriptor into a Vercel AI SDK
|
|
992
1244
|
* {@link LanguageModel}.
|
|
993
1245
|
*
|
|
994
|
-
* The API key is pulled from the agent's env (e.g. `
|
|
1246
|
+
* The API key is pulled from the agent's env (e.g. `OPENAI_API_KEY`).
|
|
995
1247
|
* Missing keys throw here — the pipeline session would fail on first
|
|
996
1248
|
* `streamText` call otherwise, and the error is clearer at construction.
|
|
997
1249
|
*/
|
|
998
1250
|
function resolveLlm(descriptor, env) {
|
|
999
1251
|
switch (descriptor.kind) {
|
|
1000
1252
|
case ANTHROPIC_KIND: {
|
|
1001
|
-
const
|
|
1002
|
-
const
|
|
1003
|
-
if (!apiKey) throw new Error("Anthropic LLM: missing API key. Set ANTHROPIC_API_KEY in the agent env.");
|
|
1253
|
+
const apiKey = requireKey(env, "ANTHROPIC_API_KEY", "Anthropic");
|
|
1254
|
+
const { createAnthropic } = loadProviderPackage("@ai-sdk/anthropic", "Anthropic");
|
|
1004
1255
|
return createAnthropic({
|
|
1005
1256
|
apiKey,
|
|
1006
1257
|
baseURL: "https://api.anthropic.com/v1"
|
|
1007
|
-
})(options.model);
|
|
1258
|
+
})(descriptor.options.model);
|
|
1259
|
+
}
|
|
1260
|
+
case OPENAI_KIND: {
|
|
1261
|
+
const apiKey = requireKey(env, "OPENAI_API_KEY", "OpenAI");
|
|
1262
|
+
const { createOpenAI } = loadProviderPackage("@ai-sdk/openai", "OpenAI");
|
|
1263
|
+
return createOpenAI({ apiKey })(descriptor.options.model);
|
|
1264
|
+
}
|
|
1265
|
+
case GOOGLE_KIND: {
|
|
1266
|
+
const apiKey = requireKey(env, "GOOGLE_GENERATIVE_AI_API_KEY", "Google");
|
|
1267
|
+
const { createGoogleGenerativeAI } = loadProviderPackage("@ai-sdk/google", "Google");
|
|
1268
|
+
return createGoogleGenerativeAI({ apiKey })(descriptor.options.model);
|
|
1008
1269
|
}
|
|
1009
|
-
|
|
1270
|
+
case MISTRAL_KIND: {
|
|
1271
|
+
const apiKey = requireKey(env, "MISTRAL_API_KEY", "Mistral");
|
|
1272
|
+
const { createMistral } = loadProviderPackage("@ai-sdk/mistral", "Mistral");
|
|
1273
|
+
return createMistral({ apiKey })(descriptor.options.model);
|
|
1274
|
+
}
|
|
1275
|
+
case "xai": {
|
|
1276
|
+
const apiKey = requireKey(env, "XAI_API_KEY", "xAI");
|
|
1277
|
+
const { createXai } = loadProviderPackage("@ai-sdk/xai", "xAI");
|
|
1278
|
+
return createXai({ apiKey })(descriptor.options.model);
|
|
1279
|
+
}
|
|
1280
|
+
case GROQ_KIND: {
|
|
1281
|
+
const apiKey = requireKey(env, "GROQ_API_KEY", "Groq");
|
|
1282
|
+
const { createGroq } = loadProviderPackage("@ai-sdk/groq", "Groq");
|
|
1283
|
+
return createGroq({ apiKey })(descriptor.options.model);
|
|
1284
|
+
}
|
|
1285
|
+
default: throw new Error(`Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}, ${OPENAI_KIND}, ${GOOGLE_KIND}, ${MISTRAL_KIND}, xai, ${GROQ_KIND}.`);
|
|
1010
1286
|
}
|
|
1011
1287
|
}
|
|
1288
|
+
function requireKey(env, name, label) {
|
|
1289
|
+
const key = resolveApiKey(name, env);
|
|
1290
|
+
if (!key) throw new Error(`${label} LLM: missing API key. Set ${name} in the agent env.`);
|
|
1291
|
+
return key;
|
|
1292
|
+
}
|
|
1293
|
+
/**
|
|
1294
|
+
* Synchronously load an optional `@ai-sdk/*` package via `createRequire`.
|
|
1295
|
+
* Throws a friendly install-hint error if the package isn't installed,
|
|
1296
|
+
* so users see "Run `pnpm add @ai-sdk/openai`" rather than a cryptic
|
|
1297
|
+
* Node module-resolution stack trace.
|
|
1298
|
+
*/
|
|
1299
|
+
function loadProviderPackage(name, label) {
|
|
1300
|
+
try {
|
|
1301
|
+
return requireFromHere(name);
|
|
1302
|
+
} catch (err) {
|
|
1303
|
+
if (isModuleNotFound(err, name)) throw new Error(`${label} LLM: package \`${name}\` is not installed. Run \`pnpm add ${name}\`.`, { cause: err });
|
|
1304
|
+
throw err;
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
function isModuleNotFound(err, name) {
|
|
1308
|
+
if (!(err instanceof Error)) return false;
|
|
1309
|
+
const code = err.code;
|
|
1310
|
+
if (code !== "MODULE_NOT_FOUND" && code !== "ERR_MODULE_NOT_FOUND") return false;
|
|
1311
|
+
return err.message.includes(name);
|
|
1312
|
+
}
|
|
1012
1313
|
//#endregion
|
|
1013
1314
|
//#region host/runtime-config.ts
|
|
1014
1315
|
/**
|
|
@@ -1492,9 +1793,10 @@ function createPipelineTransport(opts) {
|
|
|
1492
1793
|
stopWhen: stepCountIs(maxSteps),
|
|
1493
1794
|
abortSignal: ctl.signal
|
|
1494
1795
|
});
|
|
1796
|
+
const handlePart = makeStreamPartHandler(onDelta);
|
|
1495
1797
|
for await (const part of result.fullStream) {
|
|
1496
1798
|
if (ctl.signal.aborted) break;
|
|
1497
|
-
|
|
1799
|
+
handlePart(part);
|
|
1498
1800
|
}
|
|
1499
1801
|
} catch (err) {
|
|
1500
1802
|
if (!ctl.signal.aborted) {
|
|
@@ -1507,31 +1809,54 @@ function createPipelineTransport(opts) {
|
|
|
1507
1809
|
}
|
|
1508
1810
|
}
|
|
1509
1811
|
}
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
sid: opts.sid
|
|
1529
|
-
});
|
|
1530
|
-
emitError("llm", msg);
|
|
1531
|
-
return;
|
|
1812
|
+
/**
|
|
1813
|
+
* Stateful per-turn handler for `streamText` `fullStream` parts.
|
|
1814
|
+
*
|
|
1815
|
+
* Tracks text-segment boundaries so that consecutive segments — which the
|
|
1816
|
+
* Vercel SDK emits across tool-call hops as `text-end` followed later by a
|
|
1817
|
+
* fresh `text-start` — don't fuse into "...up.Got it" when concatenated for
|
|
1818
|
+
* the transcript or streamed to TTS. When a boundary is crossed and neither
|
|
1819
|
+
* side carries whitespace, a single space is injected into both streams.
|
|
1820
|
+
*/
|
|
1821
|
+
function makeStreamPartHandler(onDelta) {
|
|
1822
|
+
let pendingSeparator = false;
|
|
1823
|
+
let lastChar = "";
|
|
1824
|
+
function emitText(delta) {
|
|
1825
|
+
if (delta.length === 0) return;
|
|
1826
|
+
let out = delta;
|
|
1827
|
+
if (pendingSeparator) {
|
|
1828
|
+
pendingSeparator = false;
|
|
1829
|
+
if (!(lastChar === "" || /\s/.test(lastChar) || /^\s/.test(out))) out = ` ${out}`;
|
|
1532
1830
|
}
|
|
1533
|
-
|
|
1831
|
+
lastChar = out.slice(-1);
|
|
1832
|
+
onDelta(out);
|
|
1833
|
+
ttsSession?.sendText(out);
|
|
1534
1834
|
}
|
|
1835
|
+
return function handlePart(part) {
|
|
1836
|
+
switch (part.type) {
|
|
1837
|
+
case "text-delta":
|
|
1838
|
+
emitText(part.text ?? "");
|
|
1839
|
+
return;
|
|
1840
|
+
case "text-end":
|
|
1841
|
+
pendingSeparator = true;
|
|
1842
|
+
return;
|
|
1843
|
+
case "tool-call": {
|
|
1844
|
+
const input = part.input ?? {};
|
|
1845
|
+
callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
|
|
1846
|
+
return;
|
|
1847
|
+
}
|
|
1848
|
+
case "error": {
|
|
1849
|
+
const msg = errorMessage(part.error);
|
|
1850
|
+
log.error("LLM stream error", {
|
|
1851
|
+
message: msg,
|
|
1852
|
+
sid: opts.sid
|
|
1853
|
+
});
|
|
1854
|
+
emitError("llm", msg);
|
|
1855
|
+
return;
|
|
1856
|
+
}
|
|
1857
|
+
default: return;
|
|
1858
|
+
}
|
|
1859
|
+
};
|
|
1535
1860
|
}
|
|
1536
1861
|
/**
|
|
1537
1862
|
* Flush TTS and wait for drain. Resolves on:
|
|
@@ -1969,11 +2294,182 @@ function connectS2s(opts) {
|
|
|
1969
2294
|
//#region host/transports/s2s-transport.ts
|
|
1970
2295
|
/** @internal Exposed for testing — allows spying on connectS2s in unit tests. */
|
|
1971
2296
|
const _internals = { connectS2s };
|
|
2297
|
+
/**
|
|
2298
|
+
* Close codes worth attempting `session.resume` on. These are network/server
|
|
2299
|
+
* blips, not protocol or auth violations. Per AssemblyAI's docs, sessions are
|
|
2300
|
+
* preserved for 30 s after disconnect, so resume is bounded by the window in
|
|
2301
|
+
* `RESUME_WINDOW_MS` below.
|
|
2302
|
+
*/
|
|
2303
|
+
const TRANSIENT_CLOSE_CODES = new Set([
|
|
2304
|
+
1005,
|
|
2305
|
+
1006,
|
|
2306
|
+
1011,
|
|
2307
|
+
3005
|
|
2308
|
+
]);
|
|
2309
|
+
/**
|
|
2310
|
+
* AssemblyAI keeps the session alive for 30 s after disconnect; we leave a
|
|
2311
|
+
* little headroom so the resume request still fits inside that window after
|
|
2312
|
+
* the new WebSocket finishes opening.
|
|
2313
|
+
*/
|
|
2314
|
+
const RESUME_WINDOW_MS = 25e3;
|
|
1972
2315
|
function createS2sTransport(opts) {
|
|
1973
2316
|
const log = opts.logger ?? consoleLogger;
|
|
1974
2317
|
const createWs = opts.createWebSocket ?? defaultCreateS2sWebSocket;
|
|
1975
2318
|
let handle = null;
|
|
1976
2319
|
let currentReplyId = null;
|
|
2320
|
+
/** Most recent `session.ready` ID — present once the upstream session is established. */
|
|
2321
|
+
let providerSessionId = null;
|
|
2322
|
+
/** When the current session became ready; bounds the resume window. */
|
|
2323
|
+
let sessionReadyAt = 0;
|
|
2324
|
+
/** Set by `stop()` so a deliberate close doesn't trigger a reconnect. */
|
|
2325
|
+
let closing = false;
|
|
2326
|
+
/**
|
|
2327
|
+
* True while a `session.resume` round-trip is in flight (between sending
|
|
2328
|
+
* resume and the next `session.ready`). Used to distinguish a resume failure
|
|
2329
|
+
* (close before ready) from a normal close.
|
|
2330
|
+
*/
|
|
2331
|
+
let reconnecting = false;
|
|
2332
|
+
/**
|
|
2333
|
+
* Set when a reconnect attempt is kicked off, cleared once the resumed
|
|
2334
|
+
* session's `session.ready` arrives. Prevents back-to-back reconnect loops
|
|
2335
|
+
* when the freshly-resumed socket also drops before fully recovering.
|
|
2336
|
+
*/
|
|
2337
|
+
let reconnectInFlight = false;
|
|
2338
|
+
function buildCallbacks() {
|
|
2339
|
+
return {
|
|
2340
|
+
onSessionReady: (id) => {
|
|
2341
|
+
providerSessionId = id;
|
|
2342
|
+
sessionReadyAt = Date.now();
|
|
2343
|
+
if (reconnecting) {
|
|
2344
|
+
reconnecting = false;
|
|
2345
|
+
reconnectInFlight = false;
|
|
2346
|
+
log.info("S2S resumed", {
|
|
2347
|
+
sid: opts.sid,
|
|
2348
|
+
sessionId: id
|
|
2349
|
+
});
|
|
2350
|
+
}
|
|
2351
|
+
opts.callbacks.onSessionReady?.(id);
|
|
2352
|
+
},
|
|
2353
|
+
onReplyStarted: (replyId) => {
|
|
2354
|
+
currentReplyId = replyId;
|
|
2355
|
+
opts.callbacks.onReplyStarted(replyId);
|
|
2356
|
+
},
|
|
2357
|
+
onReplyDone: () => {
|
|
2358
|
+
currentReplyId = null;
|
|
2359
|
+
opts.callbacks.onReplyDone();
|
|
2360
|
+
},
|
|
2361
|
+
onCancelled: () => {
|
|
2362
|
+
currentReplyId = null;
|
|
2363
|
+
opts.callbacks.onCancelled();
|
|
2364
|
+
},
|
|
2365
|
+
onAudio: (bytes) => opts.callbacks.onAudioChunk(bytes),
|
|
2366
|
+
onUserTranscript: opts.callbacks.onUserTranscript,
|
|
2367
|
+
onAgentTranscript: opts.callbacks.onAgentTranscript,
|
|
2368
|
+
onToolCall: opts.callbacks.onToolCall,
|
|
2369
|
+
onSpeechStarted: opts.callbacks.onSpeechStarted,
|
|
2370
|
+
onSpeechStopped: opts.callbacks.onSpeechStopped,
|
|
2371
|
+
onSessionExpired: () => {
|
|
2372
|
+
if (reconnecting) {
|
|
2373
|
+
reconnecting = false;
|
|
2374
|
+
reconnectInFlight = false;
|
|
2375
|
+
log.warn("S2S resume rejected: session expired", { sid: opts.sid });
|
|
2376
|
+
opts.callbacks.onError("connection", "S2S resume failed: session expired");
|
|
2377
|
+
return;
|
|
2378
|
+
}
|
|
2379
|
+
log.info("S2S session expired", { sid: opts.sid });
|
|
2380
|
+
handle?.close();
|
|
2381
|
+
},
|
|
2382
|
+
onError: (err) => opts.callbacks.onError("internal", err.message),
|
|
2383
|
+
onClose: (code, reason) => handleClose(code, reason)
|
|
2384
|
+
};
|
|
2385
|
+
}
|
|
2386
|
+
function canResumeAfter(code) {
|
|
2387
|
+
if (!TRANSIENT_CLOSE_CODES.has(code)) return false;
|
|
2388
|
+
if (providerSessionId === null) return false;
|
|
2389
|
+
if (reconnectInFlight) return false;
|
|
2390
|
+
return sessionReadyAt > 0 && Date.now() - sessionReadyAt < RESUME_WINDOW_MS;
|
|
2391
|
+
}
|
|
2392
|
+
function emitFatalClose(code, reason, wasReconnecting) {
|
|
2393
|
+
if (wasReconnecting) {
|
|
2394
|
+
reconnecting = false;
|
|
2395
|
+
reconnectInFlight = false;
|
|
2396
|
+
opts.callbacks.onError("connection", `S2S resume failed (code=${code})`);
|
|
2397
|
+
return;
|
|
2398
|
+
}
|
|
2399
|
+
if (currentReplyId !== null) {
|
|
2400
|
+
log.warn("S2S closed with active reply", {
|
|
2401
|
+
sid: opts.sid,
|
|
2402
|
+
agent: opts.agent,
|
|
2403
|
+
activeReplyId: currentReplyId,
|
|
2404
|
+
code,
|
|
2405
|
+
reason
|
|
2406
|
+
});
|
|
2407
|
+
opts.callbacks.onError("connection", `S2S closed mid-reply (code=${code})`);
|
|
2408
|
+
return;
|
|
2409
|
+
}
|
|
2410
|
+
log.info("S2S closed", {
|
|
2411
|
+
code,
|
|
2412
|
+
reason
|
|
2413
|
+
});
|
|
2414
|
+
}
|
|
2415
|
+
function startResume(prevId, code, reason) {
|
|
2416
|
+
reconnectInFlight = true;
|
|
2417
|
+
reconnecting = true;
|
|
2418
|
+
log.warn("S2S unexpected close — attempting resume", {
|
|
2419
|
+
sid: opts.sid,
|
|
2420
|
+
agent: opts.agent,
|
|
2421
|
+
code,
|
|
2422
|
+
reason,
|
|
2423
|
+
prevSessionId: prevId
|
|
2424
|
+
});
|
|
2425
|
+
if (currentReplyId !== null) {
|
|
2426
|
+
currentReplyId = null;
|
|
2427
|
+
opts.callbacks.onCancelled();
|
|
2428
|
+
}
|
|
2429
|
+
resume(prevId).catch((err) => {
|
|
2430
|
+
reconnecting = false;
|
|
2431
|
+
reconnectInFlight = false;
|
|
2432
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2433
|
+
log.warn("S2S resume failed", {
|
|
2434
|
+
sid: opts.sid,
|
|
2435
|
+
error: msg
|
|
2436
|
+
});
|
|
2437
|
+
opts.callbacks.onError("connection", `S2S resume failed: ${msg}`);
|
|
2438
|
+
});
|
|
2439
|
+
}
|
|
2440
|
+
function handleClose(code, reason) {
|
|
2441
|
+
if (closing) {
|
|
2442
|
+
log.info("S2S closed", {
|
|
2443
|
+
code,
|
|
2444
|
+
reason
|
|
2445
|
+
});
|
|
2446
|
+
return;
|
|
2447
|
+
}
|
|
2448
|
+
const wasReconnecting = reconnecting;
|
|
2449
|
+
if (!canResumeAfter(code)) {
|
|
2450
|
+
emitFatalClose(code, reason, wasReconnecting);
|
|
2451
|
+
return;
|
|
2452
|
+
}
|
|
2453
|
+
const prevId = providerSessionId;
|
|
2454
|
+
if (prevId === null) return;
|
|
2455
|
+
startResume(prevId, code, reason);
|
|
2456
|
+
}
|
|
2457
|
+
async function resume(prevSessionId) {
|
|
2458
|
+
const newHandle = await _internals.connectS2s({
|
|
2459
|
+
apiKey: opts.apiKey,
|
|
2460
|
+
config: opts.s2sConfig,
|
|
2461
|
+
createWebSocket: createWs,
|
|
2462
|
+
logger: log,
|
|
2463
|
+
...opts.sid !== void 0 ? { sid: opts.sid } : {},
|
|
2464
|
+
callbacks: buildCallbacks()
|
|
2465
|
+
});
|
|
2466
|
+
if (closing) {
|
|
2467
|
+
newHandle.close();
|
|
2468
|
+
return;
|
|
2469
|
+
}
|
|
2470
|
+
handle = newHandle;
|
|
2471
|
+
newHandle.resumeSession(prevSessionId);
|
|
2472
|
+
}
|
|
1977
2473
|
async function start() {
|
|
1978
2474
|
handle = await _internals.connectS2s({
|
|
1979
2475
|
apiKey: opts.apiKey,
|
|
@@ -1981,51 +2477,12 @@ function createS2sTransport(opts) {
|
|
|
1981
2477
|
createWebSocket: createWs,
|
|
1982
2478
|
logger: log,
|
|
1983
2479
|
sid: opts.sid,
|
|
1984
|
-
callbacks:
|
|
1985
|
-
onSessionReady: (providerSessionId) => opts.callbacks.onSessionReady?.(providerSessionId),
|
|
1986
|
-
onReplyStarted: (replyId) => {
|
|
1987
|
-
currentReplyId = replyId;
|
|
1988
|
-
opts.callbacks.onReplyStarted(replyId);
|
|
1989
|
-
},
|
|
1990
|
-
onReplyDone: () => {
|
|
1991
|
-
currentReplyId = null;
|
|
1992
|
-
opts.callbacks.onReplyDone();
|
|
1993
|
-
},
|
|
1994
|
-
onCancelled: () => {
|
|
1995
|
-
currentReplyId = null;
|
|
1996
|
-
opts.callbacks.onCancelled();
|
|
1997
|
-
},
|
|
1998
|
-
onAudio: (bytes) => opts.callbacks.onAudioChunk(bytes),
|
|
1999
|
-
onUserTranscript: opts.callbacks.onUserTranscript,
|
|
2000
|
-
onAgentTranscript: opts.callbacks.onAgentTranscript,
|
|
2001
|
-
onToolCall: opts.callbacks.onToolCall,
|
|
2002
|
-
onSpeechStarted: opts.callbacks.onSpeechStarted,
|
|
2003
|
-
onSpeechStopped: opts.callbacks.onSpeechStopped,
|
|
2004
|
-
onSessionExpired: () => {
|
|
2005
|
-
log.info("S2S session expired", { sid: opts.sid });
|
|
2006
|
-
handle?.close();
|
|
2007
|
-
},
|
|
2008
|
-
onError: (err) => opts.callbacks.onError("internal", err.message),
|
|
2009
|
-
onClose: (code, reason) => {
|
|
2010
|
-
if (currentReplyId !== null) {
|
|
2011
|
-
log.warn("S2S closed with active reply", {
|
|
2012
|
-
sid: opts.sid,
|
|
2013
|
-
agent: opts.agent,
|
|
2014
|
-
activeReplyId: currentReplyId,
|
|
2015
|
-
code,
|
|
2016
|
-
reason
|
|
2017
|
-
});
|
|
2018
|
-
opts.callbacks.onError("connection", `S2S closed mid-reply (code=${code})`);
|
|
2019
|
-
} else log.info("S2S closed", {
|
|
2020
|
-
code,
|
|
2021
|
-
reason
|
|
2022
|
-
});
|
|
2023
|
-
}
|
|
2024
|
-
}
|
|
2480
|
+
callbacks: buildCallbacks()
|
|
2025
2481
|
});
|
|
2026
2482
|
handle.updateSession(opts.sessionConfig);
|
|
2027
2483
|
}
|
|
2028
2484
|
async function stop() {
|
|
2485
|
+
closing = true;
|
|
2029
2486
|
handle?.close();
|
|
2030
2487
|
handle = null;
|
|
2031
2488
|
}
|