@alexkroman1/aai 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.turbo/turbo-build.log +17 -17
  2. package/CHANGELOG.md +18 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/host/providers/resolve.d.ts +7 -1
  5. package/dist/host/providers/stt/elevenlabs.d.ts +16 -0
  6. package/dist/host/providers/stt/soniox.d.ts +25 -0
  7. package/dist/host/runtime-barrel.js +534 -77
  8. package/dist/sdk/_internal-types.d.ts +2 -0
  9. package/dist/sdk/manifest-barrel.js +1 -1
  10. package/dist/sdk/providers/llm/google.d.ts +22 -0
  11. package/dist/sdk/providers/llm/groq.d.ts +21 -0
  12. package/dist/sdk/providers/llm/mistral.d.ts +21 -0
  13. package/dist/sdk/providers/llm/openai.d.ts +21 -0
  14. package/dist/sdk/providers/llm/xai.d.ts +21 -0
  15. package/dist/sdk/providers/llm-barrel.d.ts +5 -0
  16. package/dist/sdk/providers/llm-barrel.js +2 -2
  17. package/dist/sdk/providers/stt/elevenlabs.d.ts +36 -0
  18. package/dist/sdk/providers/stt/soniox.d.ts +37 -0
  19. package/dist/sdk/providers/stt-barrel.d.ts +2 -0
  20. package/dist/sdk/providers/stt-barrel.js +2 -2
  21. package/dist/soniox-DCQ3GqJq.js +69 -0
  22. package/dist/xai-jfQsxxPZ.js +55 -0
  23. package/host/builtin-tools.ts +1 -0
  24. package/host/providers/resolve.test.ts +110 -0
  25. package/host/providers/resolve.ts +113 -10
  26. package/host/providers/stt/elevenlabs.test.ts +200 -0
  27. package/host/providers/stt/elevenlabs.ts +145 -0
  28. package/host/providers/stt/soniox.test.ts +338 -0
  29. package/host/providers/stt/soniox.ts +239 -0
  30. package/host/runtime.test.ts +3 -1
  31. package/host/to-vercel-tools.test.ts +9 -1
  32. package/host/transports/pipeline-transport.test.ts +93 -0
  33. package/host/transports/pipeline-transport.ts +53 -30
  34. package/host/transports/s2s-transport.test.ts +222 -2
  35. package/host/transports/s2s-transport.ts +176 -40
  36. package/package.json +35 -2
  37. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  38. package/sdk/_internal-types.ts +3 -0
  39. package/sdk/providers/llm/google.ts +30 -0
  40. package/sdk/providers/llm/groq.ts +29 -0
  41. package/sdk/providers/llm/mistral.ts +29 -0
  42. package/sdk/providers/llm/openai.ts +29 -0
  43. package/sdk/providers/llm/xai.ts +29 -0
  44. package/sdk/providers/llm-barrel.ts +10 -0
  45. package/sdk/providers/stt/elevenlabs.ts +44 -0
  46. package/sdk/providers/stt/soniox.ts +45 -0
  47. package/sdk/providers/stt-barrel.ts +4 -0
  48. package/sdk/schema-alignment.test.ts +18 -6
  49. package/dist/anthropic-CcLZygAr.js +0 -10
  50. package/dist/assemblyai-C969QGi4.js +0 -35
@@ -2,22 +2,24 @@ import { r as DEFAULT_SYSTEM_PROMPT } from "../types-KUgezM6u.js";
2
2
  import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-C2nirZUI.js";
3
3
  import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-BeOQ7fXL.js";
4
4
  import { ClientMessageSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
5
- import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-3p3OJZPb.js";
6
- import { r as DEEPGRAM_KIND, t as ASSEMBLYAI_KIND } from "../assemblyai-C969QGi4.js";
5
+ import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-DFL07G3f.js";
6
+ import { a as ASSEMBLYAI_KIND, r as ELEVENLABS_KIND, s as DEEPGRAM_KIND, t as SONIOX_KIND } from "../soniox-DCQ3GqJq.js";
7
7
  import { a as RIME_KIND, n as CARTESIA_KIND } from "../cartesia-BfQPOQ7Y.js";
8
- import { t as ANTHROPIC_KIND } from "../anthropic-CcLZygAr.js";
8
+ import { a as MISTRAL_KIND, d as ANTHROPIC_KIND, l as GOOGLE_KIND, r as OPENAI_KIND, s as GROQ_KIND } from "../xai-jfQsxxPZ.js";
9
+ import { createRequire } from "node:module";
9
10
  import { z } from "zod";
10
11
  import { convert } from "html-to-text";
11
12
  import vm from "node:vm";
12
13
  import pTimeout from "p-timeout";
13
14
  import { createStorage, prefixStorage } from "unstorage";
14
- import { createAnthropic } from "@ai-sdk/anthropic";
15
15
  import { AssemblyAI } from "assemblyai";
16
16
  import { createNanoEvents } from "nanoevents";
17
17
  import { DeepgramClient } from "@deepgram/sdk";
18
+ import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
19
+ import { AudioFormat, CommitStrategy, RealtimeEvents } from "@elevenlabs/elevenlabs-js/wrapper/realtime/index.js";
20
+ import WsWebSocket, { WebSocketServer } from "ws";
18
21
  import { randomUUID } from "node:crypto";
19
22
  import { Cartesia } from "@cartesia/cartesia-js";
20
- import WsWebSocket, { WebSocketServer } from "ws";
21
23
  import { jsonSchema, stepCountIs, streamText, tool } from "ai";
22
24
  import fs from "node:fs";
23
25
  import http from "node:http";
@@ -334,6 +336,7 @@ function resolveAllBuiltins(names, opts) {
334
336
  for (const name of names) for (const [toolName, def] of resolveBuiltin(name, opts)) {
335
337
  defs[toolName] = def;
336
338
  schemas.push({
339
+ type: "function",
337
340
  name: toolName,
338
341
  description: def.description,
339
342
  parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
@@ -564,6 +567,246 @@ function openDeepgram(opts = {}) {
564
567
  };
565
568
  }
566
569
  //#endregion
570
+ //#region host/providers/stt/elevenlabs.ts
571
+ /**
572
+ * ElevenLabs Scribe streaming STT opener (host-only).
573
+ *
574
+ * The user-facing descriptor factory (`elevenlabs(...)`) lives in
575
+ * `sdk/providers/stt/elevenlabs.ts`. This module is the host-side
576
+ * counterpart: it takes the descriptor options + an API key and
577
+ * returns an {@link SttOpener} that the pipeline session drives.
578
+ *
579
+ * Default model: `"scribe_v2_realtime"`. Audio is sent as base64-encoded
580
+ * PCM_16000; partial transcripts arrive on `transcript`, finals on
581
+ * `committed_transcript`.
582
+ */
583
+ /** Map a numeric sample rate to the SDK's `AudioFormat` enum. */
584
+ function audioFormatFor(sampleRate) {
585
+ switch (sampleRate) {
586
+ case 8e3: return AudioFormat.PCM_8000;
587
+ case 16e3: return AudioFormat.PCM_16000;
588
+ case 22050: return AudioFormat.PCM_22050;
589
+ case 24e3: return AudioFormat.PCM_24000;
590
+ case 44100: return AudioFormat.PCM_44100;
591
+ case 48e3: return AudioFormat.PCM_48000;
592
+ default: throw makeSttError("stt_connect_failed", `ElevenLabs STT: unsupported sample rate ${sampleRate}. Supported: 8000, 16000, 22050, 24000, 44100, 48000.`);
593
+ }
594
+ }
595
+ /** Build an {@link SttOpener} from resolved ElevenLabs descriptor options. */
596
+ function openElevenLabs(opts = {}) {
597
+ return {
598
+ name: "elevenlabs",
599
+ async open(openOpts) {
600
+ const apiKey = openOpts.apiKey || process.env.ELEVENLABS_API_KEY;
601
+ if (!apiKey) throw makeSttError("stt_auth_failed", "ElevenLabs STT: missing API key. Set ELEVENLABS_API_KEY in the agent env.");
602
+ const client = new ElevenLabsClient({ apiKey });
603
+ let connection;
604
+ try {
605
+ connection = await client.speechToText.realtime.connect({
606
+ modelId: opts.model ?? "scribe_v2_realtime",
607
+ audioFormat: audioFormatFor(openOpts.sampleRate),
608
+ sampleRate: openOpts.sampleRate,
609
+ commitStrategy: CommitStrategy.VAD,
610
+ ...opts.languageCode ? { languageCode: opts.languageCode } : {}
611
+ });
612
+ } catch (cause) {
613
+ throw makeSttError("stt_connect_failed", `ElevenLabs STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
614
+ }
615
+ const emitter = createNanoEvents();
616
+ let closed = false;
617
+ connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (msg) => {
618
+ if (closed) return;
619
+ const text = msg.text ?? "";
620
+ if (text.length > 0) emitter.emit("partial", text);
621
+ });
622
+ connection.on(RealtimeEvents.COMMITTED_TRANSCRIPT, (msg) => {
623
+ if (closed) return;
624
+ const text = msg.text ?? "";
625
+ if (text.length > 0) emitter.emit("final", text);
626
+ });
627
+ connection.on(RealtimeEvents.ERROR, (payload) => {
628
+ if (closed) return;
629
+ const msg = payload instanceof Error ? payload.message : payload.error ?? `${payload.message_type}`;
630
+ emitter.emit("error", makeSttError("stt_stream_error", msg));
631
+ });
632
+ connection.on(RealtimeEvents.AUTH_ERROR, (msg) => {
633
+ if (closed) return;
634
+ emitter.emit("error", makeSttError("stt_auth_failed", msg.error));
635
+ });
636
+ const close = async () => {
637
+ if (closed) return;
638
+ closed = true;
639
+ try {
640
+ connection.close();
641
+ } catch {}
642
+ };
643
+ if (openOpts.signal.aborted) close();
644
+ else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
645
+ return {
646
+ sendAudio(pcm) {
647
+ if (closed) return;
648
+ const bytes = Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength);
649
+ connection.send({ audioBase64: bytes.toString("base64") });
650
+ },
651
+ on(event, fn) {
652
+ return emitter.on(event, fn);
653
+ },
654
+ close
655
+ };
656
+ }
657
+ };
658
+ }
659
+ //#endregion
660
+ //#region host/providers/stt/soniox.ts
661
+ /**
662
+ * Soniox real-time STT opener (host-only).
663
+ *
664
+ * The user-facing descriptor factory (`soniox(...)`) lives in
665
+ * `sdk/providers/stt/soniox.ts`. This module is the host-side
666
+ * counterpart: it takes the descriptor options + an API key and
667
+ * returns an {@link SttOpener} that the pipeline session drives.
668
+ *
669
+ * Soniox's published JS client (`@soniox/speech-to-text-web`) is
670
+ * browser-only — it depends on `MediaRecorder` and `getUserMedia`. For
671
+ * server-side use we talk to the WebSocket directly:
672
+ * `wss://stt-rt.soniox.com/transcribe-websocket`
673
+ *
674
+ * Wire format:
675
+ * - First text frame: JSON config with api_key, model, audio_format,
676
+ * sample_rate, num_channels (and optional language hints).
677
+ * - Subsequent binary frames: 16-bit signed little-endian PCM audio.
678
+ * - Server replies: JSON `{ tokens: [{ text, is_final }] }` messages.
679
+ * Final tokens accumulate; non-final tokens are a rolling preview.
680
+ * - On error: `{ error_code, error_message }`.
681
+ */
682
+ const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
683
+ /**
684
+ * Walk a batch of Soniox tokens, sending finals into `appendFinal` and
685
+ * returning the concatenated non-finals as a rolling preview string.
686
+ */
687
+ function consumeTokens(tokens, appendFinal) {
688
+ let nonFinal = "";
689
+ for (const tok of tokens) {
690
+ const text = tok.text ?? "";
691
+ if (text.length === 0) continue;
692
+ if (tok.is_final) appendFinal(text);
693
+ else nonFinal += text;
694
+ }
695
+ return nonFinal;
696
+ }
697
+ /** Resolve once the WebSocket opens; reject on the first error. */
698
+ function waitForOpen$1(ws) {
699
+ return new Promise((resolve, reject) => {
700
+ const onOpen = () => {
701
+ ws.off("error", onErr);
702
+ resolve();
703
+ };
704
+ const onErr = (err) => {
705
+ ws.off("open", onOpen);
706
+ reject(err);
707
+ };
708
+ ws.once("open", onOpen);
709
+ ws.once("error", onErr);
710
+ });
711
+ }
712
+ /** Build the initial JSON config frame for a Soniox session. */
713
+ function buildConfigFrame(apiKey, opts, sampleRate) {
714
+ const config = {
715
+ api_key: apiKey,
716
+ model: opts.model ?? "stt-rt-v3",
717
+ audio_format: "pcm_s16le",
718
+ sample_rate: sampleRate,
719
+ num_channels: 1
720
+ };
721
+ if (opts.languageHints && opts.languageHints.length > 0) config.language_hints = [...opts.languageHints];
722
+ return config;
723
+ }
724
+ /** Parse a Soniox text frame into a {@link SonioxResponse}; returns null on garbage. */
725
+ function parseFrame(raw) {
726
+ try {
727
+ return JSON.parse(raw.toString());
728
+ } catch {
729
+ return null;
730
+ }
731
+ }
732
+ /**
733
+ * Handle one server response. Emits `error`, `final`, and `partial` events
734
+ * onto `emitter` based on the token batch and the running `finalBuf`. The
735
+ * caller owns `finalBuf` so it survives across messages and can be flushed
736
+ * on close.
737
+ */
738
+ function handleResponse(res, emitter, finalBuf) {
739
+ if (res.error_code !== void 0) {
740
+ emitter.emit("error", makeSttError("stt_stream_error", `Soniox error ${res.error_code}: ${res.error_message ?? "unknown"}`));
741
+ return;
742
+ }
743
+ if (!res.tokens || res.tokens.length === 0) return;
744
+ const nonFinal = consumeTokens(res.tokens, (text) => {
745
+ finalBuf.value += text;
746
+ });
747
+ if (finalBuf.value.length > 0 && (nonFinal.length > 0 || res.finished)) {
748
+ emitter.emit("final", finalBuf.value);
749
+ finalBuf.value = "";
750
+ }
751
+ if (nonFinal.length > 0) emitter.emit("partial", nonFinal);
752
+ }
753
+ /** Build an {@link SttOpener} from resolved Soniox descriptor options. */
754
+ function openSoniox(opts = {}) {
755
+ return {
756
+ name: "soniox",
757
+ async open(openOpts) {
758
+ const apiKey = openOpts.apiKey || process.env.SONIOX_API_KEY;
759
+ if (!apiKey) throw makeSttError("stt_auth_failed", "Soniox STT: missing API key. Set SONIOX_API_KEY in the agent env.");
760
+ const ws = new WsWebSocket(SONIOX_WS_URL);
761
+ const emitter = createNanoEvents();
762
+ let closed = false;
763
+ const finalBuf = { value: "" };
764
+ try {
765
+ await waitForOpen$1(ws);
766
+ } catch (cause) {
767
+ throw makeSttError("stt_connect_failed", `Soniox STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
768
+ }
769
+ ws.send(JSON.stringify(buildConfigFrame(apiKey, opts, openOpts.sampleRate)));
770
+ ws.on("message", (raw) => {
771
+ if (closed) return;
772
+ const res = parseFrame(raw);
773
+ if (res) handleResponse(res, emitter, finalBuf);
774
+ });
775
+ ws.on("error", (err) => {
776
+ if (closed) return;
777
+ emitter.emit("error", makeSttError("stt_stream_error", err.message ?? String(err)));
778
+ });
779
+ ws.on("close", (code) => {
780
+ if (closed) return;
781
+ if (code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
782
+ });
783
+ const close = async () => {
784
+ if (closed) return;
785
+ closed = true;
786
+ if (finalBuf.value.length > 0) {
787
+ emitter.emit("final", finalBuf.value);
788
+ finalBuf.value = "";
789
+ }
790
+ try {
791
+ ws.close();
792
+ } catch {}
793
+ };
794
+ if (openOpts.signal.aborted) close();
795
+ else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
796
+ return {
797
+ sendAudio(pcm) {
798
+ if (closed || ws.readyState !== WsWebSocket.OPEN) return;
799
+ ws.send(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength), { binary: true });
800
+ },
801
+ on(event, fn) {
802
+ return emitter.on(event, fn);
803
+ },
804
+ close
805
+ };
806
+ }
807
+ };
808
+ }
809
+ //#endregion
567
810
  //#region host/providers/tts/cartesia.ts
568
811
  /**
569
812
  * Cartesia TTS opener (host-only).
@@ -962,7 +1205,14 @@ function openRime(opts) {
962
1205
  * The guest sandbox never imports these functions, which is how the agent
963
1206
  * bundle stays free of `@ai-sdk/anthropic` / `assemblyai` /
964
1207
  * `@cartesia/cartesia-js`.
1208
+ *
1209
+ * `@ai-sdk/*` packages are loaded via `createRequire` lazily so self-hosted
1210
+ * users only need to install the providers they actually reference. A
1211
+ * missing package is reported as a friendly "package not installed" error
1212
+ * at first session start, not as a module-load failure when the host
1213
+ * package itself is imported.
965
1214
  */
1215
+ const requireFromHere = createRequire(import.meta.url);
966
1216
  /**
967
1217
  * Look up a provider API key: agent env first (set via `aai secret put` or
968
1218
  * `.env`), then the host's `process.env` as a fallback for self-hosted mode.
@@ -976,7 +1226,9 @@ function resolveStt(descriptor) {
976
1226
  switch (descriptor.kind) {
977
1227
  case ASSEMBLYAI_KIND: return openAssemblyAI(descriptor.options);
978
1228
  case DEEPGRAM_KIND: return openDeepgram(descriptor.options);
979
- default: throw new Error(`Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}, ${DEEPGRAM_KIND}.`);
1229
+ case ELEVENLABS_KIND: return openElevenLabs(descriptor.options);
1230
+ case SONIOX_KIND: return openSoniox(descriptor.options);
1231
+ default: throw new Error(`Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}, ${DEEPGRAM_KIND}, ${ELEVENLABS_KIND}, ${SONIOX_KIND}.`);
980
1232
  }
981
1233
  }
982
1234
  /** Resolve a {@link TtsProvider} descriptor into a host-side opener. */
@@ -991,24 +1243,73 @@ function resolveTts(descriptor) {
991
1243
  * Resolve an {@link LlmProvider} descriptor into a Vercel AI SDK
992
1244
  * {@link LanguageModel}.
993
1245
  *
994
- * The API key is pulled from the agent's env (e.g. `ANTHROPIC_API_KEY`).
1246
+ * The API key is pulled from the agent's env (e.g. `OPENAI_API_KEY`).
995
1247
  * Missing keys throw here — the pipeline session would fail on first
996
1248
  * `streamText` call otherwise, and the error is clearer at construction.
997
1249
  */
998
1250
  function resolveLlm(descriptor, env) {
999
1251
  switch (descriptor.kind) {
1000
1252
  case ANTHROPIC_KIND: {
1001
- const options = descriptor.options;
1002
- const apiKey = resolveApiKey("ANTHROPIC_API_KEY", env);
1003
- if (!apiKey) throw new Error("Anthropic LLM: missing API key. Set ANTHROPIC_API_KEY in the agent env.");
1253
+ const apiKey = requireKey(env, "ANTHROPIC_API_KEY", "Anthropic");
1254
+ const { createAnthropic } = loadProviderPackage("@ai-sdk/anthropic", "Anthropic");
1004
1255
  return createAnthropic({
1005
1256
  apiKey,
1006
1257
  baseURL: "https://api.anthropic.com/v1"
1007
- })(options.model);
1258
+ })(descriptor.options.model);
1259
+ }
1260
+ case OPENAI_KIND: {
1261
+ const apiKey = requireKey(env, "OPENAI_API_KEY", "OpenAI");
1262
+ const { createOpenAI } = loadProviderPackage("@ai-sdk/openai", "OpenAI");
1263
+ return createOpenAI({ apiKey })(descriptor.options.model);
1264
+ }
1265
+ case GOOGLE_KIND: {
1266
+ const apiKey = requireKey(env, "GOOGLE_GENERATIVE_AI_API_KEY", "Google");
1267
+ const { createGoogleGenerativeAI } = loadProviderPackage("@ai-sdk/google", "Google");
1268
+ return createGoogleGenerativeAI({ apiKey })(descriptor.options.model);
1008
1269
  }
1009
- default: throw new Error(`Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}.`);
1270
+ case MISTRAL_KIND: {
1271
+ const apiKey = requireKey(env, "MISTRAL_API_KEY", "Mistral");
1272
+ const { createMistral } = loadProviderPackage("@ai-sdk/mistral", "Mistral");
1273
+ return createMistral({ apiKey })(descriptor.options.model);
1274
+ }
1275
+ case "xai": {
1276
+ const apiKey = requireKey(env, "XAI_API_KEY", "xAI");
1277
+ const { createXai } = loadProviderPackage("@ai-sdk/xai", "xAI");
1278
+ return createXai({ apiKey })(descriptor.options.model);
1279
+ }
1280
+ case GROQ_KIND: {
1281
+ const apiKey = requireKey(env, "GROQ_API_KEY", "Groq");
1282
+ const { createGroq } = loadProviderPackage("@ai-sdk/groq", "Groq");
1283
+ return createGroq({ apiKey })(descriptor.options.model);
1284
+ }
1285
+ default: throw new Error(`Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}, ${OPENAI_KIND}, ${GOOGLE_KIND}, ${MISTRAL_KIND}, xai, ${GROQ_KIND}.`);
1010
1286
  }
1011
1287
  }
1288
+ function requireKey(env, name, label) {
1289
+ const key = resolveApiKey(name, env);
1290
+ if (!key) throw new Error(`${label} LLM: missing API key. Set ${name} in the agent env.`);
1291
+ return key;
1292
+ }
1293
+ /**
1294
+ * Synchronously load an optional `@ai-sdk/*` package via `createRequire`.
1295
+ * Throws a friendly install-hint error if the package isn't installed,
1296
+ * so users see "Run `pnpm add @ai-sdk/openai`" rather than a cryptic
1297
+ * Node module-resolution stack trace.
1298
+ */
1299
+ function loadProviderPackage(name, label) {
1300
+ try {
1301
+ return requireFromHere(name);
1302
+ } catch (err) {
1303
+ if (isModuleNotFound(err, name)) throw new Error(`${label} LLM: package \`${name}\` is not installed. Run \`pnpm add ${name}\`.`, { cause: err });
1304
+ throw err;
1305
+ }
1306
+ }
1307
+ function isModuleNotFound(err, name) {
1308
+ if (!(err instanceof Error)) return false;
1309
+ const code = err.code;
1310
+ if (code !== "MODULE_NOT_FOUND" && code !== "ERR_MODULE_NOT_FOUND") return false;
1311
+ return err.message.includes(name);
1312
+ }
1012
1313
  //#endregion
1013
1314
  //#region host/runtime-config.ts
1014
1315
  /**
@@ -1492,9 +1793,10 @@ function createPipelineTransport(opts) {
1492
1793
  stopWhen: stepCountIs(maxSteps),
1493
1794
  abortSignal: ctl.signal
1494
1795
  });
1796
+ const handlePart = makeStreamPartHandler(onDelta);
1495
1797
  for await (const part of result.fullStream) {
1496
1798
  if (ctl.signal.aborted) break;
1497
- handleStreamPart(part, ctl, onDelta);
1799
+ handlePart(part);
1498
1800
  }
1499
1801
  } catch (err) {
1500
1802
  if (!ctl.signal.aborted) {
@@ -1507,31 +1809,54 @@ function createPipelineTransport(opts) {
1507
1809
  }
1508
1810
  }
1509
1811
  }
1510
- function handleStreamPart(part, _ctl, onDelta) {
1511
- switch (part.type) {
1512
- case "text-delta": {
1513
- const delta = part.text ?? "";
1514
- if (delta.length === 0) return;
1515
- onDelta(delta);
1516
- ttsSession?.sendText(delta);
1517
- return;
1518
- }
1519
- case "tool-call": {
1520
- const input = part.input ?? {};
1521
- callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
1522
- return;
1523
- }
1524
- case "error": {
1525
- const msg = errorMessage(part.error);
1526
- log.error("LLM stream error", {
1527
- message: msg,
1528
- sid: opts.sid
1529
- });
1530
- emitError("llm", msg);
1531
- return;
1812
+ /**
1813
+ * Stateful per-turn handler for `streamText` `fullStream` parts.
1814
+ *
1815
+ * Tracks text-segment boundaries so that consecutive segments — which the
1816
+ * Vercel SDK emits across tool-call hops as `text-end` followed later by a
1817
+ * fresh `text-start` — don't fuse into "...up.Got it" when concatenated for
1818
+ * the transcript or streamed to TTS. When a boundary is crossed and neither
1819
+ * side carries whitespace, a single space is injected into both streams.
1820
+ */
1821
+ function makeStreamPartHandler(onDelta) {
1822
+ let pendingSeparator = false;
1823
+ let lastChar = "";
1824
+ function emitText(delta) {
1825
+ if (delta.length === 0) return;
1826
+ let out = delta;
1827
+ if (pendingSeparator) {
1828
+ pendingSeparator = false;
1829
+ if (!(lastChar === "" || /\s/.test(lastChar) || /^\s/.test(out))) out = ` ${out}`;
1532
1830
  }
1533
- default: return;
1831
+ lastChar = out.slice(-1);
1832
+ onDelta(out);
1833
+ ttsSession?.sendText(out);
1534
1834
  }
1835
+ return function handlePart(part) {
1836
+ switch (part.type) {
1837
+ case "text-delta":
1838
+ emitText(part.text ?? "");
1839
+ return;
1840
+ case "text-end":
1841
+ pendingSeparator = true;
1842
+ return;
1843
+ case "tool-call": {
1844
+ const input = part.input ?? {};
1845
+ callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
1846
+ return;
1847
+ }
1848
+ case "error": {
1849
+ const msg = errorMessage(part.error);
1850
+ log.error("LLM stream error", {
1851
+ message: msg,
1852
+ sid: opts.sid
1853
+ });
1854
+ emitError("llm", msg);
1855
+ return;
1856
+ }
1857
+ default: return;
1858
+ }
1859
+ };
1535
1860
  }
1536
1861
  /**
1537
1862
  * Flush TTS and wait for drain. Resolves on:
@@ -1969,11 +2294,182 @@ function connectS2s(opts) {
1969
2294
  //#region host/transports/s2s-transport.ts
1970
2295
  /** @internal Exposed for testing — allows spying on connectS2s in unit tests. */
1971
2296
  const _internals = { connectS2s };
2297
+ /**
2298
+ * Close codes worth attempting `session.resume` on. These are network/server
2299
+ * blips, not protocol or auth violations. Per AssemblyAI's docs, sessions are
2300
+ * preserved for 30 s after disconnect, so resume is bounded by the window in
2301
+ * `RESUME_WINDOW_MS` below.
2302
+ */
2303
+ const TRANSIENT_CLOSE_CODES = new Set([
2304
+ 1005,
2305
+ 1006,
2306
+ 1011,
2307
+ 3005
2308
+ ]);
2309
+ /**
2310
+ * AssemblyAI keeps the session alive for 30 s after disconnect; we leave a
2311
+ * little headroom so the resume request still fits inside that window after
2312
+ * the new WebSocket finishes opening.
2313
+ */
2314
+ const RESUME_WINDOW_MS = 25e3;
1972
2315
  function createS2sTransport(opts) {
1973
2316
  const log = opts.logger ?? consoleLogger;
1974
2317
  const createWs = opts.createWebSocket ?? defaultCreateS2sWebSocket;
1975
2318
  let handle = null;
1976
2319
  let currentReplyId = null;
2320
+ /** Most recent `session.ready` ID — present once the upstream session is established. */
2321
+ let providerSessionId = null;
2322
+ /** When the current session became ready; bounds the resume window. */
2323
+ let sessionReadyAt = 0;
2324
+ /** Set by `stop()` so a deliberate close doesn't trigger a reconnect. */
2325
+ let closing = false;
2326
+ /**
2327
+ * True while a `session.resume` round-trip is in flight (between sending
2328
+ * resume and the next `session.ready`). Used to distinguish a resume failure
2329
+ * (close before ready) from a normal close.
2330
+ */
2331
+ let reconnecting = false;
2332
+ /**
2333
+ * Set when a reconnect attempt is kicked off, cleared once the resumed
2334
+ * session's `session.ready` arrives. Prevents back-to-back reconnect loops
2335
+ * when the freshly-resumed socket also drops before fully recovering.
2336
+ */
2337
+ let reconnectInFlight = false;
2338
+ function buildCallbacks() {
2339
+ return {
2340
+ onSessionReady: (id) => {
2341
+ providerSessionId = id;
2342
+ sessionReadyAt = Date.now();
2343
+ if (reconnecting) {
2344
+ reconnecting = false;
2345
+ reconnectInFlight = false;
2346
+ log.info("S2S resumed", {
2347
+ sid: opts.sid,
2348
+ sessionId: id
2349
+ });
2350
+ }
2351
+ opts.callbacks.onSessionReady?.(id);
2352
+ },
2353
+ onReplyStarted: (replyId) => {
2354
+ currentReplyId = replyId;
2355
+ opts.callbacks.onReplyStarted(replyId);
2356
+ },
2357
+ onReplyDone: () => {
2358
+ currentReplyId = null;
2359
+ opts.callbacks.onReplyDone();
2360
+ },
2361
+ onCancelled: () => {
2362
+ currentReplyId = null;
2363
+ opts.callbacks.onCancelled();
2364
+ },
2365
+ onAudio: (bytes) => opts.callbacks.onAudioChunk(bytes),
2366
+ onUserTranscript: opts.callbacks.onUserTranscript,
2367
+ onAgentTranscript: opts.callbacks.onAgentTranscript,
2368
+ onToolCall: opts.callbacks.onToolCall,
2369
+ onSpeechStarted: opts.callbacks.onSpeechStarted,
2370
+ onSpeechStopped: opts.callbacks.onSpeechStopped,
2371
+ onSessionExpired: () => {
2372
+ if (reconnecting) {
2373
+ reconnecting = false;
2374
+ reconnectInFlight = false;
2375
+ log.warn("S2S resume rejected: session expired", { sid: opts.sid });
2376
+ opts.callbacks.onError("connection", "S2S resume failed: session expired");
2377
+ return;
2378
+ }
2379
+ log.info("S2S session expired", { sid: opts.sid });
2380
+ handle?.close();
2381
+ },
2382
+ onError: (err) => opts.callbacks.onError("internal", err.message),
2383
+ onClose: (code, reason) => handleClose(code, reason)
2384
+ };
2385
+ }
2386
+ function canResumeAfter(code) {
2387
+ if (!TRANSIENT_CLOSE_CODES.has(code)) return false;
2388
+ if (providerSessionId === null) return false;
2389
+ if (reconnectInFlight) return false;
2390
+ return sessionReadyAt > 0 && Date.now() - sessionReadyAt < RESUME_WINDOW_MS;
2391
+ }
2392
+ function emitFatalClose(code, reason, wasReconnecting) {
2393
+ if (wasReconnecting) {
2394
+ reconnecting = false;
2395
+ reconnectInFlight = false;
2396
+ opts.callbacks.onError("connection", `S2S resume failed (code=${code})`);
2397
+ return;
2398
+ }
2399
+ if (currentReplyId !== null) {
2400
+ log.warn("S2S closed with active reply", {
2401
+ sid: opts.sid,
2402
+ agent: opts.agent,
2403
+ activeReplyId: currentReplyId,
2404
+ code,
2405
+ reason
2406
+ });
2407
+ opts.callbacks.onError("connection", `S2S closed mid-reply (code=${code})`);
2408
+ return;
2409
+ }
2410
+ log.info("S2S closed", {
2411
+ code,
2412
+ reason
2413
+ });
2414
+ }
2415
+ function startResume(prevId, code, reason) {
2416
+ reconnectInFlight = true;
2417
+ reconnecting = true;
2418
+ log.warn("S2S unexpected close — attempting resume", {
2419
+ sid: opts.sid,
2420
+ agent: opts.agent,
2421
+ code,
2422
+ reason,
2423
+ prevSessionId: prevId
2424
+ });
2425
+ if (currentReplyId !== null) {
2426
+ currentReplyId = null;
2427
+ opts.callbacks.onCancelled();
2428
+ }
2429
+ resume(prevId).catch((err) => {
2430
+ reconnecting = false;
2431
+ reconnectInFlight = false;
2432
+ const msg = err instanceof Error ? err.message : String(err);
2433
+ log.warn("S2S resume failed", {
2434
+ sid: opts.sid,
2435
+ error: msg
2436
+ });
2437
+ opts.callbacks.onError("connection", `S2S resume failed: ${msg}`);
2438
+ });
2439
+ }
2440
+ function handleClose(code, reason) {
2441
+ if (closing) {
2442
+ log.info("S2S closed", {
2443
+ code,
2444
+ reason
2445
+ });
2446
+ return;
2447
+ }
2448
+ const wasReconnecting = reconnecting;
2449
+ if (!canResumeAfter(code)) {
2450
+ emitFatalClose(code, reason, wasReconnecting);
2451
+ return;
2452
+ }
2453
+ const prevId = providerSessionId;
2454
+ if (prevId === null) return;
2455
+ startResume(prevId, code, reason);
2456
+ }
2457
+ async function resume(prevSessionId) {
2458
+ const newHandle = await _internals.connectS2s({
2459
+ apiKey: opts.apiKey,
2460
+ config: opts.s2sConfig,
2461
+ createWebSocket: createWs,
2462
+ logger: log,
2463
+ ...opts.sid !== void 0 ? { sid: opts.sid } : {},
2464
+ callbacks: buildCallbacks()
2465
+ });
2466
+ if (closing) {
2467
+ newHandle.close();
2468
+ return;
2469
+ }
2470
+ handle = newHandle;
2471
+ newHandle.resumeSession(prevSessionId);
2472
+ }
1977
2473
  async function start() {
1978
2474
  handle = await _internals.connectS2s({
1979
2475
  apiKey: opts.apiKey,
@@ -1981,51 +2477,12 @@ function createS2sTransport(opts) {
1981
2477
  createWebSocket: createWs,
1982
2478
  logger: log,
1983
2479
  sid: opts.sid,
1984
- callbacks: {
1985
- onSessionReady: (providerSessionId) => opts.callbacks.onSessionReady?.(providerSessionId),
1986
- onReplyStarted: (replyId) => {
1987
- currentReplyId = replyId;
1988
- opts.callbacks.onReplyStarted(replyId);
1989
- },
1990
- onReplyDone: () => {
1991
- currentReplyId = null;
1992
- opts.callbacks.onReplyDone();
1993
- },
1994
- onCancelled: () => {
1995
- currentReplyId = null;
1996
- opts.callbacks.onCancelled();
1997
- },
1998
- onAudio: (bytes) => opts.callbacks.onAudioChunk(bytes),
1999
- onUserTranscript: opts.callbacks.onUserTranscript,
2000
- onAgentTranscript: opts.callbacks.onAgentTranscript,
2001
- onToolCall: opts.callbacks.onToolCall,
2002
- onSpeechStarted: opts.callbacks.onSpeechStarted,
2003
- onSpeechStopped: opts.callbacks.onSpeechStopped,
2004
- onSessionExpired: () => {
2005
- log.info("S2S session expired", { sid: opts.sid });
2006
- handle?.close();
2007
- },
2008
- onError: (err) => opts.callbacks.onError("internal", err.message),
2009
- onClose: (code, reason) => {
2010
- if (currentReplyId !== null) {
2011
- log.warn("S2S closed with active reply", {
2012
- sid: opts.sid,
2013
- agent: opts.agent,
2014
- activeReplyId: currentReplyId,
2015
- code,
2016
- reason
2017
- });
2018
- opts.callbacks.onError("connection", `S2S closed mid-reply (code=${code})`);
2019
- } else log.info("S2S closed", {
2020
- code,
2021
- reason
2022
- });
2023
- }
2024
- }
2480
+ callbacks: buildCallbacks()
2025
2481
  });
2026
2482
  handle.updateSession(opts.sessionConfig);
2027
2483
  }
2028
2484
  async function stop() {
2485
+ closing = true;
2029
2486
  handle?.close();
2030
2487
  handle = null;
2031
2488
  }