@alexkroman1/aai 1.3.2 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/.turbo/turbo-build.log +18 -14
  2. package/CHANGELOG.md +8 -0
  3. package/dist/_internal-types-3p3OJZPb.js +145 -0
  4. package/dist/anthropic-BrUCPKUc.js +10 -0
  5. package/dist/assemblyai-Cxg9eobY.js +18 -0
  6. package/dist/cartesia-DwDk2tEu.js +10 -0
  7. package/dist/host/_pipeline-test-fakes.d.ts +5 -5
  8. package/dist/host/pipeline-session.d.ts +5 -5
  9. package/dist/host/providers/resolve.d.ts +34 -0
  10. package/dist/host/providers/stt/assemblyai.d.ts +9 -18
  11. package/dist/host/providers/tts/cartesia.d.ts +11 -18
  12. package/dist/host/runtime-barrel.js +348 -42
  13. package/dist/host/runtime.d.ts +13 -9
  14. package/dist/index.js +2 -91
  15. package/dist/sdk/_internal-types.d.ts +27 -1
  16. package/dist/sdk/manifest-barrel.d.ts +2 -0
  17. package/dist/sdk/manifest-barrel.js +2 -2
  18. package/dist/sdk/manifest.d.ts +13 -2
  19. package/dist/sdk/protocol.d.ts +3 -3
  20. package/dist/sdk/protocol.js +1 -1
  21. package/dist/sdk/providers/llm/anthropic.d.ts +23 -0
  22. package/dist/sdk/providers/llm-barrel.d.ts +9 -0
  23. package/dist/sdk/providers/llm-barrel.js +2 -0
  24. package/dist/sdk/providers/stt/assemblyai.d.ts +30 -0
  25. package/dist/sdk/providers/stt-barrel.d.ts +9 -0
  26. package/dist/sdk/providers/stt-barrel.js +2 -0
  27. package/dist/sdk/providers/tts/cartesia.d.ts +23 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +9 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -0
  30. package/dist/sdk/providers.d.ts +59 -11
  31. package/dist/types-KUgezM6u.js +128 -0
  32. package/host/_pipeline-test-fakes.ts +6 -6
  33. package/host/integration/pipeline-reference.integration.test.ts +4 -4
  34. package/host/pipeline-session.ts +6 -6
  35. package/host/providers/providers.test-d.ts +19 -10
  36. package/host/providers/resolve.ts +90 -0
  37. package/host/providers/stt/assemblyai.test.ts +2 -2
  38. package/host/providers/stt/assemblyai.ts +25 -47
  39. package/host/providers/tts/cartesia.test.ts +2 -2
  40. package/host/providers/tts/cartesia.ts +43 -73
  41. package/host/runtime.ts +66 -39
  42. package/package.json +13 -7
  43. package/sdk/__snapshots__/exports.test.ts.snap +2 -0
  44. package/sdk/__snapshots__/schema-shapes.test.ts.snap +4 -0
  45. package/sdk/_internal-types.ts +28 -1
  46. package/sdk/define.test.ts +12 -10
  47. package/sdk/manifest-barrel.ts +2 -0
  48. package/sdk/manifest.test.ts +6 -3
  49. package/sdk/manifest.ts +26 -18
  50. package/sdk/providers/llm/anthropic.ts +31 -0
  51. package/sdk/providers/llm-barrel.ts +12 -0
  52. package/sdk/providers/stt/assemblyai.ts +38 -0
  53. package/sdk/providers/stt-barrel.ts +12 -0
  54. package/sdk/providers/tts/cartesia.ts +31 -0
  55. package/sdk/providers/tts-barrel.ts +12 -0
  56. package/sdk/providers.ts +81 -17
  57. package/dist/_internal-types-CoDTiBd1.js +0 -61
  58. package/dist/host/providers/llm.d.ts +0 -2
  59. package/dist/host/providers/stt-barrel.d.ts +0 -8
  60. package/dist/host/providers/stt-barrel.js +0 -92
  61. package/dist/host/providers/stt.d.ts +0 -2
  62. package/dist/host/providers/tts-barrel.d.ts +0 -8
  63. package/dist/host/providers/tts-barrel.js +0 -182
  64. package/dist/host/providers/tts.d.ts +0 -2
  65. package/dist/types-Cfx_4QDK.js +0 -39
  66. package/host/providers/llm.ts +0 -3
  67. package/host/providers/stt-barrel.ts +0 -13
  68. package/host/providers/stt.ts +0 -3
  69. package/host/providers/tts-barrel.ts +0 -13
  70. package/host/providers/tts.ts +0 -3
  71. /package/dist/{constants-BL3nvg4I.js → constants-C2nirZUI.js} +0 -0
@@ -0,0 +1,90 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * Descriptor → concrete-provider resolution (host-only).
4
+ *
5
+ * User code (and the server, after extracting config from a bundled agent)
6
+ * holds `SttProvider` / `LlmProvider` / `TtsProvider` **descriptors** —
7
+ * plain `{ kind, options }` data. At session start the runtime calls the
8
+ * resolvers here to turn each descriptor into its openable / callable
9
+ * host-side counterpart, importing the third-party SDK only at that point.
10
+ *
11
+ * The guest sandbox never imports these functions, which is how the agent
12
+ * bundle stays free of `@ai-sdk/anthropic` / `assemblyai` /
13
+ * `@cartesia/cartesia-js`.
14
+ */
15
+
16
+ import { createAnthropic } from "@ai-sdk/anthropic";
17
+ import type { LanguageModel } from "ai";
18
+ import { ANTHROPIC_KIND, type AnthropicOptions } from "../../sdk/providers/llm/anthropic.ts";
19
+ import { ASSEMBLYAI_KIND, type AssemblyAIOptions } from "../../sdk/providers/stt/assemblyai.ts";
20
+ import { CARTESIA_KIND, type CartesiaOptions } from "../../sdk/providers/tts/cartesia.ts";
21
+ import type {
22
+ LlmProvider,
23
+ SttOpener,
24
+ SttProvider,
25
+ TtsOpener,
26
+ TtsProvider,
27
+ } from "../../sdk/providers.ts";
28
+ import { openAssemblyAI } from "./stt/assemblyai.ts";
29
+ import { openCartesia } from "./tts/cartesia.ts";
30
+
31
+ /**
32
+ * Look up a provider API key: agent env first (set via `aai secret put` or
33
+ * `.env`), then the host's `process.env` as a fallback for self-hosted mode.
34
+ * Returns `""` if neither has it — the caller decides whether that's fatal.
35
+ */
36
+ export function resolveApiKey(envVar: string, env: Record<string, string>): string {
37
+ return env[envVar] ?? process.env[envVar] ?? "";
38
+ }
39
+
40
+ /** Resolve an {@link SttProvider} descriptor into a host-side opener. */
41
+ export function resolveStt(descriptor: SttProvider): SttOpener {
42
+ switch (descriptor.kind) {
43
+ case ASSEMBLYAI_KIND:
44
+ return openAssemblyAI(descriptor.options as unknown as AssemblyAIOptions);
45
+ default:
46
+ throw new Error(
47
+ `Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}.`,
48
+ );
49
+ }
50
+ }
51
+
52
+ /** Resolve a {@link TtsProvider} descriptor into a host-side opener. */
53
+ export function resolveTts(descriptor: TtsProvider): TtsOpener {
54
+ switch (descriptor.kind) {
55
+ case CARTESIA_KIND:
56
+ return openCartesia(descriptor.options as unknown as CartesiaOptions);
57
+ default:
58
+ throw new Error(
59
+ `Unknown TTS provider kind: "${descriptor.kind}". Supported: ${CARTESIA_KIND}.`,
60
+ );
61
+ }
62
+ }
63
+
64
+ /**
65
+ * Resolve an {@link LlmProvider} descriptor into a Vercel AI SDK
66
+ * {@link LanguageModel}.
67
+ *
68
+ * The API key is pulled from the agent's env (e.g. `ANTHROPIC_API_KEY`).
69
+ * Missing keys throw here — the pipeline session would fail on first
70
+ * `streamText` call otherwise, and the error is clearer at construction.
71
+ */
72
+ export function resolveLlm(descriptor: LlmProvider, env: Record<string, string>): LanguageModel {
73
+ switch (descriptor.kind) {
74
+ case ANTHROPIC_KIND: {
75
+ const options = descriptor.options as unknown as AnthropicOptions;
76
+ const apiKey = resolveApiKey("ANTHROPIC_API_KEY", env);
77
+ if (!apiKey) {
78
+ throw new Error("Anthropic LLM: missing API key. Set ANTHROPIC_API_KEY in the agent env.");
79
+ }
80
+ // Pass baseURL explicitly so the SDK's loadOptionalSetting returns
81
+ // before reading process.env["ANTHROPIC_BASE_URL"]. Without this,
82
+ // the Deno platform server needs --allow-env to start a session.
83
+ return createAnthropic({ apiKey, baseURL: "https://api.anthropic.com/v1" })(options.model);
84
+ }
85
+ default:
86
+ throw new Error(
87
+ `Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}.`,
88
+ );
89
+ }
90
+ }
@@ -7,7 +7,7 @@ import { fileURLToPath } from "node:url";
7
7
  import type { TurnEvent } from "assemblyai";
8
8
  import { describe, expect, test, vi } from "vitest";
9
9
  import { flush } from "../../_test-utils.ts";
10
- import { type AssemblyAISession, assemblyAI } from "./assemblyai.ts";
10
+ import { type AssemblyAISession, openAssemblyAI } from "./assemblyai.ts";
11
11
 
12
12
  const here = dirname(fileURLToPath(import.meta.url));
13
13
 
@@ -66,7 +66,7 @@ describe("assemblyAI STT adapter — fixture replay", () => {
66
66
  await readFile(join(here, "fixtures/assemblyai/basic-turn.json"), "utf8"),
67
67
  ) as Record<string, unknown>[];
68
68
 
69
- const provider = assemblyAI({ model: "u3pro-rt", apiKey: "k" });
69
+ const provider = openAssemblyAI({ model: "u3pro-rt" });
70
70
  const controller = new AbortController();
71
71
  const session = (await provider.open({
72
72
  sampleRate: 16_000,
@@ -1,10 +1,11 @@
1
1
  // Copyright 2025 the AAI authors. MIT license.
2
2
  /**
3
- * AssemblyAI Universal-Streaming STT adapter.
3
+ * AssemblyAI Universal-Streaming STT opener (host-only).
4
4
  *
5
- * Wraps the `assemblyai` Node SDK's {@link StreamingTranscriber} and
6
- * normalizes its event surface onto the {@link SttProvider} /
7
- * {@link SttEvents} contract consumed by the pipeline orchestrator.
5
+ * The user-facing descriptor factory (`assemblyAI(...)`) lives in
6
+ * `sdk/providers/stt/assemblyai.ts`. This module is the host-side
7
+ * counterpart: it takes the descriptor options + an API key and
8
+ * returns an {@link SttOpener} that the pipeline session drives.
8
9
  *
9
10
  * Default model: `"u3pro-rt"` (Universal-3 Pro Real-Time). The adapter
10
11
  * maps that to the SDK's `"u3-rt-pro"` `speechModel` value; any other
@@ -13,57 +14,39 @@
13
14
 
14
15
  import { AssemblyAI, type StreamingTranscriber } from "assemblyai";
15
16
  import { createNanoEvents, type Emitter } from "nanoevents";
16
- import type {
17
- SttError,
18
- SttEvents,
19
- SttOpenOptions,
20
- SttProvider,
21
- SttSession,
17
+ import type { AssemblyAIOptions } from "../../../sdk/providers/stt/assemblyai.ts";
18
+ import {
19
+ makeSttError,
20
+ type SttEvents,
21
+ type SttOpener,
22
+ type SttOpenOptions,
23
+ type SttSession,
22
24
  } from "../../../sdk/providers.ts";
23
25
 
24
- export interface AssemblyAIOptions {
25
- /**
26
- * Streaming speech model. Defaults to `"u3pro-rt"` (Universal-3 Pro
27
- * Real-Time). Arbitrary strings are forwarded to the SDK unchanged.
28
- */
29
- model?: "u3pro-rt" | string;
30
- /**
31
- * AssemblyAI API key. Falls back to `SttOpenOptions.apiKey`, then
32
- * `process.env.ASSEMBLYAI_API_KEY`.
33
- */
34
- apiKey?: string;
35
- }
36
-
37
26
  /** Internal: SttSession with a test-only handle to the raw SDK transcriber. */
38
27
  export interface AssemblyAISession extends SttSession {
39
28
  /** @internal Test-only: exposes the underlying SDK transcriber for fixture replay. */
40
29
  readonly _transcriber: StreamingTranscriber;
41
30
  }
42
31
 
43
- /** Translate the adapter's model alias to the SDK's `speechModel` value. */
32
+ /** Translate the descriptor's model alias to the SDK's `speechModel` value. */
44
33
  function resolveSpeechModel(model: string): string {
45
34
  // Plan's public name is "u3pro-rt"; the SDK's enum uses "u3-rt-pro".
46
35
  if (model === "u3pro-rt") return "u3-rt-pro";
47
36
  return model;
48
37
  }
49
38
 
50
- function makeError(message: string): SttError {
51
- const err = new Error(message) as SttError & { code: SttError["code"] };
52
- (err as { code: SttError["code"] }).code = "stt_stream_error";
53
- return err;
54
- }
55
-
56
- export function assemblyAI(opts: AssemblyAIOptions = {}): SttProvider {
39
+ /** Build an {@link SttOpener} from resolved AssemblyAI descriptor options. */
40
+ export function openAssemblyAI(opts: AssemblyAIOptions = {}): SttOpener {
57
41
  return {
58
42
  name: "assemblyai",
59
43
  async open(openOpts: SttOpenOptions): Promise<SttSession> {
60
- const apiKey = opts.apiKey ?? openOpts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
44
+ const apiKey = openOpts.apiKey || process.env.ASSEMBLYAI_API_KEY;
61
45
  if (!apiKey) {
62
- const err = new Error(
63
- "AssemblyAI STT adapter: missing API key. Provide via the factory option, SttOpenOptions, or the ASSEMBLYAI_API_KEY environment variable.",
64
- ) as SttError & { code: SttError["code"] };
65
- (err as { code: SttError["code"] }).code = "stt_auth_failed";
66
- throw err;
46
+ throw makeSttError(
47
+ "stt_auth_failed",
48
+ "AssemblyAI STT: missing API key. Set ASSEMBLYAI_API_KEY in the agent env.",
49
+ );
67
50
  }
68
51
 
69
52
  const client = new AssemblyAI({ apiKey });
@@ -91,25 +74,24 @@ export function assemblyAI(opts: AssemblyAIOptions = {}): SttProvider {
91
74
 
92
75
  transcriber.on("error", (err) => {
93
76
  if (closed) return;
94
- emitter.emit("error", makeError(err?.message ?? String(err)));
77
+ emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
95
78
  });
96
79
 
97
80
  transcriber.on("close", (code) => {
98
81
  if (closed) return;
99
82
  // 1000 = normal closure.
100
83
  if (code !== 1000) {
101
- emitter.emit("error", makeError(`socket closed ${code}`));
84
+ emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
102
85
  }
103
86
  });
104
87
 
105
88
  try {
106
89
  await transcriber.connect();
107
90
  } catch (cause) {
108
- const err = new Error(
91
+ throw makeSttError(
92
+ "stt_connect_failed",
109
93
  `AssemblyAI STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`,
110
- ) as SttError & { code: SttError["code"] };
111
- (err as { code: SttError["code"] }).code = "stt_connect_failed";
112
- throw err;
94
+ );
113
95
  }
114
96
 
115
97
  const close = async (): Promise<void> => {
@@ -122,7 +104,6 @@ export function assemblyAI(opts: AssemblyAIOptions = {}): SttProvider {
122
104
  }
123
105
  };
124
106
 
125
- // Wire session-level abort to close the SDK socket.
126
107
  if (openOpts.signal.aborted) {
127
108
  void close();
128
109
  } else {
@@ -134,9 +115,6 @@ export function assemblyAI(opts: AssemblyAIOptions = {}): SttProvider {
134
115
  const session: AssemblyAISession = {
135
116
  sendAudio(pcm: Int16Array) {
136
117
  if (closed) return;
137
- // The SDK's sendAudio accepts ArrayBufferLike. Forward a detached
138
- // copy of the PCM view's window so the consumer sees only this
139
- // chunk's bytes.
140
118
  const copy = new Uint8Array(pcm.byteLength);
141
119
  copy.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
142
120
  transcriber.sendAudio(copy.buffer);
@@ -3,7 +3,7 @@
3
3
 
4
4
  import { beforeEach, describe, expect, test, vi } from "vitest";
5
5
  import { flush } from "../../_test-utils.ts";
6
- import { type CartesiaSession, cartesia } from "./cartesia.ts";
6
+ import { type CartesiaSession, openCartesia } from "./cartesia.ts";
7
7
 
8
8
  // Recorded interactions on the fake `TTSWSContext` — one entry per method call.
9
9
  interface RecordedSend {
@@ -101,7 +101,7 @@ async function openSession(): Promise<{
101
101
  session: CartesiaSession;
102
102
  controller: AbortController;
103
103
  }> {
104
- const provider = cartesia({ voice: "voice-id", apiKey: "k" });
104
+ const provider = openCartesia({ voice: "voice-id" });
105
105
  const controller = new AbortController();
106
106
  const session = (await provider.open({
107
107
  sampleRate: 16_000,
@@ -1,10 +1,14 @@
1
1
  // Copyright 2025 the AAI authors. MIT license.
2
2
  /**
3
- * Cartesia TTS adapter — streaming WebSocket with per-turn `context_id`.
3
+ * Cartesia TTS opener (host-only).
4
+ *
5
+ * The user-facing descriptor factory (`cartesia(...)`) lives in
6
+ * `sdk/providers/tts/cartesia.ts`. This module is the host-side
7
+ * counterpart: it takes the descriptor options + an API key and
8
+ * returns a {@link TtsOpener} that the pipeline session drives.
4
9
  *
5
10
  * Wraps `@cartesia/cartesia-js`'s `TTSWS` / `TTSWSContext` and normalizes it
6
- * onto the {@link TtsProvider} / {@link TtsEvents} contract consumed by the
7
- * pipeline orchestrator.
11
+ * onto the {@link TtsEvents} contract consumed by the pipeline orchestrator.
8
12
  *
9
13
  * **Per-turn context lifecycle.** Each `sendText(...)` within the same turn
10
14
  * appends to the same Cartesia context. On `flush()` or `cancel()`, a new
@@ -20,28 +24,15 @@ import { randomUUID } from "node:crypto";
20
24
  import { Cartesia } from "@cartesia/cartesia-js";
21
25
  import type { TTSWS, TTSWSContext } from "@cartesia/cartesia-js/resources/tts";
22
26
  import { createNanoEvents, type Emitter } from "nanoevents";
23
- import type {
24
- TtsError,
25
- TtsEvents,
26
- TtsOpenOptions,
27
- TtsProvider,
28
- TtsSession,
27
+ import type { CartesiaOptions } from "../../../sdk/providers/tts/cartesia.ts";
28
+ import {
29
+ makeTtsError,
30
+ type TtsEvents,
31
+ type TtsOpener,
32
+ type TtsOpenOptions,
33
+ type TtsSession,
29
34
  } from "../../../sdk/providers.ts";
30
35
 
31
- export interface CartesiaOptions {
32
- /** Cartesia voice ID. Required. */
33
- voice: string;
34
- /** Model ID. Defaults to `"sonic-2"`. */
35
- model?: string;
36
- /**
37
- * Cartesia API key. Falls back to `TtsOpenOptions.apiKey`, then
38
- * `process.env.CARTESIA_API_KEY`.
39
- */
40
- apiKey?: string;
41
- /** Spoken language hint. Defaults to `"en"`. */
42
- language?: string;
43
- }
44
-
45
36
  /** Internal: TtsSession with a test-only handle to the raw SDK socket. */
46
37
  export interface CartesiaSession extends TtsSession {
47
38
  /** @internal Test-only: exposes the underlying SDK WebSocket wrapper. */
@@ -50,12 +41,6 @@ export interface CartesiaSession extends TtsSession {
50
41
  readonly _currentContextId: () => string;
51
42
  }
52
43
 
53
- function makeError(message: string): TtsError {
54
- const err = new Error(message) as TtsError & { code: TtsError["code"] };
55
- (err as { code: TtsError["code"] }).code = "tts_stream_error";
56
- return err;
57
- }
58
-
59
44
  /** PCM16 sample rates supported by Cartesia's `raw` output format. */
60
45
  const CARTESIA_PCM16_RATES = [
61
46
  8000, 16_000, 22_050, 24_000, 44_100, 48_000,
@@ -66,24 +51,23 @@ function assertSupportedSampleRate(rate: number): CartesiaSampleRate {
66
51
  if ((CARTESIA_PCM16_RATES as readonly number[]).includes(rate)) {
67
52
  return rate as CartesiaSampleRate;
68
53
  }
69
- const err = new Error(
70
- `Cartesia TTS adapter: unsupported sample rate ${rate}. Supported: ${CARTESIA_PCM16_RATES.join(", ")}.`,
71
- ) as TtsError & { code: TtsError["code"] };
72
- (err as { code: TtsError["code"] }).code = "tts_connect_failed";
73
- throw err;
54
+ throw makeTtsError(
55
+ "tts_connect_failed",
56
+ `Cartesia TTS: unsupported sample rate ${rate}. Supported: ${CARTESIA_PCM16_RATES.join(", ")}.`,
57
+ );
74
58
  }
75
59
 
76
- export function cartesia(opts: CartesiaOptions): TtsProvider {
60
+ /** Build a {@link TtsOpener} from resolved Cartesia descriptor options. */
61
+ export function openCartesia(opts: CartesiaOptions): TtsOpener {
77
62
  return {
78
63
  name: "cartesia",
79
64
  async open(openOpts: TtsOpenOptions): Promise<TtsSession> {
80
- const apiKey = opts.apiKey ?? openOpts.apiKey ?? process.env.CARTESIA_API_KEY;
65
+ const apiKey = openOpts.apiKey || process.env.CARTESIA_API_KEY;
81
66
  if (!apiKey) {
82
- const err = new Error(
83
- "Cartesia TTS adapter: missing API key. Provide via the factory option, TtsOpenOptions, or the CARTESIA_API_KEY environment variable.",
84
- ) as TtsError & { code: TtsError["code"] };
85
- (err as { code: TtsError["code"] }).code = "tts_auth_failed";
86
- throw err;
67
+ throw makeTtsError(
68
+ "tts_auth_failed",
69
+ "Cartesia TTS: missing API key. Set CARTESIA_API_KEY in the agent env.",
70
+ );
87
71
  }
88
72
 
89
73
  const sampleRate = assertSupportedSampleRate(openOpts.sampleRate);
@@ -95,11 +79,10 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
95
79
  try {
96
80
  ws = await client.tts.websocket();
97
81
  } catch (cause) {
98
- const err = new Error(
82
+ throw makeTtsError(
83
+ "tts_connect_failed",
99
84
  `Cartesia TTS: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`,
100
- ) as TtsError & { code: TtsError["code"] };
101
- (err as { code: TtsError["code"] }).code = "tts_connect_failed";
102
- throw err;
85
+ );
103
86
  }
104
87
 
105
88
  const emitter: Emitter<TtsEvents> = createNanoEvents<TtsEvents>();
@@ -134,17 +117,15 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
134
117
  emitter.emit("done");
135
118
  };
136
119
 
137
- // Route SDK events onto the adapter's event surface, filtering by the
138
- // currently-active `context_id`. The TTSWS EventEmitter fires globally
139
- // across all contexts on the socket; we only care about the active one.
120
+ // TTSWS fires events globally across all contexts on the shared
121
+ // socket; filter by the currently-active context_id.
140
122
  ws.on("chunk", (event) => {
141
123
  if (closed) return;
142
124
  if (event.context_id !== context.contextId) return;
143
- // SDK decodes base64 → Buffer on receipt (`event.audio`). Forward as
144
- // Int16Array over the same byte window.
145
125
  const buf = event.audio;
146
126
  if (!buf || buf.byteLength === 0) return;
147
- // Cartesia sends PCM16 little-endian with even byte counts. Be defensive.
127
+ // Cartesia sends PCM16 LE; be defensive about odd byte counts
128
+ // so `new Int16Array` never throws on a misaligned length.
148
129
  const evenBytes = buf.byteLength - (buf.byteLength % 2);
149
130
  if (evenBytes === 0) return;
150
131
  const pcm = new Int16Array(buf.buffer.slice(buf.byteOffset, buf.byteOffset + evenBytes));
@@ -159,7 +140,7 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
159
140
 
160
141
  ws.on("error", (err) => {
161
142
  if (closed) return;
162
- emitter.emit("error", makeError(err?.message ?? String(err)));
143
+ emitter.emit("error", makeTtsError("tts_stream_error", err?.message ?? String(err)));
163
144
  });
164
145
 
165
146
  const close = async (): Promise<void> => {
@@ -172,7 +153,6 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
172
153
  }
173
154
  };
174
155
 
175
- // Session-level abort → close the SDK socket.
176
156
  if (openOpts.signal.aborted) {
177
157
  void close();
178
158
  } else {
@@ -181,8 +161,6 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
181
161
  });
182
162
  }
183
163
 
184
- /** Static part of each generation request; only `transcript` and
185
- * `continue` vary per send. Pinned here so `language` threads through. */
186
164
  const baseRequest = {
187
165
  model_id: model,
188
166
  voice: { mode: "id" as const, id: opts.voice },
@@ -194,11 +172,6 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
194
172
  language,
195
173
  };
196
174
 
197
- /**
198
- * Swallow rejections from async SDK calls — the global `error`
199
- * listener on `ws` emits a normalized {@link TtsError}, so there's
200
- * nothing useful for the caller to do with per-send failures.
201
- */
202
175
  const ignoreRejection = (_err: unknown): void => {
203
176
  // intentionally empty
204
177
  };
@@ -206,22 +179,19 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
206
179
  const session: CartesiaSession = {
207
180
  sendText(text: string) {
208
181
  if (closed || text.length === 0) return;
209
- // Send a delta with `continue: true`, sharing the same
210
- // context_id across all deltas of this turn.
211
182
  void context
212
183
  .send({ ...baseRequest, transcript: text, continue: true })
213
184
  .catch(ignoreRejection);
214
185
  },
215
186
  flush() {
216
187
  if (closed) return;
217
- // Send an empty transcript with `continue: false` the canonical
218
- // end-of-turn signal. The server replies with a `done` event
219
- // tagged with this context's id, which drives `emitDoneOnce`. We
220
- // also microtask-emit `done` as a fallback so the orchestrator's
221
- // state machine can't wedge if the server event is dropped.
222
- // TODO: drop the microtask fallback once we've verified Cartesia
223
- // always emits a `done` for cleanly-flushed contexts. See
224
- // 2026-04-22-pluggable-providers-design.md → "Note on flush() timing".
188
+ // Empty transcript with `continue: false` is the canonical
189
+ // end-of-turn signal. Cartesia replies with a `done` tagged
190
+ // by context_id, driving `emitDoneOnce`. The microtask
191
+ // fallback guards against a dropped server event wedging
192
+ // the orchestrator's state machine.
193
+ // TODO: drop the microtask fallback once we've verified
194
+ // Cartesia always emits `done` for cleanly-flushed contexts.
225
195
  void context
226
196
  .send({ ...baseRequest, transcript: "", continue: false })
227
197
  .catch(ignoreRejection);
@@ -230,10 +200,10 @@ export function cartesia(opts: CartesiaOptions): TtsProvider {
230
200
  },
231
201
  cancel() {
232
202
  if (closed) return;
233
- // `cancel()` calls ws.cancelContext(contextId) under the hood.
234
203
  void context.cancel().catch(ignoreRejection);
235
- // Emit `done` synchronously the orchestrator's state machine
236
- // advances on `done`, and barge-in must not be delayed.
204
+ // Emit synchronously: barge-in advances the orchestrator's
205
+ // state machine on `done`, and delaying it would audibly
206
+ // stall subsequent turns.
237
207
  emitDoneOnce();
238
208
  rotateContext();
239
209
  },
package/host/runtime.ts CHANGED
@@ -7,6 +7,7 @@
7
7
  * lifecycle hooks, and session management.
8
8
  */
9
9
 
10
+ import type { LanguageModel } from "ai";
10
11
  import pTimeout from "p-timeout";
11
12
  import { createStorage } from "unstorage";
12
13
  import { agentToolsToSchemas, type ToolSchema, toAgentConfig } from "../sdk/_internal-types.ts";
@@ -14,11 +15,19 @@ import { DEFAULT_SHUTDOWN_TIMEOUT_MS } from "../sdk/constants.ts";
14
15
  import type { Kv } from "../sdk/kv.ts";
15
16
  import type { ClientSink } from "../sdk/protocol.ts";
16
17
  import { buildReadyConfig, type ReadyConfig } from "../sdk/protocol.ts";
17
- import type { LlmProvider, SttProvider, TtsProvider } from "../sdk/providers.ts";
18
+ import {
19
+ assertProviderTriple,
20
+ type LlmProvider,
21
+ type SttOpener,
22
+ type SttProvider,
23
+ type TtsOpener,
24
+ type TtsProvider,
25
+ } from "../sdk/providers.ts";
18
26
  import type { AgentDef } from "../sdk/types.ts";
19
27
  import { toolError } from "../sdk/utils.ts";
20
28
  import { resolveAllBuiltins } from "./builtin-tools.ts";
21
29
  import { createPipelineSession } from "./pipeline-session.ts";
30
+ import { resolveApiKey, resolveLlm, resolveStt, resolveTts } from "./providers/resolve.ts";
22
31
  import type { Logger, S2SConfig } from "./runtime-config.ts";
23
32
  import { consoleLogger, DEFAULT_S2S_CONFIG } from "./runtime-config.ts";
24
33
  import type { CreateS2sWebSocket } from "./s2s.ts";
@@ -55,23 +64,36 @@ export type AgentRuntime = {
55
64
 
56
65
  // ─── Runtime implementation ──────────────────────────────────────────────────
57
66
 
67
+ /**
68
+ * Distinguish a descriptor (`{ kind, options }`) from an already-resolved
69
+ * opener / `LanguageModel`. The production path always passes descriptors;
70
+ * openers are a test escape hatch (fakes in `_pipeline-test-fakes.ts`).
71
+ * STT/TTS openers are identified by the `open` method, `LanguageModel` by
72
+ * its `specificationVersion` field — both absent on descriptors.
73
+ */
74
+ function resolveSttIfDescriptor(value: SttProvider | SttOpener): SttOpener {
75
+ return "open" in value ? value : resolveStt(value);
76
+ }
77
+
78
+ function resolveTtsIfDescriptor(value: TtsProvider | TtsOpener): TtsOpener {
79
+ return "open" in value ? value : resolveTts(value);
80
+ }
81
+
82
+ function resolveLlmIfDescriptor(
83
+ value: LlmProvider | LanguageModel,
84
+ env: Record<string, string>,
85
+ ): LanguageModel {
86
+ // LanguageModel can be a string (model-id shortcut) or an object with
87
+ // `specificationVersion`; descriptors are plain `{ kind, options }` objects.
88
+ if (typeof value === "string") return value;
89
+ return "specificationVersion" in value ? value : resolveLlm(value, env);
90
+ }
91
+
58
92
  /** Create an in-memory KV store (default for self-hosted). */
59
93
  function createLocalKv(): Kv {
60
94
  return createUnstorageKv({ storage: createStorage() });
61
95
  }
62
96
 
63
- /**
64
- * Resolve an API key host-side for pipeline providers.
65
- *
66
- * Checks the agent's declared env first, then the host process env as a
67
- * fallback. Returns `""` when absent — pipeline providers surface a clear
68
- * `MissingCredentialsError` via their `open()` that the orchestrator
69
- * converts to a `session.error` wire event.
70
- */
71
- function resolveApiKey(envVar: string, env: Record<string, string>): string {
72
- return env[envVar] ?? process.env[envVar] ?? "";
73
- }
74
-
75
97
  /**
76
98
  * Configuration for {@link createRuntime}.
77
99
  *
@@ -126,21 +148,24 @@ export type RuntimeOptions = {
126
148
  */
127
149
  fetch?: typeof globalThis.fetch | undefined;
128
150
  /**
129
- * Pluggable STT provider. Must be set together with `llm` and `tts` to
151
+ * STT provider. Accepts either a descriptor ({@link SttProvider},
152
+ * the normal production path) or a pre-resolved {@link SttOpener}
153
+ * (test escape hatch). Must be set together with `llm` and `tts` to
130
154
  * route sessions through the pipeline path; leave all three unset for
131
155
  * the default AssemblyAI Streaming Speech-to-Speech (S2S) path.
132
156
  */
133
- stt?: SttProvider | undefined;
157
+ stt?: SttProvider | SttOpener | undefined;
134
158
  /**
135
- * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
136
- * together with `stt` and `tts` to route sessions through the pipeline path.
159
+ * LLM provider. Accepts either a descriptor ({@link LlmProvider},
160
+ * produced by factories like `anthropic(...)`) or a concrete Vercel AI
161
+ * SDK `LanguageModel` (self-hosted / test escape hatch).
137
162
  */
138
- llm?: LlmProvider | undefined;
163
+ llm?: LlmProvider | LanguageModel | undefined;
139
164
  /**
140
- * Pluggable TTS provider. Must be set together with `stt` and `llm` to
141
- * route sessions through the pipeline path.
165
+ * TTS provider. Accepts either a descriptor ({@link TtsProvider})
166
+ * or a pre-resolved {@link TtsOpener}.
142
167
  */
143
- tts?: TtsProvider | undefined;
168
+ tts?: TtsProvider | TtsOpener | undefined;
144
169
  };
145
170
 
146
171
  /**
@@ -190,14 +215,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
190
215
  sessionStartTimeoutMs,
191
216
  shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS,
192
217
  } = opts;
193
- // Derive session mode from the provider triple: all three set ⇒ pipeline,
194
- // none set ⇒ s2s. Anything in-between is a configuration error.
195
- const providerCount =
196
- (opts.stt != null ? 1 : 0) + (opts.llm != null ? 1 : 0) + (opts.tts != null ? 1 : 0);
197
- if (providerCount !== 0 && providerCount !== 3) {
198
- throw new Error("stt, llm, and tts must be set together");
199
- }
200
- const mode: "s2s" | "pipeline" = providerCount === 3 ? "pipeline" : "s2s";
218
+ const mode = assertProviderTriple(opts.stt, opts.llm, opts.tts);
201
219
  const agentConfig = toAgentConfig(agent);
202
220
  const sessions = new Map<string, Session>();
203
221
  const sinkMap = new Map<string, ClientSink>();
@@ -271,6 +289,21 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
271
289
  };
272
290
  }
273
291
 
292
+ // Resolve pipeline providers once per runtime (not per session). Each
293
+ // session reuses the same opener / LanguageModel — the opener's `open()`
294
+ // mints the per-session stream inside.
295
+ const pipelineProviders =
296
+ mode === "pipeline"
297
+ ? {
298
+ // biome-ignore lint/style/noNonNullAssertion: mode === "pipeline" ⇒ all three set
299
+ stt: resolveSttIfDescriptor(opts.stt!),
300
+ // biome-ignore lint/style/noNonNullAssertion: mode === "pipeline" ⇒ all three set
301
+ llm: resolveLlmIfDescriptor(opts.llm!, env),
302
+ // biome-ignore lint/style/noNonNullAssertion: mode === "pipeline" ⇒ all three set
303
+ tts: resolveTtsIfDescriptor(opts.tts!),
304
+ }
305
+ : null;
306
+
274
307
  function createSession(sessionOpts: {
275
308
  id: string;
276
309
  agent: string;
@@ -279,13 +312,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
279
312
  resumeFrom?: string;
280
313
  }): Session {
281
314
  sinkMap.set(sessionOpts.id, sessionOpts.client);
282
- if (mode === "pipeline") {
283
- // biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
284
- const stt = opts.stt!;
285
- // biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
286
- const llm = opts.llm!;
287
- // biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
288
- const tts = opts.tts!;
315
+ if (pipelineProviders) {
289
316
  return createPipelineSession({
290
317
  id: sessionOpts.id,
291
318
  agent: sessionOpts.agent,
@@ -294,9 +321,9 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
294
321
  toolSchemas,
295
322
  toolGuidance,
296
323
  executeTool,
297
- stt,
298
- llm,
299
- tts,
324
+ stt: pipelineProviders.stt,
325
+ llm: pipelineProviders.llm,
326
+ tts: pipelineProviders.tts,
300
327
  sttApiKey: resolveApiKey("ASSEMBLYAI_API_KEY", env),
301
328
  ttsApiKey: resolveApiKey("CARTESIA_API_KEY", env),
302
329
  logger,