@alexkroman1/aai 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/CHANGELOG.md +19 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/assemblyai-C969QGi4.js +35 -0
  5. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  6. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  7. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  8. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  9. package/dist/host/providers/tts/rime.d.ts +44 -0
  10. package/dist/host/runtime-barrel.d.ts +4 -2
  11. package/dist/host/runtime-barrel.js +1434 -1209
  12. package/dist/host/runtime.d.ts +2 -2
  13. package/dist/host/s2s.d.ts +16 -16
  14. package/dist/host/session-core.d.ts +37 -0
  15. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  16. package/dist/host/transports/s2s-transport.d.ts +19 -0
  17. package/dist/host/transports/types.d.ts +45 -0
  18. package/dist/host/ws-handler.d.ts +14 -10
  19. package/dist/sdk/_internal-types.d.ts +2 -0
  20. package/dist/sdk/manifest-barrel.js +1 -1
  21. package/dist/sdk/protocol.d.ts +6 -5
  22. package/dist/sdk/providers/llm-barrel.js +1 -1
  23. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  24. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  25. package/dist/sdk/providers/stt-barrel.js +2 -2
  26. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  27. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -2
  30. package/host/_pipeline-test-fakes.ts +6 -3
  31. package/host/_test-utils.ts +209 -128
  32. package/host/builtin-tools.ts +1 -0
  33. package/host/cleanup.test.ts +25 -298
  34. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  35. package/host/providers/resolve.ts +10 -2
  36. package/host/providers/stt/deepgram.test.ts +229 -0
  37. package/host/providers/stt/deepgram.ts +172 -0
  38. package/host/providers/tts/cartesia.ts +7 -3
  39. package/host/providers/tts/rime.test.ts +251 -0
  40. package/host/providers/tts/rime.ts +322 -0
  41. package/host/runtime-barrel.ts +4 -2
  42. package/host/runtime.test.ts +16 -47
  43. package/host/runtime.ts +131 -23
  44. package/host/s2s.test.ts +122 -131
  45. package/host/s2s.ts +44 -52
  46. package/host/session-core.test.ts +257 -0
  47. package/host/session-core.ts +262 -0
  48. package/host/to-vercel-tools.test.ts +9 -1
  49. package/host/transports/pipeline-transport.test.ts +653 -0
  50. package/host/transports/pipeline-transport.ts +532 -0
  51. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  52. package/host/transports/s2s-transport.test.ts +56 -0
  53. package/host/transports/s2s-transport.ts +116 -0
  54. package/host/transports/types.test.ts +22 -0
  55. package/host/transports/types.ts +51 -0
  56. package/host/ws-handler.test.ts +324 -242
  57. package/host/ws-handler.ts +56 -59
  58. package/package.json +2 -1
  59. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  60. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  61. package/sdk/_internal-types.ts +3 -0
  62. package/sdk/protocol-compat.test.ts +8 -0
  63. package/sdk/protocol.ts +6 -5
  64. package/sdk/providers/stt/deepgram.ts +43 -0
  65. package/sdk/providers/stt-barrel.ts +2 -0
  66. package/sdk/providers/tts/cartesia.ts +15 -5
  67. package/sdk/providers/tts/rime.ts +52 -0
  68. package/sdk/providers/tts-barrel.ts +2 -0
  69. package/sdk/schema-alignment.test.ts +18 -6
  70. package/dist/assemblyai-Cxg9eobY.js +0 -18
  71. package/dist/cartesia-DwDk2tEu.js +0 -10
  72. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  73. package/dist/host/pipeline-session.d.ts +0 -52
  74. package/dist/host/session-ctx.d.ts +0 -73
  75. package/dist/host/session.d.ts +0 -62
  76. package/host/pipeline-session-ctx.test.ts +0 -31
  77. package/host/pipeline-session-ctx.ts +0 -36
  78. package/host/pipeline-session.test.ts +0 -672
  79. package/host/pipeline-session.ts +0 -533
  80. package/host/s2s-fixtures.test.ts +0 -237
  81. package/host/session-ctx.test.ts +0 -387
  82. package/host/session-ctx.ts +0 -134
  83. package/host/session-fixture-replay.test.ts +0 -128
  84. package/host/session.test.ts +0 -634
  85. package/host/session.ts +0 -412
  86. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
@@ -11,12 +11,11 @@ import {
11
11
  MAX_MESSAGE_BUFFER_SIZE,
12
12
  WS_OPEN,
13
13
  } from "../sdk/constants.ts";
14
- import type { ClientMessage, ClientSink, ReadyConfig } from "../sdk/protocol.ts";
15
- import { ClientMessageSchema, lenientParse } from "../sdk/protocol.ts";
16
- import { errorDetail, errorMessage } from "../sdk/utils.ts";
14
+ import { ClientMessageSchema, type ClientSink, lenientParse } from "../sdk/protocol.ts";
15
+ import { errorDetail } from "../sdk/utils.ts";
17
16
  import type { Logger } from "./runtime-config.ts";
18
17
  import { consoleLogger } from "./runtime-config.ts";
19
- import type { Session } from "./session.ts";
18
+ import type { SessionCore } from "./session-core.ts";
20
19
 
21
20
  /**
22
21
  * Minimal WebSocket interface accepted by {@link wireSessionSocket}.
@@ -34,11 +33,11 @@ export type SessionWebSocket = {
34
33
  /** Options for wiring a WebSocket to a session. */
35
34
  export type WsSessionOptions = {
36
35
  /** Map of active sessions (session is added on open, removed on close). */
37
- sessions: Map<string, Session>;
36
+ sessions: Map<string, SessionCore>;
38
37
  /** Factory function to create a session for a given ID and client sink. */
39
- createSession: (sessionId: string, client: ClientSink) => Session;
38
+ createSession: (sessionId: string, client: ClientSink) => SessionCore;
40
39
  /** Protocol config sent to the client immediately on connect. */
41
- readyConfig: ReadyConfig;
40
+ readyConfig: { audioFormat: "pcm16"; sampleRate: number; ttsSampleRate: number };
42
41
  /** Additional key-value pairs included in log messages. */
43
42
  logContext?: Record<string, string>;
44
43
  /** Callback invoked when the WebSocket connection opens. */
@@ -57,25 +56,27 @@ export type WsSessionOptions = {
57
56
  resumeFrom?: string;
58
57
  };
59
58
 
59
+ const AUDIO_DONE_FRAME = JSON.stringify({
60
+ type: "audio_done",
61
+ } satisfies { type: "audio_done" });
62
+
60
63
  /**
61
64
  * Creates a {@link ClientSink} backed by a plain WebSocket.
62
65
  *
63
- * Text events are sent as JSON text frames; audio chunks are sent as
64
- * binary frames (zero-copy).
66
+ * Session events are sent as JSON text frames; audio chunks are sent as raw
67
+ * PCM16 binary frames.
65
68
  */
66
69
  function createClientSink(ws: SessionWebSocket, log: Logger): ClientSink {
67
- /** Send data over ws, silently dropping if the socket is not open. */
68
- function safeSend(data: string | ArrayBuffer | Uint8Array): void {
70
+ function safeSend(data: string | Uint8Array): void {
69
71
  try {
70
72
  if (ws.readyState !== WS_OPEN) return;
71
73
  ws.send(data);
72
74
  } catch (err) {
73
75
  log.debug?.("safeSend: socket closed between readyState check and send", {
74
- error: errorMessage(err),
76
+ error: err instanceof Error ? err.message : String(err),
75
77
  });
76
78
  }
77
79
  }
78
-
79
80
  return {
80
81
  get open() {
81
82
  return ws.readyState === WS_OPEN;
@@ -87,48 +88,40 @@ function createClientSink(ws: SessionWebSocket, log: Logger): ClientSink {
87
88
  safeSend(chunk);
88
89
  },
89
90
  playAudioDone() {
90
- safeSend(JSON.stringify({ type: "audio_done" }));
91
+ safeSend(AUDIO_DONE_FRAME);
91
92
  },
92
93
  };
93
94
  }
94
95
 
95
- function handleBinaryAudio(data: unknown, session: Session): boolean {
96
- // Buffer extends Uint8Array in Node, so this catches Buffer too.
96
+ function handleBinaryAudio(data: unknown, session: SessionCore): boolean {
97
97
  if (data instanceof Uint8Array) {
98
98
  session.onAudio(data);
99
99
  return true;
100
100
  }
101
- if (data instanceof ArrayBuffer) {
102
- session.onAudio(new Uint8Array(data));
103
- return true;
104
- }
105
101
  return false;
106
102
  }
107
103
 
108
- function handleTextMessage(
109
- data: unknown,
110
- session: Session,
111
- log: Logger,
112
- ctx: Record<string, string>,
113
- sid: string,
114
- ): void {
115
- if (typeof data !== "string") return;
116
- let json: unknown;
104
+ function handleTextMessage(data: unknown, session: SessionCore, log: Logger, sid: string): void {
105
+ if (typeof data !== "string") {
106
+ log.warn("ws: non-string, non-binary frame received; dropping", { sid });
107
+ return;
108
+ }
109
+ let parsed: unknown;
117
110
  try {
118
- json = JSON.parse(data);
111
+ parsed = JSON.parse(data);
119
112
  } catch {
120
- log.warn("Invalid JSON from client", { ...ctx, sid });
113
+ log.warn("ws: invalid JSON; dropping", { sid, data: data.slice(0, 200) });
121
114
  return;
122
115
  }
123
-
124
- const parsed = lenientParse(ClientMessageSchema, json);
125
- if (!parsed.ok) {
126
- if (parsed.malformed) log.warn("Invalid client message", { ...ctx, sid, error: parsed.error });
116
+ const result = lenientParse(ClientMessageSchema, parsed);
117
+ if (!result.ok) {
118
+ if (result.malformed) {
119
+ log.warn("ws: malformed client message", { sid, error: result.error });
120
+ }
121
+ // else: unrecognised type — silently drop (rolling-upgrade tolerance)
127
122
  return;
128
123
  }
129
-
130
- const msg: ClientMessage = parsed.data;
131
- switch (msg.type) {
124
+ switch (result.data.type) {
132
125
  case "audio_ready":
133
126
  session.onAudioReady();
134
127
  break;
@@ -139,7 +132,7 @@ function handleTextMessage(
139
132
  session.onReset();
140
133
  break;
141
134
  case "history":
142
- session.onHistory(msg.messages);
135
+ session.onHistory(result.data.messages);
143
136
  break;
144
137
  default:
145
138
  break;
@@ -147,13 +140,13 @@ function handleTextMessage(
147
140
  }
148
141
 
149
142
  /**
150
- * Attaches session lifecycle handlers to a native WebSocket using
151
- * plain JSON text frames and binary audio frames.
143
+ * Attaches session lifecycle handlers to a native WebSocket using JSON text
144
+ * frames for control messages and raw PCM16 binary frames for audio.
152
145
  *
153
146
  * Connection flow:
154
- * 1. WebSocket opens → server sends `{ type: "config", ...ReadyConfig }`
155
- * 2. Client sets up audio → sends `{ type: "audio_ready" }`
156
- * 3. If reconnecting → client sends `{ type: "history", messages: [...] }`
147
+ * 1. WebSocket opens → server sends JSON CONFIG frame with sampleRate, ttsSampleRate, sessionId
148
+ * 2. Client sets up audio → sends JSON AUDIO_READY frame
149
+ * 3. If reconnecting → client sends JSON HISTORY frame with prior messages
157
150
  */
158
151
  export function wireSessionSocket(ws: SessionWebSocket, opts: WsSessionOptions): void {
159
152
  const { sessions, logger: log = consoleLogger } = opts;
@@ -161,10 +154,10 @@ export function wireSessionSocket(ws: SessionWebSocket, opts: WsSessionOptions):
161
154
  const sid = sessionId.slice(0, 8);
162
155
  const ctx = opts.logContext ?? {};
163
156
 
164
- let session: Session | null = null;
157
+ let session: SessionCore | null = null;
165
158
  /** Set to true once session.start() resolves. Messages arriving before
166
159
  * this flag is set are buffered and replayed once the session is ready,
167
- * preventing audio/text from being dispatched to a half-initialized session. */
160
+ * preventing audio/frames from being dispatched to a half-initialized session. */
168
161
  let sessionReady = false;
169
162
  let messageBuffer: { data: unknown }[] | null = [];
170
163
 
@@ -173,9 +166,8 @@ export function wireSessionSocket(ws: SessionWebSocket, opts: WsSessionOptions):
173
166
  const buf = messageBuffer;
174
167
  messageBuffer = null;
175
168
  for (const event of buf) {
176
- const { data } = event;
177
- if (handleBinaryAudio(data, session)) continue;
178
- handleTextMessage(data, session, log, ctx, sid);
169
+ if (handleBinaryAudio(event.data, session)) continue;
170
+ handleTextMessage(event.data, session, log, sid);
179
171
  }
180
172
  }
181
173
 
@@ -188,9 +180,17 @@ export function wireSessionSocket(ws: SessionWebSocket, opts: WsSessionOptions):
188
180
  sessions.set(sessionId, session);
189
181
  opts.onSinkCreated?.(sessionId, client);
190
182
 
191
- // Send config immediately — zero RTT. Include sessionId so the client
192
- // can reconnect with ?sessionId=<id> to resume a persisted session.
193
- ws.send(JSON.stringify({ type: "config", ...opts.readyConfig, sessionId }));
183
+ // Send config immediately — zero RTT. Include sessionId so the
184
+ // client can reconnect with ?sessionId=<id> to resume a persisted session.
185
+ ws.send(
186
+ JSON.stringify({
187
+ type: "config",
188
+ audioFormat: opts.readyConfig.audioFormat,
189
+ sampleRate: opts.readyConfig.sampleRate,
190
+ ttsSampleRate: opts.readyConfig.ttsSampleRate,
191
+ sessionId,
192
+ }),
193
+ );
194
194
 
195
195
  const timeoutMs = opts.sessionStartTimeoutMs ?? DEFAULT_SESSION_START_TIMEOUT_MS;
196
196
  const startWithTimeout = pTimeout(session.start(), {
@@ -222,17 +222,14 @@ export function wireSessionSocket(ws: SessionWebSocket, opts: WsSessionOptions):
222
222
  ws.addEventListener("message", (event) => {
223
223
  if (!session) return;
224
224
  // Buffer messages until session.start() completes to avoid dispatching
225
- // to a session whose S2S connection isn't established yet.
225
+ // to a session whose transport connection isn't established yet.
226
226
  if (!sessionReady) {
227
- if (messageBuffer && messageBuffer.length < MAX_MESSAGE_BUFFER_SIZE) {
227
+ if (messageBuffer && messageBuffer.length < MAX_MESSAGE_BUFFER_SIZE)
228
228
  messageBuffer.push(event);
229
- }
230
229
  return;
231
230
  }
232
- const { data } = event;
233
-
234
- if (handleBinaryAudio(data, session)) return;
235
- handleTextMessage(data, session, log, ctx, sid);
231
+ if (handleBinaryAudio(event.data, session)) return;
232
+ handleTextMessage(event.data, session, log, sid);
236
233
  });
237
234
 
238
235
  ws.addEventListener("close", () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@alexkroman1/aai",
3
- "version": "1.4.5",
3
+ "version": "1.5.1",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {
@@ -42,6 +42,7 @@
42
42
  "dependencies": {
43
43
  "@ai-sdk/anthropic": "^3.0.0",
44
44
  "@cartesia/cartesia-js": "^3.0.0",
45
+ "@deepgram/sdk": "^5.0.0",
45
46
  "ai": "^6.0.161",
46
47
  "assemblyai": "^4.30.0",
47
48
  "escape-html": "^1.0.3",
@@ -66,13 +66,13 @@ exports[`export surface stability > @alexkroman1/aai/protocol export 1`] = `
66
66
  exports[`export surface stability > @alexkroman1/aai/runtime export 1`] = `
67
67
  [
68
68
  "DEFAULT_S2S_CONFIG",
69
- "_buildBaseCtx",
70
69
  "_internals",
71
- "buildCtx",
72
70
  "consoleLogger",
71
+ "createPipelineTransport",
73
72
  "createRuntime",
74
- "createS2sSession",
73
+ "createS2sTransport",
75
74
  "createServer",
75
+ "createSessionCore",
76
76
  "createUnstorageKv",
77
77
  "executeInIsolate",
78
78
  "executeToolCall",
@@ -22,6 +22,7 @@ exports[`manifest schema shapes > ToolSchemaSchema shape 1`] = `
22
22
  "description",
23
23
  "name",
24
24
  "parameters",
25
+ "type",
25
26
  ]
26
27
  `;
27
28
 
@@ -128,6 +128,7 @@ export function toAgentConfig(src: AgentConfigSource): AgentConfig {
128
128
  * etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
129
129
  */
130
130
  export const ToolSchemaSchema = z.object({
131
+ type: z.literal("function"),
131
132
  name: z.string().min(1),
132
133
  description: z.string().min(1),
133
134
  parameters: z.record(z.string(), z.unknown()),
@@ -135,6 +136,7 @@ export const ToolSchemaSchema = z.object({
135
136
 
136
137
  /** Serialized tool schema — derived from {@link ToolSchemaSchema}. */
137
138
  export type ToolSchema = {
139
+ type: "function";
138
140
  name: string;
139
141
  description: string;
140
142
  parameters: JSONSchema7;
@@ -151,6 +153,7 @@ export const EMPTY_PARAMS = z.object({});
151
153
  */
152
154
  export function agentToolsToSchemas(tools: Readonly<Record<string, ToolDef>>): ToolSchema[] {
153
155
  return Object.entries(tools).map(([name, def]) => ({
156
+ type: "function",
154
157
  name,
155
158
  description: def.description,
156
159
  parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS) as JSONSchema7,
@@ -24,8 +24,16 @@ import {
24
24
  // ── Load fixtures ─────────────────────────────────────────────────────────
25
25
 
26
26
  const FIXTURE_DIR = join(import.meta.dirname, "compat-fixtures");
27
+ // Only load compat fixtures that have the expected schema-compat structure
28
+ // (ServerMessage, ClientMessage, KvRequest, constants). Wire-format fixtures
29
+ // like wire-v1.json use a different shape and are tested by wire.test.ts.
27
30
  const fixtureFiles = readdirSync(FIXTURE_DIR)
28
31
  .filter((f) => f.endsWith(".json"))
32
+ .filter((f) => {
33
+ const raw = readFileSync(join(FIXTURE_DIR, f), "utf-8");
34
+ const parsed = JSON.parse(raw) as Record<string, unknown>;
35
+ return "ServerMessage" in parsed && "ClientMessage" in parsed;
36
+ })
29
37
  .sort();
30
38
 
31
39
  type Fixture = {
package/sdk/protocol.ts CHANGED
@@ -141,16 +141,17 @@ export type ClientEvent = z.infer<typeof ClientEventSchema>;
141
141
  /**
142
142
  * Typed interface for pushing session events to a connected client.
143
143
  *
144
- * For WebSocket sessions this sends JSON text frames and binary audio frames.
144
+ * Events (`event`, `playAudioDone`) send JSON text frames. Audio chunks
145
+ * (`playAudioChunk`) send raw PCM16 binary frames.
145
146
  */
146
147
  export interface ClientSink {
147
- /** Whether the underlying connection is open and accepting calls. */
148
+ /** True when the underlying connection is open and will accept calls. */
148
149
  readonly open: boolean;
149
- /** Push a session event to the client. */
150
+ /** Push a session event (JSON text frame) to the client. */
150
151
  event(e: ClientEvent): void;
151
- /** Send a single TTS audio chunk to the client. */
152
+ /** Send a single PCM16 audio chunk (raw binary frame) to the client. */
152
153
  playAudioChunk(chunk: Uint8Array): void;
153
- /** Signal that TTS audio is complete. */
154
+ /** Signal that TTS audio is complete (JSON text frame). */
154
155
  playAudioDone(): void;
155
156
  }
156
157
 
@@ -0,0 +1,43 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ /**
3
+ * Deepgram Nova streaming STT factory — returns a pure descriptor.
4
+ *
5
+ * The descriptor flows through the bundle → server → runtime pipeline
6
+ * without importing the `@deepgram/sdk` package. The host-side resolver in
7
+ * `host/providers/resolve.ts` turns it into an openable {@link SttOpener}
8
+ * during `createRuntime`.
9
+ */
10
+
11
+ import type { SttProvider } from "../../providers.ts";
12
+
13
+ /** Kind tag recognised by the host-side resolver. */
14
+ export const DEEPGRAM_KIND = "deepgram" as const;
15
+
16
+ export interface DeepgramOptions {
17
+ /**
18
+ * Streaming speech model. Defaults to `"nova-3"`. Any string is forwarded
19
+ * to the SDK unchanged, which allows opt-in to future models.
20
+ */
21
+ model?: "nova-3" | "nova-2" | string;
22
+ /**
23
+ * BCP-47 language code for transcription. Defaults to `"en"`.
24
+ * Examples: `"en"`, `"es"`, `"fr"`, `"de"`.
25
+ */
26
+ language?: string;
27
+ }
28
+
29
+ export type DeepgramProvider = SttProvider & {
30
+ readonly kind: typeof DEEPGRAM_KIND;
31
+ readonly options: DeepgramOptions;
32
+ };
33
+
34
+ /**
35
+ * Build a Deepgram STT descriptor.
36
+ *
37
+ * The API key is resolved host-side from the agent's env
38
+ * (`DEEPGRAM_API_KEY`); there is no factory-time key parameter, so the
39
+ * descriptor stays free of secrets and safe to serialize.
40
+ */
41
+ export function deepgram(opts: DeepgramOptions = {}): DeepgramProvider {
42
+ return { kind: DEEPGRAM_KIND, options: { ...opts } };
43
+ }
@@ -10,3 +10,5 @@
10
10
  export type { SttError, SttEvents, SttOpenOptions, SttProvider, SttSession } from "../providers.ts";
11
11
  // biome-ignore lint/performance/noReExportAll: subpath barrel
12
12
  export * from "./stt/assemblyai.ts";
13
+ // biome-ignore lint/performance/noReExportAll: subpath barrel
14
+ export * from "./stt/deepgram.ts";
@@ -12,9 +12,16 @@ import type { TtsProvider } from "../../providers.ts";
12
12
 
13
13
  export const CARTESIA_KIND = "cartesia" as const;
14
14
 
15
+ /**
16
+ * Default voice used when callers invoke `cartesia()` with no `voice`. This
17
+ * is the same voice the example templates ship with, so a bare `cartesia()`
18
+ * works out of the box for new agents.
19
+ */
20
+ export const CARTESIA_DEFAULT_VOICE = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
21
+
15
22
  export interface CartesiaOptions {
16
- /** Cartesia voice ID. Required. */
17
- voice: string;
23
+ /** Cartesia voice ID. Defaults to {@link CARTESIA_DEFAULT_VOICE}. */
24
+ voice?: string;
18
25
  /** Model ID. Defaults to `"sonic-2"`. */
19
26
  model?: string;
20
27
  /** Spoken language hint. Defaults to `"en"`. */
@@ -23,9 +30,12 @@ export interface CartesiaOptions {
23
30
 
24
31
  export type CartesiaProvider = TtsProvider & {
25
32
  readonly kind: typeof CARTESIA_KIND;
26
- readonly options: CartesiaOptions;
33
+ readonly options: CartesiaOptions & { voice: string };
27
34
  };
28
35
 
29
- export function cartesia(opts: CartesiaOptions): CartesiaProvider {
30
- return { kind: CARTESIA_KIND, options: { ...opts } };
36
+ export function cartesia(opts: CartesiaOptions = {}): CartesiaProvider {
37
+ return {
38
+ kind: CARTESIA_KIND,
39
+ options: { ...opts, voice: opts.voice ?? CARTESIA_DEFAULT_VOICE },
40
+ };
31
41
  }
@@ -0,0 +1,52 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ /**
3
+ * Rime TTS factory — returns a pure descriptor.
4
+ *
5
+ * See `sdk/providers/stt/assemblyai.ts` for the descriptor/opener split;
6
+ * the host-side resolver in `host/providers/resolve.ts` turns this into an
7
+ * openable {@link TtsOpener} during `createRuntime` using the
8
+ * `RIME_API_KEY` from the agent's env.
9
+ *
10
+ * Language codes follow ISO 639-3 (three-letter): `"eng"`, `"fra"`, etc.
11
+ * This differs from many APIs that use ISO 639-1 two-letter codes like `"en"`.
12
+ */
13
+
14
+ import type { TtsProvider } from "../../providers.ts";
15
+
16
+ export const RIME_KIND = "rime" as const;
17
+
18
+ /**
19
+ * Default Rime speaker used when callers invoke `rime()` with no `voice`.
20
+ * `cove` is a `mistv2` speaker, matching the default model below — so a
21
+ * bare `rime()` works out of the box for new agents.
22
+ */
23
+ export const RIME_DEFAULT_VOICE = "cove";
24
+
25
+ export interface RimeOptions {
26
+ /** Rime speaker ID. Defaults to {@link RIME_DEFAULT_VOICE}. */
27
+ voice?: string;
28
+ /**
29
+ * Rime model ID. Defaults to `"mistv2"` (Rime's most compatible model).
30
+ * Common values: `"mistv2"`, `"arcana"`.
31
+ */
32
+ model?: "mistv2" | "arcana" | string;
33
+ /**
34
+ * Spoken language. Uses ISO 639-3 (three-letter codes).
35
+ * Defaults to `"eng"` (English).
36
+ *
37
+ * Note: Rime uses 3-letter codes — use `"eng"` not `"en"`.
38
+ */
39
+ language?: string;
40
+ }
41
+
42
+ export type RimeProvider = TtsProvider & {
43
+ readonly kind: typeof RIME_KIND;
44
+ readonly options: RimeOptions & { voice: string };
45
+ };
46
+
47
+ export function rime(opts: RimeOptions = {}): RimeProvider {
48
+ return {
49
+ kind: RIME_KIND,
50
+ options: { ...opts, voice: opts.voice ?? RIME_DEFAULT_VOICE },
51
+ };
52
+ }
@@ -10,3 +10,5 @@
10
10
  export type { TtsError, TtsEvents, TtsOpenOptions, TtsProvider, TtsSession } from "../providers.ts";
11
11
  // biome-ignore lint/performance/noReExportAll: subpath barrel
12
12
  export * from "./tts/cartesia.ts";
13
+ // biome-ignore lint/performance/noReExportAll: subpath barrel
14
+ export * from "./tts/rime.ts";
@@ -65,6 +65,7 @@ describe("AgentConfigSchema", () => {
65
65
  describe("ToolSchemaSchema", () => {
66
66
  test("accepts valid tool schema", () => {
67
67
  const valid = {
68
+ type: "function" as const,
68
69
  name: "get_weather",
69
70
  description: "Get weather",
70
71
  parameters: { type: "object", properties: { city: { type: "string" } } },
@@ -73,15 +74,25 @@ describe("ToolSchemaSchema", () => {
73
74
  });
74
75
 
75
76
  test("rejects empty name", () => {
76
- expect(ToolSchemaSchema.safeParse({ name: "", description: "d", parameters: {} }).success).toBe(
77
- false,
78
- );
77
+ expect(
78
+ ToolSchemaSchema.safeParse({
79
+ type: "function",
80
+ name: "",
81
+ description: "d",
82
+ parameters: {},
83
+ }).success,
84
+ ).toBe(false);
79
85
  });
80
86
 
81
87
  test("rejects empty description", () => {
82
- expect(ToolSchemaSchema.safeParse({ name: "n", description: "", parameters: {} }).success).toBe(
83
- false,
84
- );
88
+ expect(
89
+ ToolSchemaSchema.safeParse({
90
+ type: "function",
91
+ name: "n",
92
+ description: "",
93
+ parameters: {},
94
+ }).success,
95
+ ).toBe(false);
85
96
  });
86
97
 
87
98
  test("ToolSchema is assignable from schema inference", () => {
@@ -89,6 +100,7 @@ describe("ToolSchemaSchema", () => {
89
100
  // the runtime schema's Record<string, unknown>. Verify the direction:
90
101
  // a parsed result should be assignable to ToolSchema (narrow → wide).
91
102
  const parsed = ToolSchemaSchema.parse({
103
+ type: "function",
92
104
  name: "test",
93
105
  description: "test",
94
106
  parameters: { type: "object" },
@@ -1,18 +0,0 @@
1
- //#region sdk/providers/stt/assemblyai.ts
2
- /** Kind tag recognised by the host-side resolver. */
3
- const ASSEMBLYAI_KIND = "assemblyai";
4
- /**
5
- * Build an AssemblyAI STT descriptor.
6
- *
7
- * The API key is resolved host-side from the agent's env
8
- * (`ASSEMBLYAI_API_KEY`); there is no factory-time key parameter, so the
9
- * descriptor stays free of secrets and safe to serialize.
10
- */
11
- function assemblyAI(opts = {}) {
12
- return {
13
- kind: ASSEMBLYAI_KIND,
14
- options: { ...opts }
15
- };
16
- }
17
- //#endregion
18
- export { assemblyAI as n, ASSEMBLYAI_KIND as t };
@@ -1,10 +0,0 @@
1
- //#region sdk/providers/tts/cartesia.ts
2
- const CARTESIA_KIND = "cartesia";
3
- function cartesia(opts) {
4
- return {
5
- kind: CARTESIA_KIND,
6
- options: { ...opts }
7
- };
8
- }
9
- //#endregion
10
- export { cartesia as n, CARTESIA_KIND as t };
@@ -1,24 +0,0 @@
1
- /** Pipeline session context — base ctx + STT/TTS session slots. */
2
- import type { AgentConfig, ExecuteTool } from "../sdk/_internal-types.ts";
3
- import type { ClientSink } from "../sdk/protocol.ts";
4
- import type { SttSession, TtsSession } from "../sdk/providers.ts";
5
- import type { Logger } from "./runtime-config.ts";
6
- import { type BaseSessionCtx } from "./session-ctx.ts";
7
- /**
8
- * Pipeline session context — {@link BaseSessionCtx} plus STT/TTS provider
9
- * session handles. Replaces the S2S `s2s` field with decoupled `stt` + `tts`
10
- * slots so the pipeline orchestrator can drive independent providers.
11
- */
12
- export type PipelineSessionCtx = BaseSessionCtx & {
13
- stt: SttSession | null;
14
- tts: TtsSession | null;
15
- };
16
- export declare function buildPipelineCtx(opts: {
17
- id: string;
18
- agent: string;
19
- client: ClientSink;
20
- agentConfig: AgentConfig;
21
- executeTool: ExecuteTool;
22
- log: Logger;
23
- maxHistory?: number | undefined;
24
- }): PipelineSessionCtx;
@@ -1,52 +0,0 @@
1
- /**
2
- * Pipeline session — pluggable STT → LLM → TTS orchestrator.
3
- *
4
- * Alternative to the S2S session (see `session.ts`) that drives three
5
- * independent providers. A new partial STT event while the agent is replying
6
- * triggers barge-in (aborts the LLM stream and cancels TTS).
7
- */
8
- import type { LanguageModel } from "ai";
9
- import type { AgentConfig, ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
10
- import type { ClientSink } from "../sdk/protocol.ts";
11
- import type { SttOpener, TtsOpener } from "../sdk/providers.ts";
12
- import { type Logger } from "./runtime-config.ts";
13
- import type { Session } from "./session.ts";
14
- /** Configuration options for {@link createPipelineSession}. */
15
- export interface PipelineSessionOptions {
16
- /** Unique session identifier. */
17
- id: string;
18
- /** Agent slug. */
19
- agent: string;
20
- /** Sink for wire events + audio back to the browser client. */
21
- client: ClientSink;
22
- /** Serializable agent config (name, system prompt, maxSteps, etc.). */
23
- agentConfig: AgentConfig;
24
- /** JSON Schema definitions for the agent's tools. */
25
- toolSchemas: readonly ToolSchema[];
26
- /** Optional natural-language guidance appended to the system prompt. */
27
- toolGuidance?: readonly string[] | undefined;
28
- /** Function to invoke tools by name. */
29
- executeTool: ExecuteTool;
30
- /** STT opener (resolved from an {@link SttProvider} descriptor). */
31
- stt: SttOpener;
32
- /** LLM provider (Vercel AI SDK `LanguageModel`). */
33
- llm: LanguageModel;
34
- /** TTS opener (resolved from a {@link TtsProvider} descriptor). */
35
- tts: TtsOpener;
36
- /** STT API key. */
37
- sttApiKey: string;
38
- /** TTS API key. */
39
- ttsApiKey: string;
40
- /** STT audio sample rate (PCM16, Hz). Defaults to {@link DEFAULT_STT_SAMPLE_RATE}. */
41
- sttSampleRate?: number | undefined;
42
- /** TTS audio sample rate (PCM16, Hz). Must match the client's playback AudioContext rate. Defaults to {@link DEFAULT_TTS_SAMPLE_RATE}. */
43
- ttsSampleRate?: number | undefined;
44
- /** Skip the initial greeting audio on connect (used for session resume). */
45
- skipGreeting?: boolean | undefined;
46
- /** Logger. Defaults to the console logger. */
47
- logger?: Logger | undefined;
48
- /** Sliding-window conversation history size. */
49
- maxHistory?: number | undefined;
50
- }
51
- /** Create a pluggable-provider voice session. */
52
- export declare function createPipelineSession(opts: PipelineSessionOptions): Session;