@alexkroman1/aai 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/CHANGELOG.md +19 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/assemblyai-C969QGi4.js +35 -0
  5. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  6. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  7. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  8. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  9. package/dist/host/providers/tts/rime.d.ts +44 -0
  10. package/dist/host/runtime-barrel.d.ts +4 -2
  11. package/dist/host/runtime-barrel.js +1434 -1209
  12. package/dist/host/runtime.d.ts +2 -2
  13. package/dist/host/s2s.d.ts +16 -16
  14. package/dist/host/session-core.d.ts +37 -0
  15. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  16. package/dist/host/transports/s2s-transport.d.ts +19 -0
  17. package/dist/host/transports/types.d.ts +45 -0
  18. package/dist/host/ws-handler.d.ts +14 -10
  19. package/dist/sdk/_internal-types.d.ts +2 -0
  20. package/dist/sdk/manifest-barrel.js +1 -1
  21. package/dist/sdk/protocol.d.ts +6 -5
  22. package/dist/sdk/providers/llm-barrel.js +1 -1
  23. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  24. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  25. package/dist/sdk/providers/stt-barrel.js +2 -2
  26. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  27. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -2
  30. package/host/_pipeline-test-fakes.ts +6 -3
  31. package/host/_test-utils.ts +209 -128
  32. package/host/builtin-tools.ts +1 -0
  33. package/host/cleanup.test.ts +25 -298
  34. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  35. package/host/providers/resolve.ts +10 -2
  36. package/host/providers/stt/deepgram.test.ts +229 -0
  37. package/host/providers/stt/deepgram.ts +172 -0
  38. package/host/providers/tts/cartesia.ts +7 -3
  39. package/host/providers/tts/rime.test.ts +251 -0
  40. package/host/providers/tts/rime.ts +322 -0
  41. package/host/runtime-barrel.ts +4 -2
  42. package/host/runtime.test.ts +16 -47
  43. package/host/runtime.ts +131 -23
  44. package/host/s2s.test.ts +122 -131
  45. package/host/s2s.ts +44 -52
  46. package/host/session-core.test.ts +257 -0
  47. package/host/session-core.ts +262 -0
  48. package/host/to-vercel-tools.test.ts +9 -1
  49. package/host/transports/pipeline-transport.test.ts +653 -0
  50. package/host/transports/pipeline-transport.ts +532 -0
  51. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  52. package/host/transports/s2s-transport.test.ts +56 -0
  53. package/host/transports/s2s-transport.ts +116 -0
  54. package/host/transports/types.test.ts +22 -0
  55. package/host/transports/types.ts +51 -0
  56. package/host/ws-handler.test.ts +324 -242
  57. package/host/ws-handler.ts +56 -59
  58. package/package.json +2 -1
  59. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  60. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  61. package/sdk/_internal-types.ts +3 -0
  62. package/sdk/protocol-compat.test.ts +8 -0
  63. package/sdk/protocol.ts +6 -5
  64. package/sdk/providers/stt/deepgram.ts +43 -0
  65. package/sdk/providers/stt-barrel.ts +2 -0
  66. package/sdk/providers/tts/cartesia.ts +15 -5
  67. package/sdk/providers/tts/rime.ts +52 -0
  68. package/sdk/providers/tts-barrel.ts +2 -0
  69. package/sdk/schema-alignment.test.ts +18 -6
  70. package/dist/assemblyai-Cxg9eobY.js +0 -18
  71. package/dist/cartesia-DwDk2tEu.js +0 -10
  72. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  73. package/dist/host/pipeline-session.d.ts +0 -52
  74. package/dist/host/session-ctx.d.ts +0 -73
  75. package/dist/host/session.d.ts +0 -62
  76. package/host/pipeline-session-ctx.test.ts +0 -31
  77. package/host/pipeline-session-ctx.ts +0 -36
  78. package/host/pipeline-session.test.ts +0 -672
  79. package/host/pipeline-session.ts +0 -533
  80. package/host/s2s-fixtures.test.ts +0 -237
  81. package/host/session-ctx.test.ts +0 -387
  82. package/host/session-ctx.ts +0 -134
  83. package/host/session-fixture-replay.test.ts +0 -128
  84. package/host/session.test.ts +0 -634
  85. package/host/session.ts +0 -412
  86. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
package/host/session.ts DELETED
@@ -1,412 +0,0 @@
1
- // Copyright 2025 the AAI authors. MIT license.
2
- /** S2S session — relays audio between client and AssemblyAI S2S API. */
3
-
4
- import type { AgentConfig, ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
5
- import { DEFAULT_IDLE_TIMEOUT_MS, MAX_TOOL_RESULT_CHARS } from "../sdk/constants.ts";
6
- import type { ClientEvent, ClientSink } from "../sdk/protocol.ts";
7
- import { buildSystemPrompt } from "../sdk/system-prompt.ts";
8
- import { errorDetail, errorMessage, toolError } from "../sdk/utils.ts";
9
- import type { Logger, S2SConfig } from "./runtime-config.ts";
10
- import { consoleLogger } from "./runtime-config.ts";
11
- import {
12
- type CreateS2sWebSocket,
13
- connectS2s,
14
- defaultCreateS2sWebSocket,
15
- type S2sHandle,
16
- type S2sToolSchema,
17
- } from "./s2s.ts";
18
- import { buildCtx, type S2sSessionCtx } from "./session-ctx.ts";
19
-
20
- /**
21
- * A voice session managing the Speech-to-Speech connection for one client.
22
- *
23
- * Created by {@link createS2sSession}. Each session owns a single S2S WebSocket
24
- * connection and relays audio between the browser client and AssemblyAI.
25
- *
26
- * @internal Exported for use by `ws-handler.ts`, `server.ts`, and `runtime.ts`.
27
- */
28
- export type Session = {
29
- start(): Promise<void>;
30
- stop(): Promise<void>;
31
- onAudio(data: Uint8Array): void;
32
- onAudioReady(): void;
33
- onCancel(): void;
34
- onReset(): void;
35
- onHistory(incoming: readonly { role: "user" | "assistant"; content: string }[]): void;
36
- waitForTurn(): Promise<void>;
37
- };
38
-
39
- /** Configuration options for creating a new S2S voice session. */
40
- export type S2sSessionOptions = {
41
- /** Unique session identifier (used for KV scoping and logging). */
42
- id: string;
43
- /** Agent slug — identifies which deployed agent this session belongs to. */
44
- agent: string;
45
- /** Sink for pushing events and audio to the connected browser client. */
46
- client: ClientSink;
47
- /** Serializable agent config (name, system prompt, greeting, maxSteps, etc.). */
48
- agentConfig: AgentConfig;
49
- /** JSON Schema definitions for the agent's custom tools. */
50
- toolSchemas: readonly ToolSchema[];
51
- /** Optional natural-language guidance appended to the system prompt for tool usage. */
52
- toolGuidance?: readonly string[];
53
- /** AssemblyAI API key — stays host-side, never forwarded to the guest sandbox. */
54
- apiKey: string;
55
- /** S2S connection config (sample rates, model selection). */
56
- s2sConfig: S2SConfig;
57
- /** Function to invoke tools by name (wired to direct-executor or sandbox RPC). */
58
- executeTool: ExecuteTool;
59
- /** Override WebSocket constructor for testing. */
60
- createWebSocket?: CreateS2sWebSocket;
61
- /** Agent environment variables (secrets). Forwarded to tool context. */
62
- env?: Record<string, string | undefined>;
63
- /** Skip the initial greeting audio on connect (used for session resume). */
64
- skipGreeting?: boolean;
65
- /** Logger instance. Defaults to `consoleLogger`. */
66
- logger?: Logger;
67
- /** Max conversation messages to retain. Defaults to DEFAULT_MAX_HISTORY (200). */
68
- maxHistory?: number;
69
- };
70
-
71
- /** @internal Not part of the public API. Exposed for testing only. */
72
- export const _internals = { connectS2s };
73
-
74
- type IdleTimer = { reset(): void; clear(): void };
75
-
76
- /**
77
- * Create an idle timer that closes the S2S connection after inactivity.
78
- * Convention: `timeoutMs <= 0` disables the timer entirely (returns a no-op).
79
- * This allows agents to opt out of idle timeout via `idleTimeoutMs: 0` in their config.
80
- */
81
- function createIdleTimer(opts: {
82
- timeoutMs: number;
83
- agent: string;
84
- log: Logger;
85
- client: ClientSink;
86
- ctx: { s2s: { close(): void } | null };
87
- }): IdleTimer {
88
- // biome-ignore lint/suspicious/noEmptyBlockStatements: intentional no-op timer
89
- if (opts.timeoutMs <= 0) return { reset() {}, clear() {} };
90
- let timer: ReturnType<typeof setTimeout> | null = null;
91
- return {
92
- reset() {
93
- if (timer !== null) clearTimeout(timer);
94
- timer = setTimeout(() => {
95
- opts.log.info("S2S idle timeout", { timeoutMs: opts.timeoutMs, agent: opts.agent });
96
- opts.client.event({ type: "idle_timeout" });
97
- opts.ctx.s2s?.close();
98
- }, opts.timeoutMs);
99
- },
100
- clear() {
101
- if (timer !== null) {
102
- clearTimeout(timer);
103
- timer = null;
104
- }
105
- },
106
- };
107
- }
108
-
109
- // ─── Session event handlers ─────────────────────────────────────────────────
110
-
111
- /**
112
- * Complete a tool call by truncating the result, emitting a `tool_call_done` event,
113
- * and accumulating the result in `ctx.reply.pendingTools` — but only if the reply that
114
- * initiated this call is still active.
115
- */
116
- function finishToolCall(
117
- ctx: S2sSessionCtx,
118
- callId: string,
119
- result: string,
120
- replyId: string | null,
121
- ): void {
122
- const truncatedResult =
123
- result.length > MAX_TOOL_RESULT_CHARS ? result.slice(0, MAX_TOOL_RESULT_CHARS) : result;
124
- ctx.client.event({ type: "tool_call_done", toolCallId: callId, result: truncatedResult });
125
- if (replyId !== null && replyId === ctx.reply.currentReplyId) {
126
- ctx.reply.pendingTools.push({ callId, result });
127
- if (ctx.maxHistory > 0 && ctx.reply.pendingTools.length > ctx.maxHistory) {
128
- ctx.reply.pendingTools.shift();
129
- }
130
- }
131
- }
132
-
133
- async function handleToolCall(
134
- ctx: S2sSessionCtx,
135
- event: Extract<ClientEvent, { type: "tool_call" }>,
136
- ): Promise<void> {
137
- const { toolCallId: callId, toolName: name, args: parsedArgs } = event;
138
- const replyId = ctx.reply.currentReplyId;
139
-
140
- ctx.client.event(event);
141
-
142
- const refused = ctx.consumeToolCallStep(name, replyId);
143
- if (refused !== null) {
144
- finishToolCall(ctx, callId, refused, replyId);
145
- return;
146
- }
147
-
148
- ctx.log.info("S2S tool call", { tool: name, callId, args: parsedArgs, agent: ctx.agent });
149
-
150
- let result: string;
151
- try {
152
- result = await ctx.executeTool(name, parsedArgs, ctx.id, ctx.conversationMessages);
153
- } catch (err: unknown) {
154
- const msg = errorMessage(err);
155
- ctx.log.error("Tool execution failed", { tool: name, error: errorDetail(err) });
156
- result = toolError(msg);
157
- }
158
-
159
- ctx.log.info("S2S tool result", { tool: name, callId, resultLength: result.length });
160
- finishToolCall(ctx, callId, result, replyId);
161
- }
162
-
163
- function handleUserTranscript(ctx: S2sSessionCtx, text: string): void {
164
- ctx.log.info("S2S user transcript", { text });
165
- ctx.client.event({ type: "user_transcript", text });
166
- ctx.pushMessages({ role: "user", content: text });
167
- }
168
-
169
- function handleAgentTranscript(ctx: S2sSessionCtx, text: string, interrupted: boolean): void {
170
- ctx.client.event({ type: "agent_transcript", text });
171
- if (!interrupted) {
172
- ctx.pushMessages({ role: "assistant", content: text });
173
- }
174
- }
175
-
176
- function handleReplyCancelled(ctx: S2sSessionCtx): void {
177
- ctx.log.info("S2S reply interrupted (barge-in)");
178
- ctx.cancelReply();
179
- ctx.client.event({ type: "cancelled" });
180
- }
181
-
182
- /**
183
- * Warn when the entry-to-emit time for a reply_done dispatch exceeds this.
184
- * Tool-less sessions should be sub-millisecond; sessions with pending tools
185
- * will legitimately spend time awaiting ctx.turnPromise. We log both (with
186
- * `hadTurnPromise`) so event-loop starvation is distinguishable from
187
- * genuine tool-call latency.
188
- */
189
- const REPLY_DONE_SLOW_THRESHOLD_MS = 50;
190
-
191
- function handleReplyDone(ctx: S2sSessionCtx): void {
192
- const startMs = Date.now();
193
- const doneReplyId = ctx.reply.currentReplyId;
194
- // Dedup duplicate reply.done events from the S2S service: once the reply
195
- // has been fully dispatched (or was never started), currentReplyId is null.
196
- if (doneReplyId === null) {
197
- ctx.log.debug("Dropping duplicate reply.done (no active reply)");
198
- return;
199
- }
200
- const hadTurnPromise = ctx.turnPromise !== null;
201
- const sendPending = () => {
202
- if (ctx.reply.currentReplyId !== doneReplyId) {
203
- ctx.reply.pendingTools = [];
204
- return;
205
- }
206
- if (ctx.reply.pendingTools.length > 0) {
207
- for (const tool of ctx.reply.pendingTools) ctx.s2s?.sendToolResult(tool.callId, tool.result);
208
- ctx.reply.pendingTools = [];
209
- } else {
210
- const stepsUsed = ctx.reply.toolCallCount;
211
- if (stepsUsed > 0) {
212
- ctx.log.info("Turn complete", { steps: stepsUsed, agent: ctx.agent });
213
- }
214
- ctx.client.playAudioDone();
215
- ctx.client.event({ type: "reply_done" });
216
- // Mark reply as finished so any repeated reply.done is dropped above.
217
- ctx.reply.currentReplyId = null;
218
- const durationMs = Date.now() - startMs;
219
- if (durationMs >= REPLY_DONE_SLOW_THRESHOLD_MS) {
220
- ctx.log.warn("slow reply_done dispatch", {
221
- sid: ctx.id,
222
- agent: ctx.agent,
223
- durationMs,
224
- hadTurnPromise,
225
- });
226
- }
227
- }
228
- };
229
- if (hadTurnPromise) {
230
- void ctx.turnPromise?.then(sendPending);
231
- } else {
232
- sendPending();
233
- }
234
- }
235
-
236
- function setupListeners(ctx: S2sSessionCtx, handle: S2sHandle): void {
237
- handle.on("ready", ({ sessionId }) => ctx.log.info("S2S session ready", { sessionId }));
238
- handle.on("replyStarted", ({ replyId }) => {
239
- ctx.beginReply(replyId);
240
- });
241
- handle.on("sessionExpired", () => {
242
- ctx.log.info("S2S session expired");
243
- handle.close();
244
- });
245
- handle.on("audio", ({ audio }) => ctx.client.playAudioChunk(audio));
246
- handle.on("error", (err) => {
247
- ctx.log.error("S2S error", { message: err.message });
248
- ctx.client.event({ type: "error", code: "internal", message: err.message });
249
- handle.close();
250
- });
251
- handle.on("close", (code, reason) => {
252
- const activeReplyId = ctx.reply.currentReplyId;
253
- if (activeReplyId !== null) {
254
- // Silent drop — S2S socket closed while the server was still owed a
255
- // reply. Client stays in waitingForReply=true until a session timeout.
256
- ctx.log.warn("S2S closed with active reply", {
257
- sid: ctx.id,
258
- agent: ctx.agent,
259
- activeReplyId,
260
- code,
261
- reason,
262
- });
263
- } else {
264
- ctx.log.info("S2S closed", { code, reason });
265
- }
266
- ctx.s2s = null;
267
- ctx.cancelReply();
268
- });
269
-
270
- handle.on("event", (event) => {
271
- switch (event.type) {
272
- case "user_transcript":
273
- handleUserTranscript(ctx, event.text);
274
- break;
275
- case "agent_transcript":
276
- handleAgentTranscript(ctx, event.text, event._interrupted ?? false);
277
- break;
278
- case "tool_call": {
279
- const p = handleToolCall(ctx, event).catch((err: unknown) => {
280
- ctx.log.error("Tool call handler failed", { err: errorMessage(err) });
281
- });
282
- ctx.chainTurn(p);
283
- break;
284
- }
285
- case "reply_done":
286
- handleReplyDone(ctx);
287
- break;
288
- case "cancelled":
289
- handleReplyCancelled(ctx);
290
- break;
291
- default:
292
- ctx.client.event(event);
293
- }
294
- });
295
- }
296
-
297
- // ─── Main session factory ────────────────────────────────────────────────────
298
-
299
- export function createS2sSession(opts: S2sSessionOptions): Session {
300
- const {
301
- id,
302
- agent,
303
- client,
304
- toolSchemas,
305
- apiKey,
306
- s2sConfig,
307
- executeTool,
308
- createWebSocket = defaultCreateS2sWebSocket,
309
- logger: log = consoleLogger,
310
- } = opts;
311
- const agentConfig = opts.skipGreeting ? { ...opts.agentConfig, greeting: "" } : opts.agentConfig;
312
- const hasTools = toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0;
313
- const systemPrompt = buildSystemPrompt(agentConfig, {
314
- hasTools,
315
- voice: true,
316
- toolGuidance: opts.toolGuidance,
317
- });
318
- const s2sTools: S2sToolSchema[] = toolSchemas.map((ts) => ({
319
- type: "function" as const,
320
- name: ts.name,
321
- description: ts.description,
322
- parameters: ts.parameters,
323
- }));
324
-
325
- const sessionAbort = new AbortController();
326
- const ctx = buildCtx({
327
- id,
328
- agent,
329
- client,
330
- agentConfig,
331
- executeTool,
332
- log,
333
- maxHistory: opts.maxHistory,
334
- });
335
-
336
- const rawTimeout = agentConfig.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS;
337
- const idleMs = rawTimeout === 0 || !Number.isFinite(rawTimeout) ? 0 : rawTimeout;
338
- const idle = createIdleTimer({ timeoutMs: idleMs, agent, log, client, ctx });
339
-
340
- let connectGeneration = 0;
341
- const sessionUpdatePayload = {
342
- systemPrompt,
343
- tools: s2sTools,
344
- ...(agentConfig.greeting ? { greeting: agentConfig.greeting } : {}),
345
- };
346
-
347
- async function connectAndSetup(): Promise<void> {
348
- const generation = ++connectGeneration;
349
- try {
350
- const handle = await _internals.connectS2s({
351
- apiKey,
352
- config: s2sConfig,
353
- createWebSocket,
354
- logger: log,
355
- sid: id,
356
- });
357
- if (sessionAbort.signal.aborted || generation !== connectGeneration) {
358
- handle.close();
359
- return;
360
- }
361
- setupListeners(ctx, handle);
362
- handle.updateSession(sessionUpdatePayload);
363
- ctx.s2s = handle;
364
- idle.reset();
365
- } catch (err: unknown) {
366
- const msg = errorMessage(err);
367
- log.error("S2S connect failed", { error: errorDetail(err) });
368
- client.event({ type: "error", code: "internal", message: msg });
369
- }
370
- }
371
-
372
- return {
373
- async start(): Promise<void> {
374
- await connectAndSetup();
375
- },
376
- async stop(): Promise<void> {
377
- if (sessionAbort.signal.aborted) return;
378
- sessionAbort.abort();
379
- idle.clear();
380
- if (ctx.turnPromise !== null) await ctx.turnPromise;
381
- ctx.s2s?.close();
382
- },
383
- onAudio(data: Uint8Array): void {
384
- idle.reset();
385
- ctx.s2s?.sendAudio(data);
386
- },
387
- onAudioReady(): void {
388
- /* S2S greeting comes automatically */
389
- },
390
- onCancel(): void {
391
- client.event({ type: "cancelled" });
392
- },
393
- onReset(): void {
394
- ctx.cancelReply();
395
- ctx.conversationMessages = [];
396
- ctx.reply.toolCallCount = 0;
397
- ctx.turnPromise = null;
398
- idle.clear();
399
- ctx.s2s?.close();
400
- client.event({ type: "reset" });
401
- connectAndSetup().catch((err: unknown) =>
402
- log.error("S2S reset reconnect failed", { error: errorMessage(err) }),
403
- );
404
- },
405
- onHistory(incoming: readonly { role: "user" | "assistant"; content: string }[]): void {
406
- ctx.pushMessages(...incoming.map((m) => ({ role: m.role, content: m.content })));
407
- },
408
- waitForTurn(): Promise<void> {
409
- return ctx.turnPromise ?? Promise.resolve();
410
- },
411
- };
412
- }