@alexkroman1/aai 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/CHANGELOG.md +19 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/assemblyai-C969QGi4.js +35 -0
  5. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  6. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  7. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  8. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  9. package/dist/host/providers/tts/rime.d.ts +44 -0
  10. package/dist/host/runtime-barrel.d.ts +4 -2
  11. package/dist/host/runtime-barrel.js +1434 -1209
  12. package/dist/host/runtime.d.ts +2 -2
  13. package/dist/host/s2s.d.ts +16 -16
  14. package/dist/host/session-core.d.ts +37 -0
  15. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  16. package/dist/host/transports/s2s-transport.d.ts +19 -0
  17. package/dist/host/transports/types.d.ts +45 -0
  18. package/dist/host/ws-handler.d.ts +14 -10
  19. package/dist/sdk/_internal-types.d.ts +2 -0
  20. package/dist/sdk/manifest-barrel.js +1 -1
  21. package/dist/sdk/protocol.d.ts +6 -5
  22. package/dist/sdk/providers/llm-barrel.js +1 -1
  23. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  24. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  25. package/dist/sdk/providers/stt-barrel.js +2 -2
  26. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  27. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -2
  30. package/host/_pipeline-test-fakes.ts +6 -3
  31. package/host/_test-utils.ts +209 -128
  32. package/host/builtin-tools.ts +1 -0
  33. package/host/cleanup.test.ts +25 -298
  34. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  35. package/host/providers/resolve.ts +10 -2
  36. package/host/providers/stt/deepgram.test.ts +229 -0
  37. package/host/providers/stt/deepgram.ts +172 -0
  38. package/host/providers/tts/cartesia.ts +7 -3
  39. package/host/providers/tts/rime.test.ts +251 -0
  40. package/host/providers/tts/rime.ts +322 -0
  41. package/host/runtime-barrel.ts +4 -2
  42. package/host/runtime.test.ts +16 -47
  43. package/host/runtime.ts +131 -23
  44. package/host/s2s.test.ts +122 -131
  45. package/host/s2s.ts +44 -52
  46. package/host/session-core.test.ts +257 -0
  47. package/host/session-core.ts +262 -0
  48. package/host/to-vercel-tools.test.ts +9 -1
  49. package/host/transports/pipeline-transport.test.ts +653 -0
  50. package/host/transports/pipeline-transport.ts +532 -0
  51. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  52. package/host/transports/s2s-transport.test.ts +56 -0
  53. package/host/transports/s2s-transport.ts +116 -0
  54. package/host/transports/types.test.ts +22 -0
  55. package/host/transports/types.ts +51 -0
  56. package/host/ws-handler.test.ts +324 -242
  57. package/host/ws-handler.ts +56 -59
  58. package/package.json +2 -1
  59. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  60. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  61. package/sdk/_internal-types.ts +3 -0
  62. package/sdk/protocol-compat.test.ts +8 -0
  63. package/sdk/protocol.ts +6 -5
  64. package/sdk/providers/stt/deepgram.ts +43 -0
  65. package/sdk/providers/stt-barrel.ts +2 -0
  66. package/sdk/providers/tts/cartesia.ts +15 -5
  67. package/sdk/providers/tts/rime.ts +52 -0
  68. package/sdk/providers/tts-barrel.ts +2 -0
  69. package/sdk/schema-alignment.test.ts +18 -6
  70. package/dist/assemblyai-Cxg9eobY.js +0 -18
  71. package/dist/cartesia-DwDk2tEu.js +0 -10
  72. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  73. package/dist/host/pipeline-session.d.ts +0 -52
  74. package/dist/host/session-ctx.d.ts +0 -73
  75. package/dist/host/session.d.ts +0 -62
  76. package/host/pipeline-session-ctx.test.ts +0 -31
  77. package/host/pipeline-session-ctx.ts +0 -36
  78. package/host/pipeline-session.test.ts +0 -672
  79. package/host/pipeline-session.ts +0 -533
  80. package/host/s2s-fixtures.test.ts +0 -237
  81. package/host/session-ctx.test.ts +0 -387
  82. package/host/session-ctx.ts +0 -134
  83. package/host/session-fixture-replay.test.ts +0 -128
  84. package/host/session.test.ts +0 -634
  85. package/host/session.ts +0 -412
  86. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
package/host/runtime.ts CHANGED
@@ -15,6 +15,8 @@ import { DEFAULT_SHUTDOWN_TIMEOUT_MS } from "../sdk/constants.ts";
15
15
  import type { Kv } from "../sdk/kv.ts";
16
16
  import type { ClientSink } from "../sdk/protocol.ts";
17
17
  import { buildReadyConfig, type ReadyConfig } from "../sdk/protocol.ts";
18
+ import { DEEPGRAM_KIND } from "../sdk/providers/stt/deepgram.ts";
19
+ import { RIME_KIND } from "../sdk/providers/tts/rime.ts";
18
20
  import {
19
21
  assertProviderTriple,
20
22
  type LlmProvider,
@@ -23,19 +25,68 @@ import {
23
25
  type TtsOpener,
24
26
  type TtsProvider,
25
27
  } from "../sdk/providers.ts";
28
+ import { buildSystemPrompt } from "../sdk/system-prompt.ts";
26
29
  import type { AgentDef } from "../sdk/types.ts";
27
30
  import { toolError } from "../sdk/utils.ts";
28
31
  import { resolveAllBuiltins } from "./builtin-tools.ts";
29
- import { createPipelineSession } from "./pipeline-session.ts";
30
32
  import { resolveApiKey, resolveLlm, resolveStt, resolveTts } from "./providers/resolve.ts";
31
33
  import type { Logger, S2SConfig } from "./runtime-config.ts";
32
34
  import { consoleLogger, DEFAULT_S2S_CONFIG } from "./runtime-config.ts";
33
35
  import type { CreateS2sWebSocket } from "./s2s.ts";
34
- import { createS2sSession, type Session } from "./session.ts";
36
+ import { createSessionCore, type SessionCore } from "./session-core.ts";
35
37
  import { type ExecuteTool, executeToolCall } from "./tool-executor.ts";
38
+ import { createPipelineTransport } from "./transports/pipeline-transport.ts";
39
+ import { createS2sTransport } from "./transports/s2s-transport.ts";
40
+ import type { Transport, TransportCallbacks } from "./transports/types.ts";
36
41
  import { createUnstorageKv } from "./unstorage-kv.ts";
37
42
  import { type SessionWebSocket, wireSessionSocket } from "./ws-handler.ts";
38
43
 
44
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
45
+
46
+ /**
47
+ * Resolve the API key env-var for the configured STT provider.
48
+ *
49
+ * Each STT provider uses its own env var (e.g. `ASSEMBLYAI_API_KEY`,
50
+ * `DEEPGRAM_API_KEY`). We read the kind from the descriptor if it is one;
51
+ * pre-resolved openers have no kind field so we fall back to AssemblyAI for
52
+ * backward compatibility (openers supply their own key at open-time anyway).
53
+ */
54
+ function resolveSttApiKey(
55
+ stt: SttProvider | SttOpener | undefined,
56
+ env: Record<string, string>,
57
+ ): string {
58
+ // SttProvider descriptors carry a `kind` field; SttOpener does not.
59
+ const kind =
60
+ stt != null && "kind" in stt && typeof (stt as SttProvider).kind === "string"
61
+ ? (stt as SttProvider).kind
62
+ : undefined;
63
+ if (kind === DEEPGRAM_KIND) return resolveApiKey("DEEPGRAM_API_KEY", env);
64
+ // Default: ASSEMBLYAI_KIND or pre-resolved opener (backward compat).
65
+ return resolveApiKey("ASSEMBLYAI_API_KEY", env);
66
+ }
67
+
68
+ /**
69
+ * Resolve the API key env-var for the configured TTS provider.
70
+ *
71
+ * Each TTS provider uses its own env var (e.g. `CARTESIA_API_KEY`,
72
+ * `RIME_API_KEY`). We read the kind from the descriptor if it is one;
73
+ * pre-resolved openers have no kind field so we fall back to Cartesia for
74
+ * backward compatibility (openers supply their own key at open-time anyway).
75
+ */
76
+ function resolveTtsApiKey(
77
+ tts: TtsProvider | TtsOpener | undefined,
78
+ env: Record<string, string>,
79
+ ): string {
80
+ // TtsProvider descriptors carry a `kind` field; TtsOpener does not.
81
+ const kind =
82
+ tts != null && "kind" in tts && typeof (tts as TtsProvider).kind === "string"
83
+ ? (tts as TtsProvider).kind
84
+ : undefined;
85
+ if (kind === RIME_KIND) return resolveApiKey("RIME_API_KEY", env);
86
+ // Default: CARTESIA_KIND or pre-resolved opener (backward compat).
87
+ return resolveApiKey("CARTESIA_API_KEY", env);
88
+ }
89
+
39
90
  // ─── Runtime adapter (formerly adapter.ts) ──────────────────────────────────
40
91
 
41
92
  /** Per-session options passed to {@link AgentRuntime.startSession}. */
@@ -189,7 +240,7 @@ export type Runtime = AgentRuntime & {
189
240
  client: ClientSink;
190
241
  skipGreeting?: boolean;
191
242
  resumeFrom?: string;
192
- }): Session;
243
+ }): SessionCore;
193
244
  };
194
245
 
195
246
  /**
@@ -217,7 +268,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
217
268
  } = opts;
218
269
  const mode = assertProviderTriple(opts.stt, opts.llm, opts.tts);
219
270
  const agentConfig = toAgentConfig(agent);
220
- const sessions = new Map<string, Session>();
271
+ const sessions = new Map<string, SessionCore>();
221
272
  const sinkMap = new Map<string, ClientSink>();
222
273
  const readyConfig: ReadyConfig = buildReadyConfig(s2sConfig);
223
274
 
@@ -310,44 +361,101 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
310
361
  client: ClientSink;
311
362
  skipGreeting?: boolean;
312
363
  resumeFrom?: string;
313
- }): Session {
364
+ }): SessionCore {
314
365
  sinkMap.set(sessionOpts.id, sessionOpts.client);
366
+
367
+ const isPipeline = Boolean(pipelineProviders);
368
+ const hasTools = toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0;
369
+ const systemPrompt = buildSystemPrompt(agentConfig, {
370
+ hasTools,
371
+ voice: true,
372
+ toolGuidance,
373
+ });
374
+
375
+ // Late-bound reference: callbacks are constructed before SessionCore exists,
376
+ // so we capture a reference and fill it in below.
377
+ let core: SessionCore | null = null;
378
+ function bindCore(): SessionCore {
379
+ if (!core) throw new Error("SessionCore not yet created");
380
+ return core;
381
+ }
382
+
383
+ const callbacks: TransportCallbacks = {
384
+ onReplyStarted: (replyId) => bindCore().onReplyStarted(replyId),
385
+ onReplyDone: () => bindCore().onReplyDone(),
386
+ onCancelled: () => bindCore().onCancelled(),
387
+ onAudioChunk: (bytes) => bindCore().onAudioChunk(bytes),
388
+ onAudioDone: () => bindCore().onAudioDone(),
389
+ onUserTranscript: (text) => bindCore().onUserTranscript(text),
390
+ onAgentTranscript: (text, interrupted) => bindCore().onAgentTranscript(text, interrupted),
391
+ // Pipeline: tools execute inside streamText; forward the call to the
392
+ // client sink for UI observability only. Going through SessionCore.onToolCall
393
+ // would re-execute the tool and leave pendingTools non-empty, hanging the turn.
394
+ onToolCall: isPipeline
395
+ ? (id, name, args) =>
396
+ sessionOpts.client.event({ type: "tool_call", toolCallId: id, toolName: name, args })
397
+ : (id, name, args) => bindCore().onToolCall(id, name, args),
398
+ onError: (code, message) => bindCore().onError(code, message),
399
+ onSpeechStarted: () => bindCore().onSpeechStarted(),
400
+ onSpeechStopped: () => bindCore().onSpeechStopped(),
401
+ };
402
+
403
+ let transport: Transport;
315
404
  if (pipelineProviders) {
316
- return createPipelineSession({
317
- id: sessionOpts.id,
405
+ transport = createPipelineTransport({
406
+ sid: sessionOpts.id,
318
407
  agent: sessionOpts.agent,
319
- client: sessionOpts.client,
320
- agentConfig,
321
- toolSchemas,
322
- toolGuidance,
323
- executeTool,
324
408
  stt: pipelineProviders.stt,
325
409
  llm: pipelineProviders.llm,
326
410
  tts: pipelineProviders.tts,
327
- sttApiKey: resolveApiKey("ASSEMBLYAI_API_KEY", env),
328
- ttsApiKey: resolveApiKey("CARTESIA_API_KEY", env),
411
+ callbacks,
412
+ sessionConfig: {
413
+ systemPrompt,
414
+ greeting: agentConfig.greeting,
415
+ tools: toolSchemas,
416
+ },
417
+ toolSchemas,
418
+ executeTool,
419
+ providerKeys: {
420
+ stt: resolveSttApiKey(opts.stt, env),
421
+ tts: resolveTtsApiKey(opts.tts, env),
422
+ },
329
423
  sttSampleRate: s2sConfig.inputSampleRate,
330
424
  ttsSampleRate: s2sConfig.outputSampleRate,
425
+ maxSteps: agentConfig.maxSteps,
426
+ toolChoice: agentConfig.toolChoice,
331
427
  skipGreeting: sessionOpts.skipGreeting ?? false,
332
428
  logger,
333
429
  });
430
+ } else {
431
+ transport = createS2sTransport({
432
+ apiKey: env.ASSEMBLYAI_API_KEY ?? "",
433
+ s2sConfig,
434
+ sessionConfig: {
435
+ systemPrompt,
436
+ tools: toolSchemas as import("./s2s.ts").S2sToolSchema[],
437
+ ...(agentConfig.greeting !== undefined ? { greeting: agentConfig.greeting } : {}),
438
+ },
439
+ toolSchemas: toolSchemas as import("./s2s.ts").S2sToolSchema[],
440
+ callbacks,
441
+ sid: sessionOpts.id,
442
+ agent: sessionOpts.agent,
443
+ ...(createWebSocket ? { createWebSocket } : {}),
444
+ logger,
445
+ });
334
446
  }
335
- const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
336
- return createS2sSession({
447
+
448
+ core = createSessionCore({
337
449
  id: sessionOpts.id,
338
450
  agent: sessionOpts.agent,
339
451
  client: sessionOpts.client,
340
452
  agentConfig,
341
- toolSchemas,
342
- toolGuidance,
343
- apiKey,
344
- s2sConfig,
345
453
  executeTool,
346
- ...(createWebSocket ? { createWebSocket } : {}),
347
- skipGreeting: sessionOpts.skipGreeting ?? false,
454
+ transport,
348
455
  logger,
349
- ...(sessionOpts.resumeFrom ? { resumeFrom: sessionOpts.resumeFrom } : {}),
350
456
  });
457
+
458
+ return core;
351
459
  }
352
460
 
353
461
  // ── AgentRuntime methods ──────────────────────────────────────────────