@alexkroman1/aai 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/CHANGELOG.md +19 -0
  3. package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
  4. package/dist/assemblyai-C969QGi4.js +35 -0
  5. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  6. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  7. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  8. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  9. package/dist/host/providers/tts/rime.d.ts +44 -0
  10. package/dist/host/runtime-barrel.d.ts +4 -2
  11. package/dist/host/runtime-barrel.js +1434 -1209
  12. package/dist/host/runtime.d.ts +2 -2
  13. package/dist/host/s2s.d.ts +16 -16
  14. package/dist/host/session-core.d.ts +37 -0
  15. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  16. package/dist/host/transports/s2s-transport.d.ts +19 -0
  17. package/dist/host/transports/types.d.ts +45 -0
  18. package/dist/host/ws-handler.d.ts +14 -10
  19. package/dist/sdk/_internal-types.d.ts +2 -0
  20. package/dist/sdk/manifest-barrel.js +1 -1
  21. package/dist/sdk/protocol.d.ts +6 -5
  22. package/dist/sdk/providers/llm-barrel.js +1 -1
  23. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  24. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  25. package/dist/sdk/providers/stt-barrel.js +2 -2
  26. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  27. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -2
  30. package/host/_pipeline-test-fakes.ts +6 -3
  31. package/host/_test-utils.ts +209 -128
  32. package/host/builtin-tools.ts +1 -0
  33. package/host/cleanup.test.ts +25 -298
  34. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  35. package/host/providers/resolve.ts +10 -2
  36. package/host/providers/stt/deepgram.test.ts +229 -0
  37. package/host/providers/stt/deepgram.ts +172 -0
  38. package/host/providers/tts/cartesia.ts +7 -3
  39. package/host/providers/tts/rime.test.ts +251 -0
  40. package/host/providers/tts/rime.ts +322 -0
  41. package/host/runtime-barrel.ts +4 -2
  42. package/host/runtime.test.ts +16 -47
  43. package/host/runtime.ts +131 -23
  44. package/host/s2s.test.ts +122 -131
  45. package/host/s2s.ts +44 -52
  46. package/host/session-core.test.ts +257 -0
  47. package/host/session-core.ts +262 -0
  48. package/host/to-vercel-tools.test.ts +9 -1
  49. package/host/transports/pipeline-transport.test.ts +653 -0
  50. package/host/transports/pipeline-transport.ts +532 -0
  51. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  52. package/host/transports/s2s-transport.test.ts +56 -0
  53. package/host/transports/s2s-transport.ts +116 -0
  54. package/host/transports/types.test.ts +22 -0
  55. package/host/transports/types.ts +51 -0
  56. package/host/ws-handler.test.ts +324 -242
  57. package/host/ws-handler.ts +56 -59
  58. package/package.json +2 -1
  59. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  60. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  61. package/sdk/_internal-types.ts +3 -0
  62. package/sdk/protocol-compat.test.ts +8 -0
  63. package/sdk/protocol.ts +6 -5
  64. package/sdk/providers/stt/deepgram.ts +43 -0
  65. package/sdk/providers/stt-barrel.ts +2 -0
  66. package/sdk/providers/tts/cartesia.ts +15 -5
  67. package/sdk/providers/tts/rime.ts +52 -0
  68. package/sdk/providers/tts-barrel.ts +2 -0
  69. package/sdk/schema-alignment.test.ts +18 -6
  70. package/dist/assemblyai-Cxg9eobY.js +0 -18
  71. package/dist/cartesia-DwDk2tEu.js +0 -10
  72. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  73. package/dist/host/pipeline-session.d.ts +0 -52
  74. package/dist/host/session-ctx.d.ts +0 -73
  75. package/dist/host/session.d.ts +0 -62
  76. package/host/pipeline-session-ctx.test.ts +0 -31
  77. package/host/pipeline-session-ctx.ts +0 -36
  78. package/host/pipeline-session.test.ts +0 -672
  79. package/host/pipeline-session.ts +0 -533
  80. package/host/s2s-fixtures.test.ts +0 -237
  81. package/host/session-ctx.test.ts +0 -387
  82. package/host/session-ctx.ts +0 -134
  83. package/host/session-fixture-replay.test.ts +0 -128
  84. package/host/session.test.ts +0 -634
  85. package/host/session.ts +0 -412
  86. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
@@ -14,7 +14,7 @@ import { type LlmProvider, type SttOpener, type SttProvider, type TtsOpener, typ
14
14
  import type { AgentDef } from "../sdk/types.ts";
15
15
  import type { Logger, S2SConfig } from "./runtime-config.ts";
16
16
  import type { CreateS2sWebSocket } from "./s2s.ts";
17
- import { type Session } from "./session.ts";
17
+ import { type SessionCore } from "./session-core.ts";
18
18
  import { type ExecuteTool } from "./tool-executor.ts";
19
19
  import { type SessionWebSocket } from "./ws-handler.ts";
20
20
  /** Per-session options passed to {@link AgentRuntime.startSession}. */
@@ -132,7 +132,7 @@ export type Runtime = AgentRuntime & {
132
132
  client: ClientSink;
133
133
  skipGreeting?: boolean;
134
134
  resumeFrom?: string;
135
- }): Session;
135
+ }): SessionCore;
136
136
  };
137
137
  /**
138
138
  * Create an agent runtime — the execution engine for a voice agent.
@@ -2,7 +2,6 @@
2
2
  * Speech-to-Speech WebSocket client for AssemblyAI's S2S API.
3
3
  */
4
4
  import type { JSONSchema7 } from "json-schema";
5
- import { type Unsubscribe } from "nanoevents";
6
5
  import type { ClientEvent } from "../sdk/protocol.ts";
7
6
  import type { Logger, S2SConfig } from "./runtime-config.ts";
8
7
  export type S2sWebSocket = {
@@ -44,23 +43,23 @@ export type S2sToolSchema = {
44
43
  description: string;
45
44
  parameters: JSONSchema7;
46
45
  };
47
- export type S2sEvents = {
48
- ready: (detail: {
49
- sessionId: string;
50
- }) => void;
51
- replyStarted: (detail: {
52
- replyId: string;
53
- }) => void;
54
- sessionExpired: () => void;
55
- event: (event: S2sEvent) => void;
56
- audio: (detail: {
57
- audio: Uint8Array;
58
- }) => void;
59
- error: (err: Error) => void;
60
- close: (code: number, reason: string) => void;
46
+ /** Callbacks fired into the owning session at construction time. */
47
+ export type S2sCallbacks = {
48
+ onSessionReady(sessionId: string): void;
49
+ onReplyStarted(replyId: string): void;
50
+ onReplyDone(): void;
51
+ onCancelled(): void;
52
+ onAudio(bytes: Uint8Array): void;
53
+ onUserTranscript(text: string): void;
54
+ onAgentTranscript(text: string, interrupted: boolean): void;
55
+ onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
56
+ onSpeechStarted(): void;
57
+ onSpeechStopped(): void;
58
+ onSessionExpired(): void;
59
+ onError(err: Error): void;
60
+ onClose(code: number, reason: string): void;
61
61
  };
62
62
  export type S2sHandle = {
63
- on<K extends keyof S2sEvents>(event: K, cb: S2sEvents[K]): Unsubscribe;
64
63
  sendAudio(audio: Uint8Array): void;
65
64
  /**
66
65
  * Send a pre-encoded audio wire frame. For perf-critical callers (load tests)
@@ -76,6 +75,7 @@ export type ConnectS2sOptions = {
76
75
  apiKey: string;
77
76
  config: S2SConfig;
78
77
  createWebSocket: CreateS2sWebSocket;
78
+ callbacks: S2sCallbacks;
79
79
  logger?: Logger;
80
80
  /**
81
81
  * Session id attached to diagnostic log lines (e.g. raw `reply.done`
@@ -0,0 +1,37 @@
1
+ import type { AgentConfig, ExecuteTool } from "../sdk/_internal-types.ts";
2
+ import type { ClientSink, SessionErrorCode } from "../sdk/protocol.ts";
3
+ import type { Message } from "../sdk/types.ts";
4
+ import type { Logger } from "./runtime-config.ts";
5
+ import type { Transport } from "./transports/types.ts";
6
+ export type SessionCoreOptions = {
7
+ id: string;
8
+ agent: string;
9
+ client: ClientSink;
10
+ agentConfig: AgentConfig;
11
+ executeTool: ExecuteTool;
12
+ transport: Transport;
13
+ logger?: Logger;
14
+ maxHistory?: number;
15
+ };
16
+ export type SessionCore = {
17
+ readonly id: string;
18
+ start(): Promise<void>;
19
+ stop(): Promise<void>;
20
+ onAudio(bytes: Uint8Array): void;
21
+ onAudioReady(): void;
22
+ onCancel(): void;
23
+ onReset(): void;
24
+ onHistory(messages: readonly Message[]): void;
25
+ onReplyStarted(replyId: string): void;
26
+ onReplyDone(): void;
27
+ onCancelled(): void;
28
+ onAudioChunk(bytes: Uint8Array): void;
29
+ onAudioDone(): void;
30
+ onUserTranscript(text: string): void;
31
+ onAgentTranscript(text: string, interrupted: boolean): void;
32
+ onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
33
+ onError(code: SessionErrorCode, message: string): void;
34
+ onSpeechStarted(): void;
35
+ onSpeechStopped(): void;
36
+ };
37
+ export declare function createSessionCore(opts: SessionCoreOptions): SessionCore;
@@ -0,0 +1,48 @@
1
+ import type { LanguageModel } from "ai";
2
+ import type { ExecuteTool, ToolSchema } from "../../sdk/_internal-types.ts";
3
+ import type { SttOpener, TtsOpener } from "../../sdk/providers.ts";
4
+ import type { ToolChoice } from "../../sdk/types.ts";
5
+ import { type Logger } from "../runtime-config.ts";
6
+ import type { Transport, TransportCallbacks, TransportSessionConfig } from "./types.ts";
7
+ /** Configuration for {@link createPipelineTransport}. */
8
+ export interface PipelineTransportOptions {
9
+ /** Unique session identifier. */
10
+ sid: string;
11
+ /** Agent slug. */
12
+ agent: string;
13
+ /** STT opener (resolved from an SttProvider descriptor). */
14
+ stt: SttOpener;
15
+ /** LLM provider (Vercel AI SDK LanguageModel). */
16
+ llm: LanguageModel;
17
+ /** TTS opener (resolved from a TtsProvider descriptor). */
18
+ tts: TtsOpener;
19
+ /** Transport-level callbacks into SessionCore. */
20
+ callbacks: TransportCallbacks;
21
+ /** Session config: systemPrompt, greeting, tools, history. */
22
+ sessionConfig: TransportSessionConfig;
23
+ /** Tool schemas (JSON Schema) for Vercel AI tool binding. */
24
+ toolSchemas?: readonly ToolSchema[];
25
+ /** Agent's tool-execution function. */
26
+ executeTool?: ExecuteTool;
27
+ /** Provider-specific API keys. */
28
+ providerKeys: {
29
+ stt: string;
30
+ tts: string;
31
+ };
32
+ /** STT audio input sample rate (PCM16, Hz). Defaults to DEFAULT_STT_SAMPLE_RATE. */
33
+ sttSampleRate?: number | undefined;
34
+ /** TTS audio output sample rate (PCM16, Hz). Defaults to DEFAULT_TTS_SAMPLE_RATE. */
35
+ ttsSampleRate?: number | undefined;
36
+ /** Optional STT prompt injected via SttOpenOptions.sttPrompt. */
37
+ sttPrompt?: string | undefined;
38
+ /** Max LLM tool-call steps per turn. Defaults to 5. */
39
+ maxSteps?: number | undefined;
40
+ /** Tool selection policy passed to `streamText`. Defaults to `"auto"`. */
41
+ toolChoice?: ToolChoice | undefined;
42
+ /** Logger. Defaults to consoleLogger. */
43
+ logger?: Logger | undefined;
44
+ /** Skip the initial greeting (used for session resume). */
45
+ skipGreeting?: boolean | undefined;
46
+ }
47
+ /** Create a pipeline-mode Transport (STT → LLM → TTS). */
48
+ export declare function createPipelineTransport(opts: PipelineTransportOptions): Transport;
@@ -0,0 +1,19 @@
1
+ import type { Logger, S2SConfig } from "../runtime-config.ts";
2
+ import { type CreateS2sWebSocket, connectS2s, type S2sSessionConfig, type S2sToolSchema } from "../s2s.ts";
3
+ import type { Transport, TransportCallbacks } from "./types.ts";
4
+ /** @internal Exposed for testing — allows spying on connectS2s in unit tests. */
5
+ export declare const _internals: {
6
+ connectS2s: typeof connectS2s;
7
+ };
8
+ export type S2sTransportOptions = {
9
+ apiKey: string;
10
+ s2sConfig: S2SConfig;
11
+ sessionConfig: S2sSessionConfig;
12
+ toolSchemas: S2sToolSchema[];
13
+ callbacks: TransportCallbacks;
14
+ sid: string;
15
+ agent: string;
16
+ createWebSocket?: CreateS2sWebSocket;
17
+ logger?: Logger;
18
+ };
19
+ export declare function createS2sTransport(opts: S2sTransportOptions): Transport;
@@ -0,0 +1,45 @@
1
+ import type { SessionErrorCode } from "../../sdk/protocol.ts";
2
+ import type { Message } from "../../sdk/types.ts";
3
+ /**
4
+ * Typed callbacks into the SessionCore. One per event the transport produces.
5
+ * Constructed at transport-creation time; no emitter.on-style indirection.
6
+ */
7
+ export type TransportCallbacks = {
8
+ onReplyStarted(replyId: string): void;
9
+ onReplyDone(): void;
10
+ onCancelled(): void;
11
+ onAudioChunk(bytes: Uint8Array): void;
12
+ onAudioDone(): void;
13
+ onUserTranscript(text: string): void;
14
+ onAgentTranscript(text: string, interrupted: boolean): void;
15
+ onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
16
+ onError(code: SessionErrorCode, message: string): void;
17
+ onSpeechStarted(): void;
18
+ onSpeechStopped(): void;
19
+ onSessionReady?(providerSessionId: string): void;
20
+ };
21
+ /** Minimal config a transport may receive at construction time. */
22
+ export type TransportSessionConfig = {
23
+ systemPrompt: string;
24
+ greeting?: string;
25
+ tools?: unknown[];
26
+ history?: Message[];
27
+ };
28
+ /**
29
+ * Transport abstraction — one implementation per provider strategy
30
+ * (see `s2s-transport.ts`, `pipeline-transport.ts`).
31
+ */
32
+ export interface Transport {
33
+ /** Open any underlying connections and send initial session config. */
34
+ start(): Promise<void>;
35
+ /** Tear down, flush, close. Idempotent. */
36
+ stop(): Promise<void>;
37
+ /** Forward user audio to the provider. */
38
+ sendUserAudio(bytes: Uint8Array): void;
39
+ /** Forward a tool result back to the provider's reply stream. */
40
+ sendToolResult(callId: string, result: string): void;
41
+ /** Cancel the currently in-flight reply (barge-in / client cancel). */
42
+ cancelReply(): void;
43
+ /** Re-send session config (S2S only; pipeline is a no-op). */
44
+ updateSession?(config: TransportSessionConfig): void;
45
+ }
@@ -3,9 +3,9 @@
3
3
  *
4
4
  * Audio validation is handled at the host transport layer (see server.ts).
5
5
  */
6
- import type { ClientSink, ReadyConfig } from "../sdk/protocol.ts";
6
+ import { type ClientSink } from "../sdk/protocol.ts";
7
7
  import type { Logger } from "./runtime-config.ts";
8
- import type { Session } from "./session.ts";
8
+ import type { SessionCore } from "./session-core.ts";
9
9
  /**
10
10
  * Minimal WebSocket interface accepted by {@link wireSessionSocket}.
11
11
  *
@@ -25,11 +25,15 @@ export type SessionWebSocket = {
25
25
  /** Options for wiring a WebSocket to a session. */
26
26
  export type WsSessionOptions = {
27
27
  /** Map of active sessions (session is added on open, removed on close). */
28
- sessions: Map<string, Session>;
28
+ sessions: Map<string, SessionCore>;
29
29
  /** Factory function to create a session for a given ID and client sink. */
30
- createSession: (sessionId: string, client: ClientSink) => Session;
30
+ createSession: (sessionId: string, client: ClientSink) => SessionCore;
31
31
  /** Protocol config sent to the client immediately on connect. */
32
- readyConfig: ReadyConfig;
32
+ readyConfig: {
33
+ audioFormat: "pcm16";
34
+ sampleRate: number;
35
+ ttsSampleRate: number;
36
+ };
33
37
  /** Additional key-value pairs included in log messages. */
34
38
  logContext?: Record<string, string>;
35
39
  /** Callback invoked when the WebSocket connection opens. */
@@ -48,12 +52,12 @@ export type WsSessionOptions = {
48
52
  resumeFrom?: string;
49
53
  };
50
54
  /**
51
- * Attaches session lifecycle handlers to a native WebSocket using
52
- * plain JSON text frames and binary audio frames.
55
+ * Attaches session lifecycle handlers to a native WebSocket using JSON text
56
+ * frames for control messages and raw PCM16 binary frames for audio.
53
57
  *
54
58
  * Connection flow:
55
- * 1. WebSocket opens → server sends `{ type: "config", ...ReadyConfig }`
56
- * 2. Client sets up audio → sends `{ type: "audio_ready" }`
57
- * 3. If reconnecting → client sends `{ type: "history", messages: [...] }`
59
+ * 1. WebSocket opens → server sends JSON CONFIG frame with sampleRate, ttsSampleRate, sessionId
60
+ * 2. Client sets up audio → sends JSON AUDIO_READY frame
61
+ * 3. If reconnecting → client sends JSON HISTORY frame with prior messages
58
62
  */
59
63
  export declare function wireSessionSocket(ws: SessionWebSocket, opts: WsSessionOptions): void;
@@ -104,12 +104,14 @@ export declare function toAgentConfig(src: AgentConfigSource): AgentConfig;
104
104
  * etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
105
105
  */
106
106
  export declare const ToolSchemaSchema: z.ZodObject<{
107
+ type: z.ZodLiteral<"function">;
107
108
  name: z.ZodString;
108
109
  description: z.ZodString;
109
110
  parameters: z.ZodRecord<z.ZodString, z.ZodUnknown>;
110
111
  }, z.core.$strip>;
111
112
  /** Serialized tool schema — derived from {@link ToolSchemaSchema}. */
112
113
  export type ToolSchema = {
114
+ type: "function";
113
115
  name: string;
114
116
  description: string;
115
117
  parameters: JSONSchema7;
@@ -1,2 +1,2 @@
1
- import { a as toAgentConfig, i as agentToolsToSchemas, n as EMPTY_PARAMS, o as ProviderDescriptorSchema, r as ToolSchemaSchema, s as assertProviderTriple, t as AgentConfigSchema } from "../_internal-types-3p3OJZPb.js";
1
+ import { a as toAgentConfig, i as agentToolsToSchemas, n as EMPTY_PARAMS, o as ProviderDescriptorSchema, r as ToolSchemaSchema, s as assertProviderTriple, t as AgentConfigSchema } from "../_internal-types-DFL07G3f.js";
2
2
  export { AgentConfigSchema, EMPTY_PARAMS, ProviderDescriptorSchema, ToolSchemaSchema, agentToolsToSchemas, assertProviderTriple, toAgentConfig };
@@ -127,16 +127,17 @@ export type ClientEvent = z.infer<typeof ClientEventSchema>;
127
127
  /**
128
128
  * Typed interface for pushing session events to a connected client.
129
129
  *
130
- * For WebSocket sessions this sends JSON text frames and binary audio frames.
130
+ * Events (`event`, `playAudioDone`) send JSON text frames. Audio chunks
131
+ * (`playAudioChunk`) send raw PCM16 binary frames.
131
132
  */
132
133
  export interface ClientSink {
133
- /** Whether the underlying connection is open and accepting calls. */
134
+ /** True when the underlying connection is open and will accept calls. */
134
135
  readonly open: boolean;
135
- /** Push a session event to the client. */
136
+ /** Push a session event (JSON text frame) to the client. */
136
137
  event(e: ClientEvent): void;
137
- /** Send a single TTS audio chunk to the client. */
138
+ /** Send a single PCM16 audio chunk (raw binary frame) to the client. */
138
139
  playAudioChunk(chunk: Uint8Array): void;
139
- /** Signal that TTS audio is complete. */
140
+ /** Signal that TTS audio is complete (JSON text frame). */
140
141
  playAudioDone(): void;
141
142
  }
142
143
  /** Zod schema for {@link ReadyConfig}. */
@@ -1,2 +1,2 @@
1
- import { n as anthropic, t as ANTHROPIC_KIND } from "../../anthropic-BrUCPKUc.js";
1
+ import { n as anthropic, t as ANTHROPIC_KIND } from "../../anthropic-CcLZygAr.js";
2
2
  export { ANTHROPIC_KIND, anthropic };
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Deepgram Nova streaming STT factory — returns a pure descriptor.
3
+ *
4
+ * The descriptor flows through the bundle → server → runtime pipeline
5
+ * without importing the `@deepgram/sdk` package. The host-side resolver in
6
+ * `host/providers/resolve.ts` turns it into an openable {@link SttOpener}
7
+ * during `createRuntime`.
8
+ */
9
+ import type { SttProvider } from "../../providers.ts";
10
+ /** Kind tag recognised by the host-side resolver. */
11
+ export declare const DEEPGRAM_KIND: "deepgram";
12
+ export interface DeepgramOptions {
13
+ /**
14
+ * Streaming speech model. Defaults to `"nova-3"`. Any string is forwarded
15
+ * to the SDK unchanged, which allows opt-in to future models.
16
+ */
17
+ model?: "nova-3" | "nova-2" | string;
18
+ /**
19
+ * BCP-47 language code for transcription. Defaults to `"en"`.
20
+ * Examples: `"en"`, `"es"`, `"fr"`, `"de"`.
21
+ */
22
+ language?: string;
23
+ }
24
+ export type DeepgramProvider = SttProvider & {
25
+ readonly kind: typeof DEEPGRAM_KIND;
26
+ readonly options: DeepgramOptions;
27
+ };
28
+ /**
29
+ * Build a Deepgram STT descriptor.
30
+ *
31
+ * The API key is resolved host-side from the agent's env
32
+ * (`DEEPGRAM_API_KEY`); there is no factory-time key parameter, so the
33
+ * descriptor stays free of secrets and safe to serialize.
34
+ */
35
+ export declare function deepgram(opts?: DeepgramOptions): DeepgramProvider;
@@ -7,3 +7,4 @@
7
7
  */
8
8
  export type { SttError, SttEvents, SttOpenOptions, SttProvider, SttSession } from "../providers.ts";
9
9
  export * from "./stt/assemblyai.ts";
10
+ export * from "./stt/deepgram.ts";
@@ -1,2 +1,2 @@
1
- import { n as assemblyAI, t as ASSEMBLYAI_KIND } from "../../assemblyai-Cxg9eobY.js";
2
- export { ASSEMBLYAI_KIND, assemblyAI };
1
+ import { i as deepgram, n as assemblyAI, r as DEEPGRAM_KIND, t as ASSEMBLYAI_KIND } from "../../assemblyai-C969QGi4.js";
2
+ export { ASSEMBLYAI_KIND, DEEPGRAM_KIND, assemblyAI, deepgram };
@@ -8,9 +8,15 @@
8
8
  */
9
9
  import type { TtsProvider } from "../../providers.ts";
10
10
  export declare const CARTESIA_KIND: "cartesia";
11
+ /**
12
+ * Default voice used when callers invoke `cartesia()` with no `voice`. This
13
+ * is the same voice the example templates ship with, so a bare `cartesia()`
14
+ * works out of the box for new agents.
15
+ */
16
+ export declare const CARTESIA_DEFAULT_VOICE = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
11
17
  export interface CartesiaOptions {
12
- /** Cartesia voice ID. Required. */
13
- voice: string;
18
+ /** Cartesia voice ID. Defaults to {@link CARTESIA_DEFAULT_VOICE}. */
19
+ voice?: string;
14
20
  /** Model ID. Defaults to `"sonic-2"`. */
15
21
  model?: string;
16
22
  /** Spoken language hint. Defaults to `"en"`. */
@@ -18,6 +24,8 @@ export interface CartesiaOptions {
18
24
  }
19
25
  export type CartesiaProvider = TtsProvider & {
20
26
  readonly kind: typeof CARTESIA_KIND;
21
- readonly options: CartesiaOptions;
27
+ readonly options: CartesiaOptions & {
28
+ voice: string;
29
+ };
22
30
  };
23
- export declare function cartesia(opts: CartesiaOptions): CartesiaProvider;
31
+ export declare function cartesia(opts?: CartesiaOptions): CartesiaProvider;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Rime TTS factory — returns a pure descriptor.
3
+ *
4
+ * See `sdk/providers/stt/assemblyai.ts` for the descriptor/opener split;
5
+ * the host-side resolver in `host/providers/resolve.ts` turns this into an
6
+ * openable {@link TtsOpener} during `createRuntime` using the
7
+ * `RIME_API_KEY` from the agent's env.
8
+ *
9
+ * Language codes follow ISO 639-3 (three-letter): `"eng"`, `"fra"`, etc.
10
+ * This differs from many APIs that use ISO 639-1 two-letter codes like `"en"`.
11
+ */
12
+ import type { TtsProvider } from "../../providers.ts";
13
+ export declare const RIME_KIND: "rime";
14
+ /**
15
+ * Default Rime speaker used when callers invoke `rime()` with no `voice`.
16
+ * `cove` is a `mistv2` speaker, matching the default model below — so a
17
+ * bare `rime()` works out of the box for new agents.
18
+ */
19
+ export declare const RIME_DEFAULT_VOICE = "cove";
20
+ export interface RimeOptions {
21
+ /** Rime speaker ID. Defaults to {@link RIME_DEFAULT_VOICE}. */
22
+ voice?: string;
23
+ /**
24
+ * Rime model ID. Defaults to `"mistv2"` (Rime's most compatible model).
25
+ * Common values: `"mistv2"`, `"arcana"`.
26
+ */
27
+ model?: "mistv2" | "arcana" | string;
28
+ /**
29
+ * Spoken language. Uses ISO 639-3 (three-letter codes).
30
+ * Defaults to `"eng"` (English).
31
+ *
32
+ * Note: Rime uses 3-letter codes — use `"eng"` not `"en"`.
33
+ */
34
+ language?: string;
35
+ }
36
+ export type RimeProvider = TtsProvider & {
37
+ readonly kind: typeof RIME_KIND;
38
+ readonly options: RimeOptions & {
39
+ voice: string;
40
+ };
41
+ };
42
+ export declare function rime(opts?: RimeOptions): RimeProvider;
@@ -7,3 +7,4 @@
7
7
  */
8
8
  export type { TtsError, TtsEvents, TtsOpenOptions, TtsProvider, TtsSession } from "../providers.ts";
9
9
  export * from "./tts/cartesia.ts";
10
+ export * from "./tts/rime.ts";
@@ -1,2 +1,2 @@
1
- import { n as cartesia, t as CARTESIA_KIND } from "../../cartesia-DwDk2tEu.js";
2
- export { CARTESIA_KIND, cartesia };
1
+ import { a as RIME_KIND, i as RIME_DEFAULT_VOICE, n as CARTESIA_KIND, o as rime, r as cartesia, t as CARTESIA_DEFAULT_VOICE } from "../../cartesia-BfQPOQ7Y.js";
2
+ export { CARTESIA_DEFAULT_VOICE, CARTESIA_KIND, RIME_DEFAULT_VOICE, RIME_KIND, cartesia, rime };
@@ -326,21 +326,24 @@ export function createFakeLanguageModel(
326
326
  options:
327
327
  | { script: ScriptedPart[]; delayMs?: number }
328
328
  | { steps: ScriptedPart[][]; delayMs?: number },
329
- ): LanguageModel {
329
+ ): LanguageModel & { readonly calls: readonly Record<string, unknown>[] } {
330
330
  const delayMs = options.delayMs;
331
331
  const steps: ScriptedPart[][] = "steps" in options ? options.steps : [options.script];
332
332
  let stepIndex = 0;
333
+ const calls: Record<string, unknown>[] = [];
333
334
  const model = {
334
335
  specificationVersion: "v3" as const,
335
336
  provider: "fake-llm",
336
337
  modelId: "fake-llm-1",
337
338
  supportedUrls: {} as Record<string, RegExp[]>,
339
+ calls,
338
340
  async doGenerate(): Promise<never> {
339
341
  throw new Error("fake LLM: doGenerate not implemented");
340
342
  },
341
- async doStream(opts: { abortSignal?: AbortSignal }): Promise<{
343
+ async doStream(opts: Record<string, unknown> & { abortSignal?: AbortSignal }): Promise<{
342
344
  stream: ReadableStream<StreamPart>;
343
345
  }> {
346
+ calls.push(opts);
344
347
  // Advance one step per call; after the last scripted step, keep
345
348
  // yielding an empty step so an unexpected extra call completes cleanly.
346
349
  const current = steps[stepIndex] ?? [];
@@ -353,5 +356,5 @@ export function createFakeLanguageModel(
353
356
  return { stream };
354
357
  },
355
358
  };
356
- return model as unknown as LanguageModel;
359
+ return model as unknown as LanguageModel & { readonly calls: readonly Record<string, unknown>[] };
357
360
  }