@alexkroman1/aai 1.4.5 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/CHANGELOG.md +19 -0
- package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
- package/dist/assemblyai-C969QGi4.js +35 -0
- package/dist/cartesia-BfQPOQ7Y.js +37 -0
- package/dist/host/_pipeline-test-fakes.d.ts +3 -1
- package/dist/host/providers/stt/deepgram.d.ts +28 -0
- package/dist/host/providers/tts/cartesia.d.ts +1 -1
- package/dist/host/providers/tts/rime.d.ts +44 -0
- package/dist/host/runtime-barrel.d.ts +4 -2
- package/dist/host/runtime-barrel.js +1434 -1209
- package/dist/host/runtime.d.ts +2 -2
- package/dist/host/s2s.d.ts +16 -16
- package/dist/host/session-core.d.ts +37 -0
- package/dist/host/transports/pipeline-transport.d.ts +48 -0
- package/dist/host/transports/s2s-transport.d.ts +19 -0
- package/dist/host/transports/types.d.ts +45 -0
- package/dist/host/ws-handler.d.ts +14 -10
- package/dist/sdk/_internal-types.d.ts +2 -0
- package/dist/sdk/manifest-barrel.js +1 -1
- package/dist/sdk/protocol.d.ts +6 -5
- package/dist/sdk/providers/llm-barrel.js +1 -1
- package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
- package/dist/sdk/providers/stt-barrel.d.ts +1 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
- package/dist/sdk/providers/tts/rime.d.ts +42 -0
- package/dist/sdk/providers/tts-barrel.d.ts +1 -0
- package/dist/sdk/providers/tts-barrel.js +2 -2
- package/host/_pipeline-test-fakes.ts +6 -3
- package/host/_test-utils.ts +209 -128
- package/host/builtin-tools.ts +1 -0
- package/host/cleanup.test.ts +25 -298
- package/host/integration/pipeline-reference.integration.test.ts +30 -35
- package/host/providers/resolve.ts +10 -2
- package/host/providers/stt/deepgram.test.ts +229 -0
- package/host/providers/stt/deepgram.ts +172 -0
- package/host/providers/tts/cartesia.ts +7 -3
- package/host/providers/tts/rime.test.ts +251 -0
- package/host/providers/tts/rime.ts +322 -0
- package/host/runtime-barrel.ts +4 -2
- package/host/runtime.test.ts +16 -47
- package/host/runtime.ts +131 -23
- package/host/s2s.test.ts +122 -131
- package/host/s2s.ts +44 -52
- package/host/session-core.test.ts +257 -0
- package/host/session-core.ts +262 -0
- package/host/to-vercel-tools.test.ts +9 -1
- package/host/transports/pipeline-transport.test.ts +653 -0
- package/host/transports/pipeline-transport.ts +532 -0
- package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
- package/host/transports/s2s-transport.test.ts +56 -0
- package/host/transports/s2s-transport.ts +116 -0
- package/host/transports/types.test.ts +22 -0
- package/host/transports/types.ts +51 -0
- package/host/ws-handler.test.ts +324 -242
- package/host/ws-handler.ts +56 -59
- package/package.json +2 -1
- package/sdk/__snapshots__/exports.test.ts.snap +3 -3
- package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
- package/sdk/_internal-types.ts +3 -0
- package/sdk/protocol-compat.test.ts +8 -0
- package/sdk/protocol.ts +6 -5
- package/sdk/providers/stt/deepgram.ts +43 -0
- package/sdk/providers/stt-barrel.ts +2 -0
- package/sdk/providers/tts/cartesia.ts +15 -5
- package/sdk/providers/tts/rime.ts +52 -0
- package/sdk/providers/tts-barrel.ts +2 -0
- package/sdk/schema-alignment.test.ts +18 -6
- package/dist/assemblyai-Cxg9eobY.js +0 -18
- package/dist/cartesia-DwDk2tEu.js +0 -10
- package/dist/host/pipeline-session-ctx.d.ts +0 -24
- package/dist/host/pipeline-session.d.ts +0 -52
- package/dist/host/session-ctx.d.ts +0 -73
- package/dist/host/session.d.ts +0 -62
- package/host/pipeline-session-ctx.test.ts +0 -31
- package/host/pipeline-session-ctx.ts +0 -36
- package/host/pipeline-session.test.ts +0 -672
- package/host/pipeline-session.ts +0 -533
- package/host/s2s-fixtures.test.ts +0 -237
- package/host/session-ctx.test.ts +0 -387
- package/host/session-ctx.ts +0 -134
- package/host/session-fixture-replay.test.ts +0 -128
- package/host/session.test.ts +0 -634
- package/host/session.ts +0 -412
- /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
package/dist/host/runtime.d.ts
CHANGED
|
@@ -14,7 +14,7 @@ import { type LlmProvider, type SttOpener, type SttProvider, type TtsOpener, typ
|
|
|
14
14
|
import type { AgentDef } from "../sdk/types.ts";
|
|
15
15
|
import type { Logger, S2SConfig } from "./runtime-config.ts";
|
|
16
16
|
import type { CreateS2sWebSocket } from "./s2s.ts";
|
|
17
|
-
import { type
|
|
17
|
+
import { type SessionCore } from "./session-core.ts";
|
|
18
18
|
import { type ExecuteTool } from "./tool-executor.ts";
|
|
19
19
|
import { type SessionWebSocket } from "./ws-handler.ts";
|
|
20
20
|
/** Per-session options passed to {@link AgentRuntime.startSession}. */
|
|
@@ -132,7 +132,7 @@ export type Runtime = AgentRuntime & {
|
|
|
132
132
|
client: ClientSink;
|
|
133
133
|
skipGreeting?: boolean;
|
|
134
134
|
resumeFrom?: string;
|
|
135
|
-
}):
|
|
135
|
+
}): SessionCore;
|
|
136
136
|
};
|
|
137
137
|
/**
|
|
138
138
|
* Create an agent runtime — the execution engine for a voice agent.
|
package/dist/host/s2s.d.ts
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
* Speech-to-Speech WebSocket client for AssemblyAI's S2S API.
|
|
3
3
|
*/
|
|
4
4
|
import type { JSONSchema7 } from "json-schema";
|
|
5
|
-
import { type Unsubscribe } from "nanoevents";
|
|
6
5
|
import type { ClientEvent } from "../sdk/protocol.ts";
|
|
7
6
|
import type { Logger, S2SConfig } from "./runtime-config.ts";
|
|
8
7
|
export type S2sWebSocket = {
|
|
@@ -44,23 +43,23 @@ export type S2sToolSchema = {
|
|
|
44
43
|
description: string;
|
|
45
44
|
parameters: JSONSchema7;
|
|
46
45
|
};
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
46
|
+
/** Callbacks fired into the owning session at construction time. */
|
|
47
|
+
export type S2sCallbacks = {
|
|
48
|
+
onSessionReady(sessionId: string): void;
|
|
49
|
+
onReplyStarted(replyId: string): void;
|
|
50
|
+
onReplyDone(): void;
|
|
51
|
+
onCancelled(): void;
|
|
52
|
+
onAudio(bytes: Uint8Array): void;
|
|
53
|
+
onUserTranscript(text: string): void;
|
|
54
|
+
onAgentTranscript(text: string, interrupted: boolean): void;
|
|
55
|
+
onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
|
|
56
|
+
onSpeechStarted(): void;
|
|
57
|
+
onSpeechStopped(): void;
|
|
58
|
+
onSessionExpired(): void;
|
|
59
|
+
onError(err: Error): void;
|
|
60
|
+
onClose(code: number, reason: string): void;
|
|
61
61
|
};
|
|
62
62
|
export type S2sHandle = {
|
|
63
|
-
on<K extends keyof S2sEvents>(event: K, cb: S2sEvents[K]): Unsubscribe;
|
|
64
63
|
sendAudio(audio: Uint8Array): void;
|
|
65
64
|
/**
|
|
66
65
|
* Send a pre-encoded audio wire frame. For perf-critical callers (load tests)
|
|
@@ -76,6 +75,7 @@ export type ConnectS2sOptions = {
|
|
|
76
75
|
apiKey: string;
|
|
77
76
|
config: S2SConfig;
|
|
78
77
|
createWebSocket: CreateS2sWebSocket;
|
|
78
|
+
callbacks: S2sCallbacks;
|
|
79
79
|
logger?: Logger;
|
|
80
80
|
/**
|
|
81
81
|
* Session id attached to diagnostic log lines (e.g. raw `reply.done`
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { AgentConfig, ExecuteTool } from "../sdk/_internal-types.ts";
|
|
2
|
+
import type { ClientSink, SessionErrorCode } from "../sdk/protocol.ts";
|
|
3
|
+
import type { Message } from "../sdk/types.ts";
|
|
4
|
+
import type { Logger } from "./runtime-config.ts";
|
|
5
|
+
import type { Transport } from "./transports/types.ts";
|
|
6
|
+
export type SessionCoreOptions = {
|
|
7
|
+
id: string;
|
|
8
|
+
agent: string;
|
|
9
|
+
client: ClientSink;
|
|
10
|
+
agentConfig: AgentConfig;
|
|
11
|
+
executeTool: ExecuteTool;
|
|
12
|
+
transport: Transport;
|
|
13
|
+
logger?: Logger;
|
|
14
|
+
maxHistory?: number;
|
|
15
|
+
};
|
|
16
|
+
export type SessionCore = {
|
|
17
|
+
readonly id: string;
|
|
18
|
+
start(): Promise<void>;
|
|
19
|
+
stop(): Promise<void>;
|
|
20
|
+
onAudio(bytes: Uint8Array): void;
|
|
21
|
+
onAudioReady(): void;
|
|
22
|
+
onCancel(): void;
|
|
23
|
+
onReset(): void;
|
|
24
|
+
onHistory(messages: readonly Message[]): void;
|
|
25
|
+
onReplyStarted(replyId: string): void;
|
|
26
|
+
onReplyDone(): void;
|
|
27
|
+
onCancelled(): void;
|
|
28
|
+
onAudioChunk(bytes: Uint8Array): void;
|
|
29
|
+
onAudioDone(): void;
|
|
30
|
+
onUserTranscript(text: string): void;
|
|
31
|
+
onAgentTranscript(text: string, interrupted: boolean): void;
|
|
32
|
+
onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
|
|
33
|
+
onError(code: SessionErrorCode, message: string): void;
|
|
34
|
+
onSpeechStarted(): void;
|
|
35
|
+
onSpeechStopped(): void;
|
|
36
|
+
};
|
|
37
|
+
export declare function createSessionCore(opts: SessionCoreOptions): SessionCore;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type { LanguageModel } from "ai";
|
|
2
|
+
import type { ExecuteTool, ToolSchema } from "../../sdk/_internal-types.ts";
|
|
3
|
+
import type { SttOpener, TtsOpener } from "../../sdk/providers.ts";
|
|
4
|
+
import type { ToolChoice } from "../../sdk/types.ts";
|
|
5
|
+
import { type Logger } from "../runtime-config.ts";
|
|
6
|
+
import type { Transport, TransportCallbacks, TransportSessionConfig } from "./types.ts";
|
|
7
|
+
/** Configuration for {@link createPipelineTransport}. */
|
|
8
|
+
export interface PipelineTransportOptions {
|
|
9
|
+
/** Unique session identifier. */
|
|
10
|
+
sid: string;
|
|
11
|
+
/** Agent slug. */
|
|
12
|
+
agent: string;
|
|
13
|
+
/** STT opener (resolved from an SttProvider descriptor). */
|
|
14
|
+
stt: SttOpener;
|
|
15
|
+
/** LLM provider (Vercel AI SDK LanguageModel). */
|
|
16
|
+
llm: LanguageModel;
|
|
17
|
+
/** TTS opener (resolved from a TtsProvider descriptor). */
|
|
18
|
+
tts: TtsOpener;
|
|
19
|
+
/** Transport-level callbacks into SessionCore. */
|
|
20
|
+
callbacks: TransportCallbacks;
|
|
21
|
+
/** Session config: systemPrompt, greeting, tools, history. */
|
|
22
|
+
sessionConfig: TransportSessionConfig;
|
|
23
|
+
/** Tool schemas (JSON Schema) for Vercel AI tool binding. */
|
|
24
|
+
toolSchemas?: readonly ToolSchema[];
|
|
25
|
+
/** Agent's tool-execution function. */
|
|
26
|
+
executeTool?: ExecuteTool;
|
|
27
|
+
/** Provider-specific API keys. */
|
|
28
|
+
providerKeys: {
|
|
29
|
+
stt: string;
|
|
30
|
+
tts: string;
|
|
31
|
+
};
|
|
32
|
+
/** STT audio input sample rate (PCM16, Hz). Defaults to DEFAULT_STT_SAMPLE_RATE. */
|
|
33
|
+
sttSampleRate?: number | undefined;
|
|
34
|
+
/** TTS audio output sample rate (PCM16, Hz). Defaults to DEFAULT_TTS_SAMPLE_RATE. */
|
|
35
|
+
ttsSampleRate?: number | undefined;
|
|
36
|
+
/** Optional STT prompt injected via SttOpenOptions.sttPrompt. */
|
|
37
|
+
sttPrompt?: string | undefined;
|
|
38
|
+
/** Max LLM tool-call steps per turn. Defaults to 5. */
|
|
39
|
+
maxSteps?: number | undefined;
|
|
40
|
+
/** Tool selection policy passed to `streamText`. Defaults to `"auto"`. */
|
|
41
|
+
toolChoice?: ToolChoice | undefined;
|
|
42
|
+
/** Logger. Defaults to consoleLogger. */
|
|
43
|
+
logger?: Logger | undefined;
|
|
44
|
+
/** Skip the initial greeting (used for session resume). */
|
|
45
|
+
skipGreeting?: boolean | undefined;
|
|
46
|
+
}
|
|
47
|
+
/** Create a pipeline-mode Transport (STT → LLM → TTS). */
|
|
48
|
+
export declare function createPipelineTransport(opts: PipelineTransportOptions): Transport;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { Logger, S2SConfig } from "../runtime-config.ts";
|
|
2
|
+
import { type CreateS2sWebSocket, connectS2s, type S2sSessionConfig, type S2sToolSchema } from "../s2s.ts";
|
|
3
|
+
import type { Transport, TransportCallbacks } from "./types.ts";
|
|
4
|
+
/** @internal Exposed for testing — allows spying on connectS2s in unit tests. */
|
|
5
|
+
export declare const _internals: {
|
|
6
|
+
connectS2s: typeof connectS2s;
|
|
7
|
+
};
|
|
8
|
+
export type S2sTransportOptions = {
|
|
9
|
+
apiKey: string;
|
|
10
|
+
s2sConfig: S2SConfig;
|
|
11
|
+
sessionConfig: S2sSessionConfig;
|
|
12
|
+
toolSchemas: S2sToolSchema[];
|
|
13
|
+
callbacks: TransportCallbacks;
|
|
14
|
+
sid: string;
|
|
15
|
+
agent: string;
|
|
16
|
+
createWebSocket?: CreateS2sWebSocket;
|
|
17
|
+
logger?: Logger;
|
|
18
|
+
};
|
|
19
|
+
export declare function createS2sTransport(opts: S2sTransportOptions): Transport;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type { SessionErrorCode } from "../../sdk/protocol.ts";
|
|
2
|
+
import type { Message } from "../../sdk/types.ts";
|
|
3
|
+
/**
|
|
4
|
+
* Typed callbacks into the SessionCore. One per event the transport produces.
|
|
5
|
+
* Constructed at transport-creation time; no emitter.on-style indirection.
|
|
6
|
+
*/
|
|
7
|
+
export type TransportCallbacks = {
|
|
8
|
+
onReplyStarted(replyId: string): void;
|
|
9
|
+
onReplyDone(): void;
|
|
10
|
+
onCancelled(): void;
|
|
11
|
+
onAudioChunk(bytes: Uint8Array): void;
|
|
12
|
+
onAudioDone(): void;
|
|
13
|
+
onUserTranscript(text: string): void;
|
|
14
|
+
onAgentTranscript(text: string, interrupted: boolean): void;
|
|
15
|
+
onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
|
|
16
|
+
onError(code: SessionErrorCode, message: string): void;
|
|
17
|
+
onSpeechStarted(): void;
|
|
18
|
+
onSpeechStopped(): void;
|
|
19
|
+
onSessionReady?(providerSessionId: string): void;
|
|
20
|
+
};
|
|
21
|
+
/** Minimal config a transport may receive at construction time. */
|
|
22
|
+
export type TransportSessionConfig = {
|
|
23
|
+
systemPrompt: string;
|
|
24
|
+
greeting?: string;
|
|
25
|
+
tools?: unknown[];
|
|
26
|
+
history?: Message[];
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Transport abstraction — one implementation per provider strategy
|
|
30
|
+
* (see `s2s-transport.ts`, `pipeline-transport.ts`).
|
|
31
|
+
*/
|
|
32
|
+
export interface Transport {
|
|
33
|
+
/** Open any underlying connections and send initial session config. */
|
|
34
|
+
start(): Promise<void>;
|
|
35
|
+
/** Tear down, flush, close. Idempotent. */
|
|
36
|
+
stop(): Promise<void>;
|
|
37
|
+
/** Forward user audio to the provider. */
|
|
38
|
+
sendUserAudio(bytes: Uint8Array): void;
|
|
39
|
+
/** Forward a tool result back to the provider's reply stream. */
|
|
40
|
+
sendToolResult(callId: string, result: string): void;
|
|
41
|
+
/** Cancel the currently in-flight reply (barge-in / client cancel). */
|
|
42
|
+
cancelReply(): void;
|
|
43
|
+
/** Re-send session config (S2S only; pipeline is a no-op). */
|
|
44
|
+
updateSession?(config: TransportSessionConfig): void;
|
|
45
|
+
}
|
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Audio validation is handled at the host transport layer (see server.ts).
|
|
5
5
|
*/
|
|
6
|
-
import type
|
|
6
|
+
import { type ClientSink } from "../sdk/protocol.ts";
|
|
7
7
|
import type { Logger } from "./runtime-config.ts";
|
|
8
|
-
import type {
|
|
8
|
+
import type { SessionCore } from "./session-core.ts";
|
|
9
9
|
/**
|
|
10
10
|
* Minimal WebSocket interface accepted by {@link wireSessionSocket}.
|
|
11
11
|
*
|
|
@@ -25,11 +25,15 @@ export type SessionWebSocket = {
|
|
|
25
25
|
/** Options for wiring a WebSocket to a session. */
|
|
26
26
|
export type WsSessionOptions = {
|
|
27
27
|
/** Map of active sessions (session is added on open, removed on close). */
|
|
28
|
-
sessions: Map<string,
|
|
28
|
+
sessions: Map<string, SessionCore>;
|
|
29
29
|
/** Factory function to create a session for a given ID and client sink. */
|
|
30
|
-
createSession: (sessionId: string, client: ClientSink) =>
|
|
30
|
+
createSession: (sessionId: string, client: ClientSink) => SessionCore;
|
|
31
31
|
/** Protocol config sent to the client immediately on connect. */
|
|
32
|
-
readyConfig:
|
|
32
|
+
readyConfig: {
|
|
33
|
+
audioFormat: "pcm16";
|
|
34
|
+
sampleRate: number;
|
|
35
|
+
ttsSampleRate: number;
|
|
36
|
+
};
|
|
33
37
|
/** Additional key-value pairs included in log messages. */
|
|
34
38
|
logContext?: Record<string, string>;
|
|
35
39
|
/** Callback invoked when the WebSocket connection opens. */
|
|
@@ -48,12 +52,12 @@ export type WsSessionOptions = {
|
|
|
48
52
|
resumeFrom?: string;
|
|
49
53
|
};
|
|
50
54
|
/**
|
|
51
|
-
* Attaches session lifecycle handlers to a native WebSocket using
|
|
52
|
-
*
|
|
55
|
+
* Attaches session lifecycle handlers to a native WebSocket using JSON text
|
|
56
|
+
* frames for control messages and raw PCM16 binary frames for audio.
|
|
53
57
|
*
|
|
54
58
|
* Connection flow:
|
|
55
|
-
* 1. WebSocket opens → server sends
|
|
56
|
-
* 2. Client sets up audio → sends
|
|
57
|
-
* 3. If reconnecting → client sends
|
|
59
|
+
* 1. WebSocket opens → server sends JSON CONFIG frame with sampleRate, ttsSampleRate, sessionId
|
|
60
|
+
* 2. Client sets up audio → sends JSON AUDIO_READY frame
|
|
61
|
+
* 3. If reconnecting → client sends JSON HISTORY frame with prior messages
|
|
58
62
|
*/
|
|
59
63
|
export declare function wireSessionSocket(ws: SessionWebSocket, opts: WsSessionOptions): void;
|
|
@@ -104,12 +104,14 @@ export declare function toAgentConfig(src: AgentConfigSource): AgentConfig;
|
|
|
104
104
|
* etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
|
|
105
105
|
*/
|
|
106
106
|
export declare const ToolSchemaSchema: z.ZodObject<{
|
|
107
|
+
type: z.ZodLiteral<"function">;
|
|
107
108
|
name: z.ZodString;
|
|
108
109
|
description: z.ZodString;
|
|
109
110
|
parameters: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
110
111
|
}, z.core.$strip>;
|
|
111
112
|
/** Serialized tool schema — derived from {@link ToolSchemaSchema}. */
|
|
112
113
|
export type ToolSchema = {
|
|
114
|
+
type: "function";
|
|
113
115
|
name: string;
|
|
114
116
|
description: string;
|
|
115
117
|
parameters: JSONSchema7;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { a as toAgentConfig, i as agentToolsToSchemas, n as EMPTY_PARAMS, o as ProviderDescriptorSchema, r as ToolSchemaSchema, s as assertProviderTriple, t as AgentConfigSchema } from "../_internal-types-
|
|
1
|
+
import { a as toAgentConfig, i as agentToolsToSchemas, n as EMPTY_PARAMS, o as ProviderDescriptorSchema, r as ToolSchemaSchema, s as assertProviderTriple, t as AgentConfigSchema } from "../_internal-types-DFL07G3f.js";
|
|
2
2
|
export { AgentConfigSchema, EMPTY_PARAMS, ProviderDescriptorSchema, ToolSchemaSchema, agentToolsToSchemas, assertProviderTriple, toAgentConfig };
|
package/dist/sdk/protocol.d.ts
CHANGED
|
@@ -127,16 +127,17 @@ export type ClientEvent = z.infer<typeof ClientEventSchema>;
|
|
|
127
127
|
/**
|
|
128
128
|
* Typed interface for pushing session events to a connected client.
|
|
129
129
|
*
|
|
130
|
-
*
|
|
130
|
+
* Events (`event`, `playAudioDone`) send JSON text frames. Audio chunks
|
|
131
|
+
* (`playAudioChunk`) send raw PCM16 binary frames.
|
|
131
132
|
*/
|
|
132
133
|
export interface ClientSink {
|
|
133
|
-
/**
|
|
134
|
+
/** True when the underlying connection is open and will accept calls. */
|
|
134
135
|
readonly open: boolean;
|
|
135
|
-
/** Push a session event to the client. */
|
|
136
|
+
/** Push a session event (JSON text frame) to the client. */
|
|
136
137
|
event(e: ClientEvent): void;
|
|
137
|
-
/** Send a single
|
|
138
|
+
/** Send a single PCM16 audio chunk (raw binary frame) to the client. */
|
|
138
139
|
playAudioChunk(chunk: Uint8Array): void;
|
|
139
|
-
/** Signal that TTS audio is complete. */
|
|
140
|
+
/** Signal that TTS audio is complete (JSON text frame). */
|
|
140
141
|
playAudioDone(): void;
|
|
141
142
|
}
|
|
142
143
|
/** Zod schema for {@link ReadyConfig}. */
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as anthropic, t as ANTHROPIC_KIND } from "../../anthropic-
|
|
1
|
+
import { n as anthropic, t as ANTHROPIC_KIND } from "../../anthropic-CcLZygAr.js";
|
|
2
2
|
export { ANTHROPIC_KIND, anthropic };
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deepgram Nova streaming STT factory — returns a pure descriptor.
|
|
3
|
+
*
|
|
4
|
+
* The descriptor flows through the bundle → server → runtime pipeline
|
|
5
|
+
* without importing the `@deepgram/sdk` package. The host-side resolver in
|
|
6
|
+
* `host/providers/resolve.ts` turns it into an openable {@link SttOpener}
|
|
7
|
+
* during `createRuntime`.
|
|
8
|
+
*/
|
|
9
|
+
import type { SttProvider } from "../../providers.ts";
|
|
10
|
+
/** Kind tag recognised by the host-side resolver. */
|
|
11
|
+
export declare const DEEPGRAM_KIND: "deepgram";
|
|
12
|
+
export interface DeepgramOptions {
|
|
13
|
+
/**
|
|
14
|
+
* Streaming speech model. Defaults to `"nova-3"`. Any string is forwarded
|
|
15
|
+
* to the SDK unchanged, which allows opt-in to future models.
|
|
16
|
+
*/
|
|
17
|
+
model?: "nova-3" | "nova-2" | string;
|
|
18
|
+
/**
|
|
19
|
+
* BCP-47 language code for transcription. Defaults to `"en"`.
|
|
20
|
+
* Examples: `"en"`, `"es"`, `"fr"`, `"de"`.
|
|
21
|
+
*/
|
|
22
|
+
language?: string;
|
|
23
|
+
}
|
|
24
|
+
export type DeepgramProvider = SttProvider & {
|
|
25
|
+
readonly kind: typeof DEEPGRAM_KIND;
|
|
26
|
+
readonly options: DeepgramOptions;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Build a Deepgram STT descriptor.
|
|
30
|
+
*
|
|
31
|
+
* The API key is resolved host-side from the agent's env
|
|
32
|
+
* (`DEEPGRAM_API_KEY`); there is no factory-time key parameter, so the
|
|
33
|
+
* descriptor stays free of secrets and safe to serialize.
|
|
34
|
+
*/
|
|
35
|
+
export declare function deepgram(opts?: DeepgramOptions): DeepgramProvider;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as assemblyAI, t as ASSEMBLYAI_KIND } from "../../assemblyai-
|
|
2
|
-
export { ASSEMBLYAI_KIND, assemblyAI };
|
|
1
|
+
import { i as deepgram, n as assemblyAI, r as DEEPGRAM_KIND, t as ASSEMBLYAI_KIND } from "../../assemblyai-C969QGi4.js";
|
|
2
|
+
export { ASSEMBLYAI_KIND, DEEPGRAM_KIND, assemblyAI, deepgram };
|
|
@@ -8,9 +8,15 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { TtsProvider } from "../../providers.ts";
|
|
10
10
|
export declare const CARTESIA_KIND: "cartesia";
|
|
11
|
+
/**
|
|
12
|
+
* Default voice used when callers invoke `cartesia()` with no `voice`. This
|
|
13
|
+
* is the same voice the example templates ship with, so a bare `cartesia()`
|
|
14
|
+
* works out of the box for new agents.
|
|
15
|
+
*/
|
|
16
|
+
export declare const CARTESIA_DEFAULT_VOICE = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
11
17
|
export interface CartesiaOptions {
|
|
12
|
-
/** Cartesia voice ID.
|
|
13
|
-
voice
|
|
18
|
+
/** Cartesia voice ID. Defaults to {@link CARTESIA_DEFAULT_VOICE}. */
|
|
19
|
+
voice?: string;
|
|
14
20
|
/** Model ID. Defaults to `"sonic-2"`. */
|
|
15
21
|
model?: string;
|
|
16
22
|
/** Spoken language hint. Defaults to `"en"`. */
|
|
@@ -18,6 +24,8 @@ export interface CartesiaOptions {
|
|
|
18
24
|
}
|
|
19
25
|
export type CartesiaProvider = TtsProvider & {
|
|
20
26
|
readonly kind: typeof CARTESIA_KIND;
|
|
21
|
-
readonly options: CartesiaOptions
|
|
27
|
+
readonly options: CartesiaOptions & {
|
|
28
|
+
voice: string;
|
|
29
|
+
};
|
|
22
30
|
};
|
|
23
|
-
export declare function cartesia(opts
|
|
31
|
+
export declare function cartesia(opts?: CartesiaOptions): CartesiaProvider;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rime TTS factory — returns a pure descriptor.
|
|
3
|
+
*
|
|
4
|
+
* See `sdk/providers/stt/assemblyai.ts` for the descriptor/opener split;
|
|
5
|
+
* the host-side resolver in `host/providers/resolve.ts` turns this into an
|
|
6
|
+
* openable {@link TtsOpener} during `createRuntime` using the
|
|
7
|
+
* `RIME_API_KEY` from the agent's env.
|
|
8
|
+
*
|
|
9
|
+
* Language codes follow ISO 639-3 (three-letter): `"eng"`, `"fra"`, etc.
|
|
10
|
+
* This differs from many APIs that use ISO 639-1 two-letter codes like `"en"`.
|
|
11
|
+
*/
|
|
12
|
+
import type { TtsProvider } from "../../providers.ts";
|
|
13
|
+
export declare const RIME_KIND: "rime";
|
|
14
|
+
/**
|
|
15
|
+
* Default Rime speaker used when callers invoke `rime()` with no `voice`.
|
|
16
|
+
* `cove` is a `mistv2` speaker, matching the default model below — so a
|
|
17
|
+
* bare `rime()` works out of the box for new agents.
|
|
18
|
+
*/
|
|
19
|
+
export declare const RIME_DEFAULT_VOICE = "cove";
|
|
20
|
+
export interface RimeOptions {
|
|
21
|
+
/** Rime speaker ID. Defaults to {@link RIME_DEFAULT_VOICE}. */
|
|
22
|
+
voice?: string;
|
|
23
|
+
/**
|
|
24
|
+
* Rime model ID. Defaults to `"mistv2"` (Rime's most compatible model).
|
|
25
|
+
* Common values: `"mistv2"`, `"arcana"`.
|
|
26
|
+
*/
|
|
27
|
+
model?: "mistv2" | "arcana" | string;
|
|
28
|
+
/**
|
|
29
|
+
* Spoken language. Uses ISO 639-3 (three-letter codes).
|
|
30
|
+
* Defaults to `"eng"` (English).
|
|
31
|
+
*
|
|
32
|
+
* Note: Rime uses 3-letter codes — use `"eng"` not `"en"`.
|
|
33
|
+
*/
|
|
34
|
+
language?: string;
|
|
35
|
+
}
|
|
36
|
+
export type RimeProvider = TtsProvider & {
|
|
37
|
+
readonly kind: typeof RIME_KIND;
|
|
38
|
+
readonly options: RimeOptions & {
|
|
39
|
+
voice: string;
|
|
40
|
+
};
|
|
41
|
+
};
|
|
42
|
+
export declare function rime(opts?: RimeOptions): RimeProvider;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as cartesia, t as
|
|
2
|
-
export { CARTESIA_KIND, cartesia };
|
|
1
|
+
import { a as RIME_KIND, i as RIME_DEFAULT_VOICE, n as CARTESIA_KIND, o as rime, r as cartesia, t as CARTESIA_DEFAULT_VOICE } from "../../cartesia-BfQPOQ7Y.js";
|
|
2
|
+
export { CARTESIA_DEFAULT_VOICE, CARTESIA_KIND, RIME_DEFAULT_VOICE, RIME_KIND, cartesia, rime };
|
|
@@ -326,21 +326,24 @@ export function createFakeLanguageModel(
|
|
|
326
326
|
options:
|
|
327
327
|
| { script: ScriptedPart[]; delayMs?: number }
|
|
328
328
|
| { steps: ScriptedPart[][]; delayMs?: number },
|
|
329
|
-
): LanguageModel {
|
|
329
|
+
): LanguageModel & { readonly calls: readonly Record<string, unknown>[] } {
|
|
330
330
|
const delayMs = options.delayMs;
|
|
331
331
|
const steps: ScriptedPart[][] = "steps" in options ? options.steps : [options.script];
|
|
332
332
|
let stepIndex = 0;
|
|
333
|
+
const calls: Record<string, unknown>[] = [];
|
|
333
334
|
const model = {
|
|
334
335
|
specificationVersion: "v3" as const,
|
|
335
336
|
provider: "fake-llm",
|
|
336
337
|
modelId: "fake-llm-1",
|
|
337
338
|
supportedUrls: {} as Record<string, RegExp[]>,
|
|
339
|
+
calls,
|
|
338
340
|
async doGenerate(): Promise<never> {
|
|
339
341
|
throw new Error("fake LLM: doGenerate not implemented");
|
|
340
342
|
},
|
|
341
|
-
async doStream(opts: { abortSignal?: AbortSignal }): Promise<{
|
|
343
|
+
async doStream(opts: Record<string, unknown> & { abortSignal?: AbortSignal }): Promise<{
|
|
342
344
|
stream: ReadableStream<StreamPart>;
|
|
343
345
|
}> {
|
|
346
|
+
calls.push(opts);
|
|
344
347
|
// Advance one step per call; after the last scripted step, keep
|
|
345
348
|
// yielding an empty step so an unexpected extra call completes cleanly.
|
|
346
349
|
const current = steps[stepIndex] ?? [];
|
|
@@ -353,5 +356,5 @@ export function createFakeLanguageModel(
|
|
|
353
356
|
return { stream };
|
|
354
357
|
},
|
|
355
358
|
};
|
|
356
|
-
return model as unknown as LanguageModel;
|
|
359
|
+
return model as unknown as LanguageModel & { readonly calls: readonly Record<string, unknown>[] };
|
|
357
360
|
}
|