@craftedxp/voice-js 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/node.d.ts ADDED
@@ -0,0 +1,270 @@
1
+ type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
2
+ type TranscriptEntry = {
3
+ id: string;
4
+ role: 'user';
5
+ text: string;
6
+ committed: boolean;
7
+ } | {
8
+ id: string;
9
+ role: 'agent';
10
+ text: string;
11
+ interrupted?: boolean;
12
+ } | {
13
+ id: string;
14
+ role: 'tool';
15
+ text: string;
16
+ } | {
17
+ id: string;
18
+ role: 'system';
19
+ text: string;
20
+ };
21
+ type CallErrorCode = 'missing_credentials' | 'forbidden' | 'mic_denied' | 'mic_start_failed' | 'audio_session_failed' | 'token_expired' | 'token_invalid' | 'unauthorized' | 'network_unreachable' | 'socket_error' | 'payment_required' | 'not_found' | 'silence_timeout' | 'server_error';
22
+ interface CallError {
23
+ code: CallErrorCode;
24
+ message: string;
25
+ }
26
+ type CallEndReason = 'agent_ended' | 'user_hangup' | 'timeout' | 'error';
27
+ interface CallEndEvent {
28
+ reason: CallEndReason;
29
+ errorCode?: CallErrorCode;
30
+ durationMs: number;
31
+ }
32
+ interface VolumeEvent {
33
+ input: number;
34
+ output: number;
35
+ }
36
+ type ServerMessage = Record<string, unknown> & {
37
+ type?: string;
38
+ };
39
+ interface ProtocolState {
40
+ state: CallState;
41
+ transcript: TranscriptEntry[];
42
+ agentBubbleId: string | null;
43
+ idCounter: number;
44
+ endReason: CallEndReason | null;
45
+ }
46
+ declare const createProtocolState: () => ProtocolState;
47
+ interface ProtocolCallbacks {
48
+ onState: (next: CallState) => void;
49
+ onTranscript: (entries: TranscriptEntry[]) => void;
50
+ onError: (err: CallError) => void;
51
+ onInterrupt: () => void;
52
+ onAgentTurnStart: () => void;
53
+ onCallEnd: (reason: CallEndReason) => void;
54
+ }
55
+ declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
56
+ interface BuildWsUrlArgs {
57
+ apiBase: string;
58
+ agentId: string;
59
+ token: string;
60
+ bargeIn?: boolean;
61
+ }
62
+ declare function buildWsUrl(args: BuildWsUrlArgs): string;
63
+
64
+ interface FetchTokenArgs {
65
+ /** The agent the SDK is about to call. */
66
+ agentId: string;
67
+ /**
68
+ * Optional consumer-side user identifier. Round-tripped to the server
69
+ * as `contactId` for Phase 11 contact memory. The SDK does not
70
+ * inspect this; your backend uses it to scope the token mint.
71
+ */
72
+ userId?: string;
73
+ /**
74
+ * Per-call structured context lowered into the agent's effective
75
+ * system prompt server-side at session open. Opaque to the SDK.
76
+ */
77
+ context?: Record<string, unknown>;
78
+ /**
79
+ * String key/value pairs round-tripped on the `call.ended` webhook.
80
+ * Capped at 1 KB total server-side. NOT lowered into the system prompt.
81
+ */
82
+ metadata?: Record<string, string>;
83
+ }
84
+ type FetchToken = (args: FetchTokenArgs) => Promise<string>;
85
+ interface VoiceClientConfig {
86
+ /**
87
+ * Full HTTPS URL of the Voxline server. The WebSocket scheme is
88
+ * derived: `https` → `wss`, `http` → `ws`. No trailing slash needed.
89
+ */
90
+ apiBase: string;
91
+ /**
92
+ * Called by the SDK whenever it needs a fresh `ct_` token (initial
93
+ * connect; mid-call refresh on `token_expired`). Your implementation
94
+ * should hit YOUR backend, which holds the `sk_` API key and mints
95
+ * via `POST /v1/call-tokens` (or `client.callTokens.mint` from
96
+ * @craftedxp/sdk-node). Never embed `sk_` in JS code that ships to a
97
+ * client.
98
+ */
99
+ fetchToken: FetchToken;
100
+ /**
101
+ * Optional metadata applied to EVERY startCall. Per-call `metadata`
102
+ * in `startCall` is merged on top (per-call wins on key conflicts).
103
+ * Useful for dashboard-wide tags like `{ surface: 'web', appVersion }`.
104
+ */
105
+ defaultMetadata?: Record<string, string>;
106
+ /**
107
+ * Optional context applied to EVERY startCall. Per-call `context` in
108
+ * `startCall` is merged on top. Useful for cross-call invariants like
109
+ * the signed-in user's locale.
110
+ */
111
+ defaultContext?: Record<string, unknown>;
112
+ }
113
+ interface StartCallOptions {
114
+ /** The agent to call. */
115
+ agentId: string;
116
+ /** Per-call user identifier. Round-tripped to fetchToken as `userId`. */
117
+ userId?: string;
118
+ /**
119
+ * Per-call structured context. Merged on top of `defaultContext`
120
+ * configured at factory time.
121
+ */
122
+ context?: Record<string, unknown>;
123
+ /**
124
+ * Per-call metadata. Merged on top of `defaultMetadata` configured
125
+ * at factory time.
126
+ */
127
+ metadata?: Record<string, string>;
128
+ /**
129
+ * When false, the SDK + server stay full-duplex but barge-in is
130
+ * suppressed. Useful for alarm-style flows where the user shouldn't
131
+ * accidentally interrupt the script. Default true.
132
+ */
133
+ bargeIn?: boolean;
134
+ /**
135
+ * Test-only escape hatch — pass a pre-minted `ct_` directly and skip
136
+ * the `fetchToken` call. Don't use this in production code: tokens
137
+ * expire and the SDK can't re-mint without the callback.
138
+ */
139
+ token?: string;
140
+ onStateChange?: (state: CallState) => void;
141
+ onTranscript?: (entries: TranscriptEntry[]) => void;
142
+ onError?: (err: CallError) => void;
143
+ onEnd?: (end: CallEndEvent) => void;
144
+ /** Volume-meter event for VU UIs. ~10 Hz cadence (browser bundle only). */
145
+ onVolume?: (vol: VolumeEvent) => void;
146
+ }
147
+ interface Call {
148
+ /** Current state. Snapshot — subscribe via onStateChange for live updates. */
149
+ readonly state: CallState;
150
+ /** Full transcript so far. Snapshot — subscribe via onTranscript for live updates. */
151
+ readonly transcript: TranscriptEntry[];
152
+ /** True after `mute()` and before `unmute()`. */
153
+ readonly isMuted: boolean;
154
+ /** End the call locally. Closes the WS, stops the mic, fires onEnd. Idempotent. */
155
+ end: () => void;
156
+ /** Mute mic frames. Wire stays active so server endpointing doesn't false-positive. Idempotent. */
157
+ mute: () => void;
158
+ /** Unmute mic frames. Idempotent. */
159
+ unmute: () => void;
160
+ }
161
+ interface VoiceClientFactory {
162
+ /** Read back the resolved config (post trailing-slash normalisation). */
163
+ readonly config: VoiceClientConfig;
164
+ /**
165
+ * Open a fresh call. Returns when the WS is open; rejects on
166
+ * pre-flight failure (missing config, fetchToken throw, etc). Mid-
167
+ * call failures arrive via the per-call `onError` callback — they
168
+ * don't reject this promise.
169
+ */
170
+ startCall: (options: StartCallOptions) => Promise<Call>;
171
+ }
172
+
173
+ type RWSEvent = {
174
+ type: 'open';
175
+ } | {
176
+ type: 'reconnected';
177
+ } | {
178
+ type: 'message';
179
+ data: string | ArrayBuffer;
180
+ } | {
181
+ type: 'close';
182
+ code: number;
183
+ reason: string;
184
+ permanent: boolean;
185
+ } | {
186
+ type: 'error';
187
+ error: Error;
188
+ };
189
+ interface WebSocketLike {
190
+ binaryType: string;
191
+ readyState: number;
192
+ onopen: ((ev: unknown) => void) | null;
193
+ onmessage: ((ev: {
194
+ data: string | ArrayBuffer;
195
+ }) => void) | null;
196
+ onerror: ((ev: unknown) => void) | null;
197
+ onclose: ((ev: {
198
+ code: number;
199
+ reason: string;
200
+ }) => void) | null;
201
+ send: (data: string | ArrayBuffer | ArrayBufferView) => void;
202
+ close: (code?: number, reason?: string) => void;
203
+ }
204
+ type WebSocketFactory = (url: string) => WebSocketLike;
205
+ interface RWSOptions {
206
+ url: string;
207
+ wsFactory: WebSocketFactory;
208
+ maxRetries?: number;
209
+ initialBackoffMs?: number;
210
+ maxBackoffMs?: number;
211
+ }
212
+ declare const createReconnectingWebSocket: (options: RWSOptions, onEvent: (ev: RWSEvent) => void) => {
213
+ send: (data: string | ArrayBuffer | ArrayBufferView) => void;
214
+ close: (code?: number, reason?: string) => void;
215
+ readyState: () => number;
216
+ };
217
+ type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
218
+
219
+ interface NodeStartCallOptions extends StartCallOptions {
220
+ /**
221
+ * Fires for each binary PCM frame the server pushes (Int16 LE mono
222
+ * @ 16 kHz — same as the browser playback path). Wire to your
223
+ * preferred output: write to a `sox -t raw -r 16000 -e signed -b 16
224
+ * -c 1 - default` subprocess, queue into PortAudio, relay over RTP,
225
+ * etc. If you don't supply this callback, agent audio is dropped on
226
+ * the floor.
227
+ */
228
+ onAudioChunk?: (pcm: ArrayBuffer) => void;
229
+ }
230
+ interface NodeCall extends Call {
231
+ /**
232
+ * Push one mic frame to the server. Expected: Int16 LE mono PCM @
233
+ * 16 kHz. Capture cadence ~100 ms / ~3.2 KB per frame is fine.
234
+ * Returns `false` if the WS isn't open yet (caller may want to
235
+ * back-pressure or drop).
236
+ */
237
+ sendAudioChunk: (pcm: ArrayBuffer | ArrayBufferView) => boolean;
238
+ }
239
+
240
+ /**
241
+ * One-time SDK setup for Node.js / Electron-main consumers. Returns a
242
+ * factory you call `startCall` on for every voice call. Same shape as
243
+ * the browser entry but the returned `Call` has an extra
244
+ * `sendAudioChunk` method for raw-PCM input, and `startCall` accepts
245
+ * an `onAudioChunk` callback for raw-PCM output.
246
+ *
247
+ * Example (vterm-style CLI, sox sub-process for I/O):
248
+ *
249
+ * import { configureVoiceClient } from '@craftedxp/voice-js/node'
250
+ * import { spawn } from 'child_process'
251
+ *
252
+ * const voice = configureVoiceClient({
253
+ * apiBase: 'https://api.your-server.com',
254
+ * fetchToken: async () => mintFromMyBackend(),
255
+ * })
256
+ *
257
+ * const mic = spawn('sox', [...recArgs, '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-t', 'raw', '-'])
258
+ * const spk = spawn('sox', ['-t', 'raw', '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-', ...playArgs])
259
+ *
260
+ * const call = await voice.startCall({
261
+ * agentId: 'agt_xxx',
262
+ * onAudioChunk: (pcm) => spk.stdin.write(Buffer.from(pcm)),
263
+ * onEnd: () => { mic.kill(); spk.stdin.end() },
264
+ * })
265
+ *
266
+ * mic.stdout.on('data', (chunk) => call.sendAudioChunk(chunk))
267
+ */
268
+ declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
269
+
270
+ export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type FetchToken, type FetchTokenArgs, type NodeCall, type NodeStartCallOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createProtocolState, createReconnectingWebSocket, handleServerMessage };