@craftedxp/voice-js 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CONSUMING.md CHANGED
@@ -42,8 +42,8 @@ For the landing dashboard in this repo:
42
42
  ```jsonc
43
43
  {
44
44
  "dependencies": {
45
- "@craftedxp/voice-js": "file:../sdk/voice-js"
46
- }
45
+ "@craftedxp/voice-js": "file:../sdk/voice-js",
46
+ },
47
47
  }
48
48
  ```
49
49
 
@@ -101,6 +101,7 @@ Browsers require a user gesture to start `AudioContext`. The SDK calls `audioCon
101
101
  ## CSP / mic permission
102
102
 
103
103
  For consumers running on a strict CSP, allow:
104
+
104
105
  - `connect-src wss://your-voxline-server.com`
105
106
  - `worker-src 'self' blob:` (the audio worklet is registered from a Blob URL)
106
107
 
@@ -113,6 +114,7 @@ The SDK doesn't log to the console by default. To see protocol-level events, wir
113
114
  ## Updating
114
115
 
115
116
  When the SDK changes:
117
+
116
118
  - **Tarball path:** re-`npm pack` then `npm install <newTgz>` in the consumer.
117
119
  - **`file:` path:** `npm run build` in `sdk/voice-js/` (refreshes `dist/`); the consumer picks it up on the next bundler refresh.
118
120
  - **Registry path:** bump the version in your `package.json` and `npm install`.
package/DEVELOPING.md CHANGED
@@ -26,6 +26,7 @@ Either way, the consumer's bundler (Webpack / Vite / esbuild / Next) picks up th
26
26
  - `dist/embed.iife.js` — minified IIFE for `<script>` embed; bundles the browser entry inline.
27
27
 
28
28
  Source files map to:
29
+
29
30
  - `src/browser.ts` — entry, factory implementation, public re-exports.
30
31
  - `src/node.ts` — entry, dynamic `ws` loader, factory implementation.
31
32
  - `src/VoiceClient.ts` — browser `BrowserVoiceClient` implementing the `Call` interface.
package/README.md CHANGED
@@ -4,7 +4,7 @@ JS SDK for embedding a voice agent call in any JS environment — browser tabs,
4
4
 
5
5
  Companion to [`@craftedxp/voice-rn`](https://www.npmjs.com/package/@craftedxp/voice-rn) (React Native) and [`@craftedxp/sdk-node`](https://www.npmjs.com/package/@craftedxp/sdk-node) (server-side `sk_` SDK).
6
6
 
7
- > **Internal testing release.** API surface may evolve before a stable release. **0.2.0** is a breaking rename + redesign of the previous `@voxline/web@0.1.0` — the singleton-`VoiceClient`-with-`apiKey` pattern is gone in favour of a `configureVoiceClient({ fetchToken })` factory that mirrors `voice-rn` 0.3.x. See [Migrating from `@voxline/web`](#migrating-from-voxlineweb) below.
7
+ > **Internal testing release.** API surface may evolve before a stable release. **0.3.0** adds [client tools](#client-tools) — handlers the agent's LLM can call on the consumer's machine. **0.2.0** was a breaking rename + redesign of the previous `@voxline/web@0.1.0` — the singleton-`VoiceClient`-with-`apiKey` pattern is gone in favour of a `configureVoiceClient({ fetchToken })` factory that mirrors `voice-rn` 0.3.x. See [Migrating from `@voxline/web`](#migrating-from-voxlineweb) below.
8
8
 
9
9
  ## Install
10
10
 
@@ -67,9 +67,9 @@ const call = await voice.startCall({
67
67
  onEnd: ({ reason, durationMs }) => log('ended', reason, durationMs),
68
68
  })
69
69
 
70
- call.mute() // gate mic frames (server still sees wire cadence)
70
+ call.mute() // gate mic frames (server still sees wire cadence)
71
71
  call.unmute()
72
- call.end() // close WS + stop mic + fire onEnd
72
+ call.end() // close WS + stop mic + fire onEnd
73
73
  ```
74
74
 
75
75
  ## Quick start (Node / Electron-main)
@@ -84,13 +84,42 @@ const voice = configureVoiceClient({
84
84
  })
85
85
 
86
86
  // Bring your own audio. Example: sox subprocesses for mic + speakers.
87
- const mic = spawn('sox', ['-d', '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-t', 'raw', '-'])
88
- const spk = spawn('sox', ['-t', 'raw', '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-', '-d'])
87
+ const mic = spawn('sox', [
88
+ '-d',
89
+ '-r',
90
+ '16000',
91
+ '-c',
92
+ '1',
93
+ '-b',
94
+ '16',
95
+ '-e',
96
+ 'signed',
97
+ '-t',
98
+ 'raw',
99
+ '-',
100
+ ])
101
+ const spk = spawn('sox', [
102
+ '-t',
103
+ 'raw',
104
+ '-r',
105
+ '16000',
106
+ '-c',
107
+ '1',
108
+ '-b',
109
+ '16',
110
+ '-e',
111
+ 'signed',
112
+ '-',
113
+ '-d',
114
+ ])
89
115
 
90
116
  const call = await voice.startCall({
91
117
  agentId: 'agt_xxx',
92
118
  onAudioChunk: (pcm) => spk.stdin.write(Buffer.from(pcm)),
93
- onEnd: () => { mic.kill(); spk.stdin.end() },
119
+ onEnd: () => {
120
+ mic.kill()
121
+ spk.stdin.end()
122
+ },
94
123
  })
95
124
 
96
125
  mic.stdout.on('data', (chunk) => call.sendAudioChunk(chunk))
@@ -102,30 +131,31 @@ The Node bundle has the same `configureVoiceClient` / `startCall` shape, plus an
102
131
 
103
132
  ### `configureVoiceClient(config)`
104
133
 
105
- | Field | Type | Notes |
106
- |---|---|---|
107
- | `apiBase` | `string` | Full HTTPS URL of the Voxline server. WS scheme derived: `https`→`wss`. Trailing slash optional. |
108
- | `fetchToken` | `(args) => Promise<string>` | Called by the SDK whenever it needs a fresh `ct_`. Mirrors `@craftedxp/voice-rn`'s shape exactly — `{ agentId, userId?, context?, metadata? }`. |
109
- | `defaultMetadata` | `Record<string, string>?` | Applied to every `startCall`. Per-call merges on top. |
110
- | `defaultContext` | `Record<string, unknown>?` | Applied to every `startCall`. Per-call merges on top. |
134
+ | Field | Type | Notes |
135
+ | ----------------- | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
136
+ | `apiBase` | `string` | Full HTTPS URL of the Voxline server. WS scheme derived: `https`→`wss`. Trailing slash optional. |
137
+ | `fetchToken` | `(args) => Promise<string>` | Called by the SDK whenever it needs a fresh `ct_`. Mirrors `@craftedxp/voice-rn`'s shape exactly — `{ agentId, userId?, context?, metadata? }`. |
138
+ | `defaultMetadata` | `Record<string, string>?` | Applied to every `startCall`. Per-call merges on top. |
139
+ | `defaultContext` | `Record<string, unknown>?` | Applied to every `startCall`. Per-call merges on top. |
111
140
 
112
141
  Returns a `VoiceClientFactory` with one method:
113
142
 
114
143
  ### `factory.startCall(options)`
115
144
 
116
- | Field | Type | Notes |
117
- |---|---|---|
118
- | `agentId` | `string` | Required. |
119
- | `userId` | `string?` | Round-tripped to fetchToken as `userId`; server uses it for contact memory. |
120
- | `context` | `Record<string, unknown>?` | Per-call structured context. Merged on top of `defaultContext`. Lowered into the agent's system prompt server-side. |
121
- | `metadata` | `Record<string, string>?` | Per-call key/value. Merged on top of `defaultMetadata`. Round-tripped on `call.ended` webhook. NOT lowered into the prompt. |
122
- | `bargeIn` | `boolean?` | Default `true`. Set `false` for alarm-style flows where the user shouldn't accidentally interrupt the script. |
123
- | `token` | `string?` | **Test-only escape hatch** pre-minted `ct_`, bypasses `fetchToken`. Don't use in production. |
124
- | `onStateChange` | `(state) => void` | Fires on every state machine transition. |
125
- | `onTranscript` | `(entries) => void` | Fires on every transcript update. |
126
- | `onVolume` | `({ input, output }) => void` | 0-1 RMS. ~10 Hz cadence. Browser bundle only. |
127
- | `onError` | `(err) => void` | Stable `code` from `CallErrorCode`; matches `voice-rn` codes where overlap. |
128
- | `onEnd` | `({ reason, errorCode?, durationMs }) => void` | Fires once when the call ends. |
145
+ | Field | Type | Notes |
146
+ | --------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
147
+ | `agentId` | `string` | Required. |
148
+ | `userId` | `string?` | Round-tripped to fetchToken as `userId`; server uses it for contact memory. |
149
+ | `context` | `Record<string, unknown>?` | Per-call structured context. Merged on top of `defaultContext`. Lowered into the agent's system prompt server-side. |
150
+ | `metadata` | `Record<string, string>?` | Per-call key/value. Merged on top of `defaultMetadata`. Round-tripped on `call.ended` webhook. NOT lowered into the prompt. |
151
+ | `bargeIn` | `boolean?` | Default `true`. Set `false` for alarm-style flows where the user shouldn't accidentally interrupt the script. |
152
+ | `clientTools` | `ClientToolMap?` | Per-call client tools the agent's LLM can invoke. See [Client tools](#client-tools) section below. Validated synchronously at `startCall` bad input throws. |
153
+ | `token` | `string?` | **Test-only escape hatch** — pre-minted `ct_`, bypasses `fetchToken`. Don't use in production. |
154
+ | `onStateChange` | `(state) => void` | Fires on every state machine transition. |
155
+ | `onTranscript` | `(entries) => void` | Fires on every transcript update. |
156
+ | `onVolume` | `({ input, output }) => void` | 0-1 RMS. ~10 Hz cadence. Browser bundle only. |
157
+ | `onError` | `(err) => void` | Stable `code` from `CallErrorCode`; matches `voice-rn` codes where overlap. |
158
+ | `onEnd` | `({ reason, errorCode?, durationMs }) => void` | Fires once when the call ends. |
129
159
 
130
160
  Resolves to a `Call` handle:
131
161
 
@@ -152,21 +182,75 @@ interface NodeCall extends Call {
152
182
 
153
183
  ```ts
154
184
  type CallState =
155
- | 'idle' | 'connecting' | 'listening'
156
- | 'user_speaking' | 'agent_speaking'
157
- | 'ended' | 'error'
185
+ | 'idle'
186
+ | 'connecting'
187
+ | 'listening'
188
+ | 'user_speaking'
189
+ | 'agent_speaking'
190
+ | 'ended'
191
+ | 'error'
158
192
 
159
193
  type CallErrorCode =
160
- | 'missing_credentials' | 'forbidden'
161
- | 'mic_denied' | 'mic_start_failed' | 'audio_session_failed'
162
- | 'token_expired' | 'token_invalid' | 'unauthorized'
163
- | 'network_unreachable' | 'socket_error'
164
- | 'payment_required' | 'not_found'
165
- | 'silence_timeout' | 'server_error'
194
+ | 'missing_credentials'
195
+ | 'forbidden'
196
+ | 'mic_denied'
197
+ | 'mic_start_failed'
198
+ | 'audio_session_failed'
199
+ | 'token_expired'
200
+ | 'token_invalid'
201
+ | 'unauthorized'
202
+ | 'network_unreachable'
203
+ | 'socket_error'
204
+ | 'payment_required'
205
+ | 'not_found'
206
+ | 'silence_timeout'
207
+ | 'server_error'
166
208
 
167
209
  type CallEndReason = 'agent_ended' | 'user_hangup' | 'timeout' | 'error'
168
210
  ```
169
211
 
212
+ ## Client tools
213
+
214
+ You can declare tools the agent's LLM can call **on the consumer's machine**. The
215
+ tool's handler runs in your app — server side has no access to it. Useful for
216
+ surface-only actions (read DOM state, hit a private API, mutate local storage,
217
+ control the UI).
218
+
219
+ ```ts
220
+ import { configureVoiceClient, type ClientToolMap } from '@craftedxp/voice-js'
221
+
222
+ const tools: ClientToolMap = {
223
+ addTodoItem: {
224
+ description: "Add an item to the user's todo list.",
225
+ parameters: {
226
+ type: 'object',
227
+ properties: { text: { type: 'string' } },
228
+ required: ['text'],
229
+ },
230
+ usage: 'Call when the user asks to add or capture a task.',
231
+ handler: async ({ text }) => {
232
+ await myAppApi.addTodo(String(text))
233
+ return `Added "${text}".`
234
+ },
235
+ },
236
+ }
237
+
238
+ const voice = configureVoiceClient({ apiBase: '...', fetchToken: async () => '...' })
239
+ const call = await voice.startCall({ agentId: 'agt_xxx', clientTools: tools })
240
+ ```
241
+
242
+ The SDK validates `clientTools` at `startCall` (sync, throws on malformed input),
243
+ then sends `client_tools_register` to the server right after `connected`. When
244
+ the agent's LLM invokes a registered tool, your handler runs and the SDK posts
245
+ the result back through the same WebSocket.
246
+
247
+ Handler return values are stringified (object → `JSON.stringify`) before being
248
+ sent back; throws become `{ error: ... }` frames. The server enforces a default
249
+ 10s / max 30s timeout per `timeoutMs` in your declaration.
250
+
251
+ For the full wire protocol, sequencing, and constraints see
252
+ [`docs/integration-echocheck.md`](../../docs/integration-echocheck.md#client-declared-tools).
253
+
170
254
  ## Migrating from `@voxline/web`
171
255
 
172
256
  ```diff
@@ -239,7 +323,8 @@ Renders a floating call button with a Shadow-DOM transcript panel. Pre-mint the
239
323
 
240
324
  ## Status
241
325
 
242
- - **0.2.0** (current) — first `@craftedxp/voice-js` release. Browser + Node dual bundle, `fetchToken` factory, voice-rn 0.3.x parity. Migration path from `@voxline/web@0.1.0` documented above.
326
+ - **0.3.0** (current) — adds client-tools support. New `clientTools` option on `startCall` accepts a `ClientToolMap` (description, parameters, handler, optional usage/timeoutMs/example). Browser and Node bundles both supported. Backwards-compatible — existing consumers see no change.
327
+ - 0.2.0 — first `@craftedxp/voice-js` release. Browser + Node dual bundle, `fetchToken` factory, voice-rn 0.3.x parity. Migration path from `@voxline/web@0.1.0` documented above.
243
328
  - 0.1.0 — `@voxline/web`. Singleton `VoiceClient` class, `apiKey` accepted. Retired in 0.2.0; never published to npm so no deprecation window.
244
329
 
245
330
  See [`CONSUMING.md`](CONSUMING.md) for the full setup walkthrough and [`DEVELOPING.md`](DEVELOPING.md) for SDK-author iteration.
@@ -1,3 +1,18 @@
1
+ interface ClientTool {
2
+ description: string;
3
+ parameters: Record<string, unknown>;
4
+ usage?: string;
5
+ timeoutMs?: number;
6
+ example?: string;
7
+ handler: (args: Record<string, unknown>) => Promise<string | object> | string | object;
8
+ }
9
+ type ClientToolMap = Record<string, ClientTool>;
10
+ interface ClientToolCallFrame {
11
+ toolCallId: string;
12
+ name: string;
13
+ args: Record<string, unknown>;
14
+ }
15
+
1
16
  type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
2
17
  type TranscriptEntry = {
3
18
  id: string;
@@ -51,6 +66,8 @@ interface ProtocolCallbacks {
51
66
  onInterrupt: () => void;
52
67
  onAgentTurnStart: () => void;
53
68
  onCallEnd: (reason: CallEndReason) => void;
69
+ onConnected: () => void;
70
+ onClientToolCall: (frame: ClientToolCallFrame) => void;
54
71
  }
55
72
  declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
56
73
  interface BuildWsUrlArgs {
@@ -131,6 +148,16 @@ interface StartCallOptions {
131
148
  * accidentally interrupt the script. Default true.
132
149
  */
133
150
  bargeIn?: boolean;
151
+ /**
152
+ * Client-side tools the agent's LLM can call mid-conversation. Each
153
+ * tool's handler runs on the consumer's side; result is fed back to
154
+ * the LLM through the existing call WebSocket. Schema and handler
155
+ * colocate. Validated synchronously at startCall — bad input throws.
156
+ *
157
+ * See docs/integration-echocheck.md for the wire protocol and the
158
+ * server-side guarantees.
159
+ */
160
+ clientTools?: ClientToolMap;
134
161
  /**
135
162
  * Test-only escape hatch — pass a pre-minted `ct_` directly and skip
136
163
  * the `fetchToken` call. Don't use this in production code: tokens
@@ -274,4 +301,4 @@ type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
274
301
  */
275
302
  declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
276
303
 
277
- export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
304
+ export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type ClientTool, type ClientToolMap, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
package/dist/browser.d.ts CHANGED
@@ -1,3 +1,18 @@
1
+ interface ClientTool {
2
+ description: string;
3
+ parameters: Record<string, unknown>;
4
+ usage?: string;
5
+ timeoutMs?: number;
6
+ example?: string;
7
+ handler: (args: Record<string, unknown>) => Promise<string | object> | string | object;
8
+ }
9
+ type ClientToolMap = Record<string, ClientTool>;
10
+ interface ClientToolCallFrame {
11
+ toolCallId: string;
12
+ name: string;
13
+ args: Record<string, unknown>;
14
+ }
15
+
1
16
  type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
2
17
  type TranscriptEntry = {
3
18
  id: string;
@@ -51,6 +66,8 @@ interface ProtocolCallbacks {
51
66
  onInterrupt: () => void;
52
67
  onAgentTurnStart: () => void;
53
68
  onCallEnd: (reason: CallEndReason) => void;
69
+ onConnected: () => void;
70
+ onClientToolCall: (frame: ClientToolCallFrame) => void;
54
71
  }
55
72
  declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
56
73
  interface BuildWsUrlArgs {
@@ -131,6 +148,16 @@ interface StartCallOptions {
131
148
  * accidentally interrupt the script. Default true.
132
149
  */
133
150
  bargeIn?: boolean;
151
+ /**
152
+ * Client-side tools the agent's LLM can call mid-conversation. Each
153
+ * tool's handler runs on the consumer's side; result is fed back to
154
+ * the LLM through the existing call WebSocket. Schema and handler
155
+ * colocate. Validated synchronously at startCall — bad input throws.
156
+ *
157
+ * See docs/integration-echocheck.md for the wire protocol and the
158
+ * server-side guarantees.
159
+ */
160
+ clientTools?: ClientToolMap;
134
161
  /**
135
162
  * Test-only escape hatch — pass a pre-minted `ct_` directly and skip
136
163
  * the `fetchToken` call. Don't use this in production code: tokens
@@ -274,4 +301,4 @@ type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
274
301
  */
275
302
  declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
276
303
 
277
- export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
304
+ export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type ClientTool, type ClientToolMap, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
package/dist/browser.js CHANGED
@@ -369,6 +369,7 @@ function handleServerMessage(raw, state, cb) {
369
369
  }
370
370
  switch (msg.type) {
371
371
  case "connected":
372
+ cb.onConnected();
372
373
  setState(state, "listening", cb);
373
374
  return;
374
375
  case "transcript": {
@@ -444,6 +445,14 @@ function handleServerMessage(raw, state, cb) {
444
445
  ];
445
446
  cb.onTranscript(state.transcript);
446
447
  return;
448
+ case "client_tool_call": {
449
+ const toolCallId = String(msg.toolCallId ?? "");
450
+ const name = String(msg.name ?? "");
451
+ const args = msg.args ?? {};
452
+ if (!toolCallId || !name) return;
453
+ cb.onClientToolCall({ toolCallId, name, args });
454
+ return;
455
+ }
447
456
  case "call_end": {
448
457
  const reasonRaw = String(msg.reason ?? "");
449
458
  const reason = mapEndReason(reasonRaw);
@@ -501,6 +510,87 @@ function buildWsUrl(args) {
501
510
  return `${proto}//${base.host}/v1/agents/${encodeURIComponent(args.agentId)}/call?token=${encodeURIComponent(args.token)}${bargeQS}`;
502
511
  }
503
512
 
513
+ // src/clientTools.ts
514
+ var NAME_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
515
+ var MAX_TOOLS = 64;
516
+ var MAX_USAGE = 500;
517
+ var MAX_TIMEOUT_MS = 3e4;
518
+ var validateClientToolMap = (tools) => {
519
+ if (tools === void 0) return;
520
+ if (typeof tools !== "object" || tools === null || Array.isArray(tools)) {
521
+ throw new Error("clientTools must be an object keyed by tool name");
522
+ }
523
+ const entries = Object.entries(tools);
524
+ if (entries.length > MAX_TOOLS) {
525
+ throw new Error(`clientTools may declare at most 64 tools (got ${entries.length})`);
526
+ }
527
+ for (const [name, def] of entries) {
528
+ if (!NAME_RE.test(name)) {
529
+ throw new Error(
530
+ `clientTools["${name}"]: name must be a valid identifier (^[a-zA-Z_][a-zA-Z0-9_]*$)`
531
+ );
532
+ }
533
+ if (!def || typeof def !== "object") {
534
+ throw new Error(`clientTools["${name}"]: must be an object`);
535
+ }
536
+ if (typeof def.description !== "string" || def.description.length === 0) {
537
+ throw new Error(`clientTools["${name}"]: must have a description`);
538
+ }
539
+ if (typeof def.handler !== "function") {
540
+ throw new Error(`clientTools["${name}"]: must have a handler function`);
541
+ }
542
+ if (def.usage !== void 0 && def.usage.length > MAX_USAGE) {
543
+ throw new Error(`clientTools["${name}"]: usage must be \u2264500 chars`);
544
+ }
545
+ if (def.timeoutMs !== void 0 && (!Number.isFinite(def.timeoutMs) || def.timeoutMs <= 0 || def.timeoutMs > MAX_TIMEOUT_MS)) {
546
+ throw new Error(`clientTools["${name}"]: timeoutMs must be in (0, 30000]`);
547
+ }
548
+ }
549
+ };
550
+ var buildRegisterFrame = (tools) => ({
551
+ type: "client_tools_register",
552
+ tools: Object.entries(tools).map(([name, def]) => ({
553
+ name,
554
+ description: def.description,
555
+ parameters: def.parameters,
556
+ ...def.usage !== void 0 ? { usage: def.usage } : {},
557
+ ...def.timeoutMs !== void 0 ? { timeoutMs: def.timeoutMs } : {}
558
+ }))
559
+ });
560
+ var dispatchClientToolCall = (send, tools, frame) => {
561
+ const safeSend = (payload) => {
562
+ try {
563
+ send(payload);
564
+ } catch {
565
+ }
566
+ };
567
+ const tool = tools[frame.name];
568
+ if (!tool) {
569
+ safeSend({
570
+ type: "client_tool_result",
571
+ toolCallId: frame.toolCallId,
572
+ error: `No handler for ${frame.name}`
573
+ });
574
+ return;
575
+ }
576
+ void (async () => {
577
+ try {
578
+ const out = await tool.handler(frame.args);
579
+ safeSend({
580
+ type: "client_tool_result",
581
+ toolCallId: frame.toolCallId,
582
+ result: typeof out === "string" ? out : JSON.stringify(out)
583
+ });
584
+ } catch (err) {
585
+ safeSend({
586
+ type: "client_tool_result",
587
+ toolCallId: frame.toolCallId,
588
+ error: err instanceof Error ? err.message : String(err)
589
+ });
590
+ }
591
+ })();
592
+ };
593
+
504
594
  // src/VoiceClient.ts
505
595
  var BrowserVoiceClient = class {
506
596
  constructor(args) {
@@ -529,6 +619,10 @@ var BrowserVoiceClient = class {
529
619
  // ---------------------------------------------------------------
530
620
  // Internal
531
621
  // ---------------------------------------------------------------
622
+ this.sendClientToolsRegister = () => {
623
+ const frame = buildRegisterFrame(this.args.options.clientTools ?? {});
624
+ this.rws?.send(JSON.stringify(frame));
625
+ };
532
626
  this.setState = (next) => {
533
627
  if (this.proto.state === next) return;
534
628
  this.proto.state = next;
@@ -558,7 +652,13 @@ var BrowserVoiceClient = class {
558
652
  onError: this.emitError,
559
653
  onInterrupt: () => this.playback?.flush(),
560
654
  onAgentTurnStart: () => void 0,
561
- onCallEnd: (reason) => this.teardown(reason)
655
+ onCallEnd: (reason) => this.teardown(reason),
656
+ onConnected: () => this.sendClientToolsRegister(),
657
+ onClientToolCall: (frame) => dispatchClientToolCall(
658
+ (f) => this.rws?.send(JSON.stringify(f)),
659
+ this.args.options.clientTools ?? {},
660
+ frame
661
+ )
562
662
  });
563
663
  } else {
564
664
  this.playback?.enqueue(ev.data);
@@ -623,6 +723,7 @@ var BrowserVoiceClient = class {
623
723
  };
624
724
  this.args = args;
625
725
  this.proto = createProtocolState();
726
+ validateClientToolMap(args.options.clientTools);
626
727
  }
627
728
  // ---------------------------------------------------------------
628
729
  // Call interface