npm - @craftedxp/voice-js - Versions diffs - 0.2.0 → 0.3.1 - Mend

@craftedxp/voice-js 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CONSUMING.md CHANGED Viewed

@@ -42,8 +42,8 @@ For the landing dashboard in this repo:
 ```jsonc
 {
   "dependencies": {
-    "@craftedxp/voice-js": "file:../sdk/voice-js"
-  }
+    "@craftedxp/voice-js": "file:../sdk/voice-js",
+  },
 }
 ```
@@ -101,6 +101,7 @@ Browsers require a user gesture to start `AudioContext`. The SDK calls `audioCon
 ## CSP / mic permission
 For consumers running on a strict CSP, allow:
 - `connect-src wss://your-voxline-server.com`
 - `worker-src 'self' blob:` (the audio worklet is registered from a Blob URL)
@@ -113,6 +114,7 @@ The SDK doesn't log to the console by default. To see protocol-level events, wir
 ## Updating
 When the SDK changes:
 - **Tarball path:** re-`npm pack` then `npm install <newTgz>` in the consumer.
 - **`file:` path:** `npm run build` in `sdk/voice-js/` (refreshes `dist/`); the consumer picks it up on the next bundler refresh.
 - **Registry path:** bump the version in your `package.json` and `npm install`.

package/DEVELOPING.md CHANGED Viewed

@@ -26,6 +26,7 @@ Either way, the consumer's bundler (Webpack / Vite / esbuild / Next) picks up th
 - `dist/embed.iife.js` — minified IIFE for `<script>` embed; bundles the browser entry inline.
 Source files map to:
 - `src/browser.ts` — entry, factory implementation, public re-exports.
 - `src/node.ts` — entry, dynamic `ws` loader, factory implementation.
 - `src/VoiceClient.ts` — browser `BrowserVoiceClient` implementing the `Call` interface.

package/README.md CHANGED Viewed

@@ -4,7 +4,7 @@ JS SDK for embedding a voice agent call in any JS environment — browser tabs,
 Companion to [`@craftedxp/voice-rn`](https://www.npmjs.com/package/@craftedxp/voice-rn) (React Native) and [`@craftedxp/sdk-node`](https://www.npmjs.com/package/@craftedxp/sdk-node) (server-side `sk_` SDK).
-> **Internal testing release.** API surface may evolve before a stable release. **0.2.0** is a breaking rename + redesign of the previous `@voxline/web@0.1.0` — the singleton-`VoiceClient`-with-`apiKey` pattern is gone in favour of a `configureVoiceClient({ fetchToken })` factory that mirrors `voice-rn` 0.3.x. See [Migrating from `@voxline/web`](#migrating-from-voxlineweb) below.
+> **Internal testing release.** API surface may evolve before a stable release. **0.3.1** adds Node-consumer ergonomics: `onInterrupt`/`onAgentTurnStart` callbacks on `startCall`, and the `NodeVoiceClientFactory` return type from the Node entry. **0.3.0** added [client tools](#client-tools) — handlers the agent's LLM can call on the consumer's machine. **0.2.0** was a breaking rename + redesign of the previous `@voxline/web@0.1.0` — the singleton-`VoiceClient`-with-`apiKey` pattern is gone in favour of a `configureVoiceClient({ fetchToken })` factory that mirrors `voice-rn` 0.3.x. See [Migrating from `@voxline/web`](#migrating-from-voxlineweb) below.
 ## Install
@@ -67,9 +67,9 @@ const call = await voice.startCall({
   onEnd: ({ reason, durationMs }) => log('ended', reason, durationMs),
 })
-call.mute()    // gate mic frames (server still sees wire cadence)
+call.mute() // gate mic frames (server still sees wire cadence)
 call.unmute()
-call.end()     // close WS + stop mic + fire onEnd
+call.end() // close WS + stop mic + fire onEnd
 ```
 ## Quick start (Node / Electron-main)
@@ -84,13 +84,42 @@ const voice = configureVoiceClient({
 })
 // Bring your own audio. Example: sox subprocesses for mic + speakers.
-const mic = spawn('sox', ['-d', '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-t', 'raw', '-'])
-const spk = spawn('sox', ['-t', 'raw', '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-', '-d'])
+const mic = spawn('sox', [
+  '-d',
+  '-r',
+  '16000',
+  '-c',
+  '1',
+  '-b',
+  '16',
+  '-e',
+  'signed',
+  '-t',
+  'raw',
+  '-',
+])
+const spk = spawn('sox', [
+  '-t',
+  'raw',
+  '-r',
+  '16000',
+  '-c',
+  '1',
+  '-b',
+  '16',
+  '-e',
+  'signed',
+  '-',
+  '-d',
+])
 const call = await voice.startCall({
   agentId: 'agt_xxx',
   onAudioChunk: (pcm) => spk.stdin.write(Buffer.from(pcm)),
-  onEnd: () => { mic.kill(); spk.stdin.end() },
+  onEnd: () => {
+    mic.kill()
+    spk.stdin.end()
+  },
 })
 mic.stdout.on('data', (chunk) => call.sendAudioChunk(chunk))
@@ -102,30 +131,33 @@ The Node bundle has the same `configureVoiceClient` / `startCall` shape, plus an
 ### `configureVoiceClient(config)`
-| Field | Type | Notes |
-|---|---|---|
-| `apiBase` | `string` | Full HTTPS URL of the Voxline server. WS scheme derived: `https`→`wss`. Trailing slash optional. |
-| `fetchToken` | `(args) => Promise<string>` | Called by the SDK whenever it needs a fresh `ct_`. Mirrors `@craftedxp/voice-rn`'s shape exactly — `{ agentId, userId?, context?, metadata? }`. |
-| `defaultMetadata` | `Record<string, string>?` | Applied to every `startCall`. Per-call merges on top. |
-| `defaultContext` | `Record<string, unknown>?` | Applied to every `startCall`. Per-call merges on top. |
+| Field             | Type                        | Notes                                                                                                                                           |
+| ----------------- | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
+| `apiBase`         | `string`                    | Full HTTPS URL of the Voxline server. WS scheme derived: `https`→`wss`. Trailing slash optional.                                                |
+| `fetchToken`      | `(args) => Promise<string>` | Called by the SDK whenever it needs a fresh `ct_`. Mirrors `@craftedxp/voice-rn`'s shape exactly — `{ agentId, userId?, context?, metadata? }`. |
+| `defaultMetadata` | `Record<string, string>?`   | Applied to every `startCall`. Per-call merges on top.                                                                                           |
+| `defaultContext`  | `Record<string, unknown>?`  | Applied to every `startCall`. Per-call merges on top.                                                                                           |
 Returns a `VoiceClientFactory` with one method:
 ### `factory.startCall(options)`
-| Field | Type | Notes |
-|---|---|---|
-| `agentId` | `string` | Required. |
-| `userId` | `string?` | Round-tripped to fetchToken as `userId`; server uses it for contact memory. |
-| `context` | `Record<string, unknown>?` | Per-call structured context. Merged on top of `defaultContext`. Lowered into the agent's system prompt server-side. |
-| `metadata` | `Record<string, string>?` | Per-call key/value. Merged on top of `defaultMetadata`. Round-tripped on `call.ended` webhook. NOT lowered into the prompt. |
-| `bargeIn` | `boolean?` | Default `true`. Set `false` for alarm-style flows where the user shouldn't accidentally interrupt the script. |
-| `token` | `string?` | **Test-only escape hatch** — pre-minted `ct_`, bypasses `fetchToken`. Don't use in production. |
-| `onStateChange` | `(state) => void` | Fires on every state machine transition. |
-| `onTranscript` | `(entries) => void` | Fires on every transcript update. |
-| `onVolume` | `({ input, output }) => void` | 0-1 RMS. ~10 Hz cadence. Browser bundle only. |
-| `onError` | `(err) => void` | Stable `code` from `CallErrorCode`; matches `voice-rn` codes where overlap. |
-| `onEnd` | `({ reason, errorCode?, durationMs }) => void` | Fires once when the call ends. |
+| Field              | Type                                           | Notes                                                                                                                                                         |
+| ------------------ | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `agentId`          | `string`                                       | Required.                                                                                                                                                     |
+| `userId`           | `string?`                                      | Round-tripped to fetchToken as `userId`; server uses it for contact memory.                                                                                   |
+| `context`          | `Record<string, unknown>?`                     | Per-call structured context. Merged on top of `defaultContext`. Lowered into the agent's system prompt server-side.                                           |
+| `metadata`         | `Record<string, string>?`                      | Per-call key/value. Merged on top of `defaultMetadata`. Round-tripped on `call.ended` webhook. NOT lowered into the prompt.                                   |
+| `bargeIn`          | `boolean?`                                     | Default `true`. Set `false` for alarm-style flows where the user shouldn't accidentally interrupt the script.                                                 |
+| `clientTools`      | `ClientToolMap?`                               | Per-call client tools the agent's LLM can invoke. See [Client tools](#client-tools) section below. Validated synchronously at `startCall` — bad input throws. |
+| `token`            | `string?`                                      | **Test-only escape hatch** — pre-minted `ct_`, bypasses `fetchToken`. Don't use in production.                                                                |
+| `onStateChange`    | `(state) => void`                              | Fires on every state machine transition.                                                                                                                      |
+| `onTranscript`     | `(entries) => void`                            | Fires on every transcript update.                                                                                                                             |
+| `onInterrupt`      | `() => void`                                   | Server signaled barge-in. Browser bundle auto-flushes built-in playback before this fires. Node consumers should drain their custom playback queue here.      |
+| `onAgentTurnStart` | `() => void`                                   | New agent turn began. Use when you want a precise turn-start anchor without diffing `onStateChange`.                                                          |
+| `onVolume`         | `({ input, output }) => void`                  | 0-1 RMS. ~10 Hz cadence. Browser bundle only.                                                                                                                 |
+| `onError`          | `(err) => void`                                | Stable `code` from `CallErrorCode`; matches `voice-rn` codes where overlap.                                                                                   |
+| `onEnd`            | `({ reason, errorCode?, durationMs }) => void` | Fires once when the call ends.                                                                                                                                |
 Resolves to a `Call` handle:
@@ -152,21 +184,75 @@ interface NodeCall extends Call {
 ```ts
 type CallState =
-  | 'idle' | 'connecting' | 'listening'
-  | 'user_speaking' | 'agent_speaking'
-  | 'ended' | 'error'
+  | 'idle'
+  | 'connecting'
+  | 'listening'
+  | 'user_speaking'
+  | 'agent_speaking'
+  | 'ended'
+  | 'error'
 type CallErrorCode =
-  | 'missing_credentials' | 'forbidden'
-  | 'mic_denied' | 'mic_start_failed' | 'audio_session_failed'
-  | 'token_expired' | 'token_invalid' | 'unauthorized'
-  | 'network_unreachable' | 'socket_error'
-  | 'payment_required' | 'not_found'
-  | 'silence_timeout' | 'server_error'
+  | 'missing_credentials'
+  | 'forbidden'
+  | 'mic_denied'
+  | 'mic_start_failed'
+  | 'audio_session_failed'
+  | 'token_expired'
+  | 'token_invalid'
+  | 'unauthorized'
+  | 'network_unreachable'
+  | 'socket_error'
+  | 'payment_required'
+  | 'not_found'
+  | 'silence_timeout'
+  | 'server_error'
 type CallEndReason = 'agent_ended' | 'user_hangup' | 'timeout' | 'error'
 ```
+## Client tools
+You can declare tools the agent's LLM can call **on the consumer's machine**. The
+tool's handler runs in your app — server side has no access to it. Useful for
+surface-only actions (read DOM state, hit a private API, mutate local storage,
+control the UI).
+```ts
+import { configureVoiceClient, type ClientToolMap } from '@craftedxp/voice-js'
+const tools: ClientToolMap = {
+  addTodoItem: {
+    description: "Add an item to the user's todo list.",
+    parameters: {
+      type: 'object',
+      properties: { text: { type: 'string' } },
+      required: ['text'],
+    },
+    usage: 'Call when the user asks to add or capture a task.',
+    handler: async ({ text }) => {
+      await myAppApi.addTodo(String(text))
+      return `Added "${text}".`
+    },
+  },
+}
+const voice = configureVoiceClient({ apiBase: '...', fetchToken: async () => '...' })
+const call = await voice.startCall({ agentId: 'agt_xxx', clientTools: tools })
+```
+The SDK validates `clientTools` at `startCall` (sync, throws on malformed input),
+then sends `client_tools_register` to the server right after `connected`. When
+the agent's LLM invokes a registered tool, your handler runs and the SDK posts
+the result back through the same WebSocket.
+Handler return values are stringified (object → `JSON.stringify`) before being
+sent back; throws become `{ error: ... }` frames. The server enforces a default
+10s / max 30s timeout per `timeoutMs` in your declaration.
+For the full wire protocol, sequencing, and constraints see
+[`docs/integration-echocheck.md`](../../docs/integration-echocheck.md#client-declared-tools).
 ## Migrating from `@voxline/web`
 ```diff
@@ -221,6 +307,25 @@ Three semantic shifts to be aware of:
 The embed widget (`<script src="embed.js" data-token="ct_...">`) keeps the same HTML API, but the `data-api-key` attribute is no longer accepted — mint server-side and inject `data-token` instead.
+## Troubleshooting
+**Agent's last syllable cuts off and plays into the next agent message.** Almost
+always a misfiring barge-in (acoustic echo from a laptop speaker → mic, or a
+false-positive VAD on background noise). Three quick fixes, in order:
+1. **Test with headphones.** Eliminates acoustic echo. If the symptom disappears,
+   it was echo. Tell production users to wear headphones, or fall back to (3).
+2. **Check for a phantom user turn between two agent turns** in the transcript
+   that contains words the agent just said. That confirms STT is hearing the
+   agent's voice through the mic.
+3. **Pass `bargeIn: false` on `startCall`** for non-conversational flows. Adds
+   `?barge=off` to the WS URL and the SDK ignores `interrupt` events
+   client-side. Tradeoff: user can't interrupt mid-sentence.
+For the full diagnostic walkthrough (including the rarer Gemini-Live
+stale-audio-leak case and audio-handling guidance for Node/Electron consumers),
+see [`docs/integration-echocheck.md` → Audio quality](../../docs/integration-echocheck.md#audio-quality--the-cut-off-syllable-trap).
 ## Embed widget
 For drop-in `<script>` consumers (landing pages, no-build embeds):
@@ -239,7 +344,9 @@ Renders a floating call button with a Shadow-DOM transcript panel. Pre-mint the
 ## Status
-- **0.2.0** (current) — first `@craftedxp/voice-js` release. Browser + Node dual bundle, `fetchToken` factory, voice-rn 0.3.x parity. Migration path from `@voxline/web@0.1.0` documented above.
+- **0.3.1** (current) — adds `onInterrupt` / `onAgentTurnStart` callbacks on `StartCallOptions` and `NodeVoiceClientFactory` proper return type for the Node entry. Backwards-compatible.
+- 0.3.0 — adds client-tools support. New `clientTools` option on `startCall` accepts a `ClientToolMap` (description, parameters, handler, optional usage/timeoutMs/example). Browser and Node bundles both supported. Backwards-compatible — existing consumers see no change.
+- 0.2.0 — first `@craftedxp/voice-js` release. Browser + Node dual bundle, `fetchToken` factory, voice-rn 0.3.x parity. Migration path from `@voxline/web@0.1.0` documented above.
 - 0.1.0 — `@voxline/web`. Singleton `VoiceClient` class, `apiKey` accepted. Retired in 0.2.0; never published to npm so no deprecation window.
 See [`CONSUMING.md`](CONSUMING.md) for the full setup walkthrough and [`DEVELOPING.md`](DEVELOPING.md) for SDK-author iteration.

package/dist/browser.d.mts CHANGED Viewed

@@ -1,3 +1,18 @@
+interface ClientTool {
+    description: string;
+    parameters: Record<string, unknown>;
+    usage?: string;
+    timeoutMs?: number;
+    example?: string;
+    handler: (args: Record<string, unknown>) => Promise<string | object> | string | object;
+}
+type ClientToolMap = Record<string, ClientTool>;
+interface ClientToolCallFrame {
+    toolCallId: string;
+    name: string;
+    args: Record<string, unknown>;
+}
 type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
 type TranscriptEntry = {
     id: string;
@@ -51,6 +66,8 @@ interface ProtocolCallbacks {
     onInterrupt: () => void;
     onAgentTurnStart: () => void;
     onCallEnd: (reason: CallEndReason) => void;
+    onConnected: () => void;
+    onClientToolCall: (frame: ClientToolCallFrame) => void;
 }
 declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
 interface BuildWsUrlArgs {
@@ -131,6 +148,16 @@ interface StartCallOptions {
      * accidentally interrupt the script. Default true.
      */
     bargeIn?: boolean;
+    /**
+     * Client-side tools the agent's LLM can call mid-conversation. Each
+     * tool's handler runs on the consumer's side; result is fed back to
+     * the LLM through the existing call WebSocket. Schema and handler
+     * colocate. Validated synchronously at startCall — bad input throws.
+     *
+     * See docs/integration-echocheck.md for the wire protocol and the
+     * server-side guarantees.
+     */
+    clientTools?: ClientToolMap;
     /**
      * Test-only escape hatch — pass a pre-minted `ct_` directly and skip
      * the `fetchToken` call. Don't use this in production code: tokens
@@ -143,6 +170,23 @@ interface StartCallOptions {
     onEnd?: (end: CallEndEvent) => void;
     /** Volume-meter event for VU UIs. ~10 Hz cadence (browser bundle only). */
     onVolume?: (vol: VolumeEvent) => void;
+    /**
+     * Fires when the server signals barge-in (the user started talking
+     * mid-agent-turn). The browser bundle automatically flushes its
+     * built-in audio playback before this callback runs; the callback is
+     * fired regardless. Node / Electron consumers with custom playback
+     * should drain their audio queue here so the agent goes silent
+     * immediately.
+     */
+    onInterrupt?: () => void;
+    /**
+     * Fires on `agent_turn_start` — the server has begun a new agent
+     * turn. The state-machine transition to `agent_speaking` happens at
+     * the same moment via `onStateChange`; use this when you want a
+     * precise turn anchor (e.g. "agent has been speaking for N ms" UIs)
+     * without diffing state.
+     */
+    onAgentTurnStart?: () => void;
 }
 interface Call {
     /** Current state. Snapshot — subscribe via onStateChange for live updates. */
@@ -274,4 +318,4 @@ type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
  */
 declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
-export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
+export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type ClientTool, type ClientToolMap, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };

package/dist/browser.d.ts CHANGED Viewed

@@ -1,3 +1,18 @@
+interface ClientTool {
+    description: string;
+    parameters: Record<string, unknown>;
+    usage?: string;
+    timeoutMs?: number;
+    example?: string;
+    handler: (args: Record<string, unknown>) => Promise<string | object> | string | object;
+}
+type ClientToolMap = Record<string, ClientTool>;
+interface ClientToolCallFrame {
+    toolCallId: string;
+    name: string;
+    args: Record<string, unknown>;
+}
 type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
 type TranscriptEntry = {
     id: string;
@@ -51,6 +66,8 @@ interface ProtocolCallbacks {
     onInterrupt: () => void;
     onAgentTurnStart: () => void;
     onCallEnd: (reason: CallEndReason) => void;
+    onConnected: () => void;
+    onClientToolCall: (frame: ClientToolCallFrame) => void;
 }
 declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
 interface BuildWsUrlArgs {
@@ -131,6 +148,16 @@ interface StartCallOptions {
      * accidentally interrupt the script. Default true.
      */
     bargeIn?: boolean;
+    /**
+     * Client-side tools the agent's LLM can call mid-conversation. Each
+     * tool's handler runs on the consumer's side; result is fed back to
+     * the LLM through the existing call WebSocket. Schema and handler
+     * colocate. Validated synchronously at startCall — bad input throws.
+     *
+     * See docs/integration-echocheck.md for the wire protocol and the
+     * server-side guarantees.
+     */
+    clientTools?: ClientToolMap;
     /**
      * Test-only escape hatch — pass a pre-minted `ct_` directly and skip
      * the `fetchToken` call. Don't use this in production code: tokens
@@ -143,6 +170,23 @@ interface StartCallOptions {
     onEnd?: (end: CallEndEvent) => void;
     /** Volume-meter event for VU UIs. ~10 Hz cadence (browser bundle only). */
     onVolume?: (vol: VolumeEvent) => void;
+    /**
+     * Fires when the server signals barge-in (the user started talking
+     * mid-agent-turn). The browser bundle automatically flushes its
+     * built-in audio playback before this callback runs; the callback is
+     * fired regardless. Node / Electron consumers with custom playback
+     * should drain their audio queue here so the agent goes silent
+     * immediately.
+     */
+    onInterrupt?: () => void;
+    /**
+     * Fires on `agent_turn_start` — the server has begun a new agent
+     * turn. The state-machine transition to `agent_speaking` happens at
+     * the same moment via `onStateChange`; use this when you want a
+     * precise turn anchor (e.g. "agent has been speaking for N ms" UIs)
+     * without diffing state.
+     */
+    onAgentTurnStart?: () => void;
 }
 interface Call {
     /** Current state. Snapshot — subscribe via onStateChange for live updates. */
@@ -274,4 +318,4 @@ type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
  */
 declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
-export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
+export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type ClientTool, type ClientToolMap, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };

package/dist/browser.js CHANGED Viewed

@@ -369,6 +369,7 @@ function handleServerMessage(raw, state, cb) {
   }
   switch (msg.type) {
     case "connected":
+      cb.onConnected();
       setState(state, "listening", cb);
       return;
     case "transcript": {
@@ -444,6 +445,14 @@ function handleServerMessage(raw, state, cb) {
       ];
       cb.onTranscript(state.transcript);
       return;
+    case "client_tool_call": {
+      const toolCallId = String(msg.toolCallId ?? "");
+      const name = String(msg.name ?? "");
+      const args = msg.args ?? {};
+      if (!toolCallId || !name) return;
+      cb.onClientToolCall({ toolCallId, name, args });
+      return;
+    }
     case "call_end": {
       const reasonRaw = String(msg.reason ?? "");
       const reason = mapEndReason(reasonRaw);
@@ -501,6 +510,87 @@ function buildWsUrl(args) {
   return `${proto}//${base.host}/v1/agents/${encodeURIComponent(args.agentId)}/call?token=${encodeURIComponent(args.token)}${bargeQS}`;
 }
+// src/clientTools.ts
+var NAME_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
+var MAX_TOOLS = 64;
+var MAX_USAGE = 500;
+var MAX_TIMEOUT_MS = 3e4;
+var validateClientToolMap = (tools) => {
+  if (tools === void 0) return;
+  if (typeof tools !== "object" || tools === null || Array.isArray(tools)) {
+    throw new Error("clientTools must be an object keyed by tool name");
+  }
+  const entries = Object.entries(tools);
+  if (entries.length > MAX_TOOLS) {
+    throw new Error(`clientTools may declare at most 64 tools (got ${entries.length})`);
+  }
+  for (const [name, def] of entries) {
+    if (!NAME_RE.test(name)) {
+      throw new Error(
+        `clientTools["${name}"]: name must be a valid identifier (^[a-zA-Z_][a-zA-Z0-9_]*$)`
+      );
+    }
+    if (!def || typeof def !== "object") {
+      throw new Error(`clientTools["${name}"]: must be an object`);
+    }
+    if (typeof def.description !== "string" || def.description.length === 0) {
+      throw new Error(`clientTools["${name}"]: must have a description`);
+    }
+    if (typeof def.handler !== "function") {
+      throw new Error(`clientTools["${name}"]: must have a handler function`);
+    }
+    if (def.usage !== void 0 && def.usage.length > MAX_USAGE) {
+      throw new Error(`clientTools["${name}"]: usage must be \u2264500 chars`);
+    }
+    if (def.timeoutMs !== void 0 && (!Number.isFinite(def.timeoutMs) || def.timeoutMs <= 0 || def.timeoutMs > MAX_TIMEOUT_MS)) {
+      throw new Error(`clientTools["${name}"]: timeoutMs must be in (0, 30000]`);
+    }
+  }
+};
+var buildRegisterFrame = (tools) => ({
+  type: "client_tools_register",
+  tools: Object.entries(tools).map(([name, def]) => ({
+    name,
+    description: def.description,
+    parameters: def.parameters,
+    ...def.usage !== void 0 ? { usage: def.usage } : {},
+    ...def.timeoutMs !== void 0 ? { timeoutMs: def.timeoutMs } : {}
+  }))
+});
+var dispatchClientToolCall = (send, tools, frame) => {
+  const safeSend = (payload) => {
+    try {
+      send(payload);
+    } catch {
+    }
+  };
+  const tool = tools[frame.name];
+  if (!tool) {
+    safeSend({
+      type: "client_tool_result",
+      toolCallId: frame.toolCallId,
+      error: `No handler for ${frame.name}`
+    });
+    return;
+  }
+  void (async () => {
+    try {
+      const out = await tool.handler(frame.args);
+      safeSend({
+        type: "client_tool_result",
+        toolCallId: frame.toolCallId,
+        result: typeof out === "string" ? out : JSON.stringify(out)
+      });
+    } catch (err) {
+      safeSend({
+        type: "client_tool_result",
+        toolCallId: frame.toolCallId,
+        error: err instanceof Error ? err.message : String(err)
+      });
+    }
+  })();
+};
 // src/VoiceClient.ts
 var BrowserVoiceClient = class {
   constructor(args) {
@@ -529,6 +619,10 @@ var BrowserVoiceClient = class {
     // ---------------------------------------------------------------
     // Internal
     // ---------------------------------------------------------------
+    this.sendClientToolsRegister = () => {
+      const frame = buildRegisterFrame(this.args.options.clientTools ?? {});
+      this.rws?.send(JSON.stringify(frame));
+    };
     this.setState = (next) => {
       if (this.proto.state === next) return;
       this.proto.state = next;
@@ -556,9 +650,18 @@ var BrowserVoiceClient = class {
               onState: this.setState,
               onTranscript: (entries) => this.args.options.onTranscript?.(entries),
               onError: this.emitError,
-              onInterrupt: () => this.playback?.flush(),
-              onAgentTurnStart: () => void 0,
-              onCallEnd: (reason) => this.teardown(reason)
+              onInterrupt: () => {
+                this.playback?.flush();
+                this.args.options.onInterrupt?.();
+              },
+              onAgentTurnStart: () => this.args.options.onAgentTurnStart?.(),
+              onCallEnd: (reason) => this.teardown(reason),
+              onConnected: () => this.sendClientToolsRegister(),
+              onClientToolCall: (frame) => dispatchClientToolCall(
+                (f) => this.rws?.send(JSON.stringify(f)),
+                this.args.options.clientTools ?? {},
+                frame
+              )
             });
           } else {
             this.playback?.enqueue(ev.data);
@@ -623,6 +726,7 @@ var BrowserVoiceClient = class {
     };
     this.args = args;
     this.proto = createProtocolState();
+    validateClientToolMap(args.options.clientTools);
   }
   // ---------------------------------------------------------------
   // Call interface