@craftedxp/voice-js 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONSUMING.md +4 -2
- package/DEVELOPING.md +1 -0
- package/README.md +120 -35
- package/dist/browser.d.mts +28 -1
- package/dist/browser.d.ts +28 -1
- package/dist/browser.js +102 -1
- package/dist/browser.js.map +1 -1
- package/dist/browser.mjs +102 -1
- package/dist/browser.mjs.map +1 -1
- package/dist/embed.iife.js +3 -3
- package/dist/node.d.mts +28 -1
- package/dist/node.d.ts +28 -1
- package/dist/node.js +102 -1
- package/dist/node.js.map +1 -1
- package/dist/node.mjs +102 -1
- package/dist/node.mjs.map +1 -1
- package/package.json +3 -1
package/CONSUMING.md
CHANGED
|
@@ -42,8 +42,8 @@ For the landing dashboard in this repo:
|
|
|
42
42
|
```jsonc
|
|
43
43
|
{
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"@craftedxp/voice-js": "file:../sdk/voice-js"
|
|
46
|
-
}
|
|
45
|
+
"@craftedxp/voice-js": "file:../sdk/voice-js",
|
|
46
|
+
},
|
|
47
47
|
}
|
|
48
48
|
```
|
|
49
49
|
|
|
@@ -101,6 +101,7 @@ Browsers require a user gesture to start `AudioContext`. The SDK calls `audioCon
|
|
|
101
101
|
## CSP / mic permission
|
|
102
102
|
|
|
103
103
|
For consumers running on a strict CSP, allow:
|
|
104
|
+
|
|
104
105
|
- `connect-src wss://your-voxline-server.com`
|
|
105
106
|
- `worker-src 'self' blob:` (the audio worklet is registered from a Blob URL)
|
|
106
107
|
|
|
@@ -113,6 +114,7 @@ The SDK doesn't log to the console by default. To see protocol-level events, wir
|
|
|
113
114
|
## Updating
|
|
114
115
|
|
|
115
116
|
When the SDK changes:
|
|
117
|
+
|
|
116
118
|
- **Tarball path:** re-`npm pack` then `npm install <newTgz>` in the consumer.
|
|
117
119
|
- **`file:` path:** `npm run build` in `sdk/voice-js/` (refreshes `dist/`); the consumer picks it up on the next bundler refresh.
|
|
118
120
|
- **Registry path:** bump the version in your `package.json` and `npm install`.
|
package/DEVELOPING.md
CHANGED
|
@@ -26,6 +26,7 @@ Either way, the consumer's bundler (Webpack / Vite / esbuild / Next) picks up th
|
|
|
26
26
|
- `dist/embed.iife.js` — minified IIFE for `<script>` embed; bundles the browser entry inline.
|
|
27
27
|
|
|
28
28
|
Source files map to:
|
|
29
|
+
|
|
29
30
|
- `src/browser.ts` — entry, factory implementation, public re-exports.
|
|
30
31
|
- `src/node.ts` — entry, dynamic `ws` loader, factory implementation.
|
|
31
32
|
- `src/VoiceClient.ts` — browser `BrowserVoiceClient` implementing the `Call` interface.
|
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ JS SDK for embedding a voice agent call in any JS environment — browser tabs,
|
|
|
4
4
|
|
|
5
5
|
Companion to [`@craftedxp/voice-rn`](https://www.npmjs.com/package/@craftedxp/voice-rn) (React Native) and [`@craftedxp/sdk-node`](https://www.npmjs.com/package/@craftedxp/sdk-node) (server-side `sk_` SDK).
|
|
6
6
|
|
|
7
|
-
> **Internal testing release.** API surface may evolve before a stable release. **0.2.0**
|
|
7
|
+
> **Internal testing release.** API surface may evolve before a stable release. **0.3.0** adds [client tools](#client-tools) — handlers the agent's LLM can call on the consumer's machine. **0.2.0** was a breaking rename + redesign of the previous `@voxline/web@0.1.0` — the singleton-`VoiceClient`-with-`apiKey` pattern is gone in favour of a `configureVoiceClient({ fetchToken })` factory that mirrors `voice-rn` 0.3.x. See [Migrating from `@voxline/web`](#migrating-from-voxlineweb) below.
|
|
8
8
|
|
|
9
9
|
## Install
|
|
10
10
|
|
|
@@ -67,9 +67,9 @@ const call = await voice.startCall({
|
|
|
67
67
|
onEnd: ({ reason, durationMs }) => log('ended', reason, durationMs),
|
|
68
68
|
})
|
|
69
69
|
|
|
70
|
-
call.mute()
|
|
70
|
+
call.mute() // gate mic frames (server still sees wire cadence)
|
|
71
71
|
call.unmute()
|
|
72
|
-
call.end()
|
|
72
|
+
call.end() // close WS + stop mic + fire onEnd
|
|
73
73
|
```
|
|
74
74
|
|
|
75
75
|
## Quick start (Node / Electron-main)
|
|
@@ -84,13 +84,42 @@ const voice = configureVoiceClient({
|
|
|
84
84
|
})
|
|
85
85
|
|
|
86
86
|
// Bring your own audio. Example: sox subprocesses for mic + speakers.
|
|
87
|
-
const mic = spawn('sox', [
|
|
88
|
-
|
|
87
|
+
const mic = spawn('sox', [
|
|
88
|
+
'-d',
|
|
89
|
+
'-r',
|
|
90
|
+
'16000',
|
|
91
|
+
'-c',
|
|
92
|
+
'1',
|
|
93
|
+
'-b',
|
|
94
|
+
'16',
|
|
95
|
+
'-e',
|
|
96
|
+
'signed',
|
|
97
|
+
'-t',
|
|
98
|
+
'raw',
|
|
99
|
+
'-',
|
|
100
|
+
])
|
|
101
|
+
const spk = spawn('sox', [
|
|
102
|
+
'-t',
|
|
103
|
+
'raw',
|
|
104
|
+
'-r',
|
|
105
|
+
'16000',
|
|
106
|
+
'-c',
|
|
107
|
+
'1',
|
|
108
|
+
'-b',
|
|
109
|
+
'16',
|
|
110
|
+
'-e',
|
|
111
|
+
'signed',
|
|
112
|
+
'-',
|
|
113
|
+
'-d',
|
|
114
|
+
])
|
|
89
115
|
|
|
90
116
|
const call = await voice.startCall({
|
|
91
117
|
agentId: 'agt_xxx',
|
|
92
118
|
onAudioChunk: (pcm) => spk.stdin.write(Buffer.from(pcm)),
|
|
93
|
-
onEnd: () => {
|
|
119
|
+
onEnd: () => {
|
|
120
|
+
mic.kill()
|
|
121
|
+
spk.stdin.end()
|
|
122
|
+
},
|
|
94
123
|
})
|
|
95
124
|
|
|
96
125
|
mic.stdout.on('data', (chunk) => call.sendAudioChunk(chunk))
|
|
@@ -102,30 +131,31 @@ The Node bundle has the same `configureVoiceClient` / `startCall` shape, plus an
|
|
|
102
131
|
|
|
103
132
|
### `configureVoiceClient(config)`
|
|
104
133
|
|
|
105
|
-
| Field
|
|
106
|
-
|
|
107
|
-
| `apiBase`
|
|
108
|
-
| `fetchToken`
|
|
109
|
-
| `defaultMetadata` | `Record<string, string>?`
|
|
110
|
-
| `defaultContext`
|
|
134
|
+
| Field | Type | Notes |
|
|
135
|
+
| ----------------- | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
136
|
+
| `apiBase` | `string` | Full HTTPS URL of the Voxline server. WS scheme derived: `https`→`wss`. Trailing slash optional. |
|
|
137
|
+
| `fetchToken` | `(args) => Promise<string>` | Called by the SDK whenever it needs a fresh `ct_`. Mirrors `@craftedxp/voice-rn`'s shape exactly — `{ agentId, userId?, context?, metadata? }`. |
|
|
138
|
+
| `defaultMetadata` | `Record<string, string>?` | Applied to every `startCall`. Per-call merges on top. |
|
|
139
|
+
| `defaultContext` | `Record<string, unknown>?` | Applied to every `startCall`. Per-call merges on top. |
|
|
111
140
|
|
|
112
141
|
Returns a `VoiceClientFactory` with one method:
|
|
113
142
|
|
|
114
143
|
### `factory.startCall(options)`
|
|
115
144
|
|
|
116
|
-
| Field
|
|
117
|
-
|
|
118
|
-
| `agentId`
|
|
119
|
-
| `userId`
|
|
120
|
-
| `context`
|
|
121
|
-
| `metadata`
|
|
122
|
-
| `bargeIn`
|
|
123
|
-
| `
|
|
124
|
-
| `
|
|
125
|
-
| `
|
|
126
|
-
| `
|
|
127
|
-
| `
|
|
128
|
-
| `
|
|
145
|
+
| Field | Type | Notes |
|
|
146
|
+
| --------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
147
|
+
| `agentId` | `string` | Required. |
|
|
148
|
+
| `userId` | `string?` | Round-tripped to fetchToken as `userId`; server uses it for contact memory. |
|
|
149
|
+
| `context` | `Record<string, unknown>?` | Per-call structured context. Merged on top of `defaultContext`. Lowered into the agent's system prompt server-side. |
|
|
150
|
+
| `metadata` | `Record<string, string>?` | Per-call key/value. Merged on top of `defaultMetadata`. Round-tripped on `call.ended` webhook. NOT lowered into the prompt. |
|
|
151
|
+
| `bargeIn` | `boolean?` | Default `true`. Set `false` for alarm-style flows where the user shouldn't accidentally interrupt the script. |
|
|
152
|
+
| `clientTools` | `ClientToolMap?` | Per-call client tools the agent's LLM can invoke. See [Client tools](#client-tools) section below. Validated synchronously at `startCall` — bad input throws. |
|
|
153
|
+
| `token` | `string?` | **Test-only escape hatch** — pre-minted `ct_`, bypasses `fetchToken`. Don't use in production. |
|
|
154
|
+
| `onStateChange` | `(state) => void` | Fires on every state machine transition. |
|
|
155
|
+
| `onTranscript` | `(entries) => void` | Fires on every transcript update. |
|
|
156
|
+
| `onVolume` | `({ input, output }) => void` | 0-1 RMS. ~10 Hz cadence. Browser bundle only. |
|
|
157
|
+
| `onError` | `(err) => void` | Stable `code` from `CallErrorCode`; matches `voice-rn` codes where overlap. |
|
|
158
|
+
| `onEnd` | `({ reason, errorCode?, durationMs }) => void` | Fires once when the call ends. |
|
|
129
159
|
|
|
130
160
|
Resolves to a `Call` handle:
|
|
131
161
|
|
|
@@ -152,21 +182,75 @@ interface NodeCall extends Call {
|
|
|
152
182
|
|
|
153
183
|
```ts
|
|
154
184
|
type CallState =
|
|
155
|
-
| 'idle'
|
|
156
|
-
| '
|
|
157
|
-
| '
|
|
185
|
+
| 'idle'
|
|
186
|
+
| 'connecting'
|
|
187
|
+
| 'listening'
|
|
188
|
+
| 'user_speaking'
|
|
189
|
+
| 'agent_speaking'
|
|
190
|
+
| 'ended'
|
|
191
|
+
| 'error'
|
|
158
192
|
|
|
159
193
|
type CallErrorCode =
|
|
160
|
-
| 'missing_credentials'
|
|
161
|
-
| '
|
|
162
|
-
| '
|
|
163
|
-
| '
|
|
164
|
-
| '
|
|
165
|
-
| '
|
|
194
|
+
| 'missing_credentials'
|
|
195
|
+
| 'forbidden'
|
|
196
|
+
| 'mic_denied'
|
|
197
|
+
| 'mic_start_failed'
|
|
198
|
+
| 'audio_session_failed'
|
|
199
|
+
| 'token_expired'
|
|
200
|
+
| 'token_invalid'
|
|
201
|
+
| 'unauthorized'
|
|
202
|
+
| 'network_unreachable'
|
|
203
|
+
| 'socket_error'
|
|
204
|
+
| 'payment_required'
|
|
205
|
+
| 'not_found'
|
|
206
|
+
| 'silence_timeout'
|
|
207
|
+
| 'server_error'
|
|
166
208
|
|
|
167
209
|
type CallEndReason = 'agent_ended' | 'user_hangup' | 'timeout' | 'error'
|
|
168
210
|
```
|
|
169
211
|
|
|
212
|
+
## Client tools
|
|
213
|
+
|
|
214
|
+
You can declare tools the agent's LLM can call **on the consumer's machine**. The
|
|
215
|
+
tool's handler runs in your app — server side has no access to it. Useful for
|
|
216
|
+
surface-only actions (read DOM state, hit a private API, mutate local storage,
|
|
217
|
+
control the UI).
|
|
218
|
+
|
|
219
|
+
```ts
|
|
220
|
+
import { configureVoiceClient, type ClientToolMap } from '@craftedxp/voice-js'
|
|
221
|
+
|
|
222
|
+
const tools: ClientToolMap = {
|
|
223
|
+
addTodoItem: {
|
|
224
|
+
description: "Add an item to the user's todo list.",
|
|
225
|
+
parameters: {
|
|
226
|
+
type: 'object',
|
|
227
|
+
properties: { text: { type: 'string' } },
|
|
228
|
+
required: ['text'],
|
|
229
|
+
},
|
|
230
|
+
usage: 'Call when the user asks to add or capture a task.',
|
|
231
|
+
handler: async ({ text }) => {
|
|
232
|
+
await myAppApi.addTodo(String(text))
|
|
233
|
+
return `Added "${text}".`
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const voice = configureVoiceClient({ apiBase: '...', fetchToken: async () => '...' })
|
|
239
|
+
const call = await voice.startCall({ agentId: 'agt_xxx', clientTools: tools })
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
The SDK validates `clientTools` at `startCall` (sync, throws on malformed input),
|
|
243
|
+
then sends `client_tools_register` to the server right after `connected`. When
|
|
244
|
+
the agent's LLM invokes a registered tool, your handler runs and the SDK posts
|
|
245
|
+
the result back through the same WebSocket.
|
|
246
|
+
|
|
247
|
+
Handler return values are stringified (object → `JSON.stringify`) before being
|
|
248
|
+
sent back; throws become `{ error: ... }` frames. The server enforces a default
|
|
249
|
+
10s / max 30s timeout per `timeoutMs` in your declaration.
|
|
250
|
+
|
|
251
|
+
For the full wire protocol, sequencing, and constraints see
|
|
252
|
+
[`docs/integration-echocheck.md`](../../docs/integration-echocheck.md#client-declared-tools).
|
|
253
|
+
|
|
170
254
|
## Migrating from `@voxline/web`
|
|
171
255
|
|
|
172
256
|
```diff
|
|
@@ -239,7 +323,8 @@ Renders a floating call button with a Shadow-DOM transcript panel. Pre-mint the
|
|
|
239
323
|
|
|
240
324
|
## Status
|
|
241
325
|
|
|
242
|
-
- **0.
|
|
326
|
+
- **0.3.0** (current) — adds client-tools support. New `clientTools` option on `startCall` accepts a `ClientToolMap` (description, parameters, handler, optional usage/timeoutMs/example). Browser and Node bundles both supported. Backwards-compatible — existing consumers see no change.
|
|
327
|
+
- 0.2.0 — first `@craftedxp/voice-js` release. Browser + Node dual bundle, `fetchToken` factory, voice-rn 0.3.x parity. Migration path from `@voxline/web@0.1.0` documented above.
|
|
243
328
|
- 0.1.0 — `@voxline/web`. Singleton `VoiceClient` class, `apiKey` accepted. Retired in 0.2.0; never published to npm so no deprecation window.
|
|
244
329
|
|
|
245
330
|
See [`CONSUMING.md`](CONSUMING.md) for the full setup walkthrough and [`DEVELOPING.md`](DEVELOPING.md) for SDK-author iteration.
|
package/dist/browser.d.mts
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
interface ClientTool {
|
|
2
|
+
description: string;
|
|
3
|
+
parameters: Record<string, unknown>;
|
|
4
|
+
usage?: string;
|
|
5
|
+
timeoutMs?: number;
|
|
6
|
+
example?: string;
|
|
7
|
+
handler: (args: Record<string, unknown>) => Promise<string | object> | string | object;
|
|
8
|
+
}
|
|
9
|
+
type ClientToolMap = Record<string, ClientTool>;
|
|
10
|
+
interface ClientToolCallFrame {
|
|
11
|
+
toolCallId: string;
|
|
12
|
+
name: string;
|
|
13
|
+
args: Record<string, unknown>;
|
|
14
|
+
}
|
|
15
|
+
|
|
1
16
|
type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
|
|
2
17
|
type TranscriptEntry = {
|
|
3
18
|
id: string;
|
|
@@ -51,6 +66,8 @@ interface ProtocolCallbacks {
|
|
|
51
66
|
onInterrupt: () => void;
|
|
52
67
|
onAgentTurnStart: () => void;
|
|
53
68
|
onCallEnd: (reason: CallEndReason) => void;
|
|
69
|
+
onConnected: () => void;
|
|
70
|
+
onClientToolCall: (frame: ClientToolCallFrame) => void;
|
|
54
71
|
}
|
|
55
72
|
declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
|
|
56
73
|
interface BuildWsUrlArgs {
|
|
@@ -131,6 +148,16 @@ interface StartCallOptions {
|
|
|
131
148
|
* accidentally interrupt the script. Default true.
|
|
132
149
|
*/
|
|
133
150
|
bargeIn?: boolean;
|
|
151
|
+
/**
|
|
152
|
+
* Client-side tools the agent's LLM can call mid-conversation. Each
|
|
153
|
+
* tool's handler runs on the consumer's side; result is fed back to
|
|
154
|
+
* the LLM through the existing call WebSocket. Schema and handler
|
|
155
|
+
* colocate. Validated synchronously at startCall — bad input throws.
|
|
156
|
+
*
|
|
157
|
+
* See docs/integration-echocheck.md for the wire protocol and the
|
|
158
|
+
* server-side guarantees.
|
|
159
|
+
*/
|
|
160
|
+
clientTools?: ClientToolMap;
|
|
134
161
|
/**
|
|
135
162
|
* Test-only escape hatch — pass a pre-minted `ct_` directly and skip
|
|
136
163
|
* the `fetchToken` call. Don't use this in production code: tokens
|
|
@@ -274,4 +301,4 @@ type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
|
|
|
274
301
|
*/
|
|
275
302
|
declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
|
|
276
303
|
|
|
277
|
-
export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
|
|
304
|
+
export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type ClientTool, type ClientToolMap, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
|
package/dist/browser.d.ts
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
interface ClientTool {
|
|
2
|
+
description: string;
|
|
3
|
+
parameters: Record<string, unknown>;
|
|
4
|
+
usage?: string;
|
|
5
|
+
timeoutMs?: number;
|
|
6
|
+
example?: string;
|
|
7
|
+
handler: (args: Record<string, unknown>) => Promise<string | object> | string | object;
|
|
8
|
+
}
|
|
9
|
+
type ClientToolMap = Record<string, ClientTool>;
|
|
10
|
+
interface ClientToolCallFrame {
|
|
11
|
+
toolCallId: string;
|
|
12
|
+
name: string;
|
|
13
|
+
args: Record<string, unknown>;
|
|
14
|
+
}
|
|
15
|
+
|
|
1
16
|
type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
|
|
2
17
|
type TranscriptEntry = {
|
|
3
18
|
id: string;
|
|
@@ -51,6 +66,8 @@ interface ProtocolCallbacks {
|
|
|
51
66
|
onInterrupt: () => void;
|
|
52
67
|
onAgentTurnStart: () => void;
|
|
53
68
|
onCallEnd: (reason: CallEndReason) => void;
|
|
69
|
+
onConnected: () => void;
|
|
70
|
+
onClientToolCall: (frame: ClientToolCallFrame) => void;
|
|
54
71
|
}
|
|
55
72
|
declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
|
|
56
73
|
interface BuildWsUrlArgs {
|
|
@@ -131,6 +148,16 @@ interface StartCallOptions {
|
|
|
131
148
|
* accidentally interrupt the script. Default true.
|
|
132
149
|
*/
|
|
133
150
|
bargeIn?: boolean;
|
|
151
|
+
/**
|
|
152
|
+
* Client-side tools the agent's LLM can call mid-conversation. Each
|
|
153
|
+
* tool's handler runs on the consumer's side; result is fed back to
|
|
154
|
+
* the LLM through the existing call WebSocket. Schema and handler
|
|
155
|
+
* colocate. Validated synchronously at startCall — bad input throws.
|
|
156
|
+
*
|
|
157
|
+
* See docs/integration-echocheck.md for the wire protocol and the
|
|
158
|
+
* server-side guarantees.
|
|
159
|
+
*/
|
|
160
|
+
clientTools?: ClientToolMap;
|
|
134
161
|
/**
|
|
135
162
|
* Test-only escape hatch — pass a pre-minted `ct_` directly and skip
|
|
136
163
|
* the `fetchToken` call. Don't use this in production code: tokens
|
|
@@ -274,4 +301,4 @@ type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
|
|
|
274
301
|
*/
|
|
275
302
|
declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
|
|
276
303
|
|
|
277
|
-
export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
|
|
304
|
+
export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type CaptureController, type CaptureOptions, type ClientTool, type ClientToolMap, type FetchToken, type FetchTokenArgs, type OnAgentSpeakingChange, type OnChunk, type OnError, type OnVolume$1 as OnVolume, type PlaybackController, type PlaybackOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createAudioCapture, createAudioPlayback, createProtocolState, createReconnectingWebSocket, handleServerMessage };
|
package/dist/browser.js
CHANGED
|
@@ -369,6 +369,7 @@ function handleServerMessage(raw, state, cb) {
|
|
|
369
369
|
}
|
|
370
370
|
switch (msg.type) {
|
|
371
371
|
case "connected":
|
|
372
|
+
cb.onConnected();
|
|
372
373
|
setState(state, "listening", cb);
|
|
373
374
|
return;
|
|
374
375
|
case "transcript": {
|
|
@@ -444,6 +445,14 @@ function handleServerMessage(raw, state, cb) {
|
|
|
444
445
|
];
|
|
445
446
|
cb.onTranscript(state.transcript);
|
|
446
447
|
return;
|
|
448
|
+
case "client_tool_call": {
|
|
449
|
+
const toolCallId = String(msg.toolCallId ?? "");
|
|
450
|
+
const name = String(msg.name ?? "");
|
|
451
|
+
const args = msg.args ?? {};
|
|
452
|
+
if (!toolCallId || !name) return;
|
|
453
|
+
cb.onClientToolCall({ toolCallId, name, args });
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
447
456
|
case "call_end": {
|
|
448
457
|
const reasonRaw = String(msg.reason ?? "");
|
|
449
458
|
const reason = mapEndReason(reasonRaw);
|
|
@@ -501,6 +510,87 @@ function buildWsUrl(args) {
|
|
|
501
510
|
return `${proto}//${base.host}/v1/agents/${encodeURIComponent(args.agentId)}/call?token=${encodeURIComponent(args.token)}${bargeQS}`;
|
|
502
511
|
}
|
|
503
512
|
|
|
513
|
+
// src/clientTools.ts
|
|
514
|
+
var NAME_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
515
|
+
var MAX_TOOLS = 64;
|
|
516
|
+
var MAX_USAGE = 500;
|
|
517
|
+
var MAX_TIMEOUT_MS = 3e4;
|
|
518
|
+
var validateClientToolMap = (tools) => {
|
|
519
|
+
if (tools === void 0) return;
|
|
520
|
+
if (typeof tools !== "object" || tools === null || Array.isArray(tools)) {
|
|
521
|
+
throw new Error("clientTools must be an object keyed by tool name");
|
|
522
|
+
}
|
|
523
|
+
const entries = Object.entries(tools);
|
|
524
|
+
if (entries.length > MAX_TOOLS) {
|
|
525
|
+
throw new Error(`clientTools may declare at most 64 tools (got ${entries.length})`);
|
|
526
|
+
}
|
|
527
|
+
for (const [name, def] of entries) {
|
|
528
|
+
if (!NAME_RE.test(name)) {
|
|
529
|
+
throw new Error(
|
|
530
|
+
`clientTools["${name}"]: name must be a valid identifier (^[a-zA-Z_][a-zA-Z0-9_]*$)`
|
|
531
|
+
);
|
|
532
|
+
}
|
|
533
|
+
if (!def || typeof def !== "object") {
|
|
534
|
+
throw new Error(`clientTools["${name}"]: must be an object`);
|
|
535
|
+
}
|
|
536
|
+
if (typeof def.description !== "string" || def.description.length === 0) {
|
|
537
|
+
throw new Error(`clientTools["${name}"]: must have a description`);
|
|
538
|
+
}
|
|
539
|
+
if (typeof def.handler !== "function") {
|
|
540
|
+
throw new Error(`clientTools["${name}"]: must have a handler function`);
|
|
541
|
+
}
|
|
542
|
+
if (def.usage !== void 0 && def.usage.length > MAX_USAGE) {
|
|
543
|
+
throw new Error(`clientTools["${name}"]: usage must be \u2264500 chars`);
|
|
544
|
+
}
|
|
545
|
+
if (def.timeoutMs !== void 0 && (!Number.isFinite(def.timeoutMs) || def.timeoutMs <= 0 || def.timeoutMs > MAX_TIMEOUT_MS)) {
|
|
546
|
+
throw new Error(`clientTools["${name}"]: timeoutMs must be in (0, 30000]`);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
};
|
|
550
|
+
var buildRegisterFrame = (tools) => ({
|
|
551
|
+
type: "client_tools_register",
|
|
552
|
+
tools: Object.entries(tools).map(([name, def]) => ({
|
|
553
|
+
name,
|
|
554
|
+
description: def.description,
|
|
555
|
+
parameters: def.parameters,
|
|
556
|
+
...def.usage !== void 0 ? { usage: def.usage } : {},
|
|
557
|
+
...def.timeoutMs !== void 0 ? { timeoutMs: def.timeoutMs } : {}
|
|
558
|
+
}))
|
|
559
|
+
});
|
|
560
|
+
var dispatchClientToolCall = (send, tools, frame) => {
|
|
561
|
+
const safeSend = (payload) => {
|
|
562
|
+
try {
|
|
563
|
+
send(payload);
|
|
564
|
+
} catch {
|
|
565
|
+
}
|
|
566
|
+
};
|
|
567
|
+
const tool = tools[frame.name];
|
|
568
|
+
if (!tool) {
|
|
569
|
+
safeSend({
|
|
570
|
+
type: "client_tool_result",
|
|
571
|
+
toolCallId: frame.toolCallId,
|
|
572
|
+
error: `No handler for ${frame.name}`
|
|
573
|
+
});
|
|
574
|
+
return;
|
|
575
|
+
}
|
|
576
|
+
void (async () => {
|
|
577
|
+
try {
|
|
578
|
+
const out = await tool.handler(frame.args);
|
|
579
|
+
safeSend({
|
|
580
|
+
type: "client_tool_result",
|
|
581
|
+
toolCallId: frame.toolCallId,
|
|
582
|
+
result: typeof out === "string" ? out : JSON.stringify(out)
|
|
583
|
+
});
|
|
584
|
+
} catch (err) {
|
|
585
|
+
safeSend({
|
|
586
|
+
type: "client_tool_result",
|
|
587
|
+
toolCallId: frame.toolCallId,
|
|
588
|
+
error: err instanceof Error ? err.message : String(err)
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
})();
|
|
592
|
+
};
|
|
593
|
+
|
|
504
594
|
// src/VoiceClient.ts
|
|
505
595
|
var BrowserVoiceClient = class {
|
|
506
596
|
constructor(args) {
|
|
@@ -529,6 +619,10 @@ var BrowserVoiceClient = class {
|
|
|
529
619
|
// ---------------------------------------------------------------
|
|
530
620
|
// Internal
|
|
531
621
|
// ---------------------------------------------------------------
|
|
622
|
+
this.sendClientToolsRegister = () => {
|
|
623
|
+
const frame = buildRegisterFrame(this.args.options.clientTools ?? {});
|
|
624
|
+
this.rws?.send(JSON.stringify(frame));
|
|
625
|
+
};
|
|
532
626
|
this.setState = (next) => {
|
|
533
627
|
if (this.proto.state === next) return;
|
|
534
628
|
this.proto.state = next;
|
|
@@ -558,7 +652,13 @@ var BrowserVoiceClient = class {
|
|
|
558
652
|
onError: this.emitError,
|
|
559
653
|
onInterrupt: () => this.playback?.flush(),
|
|
560
654
|
onAgentTurnStart: () => void 0,
|
|
561
|
-
onCallEnd: (reason) => this.teardown(reason)
|
|
655
|
+
onCallEnd: (reason) => this.teardown(reason),
|
|
656
|
+
onConnected: () => this.sendClientToolsRegister(),
|
|
657
|
+
onClientToolCall: (frame) => dispatchClientToolCall(
|
|
658
|
+
(f) => this.rws?.send(JSON.stringify(f)),
|
|
659
|
+
this.args.options.clientTools ?? {},
|
|
660
|
+
frame
|
|
661
|
+
)
|
|
562
662
|
});
|
|
563
663
|
} else {
|
|
564
664
|
this.playback?.enqueue(ev.data);
|
|
@@ -623,6 +723,7 @@ var BrowserVoiceClient = class {
|
|
|
623
723
|
};
|
|
624
724
|
this.args = args;
|
|
625
725
|
this.proto = createProtocolState();
|
|
726
|
+
validateClientToolMap(args.options.clientTools);
|
|
626
727
|
}
|
|
627
728
|
// ---------------------------------------------------------------
|
|
628
729
|
// Call interface
|