@drawdream/livespeech 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/dist/index.d.mts +161 -7
- package/dist/index.d.ts +161 -7
- package/dist/index.js +56 -16
- package/dist/index.mjs +56 -16
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -74,6 +74,7 @@ client.disconnect();
|
|
|
74
74
|
connect() → startSession() → audioStart() → sendAudioChunk()* → audioEnd() → endSession()
|
|
75
75
|
↓
|
|
76
76
|
sendSystemMessage() (optional, during live session)
|
|
77
|
+
sendToolResponse() (when toolCall received)
|
|
77
78
|
```
|
|
78
79
|
|
|
79
80
|
| Step | Description |
|
|
@@ -83,6 +84,7 @@ connect() → startSession() → audioStart() → sendAudioChunk()* → audioEnd
|
|
|
83
84
|
| `audioStart()` | Begin audio streaming |
|
|
84
85
|
| `sendAudioChunk(data)` | Send PCM16 audio (call multiple times) |
|
|
85
86
|
| `sendSystemMessage(msg)` | Inject context or trigger AI response (optional) |
|
|
87
|
+
| `sendToolResponse(id, result)` | Send function result back to AI (after toolCall) |
|
|
86
88
|
| `audioEnd()` | End streaming, triggers AI response |
|
|
87
89
|
| `endSession()` | End conversation |
|
|
88
90
|
| `disconnect()` | Close connection |
|
|
@@ -105,6 +107,7 @@ await client.startSession({
|
|
|
105
107
|
pipelineMode: 'live', // 'live' (default) or 'composed'
|
|
106
108
|
aiSpeaksFirst: false, // AI speaks first (live mode only)
|
|
107
109
|
allowHarmCategory: false, // Disable safety filtering (use with caution)
|
|
110
|
+
tools: [{ name: 'func', description: 'desc', parameters: {...} }], // Function calling
|
|
108
111
|
});
|
|
109
112
|
```
|
|
110
113
|
|
|
@@ -117,6 +120,7 @@ await client.startSession({
|
|
|
117
120
|
| `pipelineMode` | `'live' \| 'composed'` | `'live'` | Audio processing mode |
|
|
118
121
|
| `aiSpeaksFirst` | `boolean` | `false` | AI initiates conversation (live mode only) |
|
|
119
122
|
| `allowHarmCategory` | `boolean` | `false` | Disable content safety filtering |
|
|
123
|
+
| `tools` | `Tool[]` | `undefined` | Function definitions for AI to call |
|
|
120
124
|
|
|
121
125
|
### Pipeline Modes
|
|
122
126
|
|
|
@@ -152,6 +156,73 @@ await client.startSession({
|
|
|
152
156
|
|
|
153
157
|
> ⚠️ **Warning**: Only use in controlled environments where content moderation is handled by other means.
|
|
154
158
|
|
|
159
|
+
## Function Calling (Tool Use)
|
|
160
|
+
|
|
161
|
+
Define functions that the AI can call during conversation. When the AI decides to call a function, you receive a `toolCall` event and must respond with `sendToolResponse()`.
|
|
162
|
+
|
|
163
|
+
### Define Tools
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
const tools = [
|
|
167
|
+
{
|
|
168
|
+
name: 'open_login',
|
|
169
|
+
description: 'Opens Google Login popup when user wants to sign in',
|
|
170
|
+
parameters: { type: 'OBJECT', properties: {}, required: [] }
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
name: 'get_price',
|
|
174
|
+
description: 'Gets product price by ID',
|
|
175
|
+
parameters: {
|
|
176
|
+
type: 'OBJECT',
|
|
177
|
+
properties: {
|
|
178
|
+
productId: { type: 'string', description: 'Product ID' }
|
|
179
|
+
},
|
|
180
|
+
required: ['productId']
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
];
|
|
184
|
+
|
|
185
|
+
await client.startSession({
|
|
186
|
+
prePrompt: 'You are a helpful assistant. Use tools when appropriate.',
|
|
187
|
+
tools,
|
|
188
|
+
});
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Handle Tool Calls
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
client.on('toolCall', (event) => {
|
|
195
|
+
console.log('AI wants to call:', event.name);
|
|
196
|
+
console.log('With arguments:', event.args);
|
|
197
|
+
|
|
198
|
+
if (event.name === 'open_login') {
|
|
199
|
+
showLoginModal();
|
|
200
|
+
client.sendToolResponse(event.id, { success: true });
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (event.name === 'get_price') {
|
|
204
|
+
const price = getProductPrice(event.args.productId);
|
|
205
|
+
client.sendToolResponse(event.id, { price, currency: 'USD' });
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Tool Interface
|
|
211
|
+
|
|
212
|
+
```typescript
|
|
213
|
+
interface Tool {
|
|
214
|
+
name: string; // Function name
|
|
215
|
+
description: string; // When AI should use this
|
|
216
|
+
parameters?: {
|
|
217
|
+
type: 'OBJECT';
|
|
218
|
+
properties: Record<string, unknown>;
|
|
219
|
+
required?: string[];
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
> ⚠️ **Note**: Function calling only works with `pipelineMode: 'live'`
|
|
225
|
+
|
|
155
226
|
## System Messages
|
|
156
227
|
|
|
157
228
|
During an active live session, you can inject text messages to the AI using `sendSystemMessage()`. This is useful for:
|
|
@@ -223,6 +294,7 @@ const client = new LiveSpeechClient({
|
|
|
223
294
|
| `response` | AI's response text | `text`, `isFinal` |
|
|
224
295
|
| `audio` | AI's audio output | `data`, `sampleRate` |
|
|
225
296
|
| `turnComplete` | AI finished speaking | `timestamp` |
|
|
297
|
+
| `toolCall` | AI wants to call a function | `id`, `name`, `args` |
|
|
226
298
|
| `error` | Error occurred | `code`, `message` |
|
|
227
299
|
|
|
228
300
|
### Simple Handlers
|
|
@@ -249,6 +321,13 @@ client.setAudioHandler((data: Uint8Array) => {
|
|
|
249
321
|
client.setErrorHandler((error) => {
|
|
250
322
|
console.error(`Error [${error.code}]: ${error.message}`);
|
|
251
323
|
});
|
|
324
|
+
|
|
325
|
+
// Tool calls (function calling)
|
|
326
|
+
client.on('toolCall', (event) => {
|
|
327
|
+
// Execute function and send result
|
|
328
|
+
const result = executeFunction(event.name, event.args);
|
|
329
|
+
client.sendToolResponse(event.id, result);
|
|
330
|
+
});
|
|
252
331
|
```
|
|
253
332
|
|
|
254
333
|
### Full Event API
|
|
@@ -283,6 +362,14 @@ client.on('turnComplete', () => {
|
|
|
283
362
|
client.on('error', (event) => {
|
|
284
363
|
console.error('Error:', event.code, event.message);
|
|
285
364
|
});
|
|
365
|
+
|
|
366
|
+
client.on('toolCall', (event) => {
|
|
367
|
+
// event.id: string - use with sendToolResponse
|
|
368
|
+
// event.name: string - function name
|
|
369
|
+
// event.args: object - function arguments
|
|
370
|
+
const result = handleToolCall(event.name, event.args);
|
|
371
|
+
client.sendToolResponse(event.id, result);
|
|
372
|
+
});
|
|
286
373
|
```
|
|
287
374
|
|
|
288
375
|
## Audio Format
|
|
@@ -434,8 +521,10 @@ import type {
|
|
|
434
521
|
ResponseEvent,
|
|
435
522
|
AudioEvent,
|
|
436
523
|
TurnCompleteEvent,
|
|
524
|
+
ToolCallEvent,
|
|
437
525
|
ErrorEvent,
|
|
438
526
|
ErrorCode,
|
|
527
|
+
Tool,
|
|
439
528
|
} from '@drawdream/livespeech';
|
|
440
529
|
```
|
|
441
530
|
|
package/dist/index.d.mts
CHANGED
|
@@ -82,6 +82,58 @@ interface LiveSpeechConfig {
|
|
|
82
82
|
*/
|
|
83
83
|
debug?: boolean;
|
|
84
84
|
}
|
|
85
|
+
/**
|
|
86
|
+
* Parameters schema for a function (for function calling)
|
|
87
|
+
*/
|
|
88
|
+
interface FunctionParameters {
|
|
89
|
+
/**
|
|
90
|
+
* Type of the parameters object
|
|
91
|
+
* @default "OBJECT"
|
|
92
|
+
*/
|
|
93
|
+
type: 'OBJECT';
|
|
94
|
+
/**
|
|
95
|
+
* Properties of the parameters object
|
|
96
|
+
* Each key is a parameter name, and the value describes its type and description
|
|
97
|
+
* @example { "productId": { "type": "string", "description": "The product ID to look up" } }
|
|
98
|
+
*/
|
|
99
|
+
properties: Record<string, unknown>;
|
|
100
|
+
/**
|
|
101
|
+
* List of required parameter names
|
|
102
|
+
*/
|
|
103
|
+
required?: string[];
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Tool definition for function calling
|
|
107
|
+
*
|
|
108
|
+
* Tools allow the AI to call functions defined by your application during conversation.
|
|
109
|
+
* When the AI decides to call a function, you'll receive a `toolCall` event.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* const tools: Tool[] = [{
|
|
113
|
+
* name: 'open_google_login',
|
|
114
|
+
* description: 'Opens the Google Login popup when user wants to sign in',
|
|
115
|
+
* parameters: {
|
|
116
|
+
* type: 'OBJECT',
|
|
117
|
+
* properties: {},
|
|
118
|
+
* required: []
|
|
119
|
+
* }
|
|
120
|
+
* }];
|
|
121
|
+
*/
|
|
122
|
+
interface Tool {
|
|
123
|
+
/**
|
|
124
|
+
* Name of the function (must be unique within the session)
|
|
125
|
+
*/
|
|
126
|
+
name: string;
|
|
127
|
+
/**
|
|
128
|
+
* Description of what the function does and when the AI should use it
|
|
129
|
+
*/
|
|
130
|
+
description: string;
|
|
131
|
+
/**
|
|
132
|
+
* Parameters schema for the function
|
|
133
|
+
* If the function takes no parameters, use { type: 'OBJECT', properties: {}, required: [] }
|
|
134
|
+
*/
|
|
135
|
+
parameters?: FunctionParameters;
|
|
136
|
+
}
|
|
85
137
|
/**
|
|
86
138
|
* Session configuration options
|
|
87
139
|
*/
|
|
@@ -114,6 +166,23 @@ interface SessionConfig {
|
|
|
114
166
|
* @default false
|
|
115
167
|
*/
|
|
116
168
|
allowHarmCategory?: boolean;
|
|
169
|
+
/**
|
|
170
|
+
* Tools (functions) that the AI can call during conversation.
|
|
171
|
+
* When the AI calls a tool, you'll receive a `toolCall` event.
|
|
172
|
+
* You must respond with `sendToolResponse()` to continue the conversation.
|
|
173
|
+
*
|
|
174
|
+
* @example
|
|
175
|
+
* tools: [{
|
|
176
|
+
* name: 'get_price',
|
|
177
|
+
* description: 'Get the current price of a product',
|
|
178
|
+
* parameters: {
|
|
179
|
+
* type: 'OBJECT',
|
|
180
|
+
* properties: { productId: { type: 'string', description: 'Product ID' } },
|
|
181
|
+
* required: ['productId']
|
|
182
|
+
* }
|
|
183
|
+
* }]
|
|
184
|
+
*/
|
|
185
|
+
tools?: Tool[];
|
|
117
186
|
}
|
|
118
187
|
/**
|
|
119
188
|
* Internal resolved configuration with defaults applied
|
|
@@ -132,7 +201,7 @@ interface ResolvedConfig {
|
|
|
132
201
|
/**
|
|
133
202
|
* Event types emitted by the LiveSpeech client
|
|
134
203
|
*/
|
|
135
|
-
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error';
|
|
204
|
+
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error';
|
|
136
205
|
/**
|
|
137
206
|
* Event payload for 'connected' event
|
|
138
207
|
*/
|
|
@@ -237,10 +306,40 @@ interface TurnCompleteEvent {
|
|
|
237
306
|
type: 'turnComplete';
|
|
238
307
|
timestamp: string;
|
|
239
308
|
}
|
|
309
|
+
/**
|
|
310
|
+
* Event payload for 'toolCall' event (function calling)
|
|
311
|
+
* Indicates the AI wants to call a function defined by your application.
|
|
312
|
+
* You must respond with `sendToolResponse()` to continue the conversation.
|
|
313
|
+
*
|
|
314
|
+
* @example
|
|
315
|
+
* client.on('toolCall', (event) => {
|
|
316
|
+
* if (event.name === 'get_price') {
|
|
317
|
+
* const price = await lookupPrice(event.args.productId);
|
|
318
|
+
* client.sendToolResponse(event.id, { price });
|
|
319
|
+
* }
|
|
320
|
+
* });
|
|
321
|
+
*/
|
|
322
|
+
interface ToolCallEvent {
|
|
323
|
+
type: 'toolCall';
|
|
324
|
+
/**
|
|
325
|
+
* Unique identifier for this tool call.
|
|
326
|
+
* Must be passed to `sendToolResponse()` to respond.
|
|
327
|
+
*/
|
|
328
|
+
id: string;
|
|
329
|
+
/**
|
|
330
|
+
* Name of the function the AI wants to call.
|
|
331
|
+
*/
|
|
332
|
+
name: string;
|
|
333
|
+
/**
|
|
334
|
+
* Arguments passed by the AI to the function.
|
|
335
|
+
*/
|
|
336
|
+
args: Record<string, unknown>;
|
|
337
|
+
timestamp: string;
|
|
338
|
+
}
|
|
240
339
|
/**
|
|
241
340
|
* Union type of all event payloads
|
|
242
341
|
*/
|
|
243
|
-
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ErrorEvent;
|
|
342
|
+
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | ErrorEvent;
|
|
244
343
|
/**
|
|
245
344
|
* Simplified event handlers for common use cases
|
|
246
345
|
*/
|
|
@@ -252,11 +351,11 @@ type ErrorHandler = (error: ErrorEvent) => void;
|
|
|
252
351
|
/**
|
|
253
352
|
* WebSocket message types sent from client to server
|
|
254
353
|
*/
|
|
255
|
-
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'ping';
|
|
354
|
+
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'ping';
|
|
256
355
|
/**
|
|
257
356
|
* WebSocket message types received from server
|
|
258
357
|
*/
|
|
259
|
-
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error' | 'pong';
|
|
358
|
+
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error' | 'pong';
|
|
260
359
|
/**
|
|
261
360
|
* Base interface for client messages
|
|
262
361
|
*/
|
|
@@ -271,6 +370,9 @@ interface StartSessionMessage extends BaseClientMessage {
|
|
|
271
370
|
prePrompt?: string;
|
|
272
371
|
language?: string;
|
|
273
372
|
pipelineMode?: 'live' | 'composed';
|
|
373
|
+
aiSpeaksFirst?: boolean;
|
|
374
|
+
allowHarmCategory?: boolean;
|
|
375
|
+
tools?: Tool[];
|
|
274
376
|
}
|
|
275
377
|
/**
|
|
276
378
|
* End session message
|
|
@@ -319,10 +421,26 @@ interface SystemMessageMessage extends BaseClientMessage {
|
|
|
319
421
|
action: 'systemMessage';
|
|
320
422
|
payload: SystemMessagePayload;
|
|
321
423
|
}
|
|
424
|
+
/**
|
|
425
|
+
* Tool response payload
|
|
426
|
+
*/
|
|
427
|
+
interface ToolResponsePayload {
|
|
428
|
+
/** The tool call ID from the toolCall event */
|
|
429
|
+
id: string;
|
|
430
|
+
/** The result of the function execution */
|
|
431
|
+
response: unknown;
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
* Tool response - send function execution result back to Gemini
|
|
435
|
+
*/
|
|
436
|
+
interface ToolResponseMessage extends BaseClientMessage {
|
|
437
|
+
action: 'toolResponse';
|
|
438
|
+
payload: ToolResponsePayload;
|
|
439
|
+
}
|
|
322
440
|
/**
|
|
323
441
|
* Union type of all client messages
|
|
324
442
|
*/
|
|
325
|
-
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | PingMessage;
|
|
443
|
+
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | PingMessage;
|
|
326
444
|
/**
|
|
327
445
|
* Base interface for server messages
|
|
328
446
|
*/
|
|
@@ -396,10 +514,23 @@ interface ServerTurnCompleteMessage extends BaseServerMessage {
|
|
|
396
514
|
interface ServerReadyMessage extends BaseServerMessage {
|
|
397
515
|
type: 'ready';
|
|
398
516
|
}
|
|
517
|
+
/**
|
|
518
|
+
* Tool call message from server (function calling)
|
|
519
|
+
* Indicates the AI wants to call a function defined by the SDK user
|
|
520
|
+
*/
|
|
521
|
+
interface ServerToolCallMessage extends BaseServerMessage {
|
|
522
|
+
type: 'toolCall';
|
|
523
|
+
/** Unique identifier for this tool call */
|
|
524
|
+
id: string;
|
|
525
|
+
/** Name of the function to call */
|
|
526
|
+
name: string;
|
|
527
|
+
/** Arguments passed to the function */
|
|
528
|
+
args: Record<string, unknown>;
|
|
529
|
+
}
|
|
399
530
|
/**
|
|
400
531
|
* Union type of all server messages
|
|
401
532
|
*/
|
|
402
|
-
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerErrorMessage | ServerPongMessage;
|
|
533
|
+
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerErrorMessage | ServerPongMessage;
|
|
403
534
|
|
|
404
535
|
/**
|
|
405
536
|
* Connection state
|
|
@@ -420,6 +551,7 @@ type LiveSpeechEventMap = {
|
|
|
420
551
|
response: ResponseEvent;
|
|
421
552
|
audio: AudioEvent;
|
|
422
553
|
turnComplete: TurnCompleteEvent;
|
|
554
|
+
toolCall: ToolCallEvent;
|
|
423
555
|
error: ErrorEvent;
|
|
424
556
|
};
|
|
425
557
|
/**
|
|
@@ -515,6 +647,28 @@ declare class LiveSpeechClient {
|
|
|
515
647
|
text: string;
|
|
516
648
|
triggerResponse?: boolean;
|
|
517
649
|
}): void;
|
|
650
|
+
/**
|
|
651
|
+
* Send a tool response back to the AI after executing a function
|
|
652
|
+
*
|
|
653
|
+
* When you receive a `toolCall` event, you must execute the function and send
|
|
654
|
+
* the result back using this method. The AI will use the result to continue
|
|
655
|
+
* the conversation.
|
|
656
|
+
*
|
|
657
|
+
* @param id - The tool call ID from the `toolCall` event
|
|
658
|
+
* @param response - The result of the function execution (will be JSON serialized)
|
|
659
|
+
*
|
|
660
|
+
* @example
|
|
661
|
+
* client.on('toolCall', async (event) => {
|
|
662
|
+
* if (event.name === 'get_price') {
|
|
663
|
+
* const price = await lookupPrice(event.args.productId);
|
|
664
|
+
* client.sendToolResponse(event.id, { price, currency: 'USD' });
|
|
665
|
+
* } else if (event.name === 'open_login') {
|
|
666
|
+
* showLoginModal();
|
|
667
|
+
* client.sendToolResponse(event.id, { success: true });
|
|
668
|
+
* }
|
|
669
|
+
* });
|
|
670
|
+
*/
|
|
671
|
+
sendToolResponse(id: string, response?: unknown): void;
|
|
518
672
|
/**
|
|
519
673
|
* Add event listener
|
|
520
674
|
*/
|
|
@@ -636,4 +790,4 @@ declare class AudioEncoder {
|
|
|
636
790
|
wrapWav(data: Uint8Array): Uint8Array;
|
|
637
791
|
}
|
|
638
792
|
|
|
639
|
-
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
|
|
793
|
+
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
|
package/dist/index.d.ts
CHANGED
|
@@ -82,6 +82,58 @@ interface LiveSpeechConfig {
|
|
|
82
82
|
*/
|
|
83
83
|
debug?: boolean;
|
|
84
84
|
}
|
|
85
|
+
/**
|
|
86
|
+
* Parameters schema for a function (for function calling)
|
|
87
|
+
*/
|
|
88
|
+
interface FunctionParameters {
|
|
89
|
+
/**
|
|
90
|
+
* Type of the parameters object
|
|
91
|
+
* @default "OBJECT"
|
|
92
|
+
*/
|
|
93
|
+
type: 'OBJECT';
|
|
94
|
+
/**
|
|
95
|
+
* Properties of the parameters object
|
|
96
|
+
* Each key is a parameter name, and the value describes its type and description
|
|
97
|
+
* @example { "productId": { "type": "string", "description": "The product ID to look up" } }
|
|
98
|
+
*/
|
|
99
|
+
properties: Record<string, unknown>;
|
|
100
|
+
/**
|
|
101
|
+
* List of required parameter names
|
|
102
|
+
*/
|
|
103
|
+
required?: string[];
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Tool definition for function calling
|
|
107
|
+
*
|
|
108
|
+
* Tools allow the AI to call functions defined by your application during conversation.
|
|
109
|
+
* When the AI decides to call a function, you'll receive a `toolCall` event.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* const tools: Tool[] = [{
|
|
113
|
+
* name: 'open_google_login',
|
|
114
|
+
* description: 'Opens the Google Login popup when user wants to sign in',
|
|
115
|
+
* parameters: {
|
|
116
|
+
* type: 'OBJECT',
|
|
117
|
+
* properties: {},
|
|
118
|
+
* required: []
|
|
119
|
+
* }
|
|
120
|
+
* }];
|
|
121
|
+
*/
|
|
122
|
+
interface Tool {
|
|
123
|
+
/**
|
|
124
|
+
* Name of the function (must be unique within the session)
|
|
125
|
+
*/
|
|
126
|
+
name: string;
|
|
127
|
+
/**
|
|
128
|
+
* Description of what the function does and when the AI should use it
|
|
129
|
+
*/
|
|
130
|
+
description: string;
|
|
131
|
+
/**
|
|
132
|
+
* Parameters schema for the function
|
|
133
|
+
* If the function takes no parameters, use { type: 'OBJECT', properties: {}, required: [] }
|
|
134
|
+
*/
|
|
135
|
+
parameters?: FunctionParameters;
|
|
136
|
+
}
|
|
85
137
|
/**
|
|
86
138
|
* Session configuration options
|
|
87
139
|
*/
|
|
@@ -114,6 +166,23 @@ interface SessionConfig {
|
|
|
114
166
|
* @default false
|
|
115
167
|
*/
|
|
116
168
|
allowHarmCategory?: boolean;
|
|
169
|
+
/**
|
|
170
|
+
* Tools (functions) that the AI can call during conversation.
|
|
171
|
+
* When the AI calls a tool, you'll receive a `toolCall` event.
|
|
172
|
+
* You must respond with `sendToolResponse()` to continue the conversation.
|
|
173
|
+
*
|
|
174
|
+
* @example
|
|
175
|
+
* tools: [{
|
|
176
|
+
* name: 'get_price',
|
|
177
|
+
* description: 'Get the current price of a product',
|
|
178
|
+
* parameters: {
|
|
179
|
+
* type: 'OBJECT',
|
|
180
|
+
* properties: { productId: { type: 'string', description: 'Product ID' } },
|
|
181
|
+
* required: ['productId']
|
|
182
|
+
* }
|
|
183
|
+
* }]
|
|
184
|
+
*/
|
|
185
|
+
tools?: Tool[];
|
|
117
186
|
}
|
|
118
187
|
/**
|
|
119
188
|
* Internal resolved configuration with defaults applied
|
|
@@ -132,7 +201,7 @@ interface ResolvedConfig {
|
|
|
132
201
|
/**
|
|
133
202
|
* Event types emitted by the LiveSpeech client
|
|
134
203
|
*/
|
|
135
|
-
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error';
|
|
204
|
+
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error';
|
|
136
205
|
/**
|
|
137
206
|
* Event payload for 'connected' event
|
|
138
207
|
*/
|
|
@@ -237,10 +306,40 @@ interface TurnCompleteEvent {
|
|
|
237
306
|
type: 'turnComplete';
|
|
238
307
|
timestamp: string;
|
|
239
308
|
}
|
|
309
|
+
/**
|
|
310
|
+
* Event payload for 'toolCall' event (function calling)
|
|
311
|
+
* Indicates the AI wants to call a function defined by your application.
|
|
312
|
+
* You must respond with `sendToolResponse()` to continue the conversation.
|
|
313
|
+
*
|
|
314
|
+
* @example
|
|
315
|
+
* client.on('toolCall', (event) => {
|
|
316
|
+
* if (event.name === 'get_price') {
|
|
317
|
+
* const price = await lookupPrice(event.args.productId);
|
|
318
|
+
* client.sendToolResponse(event.id, { price });
|
|
319
|
+
* }
|
|
320
|
+
* });
|
|
321
|
+
*/
|
|
322
|
+
interface ToolCallEvent {
|
|
323
|
+
type: 'toolCall';
|
|
324
|
+
/**
|
|
325
|
+
* Unique identifier for this tool call.
|
|
326
|
+
* Must be passed to `sendToolResponse()` to respond.
|
|
327
|
+
*/
|
|
328
|
+
id: string;
|
|
329
|
+
/**
|
|
330
|
+
* Name of the function the AI wants to call.
|
|
331
|
+
*/
|
|
332
|
+
name: string;
|
|
333
|
+
/**
|
|
334
|
+
* Arguments passed by the AI to the function.
|
|
335
|
+
*/
|
|
336
|
+
args: Record<string, unknown>;
|
|
337
|
+
timestamp: string;
|
|
338
|
+
}
|
|
240
339
|
/**
|
|
241
340
|
* Union type of all event payloads
|
|
242
341
|
*/
|
|
243
|
-
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ErrorEvent;
|
|
342
|
+
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | ErrorEvent;
|
|
244
343
|
/**
|
|
245
344
|
* Simplified event handlers for common use cases
|
|
246
345
|
*/
|
|
@@ -252,11 +351,11 @@ type ErrorHandler = (error: ErrorEvent) => void;
|
|
|
252
351
|
/**
|
|
253
352
|
* WebSocket message types sent from client to server
|
|
254
353
|
*/
|
|
255
|
-
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'ping';
|
|
354
|
+
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'ping';
|
|
256
355
|
/**
|
|
257
356
|
* WebSocket message types received from server
|
|
258
357
|
*/
|
|
259
|
-
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error' | 'pong';
|
|
358
|
+
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error' | 'pong';
|
|
260
359
|
/**
|
|
261
360
|
* Base interface for client messages
|
|
262
361
|
*/
|
|
@@ -271,6 +370,9 @@ interface StartSessionMessage extends BaseClientMessage {
|
|
|
271
370
|
prePrompt?: string;
|
|
272
371
|
language?: string;
|
|
273
372
|
pipelineMode?: 'live' | 'composed';
|
|
373
|
+
aiSpeaksFirst?: boolean;
|
|
374
|
+
allowHarmCategory?: boolean;
|
|
375
|
+
tools?: Tool[];
|
|
274
376
|
}
|
|
275
377
|
/**
|
|
276
378
|
* End session message
|
|
@@ -319,10 +421,26 @@ interface SystemMessageMessage extends BaseClientMessage {
|
|
|
319
421
|
action: 'systemMessage';
|
|
320
422
|
payload: SystemMessagePayload;
|
|
321
423
|
}
|
|
424
|
+
/**
|
|
425
|
+
* Tool response payload
|
|
426
|
+
*/
|
|
427
|
+
interface ToolResponsePayload {
|
|
428
|
+
/** The tool call ID from the toolCall event */
|
|
429
|
+
id: string;
|
|
430
|
+
/** The result of the function execution */
|
|
431
|
+
response: unknown;
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
* Tool response - send function execution result back to Gemini
|
|
435
|
+
*/
|
|
436
|
+
interface ToolResponseMessage extends BaseClientMessage {
|
|
437
|
+
action: 'toolResponse';
|
|
438
|
+
payload: ToolResponsePayload;
|
|
439
|
+
}
|
|
322
440
|
/**
|
|
323
441
|
* Union type of all client messages
|
|
324
442
|
*/
|
|
325
|
-
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | PingMessage;
|
|
443
|
+
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | PingMessage;
|
|
326
444
|
/**
|
|
327
445
|
* Base interface for server messages
|
|
328
446
|
*/
|
|
@@ -396,10 +514,23 @@ interface ServerTurnCompleteMessage extends BaseServerMessage {
|
|
|
396
514
|
interface ServerReadyMessage extends BaseServerMessage {
|
|
397
515
|
type: 'ready';
|
|
398
516
|
}
|
|
517
|
+
/**
|
|
518
|
+
* Tool call message from server (function calling)
|
|
519
|
+
* Indicates the AI wants to call a function defined by the SDK user
|
|
520
|
+
*/
|
|
521
|
+
interface ServerToolCallMessage extends BaseServerMessage {
|
|
522
|
+
type: 'toolCall';
|
|
523
|
+
/** Unique identifier for this tool call */
|
|
524
|
+
id: string;
|
|
525
|
+
/** Name of the function to call */
|
|
526
|
+
name: string;
|
|
527
|
+
/** Arguments passed to the function */
|
|
528
|
+
args: Record<string, unknown>;
|
|
529
|
+
}
|
|
399
530
|
/**
|
|
400
531
|
* Union type of all server messages
|
|
401
532
|
*/
|
|
402
|
-
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerErrorMessage | ServerPongMessage;
|
|
533
|
+
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerErrorMessage | ServerPongMessage;
|
|
403
534
|
|
|
404
535
|
/**
|
|
405
536
|
* Connection state
|
|
@@ -420,6 +551,7 @@ type LiveSpeechEventMap = {
|
|
|
420
551
|
response: ResponseEvent;
|
|
421
552
|
audio: AudioEvent;
|
|
422
553
|
turnComplete: TurnCompleteEvent;
|
|
554
|
+
toolCall: ToolCallEvent;
|
|
423
555
|
error: ErrorEvent;
|
|
424
556
|
};
|
|
425
557
|
/**
|
|
@@ -515,6 +647,28 @@ declare class LiveSpeechClient {
|
|
|
515
647
|
text: string;
|
|
516
648
|
triggerResponse?: boolean;
|
|
517
649
|
}): void;
|
|
650
|
+
/**
|
|
651
|
+
* Send a tool response back to the AI after executing a function
|
|
652
|
+
*
|
|
653
|
+
* When you receive a `toolCall` event, you must execute the function and send
|
|
654
|
+
* the result back using this method. The AI will use the result to continue
|
|
655
|
+
* the conversation.
|
|
656
|
+
*
|
|
657
|
+
* @param id - The tool call ID from the `toolCall` event
|
|
658
|
+
* @param response - The result of the function execution (will be JSON serialized)
|
|
659
|
+
*
|
|
660
|
+
* @example
|
|
661
|
+
* client.on('toolCall', async (event) => {
|
|
662
|
+
* if (event.name === 'get_price') {
|
|
663
|
+
* const price = await lookupPrice(event.args.productId);
|
|
664
|
+
* client.sendToolResponse(event.id, { price, currency: 'USD' });
|
|
665
|
+
* } else if (event.name === 'open_login') {
|
|
666
|
+
* showLoginModal();
|
|
667
|
+
* client.sendToolResponse(event.id, { success: true });
|
|
668
|
+
* }
|
|
669
|
+
* });
|
|
670
|
+
*/
|
|
671
|
+
sendToolResponse(id: string, response?: unknown): void;
|
|
518
672
|
/**
|
|
519
673
|
* Add event listener
|
|
520
674
|
*/
|
|
@@ -636,4 +790,4 @@ declare class AudioEncoder {
|
|
|
636
790
|
wrapWav(data: Uint8Array): Uint8Array;
|
|
637
791
|
}
|
|
638
792
|
|
|
639
|
-
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
|
|
793
|
+
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
|
package/dist/index.js
CHANGED
|
@@ -45,7 +45,7 @@ var Region = {
|
|
|
45
45
|
US_WEST_2: "us-west-2"
|
|
46
46
|
};
|
|
47
47
|
var REGION_ENDPOINTS = {
|
|
48
|
-
"ap-northeast-2": "
|
|
48
|
+
"ap-northeast-2": "ws://localhost:9090",
|
|
49
49
|
"us-west-2": "wss://talk.drawdream.ca"
|
|
50
50
|
// Coming soon
|
|
51
51
|
};
|
|
@@ -728,21 +728,15 @@ var LiveSpeechClient = class {
|
|
|
728
728
|
};
|
|
729
729
|
this.on("sessionStarted", onSessionStarted);
|
|
730
730
|
this.on("error", onError);
|
|
731
|
-
|
|
732
|
-
action: "startSession"
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
}
|
|
740
|
-
startMessage.pipelineMode = config?.pipelineMode ?? "live";
|
|
741
|
-
if (config?.aiSpeaksFirst) {
|
|
742
|
-
startMessage.aiSpeaksFirst = config.aiSpeaksFirst;
|
|
743
|
-
}
|
|
744
|
-
startMessage.allowHarmCategory = config?.allowHarmCategory ?? false;
|
|
745
|
-
this.connection.send(startMessage);
|
|
731
|
+
this.connection.send({
|
|
732
|
+
action: "startSession",
|
|
733
|
+
...config?.prePrompt && { prePrompt: config.prePrompt },
|
|
734
|
+
...config?.language && { language: config.language },
|
|
735
|
+
pipelineMode: config?.pipelineMode ?? "live",
|
|
736
|
+
...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
|
|
737
|
+
allowHarmCategory: config?.allowHarmCategory ?? false,
|
|
738
|
+
...config?.tools && config.tools.length > 0 && { tools: config.tools }
|
|
739
|
+
});
|
|
746
740
|
});
|
|
747
741
|
}
|
|
748
742
|
/**
|
|
@@ -849,6 +843,40 @@ var LiveSpeechClient = class {
|
|
|
849
843
|
payload
|
|
850
844
|
});
|
|
851
845
|
}
|
|
846
|
+
/**
|
|
847
|
+
* Send a tool response back to the AI after executing a function
|
|
848
|
+
*
|
|
849
|
+
* When you receive a `toolCall` event, you must execute the function and send
|
|
850
|
+
* the result back using this method. The AI will use the result to continue
|
|
851
|
+
* the conversation.
|
|
852
|
+
*
|
|
853
|
+
* @param id - The tool call ID from the `toolCall` event
|
|
854
|
+
* @param response - The result of the function execution (will be JSON serialized)
|
|
855
|
+
*
|
|
856
|
+
* @example
|
|
857
|
+
* client.on('toolCall', async (event) => {
|
|
858
|
+
* if (event.name === 'get_price') {
|
|
859
|
+
* const price = await lookupPrice(event.args.productId);
|
|
860
|
+
* client.sendToolResponse(event.id, { price, currency: 'USD' });
|
|
861
|
+
* } else if (event.name === 'open_login') {
|
|
862
|
+
* showLoginModal();
|
|
863
|
+
* client.sendToolResponse(event.id, { success: true });
|
|
864
|
+
* }
|
|
865
|
+
* });
|
|
866
|
+
*/
|
|
867
|
+
sendToolResponse(id, response = {}) {
|
|
868
|
+
if (!this.isConnected) {
|
|
869
|
+
throw new Error("Not connected");
|
|
870
|
+
}
|
|
871
|
+
if (!this.isStreaming) {
|
|
872
|
+
throw new Error("No active Live session. Call audioStart() first.");
|
|
873
|
+
}
|
|
874
|
+
this.logger.info("Sending tool response:", id);
|
|
875
|
+
this.connection.send({
|
|
876
|
+
action: "toolResponse",
|
|
877
|
+
payload: { id, response }
|
|
878
|
+
});
|
|
879
|
+
}
|
|
852
880
|
// ==================== Event System ====================
|
|
853
881
|
/**
|
|
854
882
|
* Add event listener
|
|
@@ -1014,6 +1042,18 @@ var LiveSpeechClient = class {
|
|
|
1014
1042
|
this.emit("turnComplete", turnCompleteEvent);
|
|
1015
1043
|
break;
|
|
1016
1044
|
}
|
|
1045
|
+
case "toolCall": {
|
|
1046
|
+
const toolCallEvent = {
|
|
1047
|
+
type: "toolCall",
|
|
1048
|
+
id: message.id,
|
|
1049
|
+
name: message.name,
|
|
1050
|
+
args: message.args ?? {},
|
|
1051
|
+
timestamp: message.timestamp
|
|
1052
|
+
};
|
|
1053
|
+
this.logger.info("Tool call received:", toolCallEvent.name);
|
|
1054
|
+
this.emit("toolCall", toolCallEvent);
|
|
1055
|
+
break;
|
|
1056
|
+
}
|
|
1017
1057
|
case "error":
|
|
1018
1058
|
this.handleError(message.code, message.message);
|
|
1019
1059
|
break;
|
package/dist/index.mjs
CHANGED
|
@@ -6,7 +6,7 @@ var Region = {
|
|
|
6
6
|
US_WEST_2: "us-west-2"
|
|
7
7
|
};
|
|
8
8
|
var REGION_ENDPOINTS = {
|
|
9
|
-
"ap-northeast-2": "
|
|
9
|
+
"ap-northeast-2": "ws://localhost:9090",
|
|
10
10
|
"us-west-2": "wss://talk.drawdream.ca"
|
|
11
11
|
// Coming soon
|
|
12
12
|
};
|
|
@@ -689,21 +689,15 @@ var LiveSpeechClient = class {
|
|
|
689
689
|
};
|
|
690
690
|
this.on("sessionStarted", onSessionStarted);
|
|
691
691
|
this.on("error", onError);
|
|
692
|
-
|
|
693
|
-
action: "startSession"
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
}
|
|
701
|
-
startMessage.pipelineMode = config?.pipelineMode ?? "live";
|
|
702
|
-
if (config?.aiSpeaksFirst) {
|
|
703
|
-
startMessage.aiSpeaksFirst = config.aiSpeaksFirst;
|
|
704
|
-
}
|
|
705
|
-
startMessage.allowHarmCategory = config?.allowHarmCategory ?? false;
|
|
706
|
-
this.connection.send(startMessage);
|
|
692
|
+
this.connection.send({
|
|
693
|
+
action: "startSession",
|
|
694
|
+
...config?.prePrompt && { prePrompt: config.prePrompt },
|
|
695
|
+
...config?.language && { language: config.language },
|
|
696
|
+
pipelineMode: config?.pipelineMode ?? "live",
|
|
697
|
+
...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
|
|
698
|
+
allowHarmCategory: config?.allowHarmCategory ?? false,
|
|
699
|
+
...config?.tools && config.tools.length > 0 && { tools: config.tools }
|
|
700
|
+
});
|
|
707
701
|
});
|
|
708
702
|
}
|
|
709
703
|
/**
|
|
@@ -810,6 +804,40 @@ var LiveSpeechClient = class {
|
|
|
810
804
|
payload
|
|
811
805
|
});
|
|
812
806
|
}
|
|
807
|
+
/**
|
|
808
|
+
* Send a tool response back to the AI after executing a function
|
|
809
|
+
*
|
|
810
|
+
* When you receive a `toolCall` event, you must execute the function and send
|
|
811
|
+
* the result back using this method. The AI will use the result to continue
|
|
812
|
+
* the conversation.
|
|
813
|
+
*
|
|
814
|
+
* @param id - The tool call ID from the `toolCall` event
|
|
815
|
+
* @param response - The result of the function execution (will be JSON serialized)
|
|
816
|
+
*
|
|
817
|
+
* @example
|
|
818
|
+
* client.on('toolCall', async (event) => {
|
|
819
|
+
* if (event.name === 'get_price') {
|
|
820
|
+
* const price = await lookupPrice(event.args.productId);
|
|
821
|
+
* client.sendToolResponse(event.id, { price, currency: 'USD' });
|
|
822
|
+
* } else if (event.name === 'open_login') {
|
|
823
|
+
* showLoginModal();
|
|
824
|
+
* client.sendToolResponse(event.id, { success: true });
|
|
825
|
+
* }
|
|
826
|
+
* });
|
|
827
|
+
*/
|
|
828
|
+
sendToolResponse(id, response = {}) {
|
|
829
|
+
if (!this.isConnected) {
|
|
830
|
+
throw new Error("Not connected");
|
|
831
|
+
}
|
|
832
|
+
if (!this.isStreaming) {
|
|
833
|
+
throw new Error("No active Live session. Call audioStart() first.");
|
|
834
|
+
}
|
|
835
|
+
this.logger.info("Sending tool response:", id);
|
|
836
|
+
this.connection.send({
|
|
837
|
+
action: "toolResponse",
|
|
838
|
+
payload: { id, response }
|
|
839
|
+
});
|
|
840
|
+
}
|
|
813
841
|
// ==================== Event System ====================
|
|
814
842
|
/**
|
|
815
843
|
* Add event listener
|
|
@@ -975,6 +1003,18 @@ var LiveSpeechClient = class {
|
|
|
975
1003
|
this.emit("turnComplete", turnCompleteEvent);
|
|
976
1004
|
break;
|
|
977
1005
|
}
|
|
1006
|
+
case "toolCall": {
|
|
1007
|
+
const toolCallEvent = {
|
|
1008
|
+
type: "toolCall",
|
|
1009
|
+
id: message.id,
|
|
1010
|
+
name: message.name,
|
|
1011
|
+
args: message.args ?? {},
|
|
1012
|
+
timestamp: message.timestamp
|
|
1013
|
+
};
|
|
1014
|
+
this.logger.info("Tool call received:", toolCallEvent.name);
|
|
1015
|
+
this.emit("toolCall", toolCallEvent);
|
|
1016
|
+
break;
|
|
1017
|
+
}
|
|
978
1018
|
case "error":
|
|
979
1019
|
this.handleError(message.code, message.message);
|
|
980
1020
|
break;
|