@drawdream/livespeech 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -74,6 +74,7 @@ client.disconnect();
74
74
  connect() → startSession() → audioStart() → sendAudioChunk()* → audioEnd() → endSession()
75
75
 
76
76
  sendSystemMessage() (optional, during live session)
77
+ sendToolResponse() (when toolCall received)
77
78
  ```
78
79
 
79
80
  | Step | Description |
@@ -83,6 +84,7 @@ connect() → startSession() → audioStart() → sendAudioChunk()* → audioEnd
83
84
  | `audioStart()` | Begin audio streaming |
84
85
  | `sendAudioChunk(data)` | Send PCM16 audio (call multiple times) |
85
86
  | `sendSystemMessage(msg)` | Inject context or trigger AI response (optional) |
87
+ | `sendToolResponse(id, result)` | Send function result back to AI (after toolCall) |
86
88
  | `audioEnd()` | End streaming, triggers AI response |
87
89
  | `endSession()` | End conversation |
88
90
  | `disconnect()` | Close connection |
@@ -105,6 +107,7 @@ await client.startSession({
105
107
  pipelineMode: 'live', // 'live' (default) or 'composed'
106
108
  aiSpeaksFirst: false, // AI speaks first (live mode only)
107
109
  allowHarmCategory: false, // Disable safety filtering (use with caution)
110
+ tools: [{ name: 'func', description: 'desc', parameters: {...} }], // Function calling
108
111
  });
109
112
  ```
110
113
 
@@ -117,6 +120,7 @@ await client.startSession({
117
120
  | `pipelineMode` | `'live' \| 'composed'` | `'live'` | Audio processing mode |
118
121
  | `aiSpeaksFirst` | `boolean` | `false` | AI initiates conversation (live mode only) |
119
122
  | `allowHarmCategory` | `boolean` | `false` | Disable content safety filtering |
123
+ | `tools` | `Tool[]` | `undefined` | Function definitions for AI to call |
120
124
 
121
125
  ### Pipeline Modes
122
126
 
@@ -152,6 +156,73 @@ await client.startSession({
152
156
 
153
157
  > ⚠️ **Warning**: Only use in controlled environments where content moderation is handled by other means.
154
158
 
159
+ ## Function Calling (Tool Use)
160
+
161
+ Define functions that the AI can call during conversation. When the AI decides to call a function, you receive a `toolCall` event and must respond with `sendToolResponse()`.
162
+
163
+ ### Define Tools
164
+
165
+ ```typescript
166
+ const tools = [
167
+ {
168
+ name: 'open_login',
169
+ description: 'Opens Google Login popup when user wants to sign in',
170
+ parameters: { type: 'OBJECT', properties: {}, required: [] }
171
+ },
172
+ {
173
+ name: 'get_price',
174
+ description: 'Gets product price by ID',
175
+ parameters: {
176
+ type: 'OBJECT',
177
+ properties: {
178
+ productId: { type: 'string', description: 'Product ID' }
179
+ },
180
+ required: ['productId']
181
+ }
182
+ }
183
+ ];
184
+
185
+ await client.startSession({
186
+ prePrompt: 'You are a helpful assistant. Use tools when appropriate.',
187
+ tools,
188
+ });
189
+ ```
190
+
191
+ ### Handle Tool Calls
192
+
193
+ ```typescript
194
+ client.on('toolCall', (event) => {
195
+ console.log('AI wants to call:', event.name);
196
+ console.log('With arguments:', event.args);
197
+
198
+ if (event.name === 'open_login') {
199
+ showLoginModal();
200
+ client.sendToolResponse(event.id, { success: true });
201
+ }
202
+
203
+ if (event.name === 'get_price') {
204
+ const price = getProductPrice(event.args.productId);
205
+ client.sendToolResponse(event.id, { price, currency: 'USD' });
206
+ }
207
+ });
208
+ ```
209
+
210
+ ### Tool Interface
211
+
212
+ ```typescript
213
+ interface Tool {
214
+ name: string; // Function name
215
+ description: string; // When AI should use this
216
+ parameters?: {
217
+ type: 'OBJECT';
218
+ properties: Record<string, unknown>;
219
+ required?: string[];
220
+ };
221
+ }
222
+ ```
223
+
224
+ > ⚠️ **Note**: Function calling only works with `pipelineMode: 'live'`
225
+
155
226
  ## System Messages
156
227
 
157
228
  During an active live session, you can inject text messages to the AI using `sendSystemMessage()`. This is useful for:
@@ -223,6 +294,7 @@ const client = new LiveSpeechClient({
223
294
  | `response` | AI's response text | `text`, `isFinal` |
224
295
  | `audio` | AI's audio output | `data`, `sampleRate` |
225
296
  | `turnComplete` | AI finished speaking | `timestamp` |
297
+ | `toolCall` | AI wants to call a function | `id`, `name`, `args` |
226
298
  | `error` | Error occurred | `code`, `message` |
227
299
 
228
300
  ### Simple Handlers
@@ -249,6 +321,13 @@ client.setAudioHandler((data: Uint8Array) => {
249
321
  client.setErrorHandler((error) => {
250
322
  console.error(`Error [${error.code}]: ${error.message}`);
251
323
  });
324
+
325
+ // Tool calls (function calling)
326
+ client.on('toolCall', (event) => {
327
+ // Execute function and send result
328
+ const result = executeFunction(event.name, event.args);
329
+ client.sendToolResponse(event.id, result);
330
+ });
252
331
  ```
253
332
 
254
333
  ### Full Event API
@@ -283,6 +362,14 @@ client.on('turnComplete', () => {
283
362
  client.on('error', (event) => {
284
363
  console.error('Error:', event.code, event.message);
285
364
  });
365
+
366
+ client.on('toolCall', (event) => {
367
+ // event.id: string - use with sendToolResponse
368
+ // event.name: string - function name
369
+ // event.args: object - function arguments
370
+ const result = handleToolCall(event.name, event.args);
371
+ client.sendToolResponse(event.id, result);
372
+ });
286
373
  ```
287
374
 
288
375
  ## Audio Format
@@ -434,8 +521,10 @@ import type {
434
521
  ResponseEvent,
435
522
  AudioEvent,
436
523
  TurnCompleteEvent,
524
+ ToolCallEvent,
437
525
  ErrorEvent,
438
526
  ErrorCode,
527
+ Tool,
439
528
  } from '@drawdream/livespeech';
440
529
  ```
441
530
 
package/dist/index.d.mts CHANGED
@@ -82,6 +82,58 @@ interface LiveSpeechConfig {
82
82
  */
83
83
  debug?: boolean;
84
84
  }
85
+ /**
86
+ * Parameters schema for a function (for function calling)
87
+ */
88
+ interface FunctionParameters {
89
+ /**
90
+ * Type of the parameters object
91
+ * @default "OBJECT"
92
+ */
93
+ type: 'OBJECT';
94
+ /**
95
+ * Properties of the parameters object
96
+ * Each key is a parameter name, and the value describes its type and description
97
+ * @example { "productId": { "type": "string", "description": "The product ID to look up" } }
98
+ */
99
+ properties: Record<string, unknown>;
100
+ /**
101
+ * List of required parameter names
102
+ */
103
+ required?: string[];
104
+ }
105
+ /**
106
+ * Tool definition for function calling
107
+ *
108
+ * Tools allow the AI to call functions defined by your application during conversation.
109
+ * When the AI decides to call a function, you'll receive a `toolCall` event.
110
+ *
111
+ * @example
112
+ * const tools: Tool[] = [{
113
+ * name: 'open_google_login',
114
+ * description: 'Opens the Google Login popup when user wants to sign in',
115
+ * parameters: {
116
+ * type: 'OBJECT',
117
+ * properties: {},
118
+ * required: []
119
+ * }
120
+ * }];
121
+ */
122
+ interface Tool {
123
+ /**
124
+ * Name of the function (must be unique within the session)
125
+ */
126
+ name: string;
127
+ /**
128
+ * Description of what the function does and when the AI should use it
129
+ */
130
+ description: string;
131
+ /**
132
+ * Parameters schema for the function
133
+ * If the function takes no parameters, use { type: 'OBJECT', properties: {}, required: [] }
134
+ */
135
+ parameters?: FunctionParameters;
136
+ }
85
137
  /**
86
138
  * Session configuration options
87
139
  */
@@ -114,6 +166,23 @@ interface SessionConfig {
114
166
  * @default false
115
167
  */
116
168
  allowHarmCategory?: boolean;
169
+ /**
170
+ * Tools (functions) that the AI can call during conversation.
171
+ * When the AI calls a tool, you'll receive a `toolCall` event.
172
+ * You must respond with `sendToolResponse()` to continue the conversation.
173
+ *
174
+ * @example
175
+ * tools: [{
176
+ * name: 'get_price',
177
+ * description: 'Get the current price of a product',
178
+ * parameters: {
179
+ * type: 'OBJECT',
180
+ * properties: { productId: { type: 'string', description: 'Product ID' } },
181
+ * required: ['productId']
182
+ * }
183
+ * }]
184
+ */
185
+ tools?: Tool[];
117
186
  }
118
187
  /**
119
188
  * Internal resolved configuration with defaults applied
@@ -132,7 +201,7 @@ interface ResolvedConfig {
132
201
  /**
133
202
  * Event types emitted by the LiveSpeech client
134
203
  */
135
- type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error';
204
+ type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error';
136
205
  /**
137
206
  * Event payload for 'connected' event
138
207
  */
@@ -237,10 +306,40 @@ interface TurnCompleteEvent {
237
306
  type: 'turnComplete';
238
307
  timestamp: string;
239
308
  }
309
+ /**
310
+ * Event payload for 'toolCall' event (function calling)
311
+ * Indicates the AI wants to call a function defined by your application.
312
+ * You must respond with `sendToolResponse()` to continue the conversation.
313
+ *
314
+ * @example
315
+ * client.on('toolCall', (event) => {
316
+ * if (event.name === 'get_price') {
317
+ * const price = await lookupPrice(event.args.productId);
318
+ * client.sendToolResponse(event.id, { price });
319
+ * }
320
+ * });
321
+ */
322
+ interface ToolCallEvent {
323
+ type: 'toolCall';
324
+ /**
325
+ * Unique identifier for this tool call.
326
+ * Must be passed to `sendToolResponse()` to respond.
327
+ */
328
+ id: string;
329
+ /**
330
+ * Name of the function the AI wants to call.
331
+ */
332
+ name: string;
333
+ /**
334
+ * Arguments passed by the AI to the function.
335
+ */
336
+ args: Record<string, unknown>;
337
+ timestamp: string;
338
+ }
240
339
  /**
241
340
  * Union type of all event payloads
242
341
  */
243
- type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ErrorEvent;
342
+ type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | ErrorEvent;
244
343
  /**
245
344
  * Simplified event handlers for common use cases
246
345
  */
@@ -252,11 +351,11 @@ type ErrorHandler = (error: ErrorEvent) => void;
252
351
  /**
253
352
  * WebSocket message types sent from client to server
254
353
  */
255
- type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'ping';
354
+ type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'ping';
256
355
  /**
257
356
  * WebSocket message types received from server
258
357
  */
259
- type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error' | 'pong';
358
+ type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error' | 'pong';
260
359
  /**
261
360
  * Base interface for client messages
262
361
  */
@@ -271,6 +370,9 @@ interface StartSessionMessage extends BaseClientMessage {
271
370
  prePrompt?: string;
272
371
  language?: string;
273
372
  pipelineMode?: 'live' | 'composed';
373
+ aiSpeaksFirst?: boolean;
374
+ allowHarmCategory?: boolean;
375
+ tools?: Tool[];
274
376
  }
275
377
  /**
276
378
  * End session message
@@ -319,10 +421,26 @@ interface SystemMessageMessage extends BaseClientMessage {
319
421
  action: 'systemMessage';
320
422
  payload: SystemMessagePayload;
321
423
  }
424
+ /**
425
+ * Tool response payload
426
+ */
427
+ interface ToolResponsePayload {
428
+ /** The tool call ID from the toolCall event */
429
+ id: string;
430
+ /** The result of the function execution */
431
+ response: unknown;
432
+ }
433
+ /**
434
+ * Tool response - send function execution result back to Gemini
435
+ */
436
+ interface ToolResponseMessage extends BaseClientMessage {
437
+ action: 'toolResponse';
438
+ payload: ToolResponsePayload;
439
+ }
322
440
  /**
323
441
  * Union type of all client messages
324
442
  */
325
- type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | PingMessage;
443
+ type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | PingMessage;
326
444
  /**
327
445
  * Base interface for server messages
328
446
  */
@@ -396,10 +514,23 @@ interface ServerTurnCompleteMessage extends BaseServerMessage {
396
514
  interface ServerReadyMessage extends BaseServerMessage {
397
515
  type: 'ready';
398
516
  }
517
+ /**
518
+ * Tool call message from server (function calling)
519
+ * Indicates the AI wants to call a function defined by the SDK user
520
+ */
521
+ interface ServerToolCallMessage extends BaseServerMessage {
522
+ type: 'toolCall';
523
+ /** Unique identifier for this tool call */
524
+ id: string;
525
+ /** Name of the function to call */
526
+ name: string;
527
+ /** Arguments passed to the function */
528
+ args: Record<string, unknown>;
529
+ }
399
530
  /**
400
531
  * Union type of all server messages
401
532
  */
402
- type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerErrorMessage | ServerPongMessage;
533
+ type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerErrorMessage | ServerPongMessage;
403
534
 
404
535
  /**
405
536
  * Connection state
@@ -420,6 +551,7 @@ type LiveSpeechEventMap = {
420
551
  response: ResponseEvent;
421
552
  audio: AudioEvent;
422
553
  turnComplete: TurnCompleteEvent;
554
+ toolCall: ToolCallEvent;
423
555
  error: ErrorEvent;
424
556
  };
425
557
  /**
@@ -515,6 +647,28 @@ declare class LiveSpeechClient {
515
647
  text: string;
516
648
  triggerResponse?: boolean;
517
649
  }): void;
650
+ /**
651
+ * Send a tool response back to the AI after executing a function
652
+ *
653
+ * When you receive a `toolCall` event, you must execute the function and send
654
+ * the result back using this method. The AI will use the result to continue
655
+ * the conversation.
656
+ *
657
+ * @param id - The tool call ID from the `toolCall` event
658
+ * @param response - The result of the function execution (will be JSON serialized)
659
+ *
660
+ * @example
661
+ * client.on('toolCall', async (event) => {
662
+ * if (event.name === 'get_price') {
663
+ * const price = await lookupPrice(event.args.productId);
664
+ * client.sendToolResponse(event.id, { price, currency: 'USD' });
665
+ * } else if (event.name === 'open_login') {
666
+ * showLoginModal();
667
+ * client.sendToolResponse(event.id, { success: true });
668
+ * }
669
+ * });
670
+ */
671
+ sendToolResponse(id: string, response?: unknown): void;
518
672
  /**
519
673
  * Add event listener
520
674
  */
@@ -636,4 +790,4 @@ declare class AudioEncoder {
636
790
  wrapWav(data: Uint8Array): Uint8Array;
637
791
  }
638
792
 
639
- export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
793
+ export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
package/dist/index.d.ts CHANGED
@@ -82,6 +82,58 @@ interface LiveSpeechConfig {
82
82
  */
83
83
  debug?: boolean;
84
84
  }
85
+ /**
86
+ * Parameters schema for a function (for function calling)
87
+ */
88
+ interface FunctionParameters {
89
+ /**
90
+ * Type of the parameters object
91
+ * @default "OBJECT"
92
+ */
93
+ type: 'OBJECT';
94
+ /**
95
+ * Properties of the parameters object
96
+ * Each key is a parameter name, and the value describes its type and description
97
+ * @example { "productId": { "type": "string", "description": "The product ID to look up" } }
98
+ */
99
+ properties: Record<string, unknown>;
100
+ /**
101
+ * List of required parameter names
102
+ */
103
+ required?: string[];
104
+ }
105
+ /**
106
+ * Tool definition for function calling
107
+ *
108
+ * Tools allow the AI to call functions defined by your application during conversation.
109
+ * When the AI decides to call a function, you'll receive a `toolCall` event.
110
+ *
111
+ * @example
112
+ * const tools: Tool[] = [{
113
+ * name: 'open_google_login',
114
+ * description: 'Opens the Google Login popup when user wants to sign in',
115
+ * parameters: {
116
+ * type: 'OBJECT',
117
+ * properties: {},
118
+ * required: []
119
+ * }
120
+ * }];
121
+ */
122
+ interface Tool {
123
+ /**
124
+ * Name of the function (must be unique within the session)
125
+ */
126
+ name: string;
127
+ /**
128
+ * Description of what the function does and when the AI should use it
129
+ */
130
+ description: string;
131
+ /**
132
+ * Parameters schema for the function
133
+ * If the function takes no parameters, use { type: 'OBJECT', properties: {}, required: [] }
134
+ */
135
+ parameters?: FunctionParameters;
136
+ }
85
137
  /**
86
138
  * Session configuration options
87
139
  */
@@ -114,6 +166,23 @@ interface SessionConfig {
114
166
  * @default false
115
167
  */
116
168
  allowHarmCategory?: boolean;
169
+ /**
170
+ * Tools (functions) that the AI can call during conversation.
171
+ * When the AI calls a tool, you'll receive a `toolCall` event.
172
+ * You must respond with `sendToolResponse()` to continue the conversation.
173
+ *
174
+ * @example
175
+ * tools: [{
176
+ * name: 'get_price',
177
+ * description: 'Get the current price of a product',
178
+ * parameters: {
179
+ * type: 'OBJECT',
180
+ * properties: { productId: { type: 'string', description: 'Product ID' } },
181
+ * required: ['productId']
182
+ * }
183
+ * }]
184
+ */
185
+ tools?: Tool[];
117
186
  }
118
187
  /**
119
188
  * Internal resolved configuration with defaults applied
@@ -132,7 +201,7 @@ interface ResolvedConfig {
132
201
  /**
133
202
  * Event types emitted by the LiveSpeech client
134
203
  */
135
- type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error';
204
+ type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error';
136
205
  /**
137
206
  * Event payload for 'connected' event
138
207
  */
@@ -237,10 +306,40 @@ interface TurnCompleteEvent {
237
306
  type: 'turnComplete';
238
307
  timestamp: string;
239
308
  }
309
+ /**
310
+ * Event payload for 'toolCall' event (function calling)
311
+ * Indicates the AI wants to call a function defined by your application.
312
+ * You must respond with `sendToolResponse()` to continue the conversation.
313
+ *
314
+ * @example
315
+ * client.on('toolCall', (event) => {
316
+ * if (event.name === 'get_price') {
317
+ * const price = await lookupPrice(event.args.productId);
318
+ * client.sendToolResponse(event.id, { price });
319
+ * }
320
+ * });
321
+ */
322
+ interface ToolCallEvent {
323
+ type: 'toolCall';
324
+ /**
325
+ * Unique identifier for this tool call.
326
+ * Must be passed to `sendToolResponse()` to respond.
327
+ */
328
+ id: string;
329
+ /**
330
+ * Name of the function the AI wants to call.
331
+ */
332
+ name: string;
333
+ /**
334
+ * Arguments passed by the AI to the function.
335
+ */
336
+ args: Record<string, unknown>;
337
+ timestamp: string;
338
+ }
240
339
  /**
241
340
  * Union type of all event payloads
242
341
  */
243
- type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ErrorEvent;
342
+ type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | ErrorEvent;
244
343
  /**
245
344
  * Simplified event handlers for common use cases
246
345
  */
@@ -252,11 +351,11 @@ type ErrorHandler = (error: ErrorEvent) => void;
252
351
  /**
253
352
  * WebSocket message types sent from client to server
254
353
  */
255
- type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'ping';
354
+ type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'ping';
256
355
  /**
257
356
  * WebSocket message types received from server
258
357
  */
259
- type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error' | 'pong';
358
+ type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'error' | 'pong';
260
359
  /**
261
360
  * Base interface for client messages
262
361
  */
@@ -271,6 +370,9 @@ interface StartSessionMessage extends BaseClientMessage {
271
370
  prePrompt?: string;
272
371
  language?: string;
273
372
  pipelineMode?: 'live' | 'composed';
373
+ aiSpeaksFirst?: boolean;
374
+ allowHarmCategory?: boolean;
375
+ tools?: Tool[];
274
376
  }
275
377
  /**
276
378
  * End session message
@@ -319,10 +421,26 @@ interface SystemMessageMessage extends BaseClientMessage {
319
421
  action: 'systemMessage';
320
422
  payload: SystemMessagePayload;
321
423
  }
424
+ /**
425
+ * Tool response payload
426
+ */
427
+ interface ToolResponsePayload {
428
+ /** The tool call ID from the toolCall event */
429
+ id: string;
430
+ /** The result of the function execution */
431
+ response: unknown;
432
+ }
433
+ /**
434
+ * Tool response - send function execution result back to Gemini
435
+ */
436
+ interface ToolResponseMessage extends BaseClientMessage {
437
+ action: 'toolResponse';
438
+ payload: ToolResponsePayload;
439
+ }
322
440
  /**
323
441
  * Union type of all client messages
324
442
  */
325
- type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | PingMessage;
443
+ type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | PingMessage;
326
444
  /**
327
445
  * Base interface for server messages
328
446
  */
@@ -396,10 +514,23 @@ interface ServerTurnCompleteMessage extends BaseServerMessage {
396
514
  interface ServerReadyMessage extends BaseServerMessage {
397
515
  type: 'ready';
398
516
  }
517
+ /**
518
+ * Tool call message from server (function calling)
519
+ * Indicates the AI wants to call a function defined by the SDK user
520
+ */
521
+ interface ServerToolCallMessage extends BaseServerMessage {
522
+ type: 'toolCall';
523
+ /** Unique identifier for this tool call */
524
+ id: string;
525
+ /** Name of the function to call */
526
+ name: string;
527
+ /** Arguments passed to the function */
528
+ args: Record<string, unknown>;
529
+ }
399
530
  /**
400
531
  * Union type of all server messages
401
532
  */
402
- type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerErrorMessage | ServerPongMessage;
533
+ type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerErrorMessage | ServerPongMessage;
403
534
 
404
535
  /**
405
536
  * Connection state
@@ -420,6 +551,7 @@ type LiveSpeechEventMap = {
420
551
  response: ResponseEvent;
421
552
  audio: AudioEvent;
422
553
  turnComplete: TurnCompleteEvent;
554
+ toolCall: ToolCallEvent;
423
555
  error: ErrorEvent;
424
556
  };
425
557
  /**
@@ -515,6 +647,28 @@ declare class LiveSpeechClient {
515
647
  text: string;
516
648
  triggerResponse?: boolean;
517
649
  }): void;
650
+ /**
651
+ * Send a tool response back to the AI after executing a function
652
+ *
653
+ * When you receive a `toolCall` event, you must execute the function and send
654
+ * the result back using this method. The AI will use the result to continue
655
+ * the conversation.
656
+ *
657
+ * @param id - The tool call ID from the `toolCall` event
658
+ * @param response - The result of the function execution (will be JSON serialized)
659
+ *
660
+ * @example
661
+ * client.on('toolCall', async (event) => {
662
+ * if (event.name === 'get_price') {
663
+ * const price = await lookupPrice(event.args.productId);
664
+ * client.sendToolResponse(event.id, { price, currency: 'USD' });
665
+ * } else if (event.name === 'open_login') {
666
+ * showLoginModal();
667
+ * client.sendToolResponse(event.id, { success: true });
668
+ * }
669
+ * });
670
+ */
671
+ sendToolResponse(id: string, response?: unknown): void;
518
672
  /**
519
673
  * Add event listener
520
674
  */
@@ -636,4 +790,4 @@ declare class AudioEncoder {
636
790
  wrapWav(data: Uint8Array): Uint8Array;
637
791
  }
638
792
 
639
- export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
793
+ export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
package/dist/index.js CHANGED
@@ -45,7 +45,7 @@ var Region = {
45
45
  US_WEST_2: "us-west-2"
46
46
  };
47
47
  var REGION_ENDPOINTS = {
48
- "ap-northeast-2": "wss://talk.drawdream.co.kr",
48
+ "ap-northeast-2": "ws://localhost:9090",
49
49
  "us-west-2": "wss://talk.drawdream.ca"
50
50
  // Coming soon
51
51
  };
@@ -728,21 +728,15 @@ var LiveSpeechClient = class {
728
728
  };
729
729
  this.on("sessionStarted", onSessionStarted);
730
730
  this.on("error", onError);
731
- const startMessage = {
732
- action: "startSession"
733
- };
734
- if (config?.prePrompt) {
735
- startMessage.prePrompt = config.prePrompt;
736
- }
737
- if (config?.language) {
738
- startMessage.language = config.language;
739
- }
740
- startMessage.pipelineMode = config?.pipelineMode ?? "live";
741
- if (config?.aiSpeaksFirst) {
742
- startMessage.aiSpeaksFirst = config.aiSpeaksFirst;
743
- }
744
- startMessage.allowHarmCategory = config?.allowHarmCategory ?? false;
745
- this.connection.send(startMessage);
731
+ this.connection.send({
732
+ action: "startSession",
733
+ ...config?.prePrompt && { prePrompt: config.prePrompt },
734
+ ...config?.language && { language: config.language },
735
+ pipelineMode: config?.pipelineMode ?? "live",
736
+ ...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
737
+ allowHarmCategory: config?.allowHarmCategory ?? false,
738
+ ...config?.tools && config.tools.length > 0 && { tools: config.tools }
739
+ });
746
740
  });
747
741
  }
748
742
  /**
@@ -849,6 +843,40 @@ var LiveSpeechClient = class {
849
843
  payload
850
844
  });
851
845
  }
846
+ /**
847
+ * Send a tool response back to the AI after executing a function
848
+ *
849
+ * When you receive a `toolCall` event, you must execute the function and send
850
+ * the result back using this method. The AI will use the result to continue
851
+ * the conversation.
852
+ *
853
+ * @param id - The tool call ID from the `toolCall` event
854
+ * @param response - The result of the function execution (will be JSON serialized)
855
+ *
856
+ * @example
857
+ * client.on('toolCall', async (event) => {
858
+ * if (event.name === 'get_price') {
859
+ * const price = await lookupPrice(event.args.productId);
860
+ * client.sendToolResponse(event.id, { price, currency: 'USD' });
861
+ * } else if (event.name === 'open_login') {
862
+ * showLoginModal();
863
+ * client.sendToolResponse(event.id, { success: true });
864
+ * }
865
+ * });
866
+ */
867
+ sendToolResponse(id, response = {}) {
868
+ if (!this.isConnected) {
869
+ throw new Error("Not connected");
870
+ }
871
+ if (!this.isStreaming) {
872
+ throw new Error("No active Live session. Call audioStart() first.");
873
+ }
874
+ this.logger.info("Sending tool response:", id);
875
+ this.connection.send({
876
+ action: "toolResponse",
877
+ payload: { id, response }
878
+ });
879
+ }
852
880
  // ==================== Event System ====================
853
881
  /**
854
882
  * Add event listener
@@ -1014,6 +1042,18 @@ var LiveSpeechClient = class {
1014
1042
  this.emit("turnComplete", turnCompleteEvent);
1015
1043
  break;
1016
1044
  }
1045
+ case "toolCall": {
1046
+ const toolCallEvent = {
1047
+ type: "toolCall",
1048
+ id: message.id,
1049
+ name: message.name,
1050
+ args: message.args ?? {},
1051
+ timestamp: message.timestamp
1052
+ };
1053
+ this.logger.info("Tool call received:", toolCallEvent.name);
1054
+ this.emit("toolCall", toolCallEvent);
1055
+ break;
1056
+ }
1017
1057
  case "error":
1018
1058
  this.handleError(message.code, message.message);
1019
1059
  break;
package/dist/index.mjs CHANGED
@@ -6,7 +6,7 @@ var Region = {
6
6
  US_WEST_2: "us-west-2"
7
7
  };
8
8
  var REGION_ENDPOINTS = {
9
- "ap-northeast-2": "wss://talk.drawdream.co.kr",
9
+ "ap-northeast-2": "ws://localhost:9090",
10
10
  "us-west-2": "wss://talk.drawdream.ca"
11
11
  // Coming soon
12
12
  };
@@ -689,21 +689,15 @@ var LiveSpeechClient = class {
689
689
  };
690
690
  this.on("sessionStarted", onSessionStarted);
691
691
  this.on("error", onError);
692
- const startMessage = {
693
- action: "startSession"
694
- };
695
- if (config?.prePrompt) {
696
- startMessage.prePrompt = config.prePrompt;
697
- }
698
- if (config?.language) {
699
- startMessage.language = config.language;
700
- }
701
- startMessage.pipelineMode = config?.pipelineMode ?? "live";
702
- if (config?.aiSpeaksFirst) {
703
- startMessage.aiSpeaksFirst = config.aiSpeaksFirst;
704
- }
705
- startMessage.allowHarmCategory = config?.allowHarmCategory ?? false;
706
- this.connection.send(startMessage);
692
+ this.connection.send({
693
+ action: "startSession",
694
+ ...config?.prePrompt && { prePrompt: config.prePrompt },
695
+ ...config?.language && { language: config.language },
696
+ pipelineMode: config?.pipelineMode ?? "live",
697
+ ...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
698
+ allowHarmCategory: config?.allowHarmCategory ?? false,
699
+ ...config?.tools && config.tools.length > 0 && { tools: config.tools }
700
+ });
707
701
  });
708
702
  }
709
703
  /**
@@ -810,6 +804,40 @@ var LiveSpeechClient = class {
810
804
  payload
811
805
  });
812
806
  }
807
+ /**
808
+ * Send a tool response back to the AI after executing a function
809
+ *
810
+ * When you receive a `toolCall` event, you must execute the function and send
811
+ * the result back using this method. The AI will use the result to continue
812
+ * the conversation.
813
+ *
814
+ * @param id - The tool call ID from the `toolCall` event
815
+ * @param response - The result of the function execution (will be JSON serialized)
816
+ *
817
+ * @example
818
+ * client.on('toolCall', async (event) => {
819
+ * if (event.name === 'get_price') {
820
+ * const price = await lookupPrice(event.args.productId);
821
+ * client.sendToolResponse(event.id, { price, currency: 'USD' });
822
+ * } else if (event.name === 'open_login') {
823
+ * showLoginModal();
824
+ * client.sendToolResponse(event.id, { success: true });
825
+ * }
826
+ * });
827
+ */
828
+ sendToolResponse(id, response = {}) {
829
+ if (!this.isConnected) {
830
+ throw new Error("Not connected");
831
+ }
832
+ if (!this.isStreaming) {
833
+ throw new Error("No active Live session. Call audioStart() first.");
834
+ }
835
+ this.logger.info("Sending tool response:", id);
836
+ this.connection.send({
837
+ action: "toolResponse",
838
+ payload: { id, response }
839
+ });
840
+ }
813
841
  // ==================== Event System ====================
814
842
  /**
815
843
  * Add event listener
@@ -975,6 +1003,18 @@ var LiveSpeechClient = class {
975
1003
  this.emit("turnComplete", turnCompleteEvent);
976
1004
  break;
977
1005
  }
1006
+ case "toolCall": {
1007
+ const toolCallEvent = {
1008
+ type: "toolCall",
1009
+ id: message.id,
1010
+ name: message.name,
1011
+ args: message.args ?? {},
1012
+ timestamp: message.timestamp
1013
+ };
1014
+ this.logger.info("Tool call received:", toolCallEvent.name);
1015
+ this.emit("toolCall", toolCallEvent);
1016
+ break;
1017
+ }
978
1018
  case "error":
979
1019
  this.handleError(message.code, message.message);
980
1020
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@drawdream/livespeech",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "description": "Real-time speech-to-speech AI conversation SDK",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",