@drawdream/livespeech 0.1.10 â 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -382
- package/dist/index.d.mts +61 -6
- package/dist/index.d.ts +61 -6
- package/dist/index.js +38 -0
- package/dist/index.mjs +38 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,7 +10,7 @@ A TypeScript/JavaScript SDK for real-time speech-to-speech AI conversations.
|
|
|
10
10
|
- đī¸ **Real-time Voice Conversations** - Natural, low-latency voice interactions
|
|
11
11
|
- đ **Multi-language Support** - Korean, English, Japanese, Chinese, and more
|
|
12
12
|
- đ **Streaming Audio** - Send and receive audio in real-time
|
|
13
|
-
-
|
|
13
|
+
- âšī¸ **Barge-in Support** - Interrupt AI mid-speech by talking or programmatically
|
|
14
14
|
- đ **Auto-reconnection** - Automatic recovery from network issues
|
|
15
15
|
- đ **Browser & Node.js** - Works in both environments
|
|
16
16
|
|
|
@@ -18,13 +18,9 @@ A TypeScript/JavaScript SDK for real-time speech-to-speech AI conversations.
|
|
|
18
18
|
|
|
19
19
|
```bash
|
|
20
20
|
npm install @drawdream/livespeech
|
|
21
|
-
# or
|
|
22
|
-
yarn add @drawdream/livespeech
|
|
23
|
-
# or
|
|
24
|
-
pnpm add @drawdream/livespeech
|
|
25
21
|
```
|
|
26
22
|
|
|
27
|
-
## Quick Start
|
|
23
|
+
## Quick Start (5 minutes)
|
|
28
24
|
|
|
29
25
|
```typescript
|
|
30
26
|
import { LiveSpeechClient } from '@drawdream/livespeech';
|
|
@@ -34,31 +30,28 @@ const client = new LiveSpeechClient({
|
|
|
34
30
|
apiKey: 'your-api-key',
|
|
35
31
|
});
|
|
36
32
|
|
|
37
|
-
//
|
|
38
|
-
client.
|
|
39
|
-
|
|
33
|
+
// Handle only 4 essential events!
|
|
34
|
+
client.setAudioHandler((audioData) => {
|
|
35
|
+
audioPlayer.queue(audioData); // PCM16 @ 24kHz
|
|
40
36
|
});
|
|
41
37
|
|
|
42
|
-
client.
|
|
43
|
-
|
|
38
|
+
client.on('interrupted', () => {
|
|
39
|
+
audioPlayer.clear(); // CRITICAL: Clear buffer on interrupt!
|
|
44
40
|
});
|
|
45
41
|
|
|
46
|
-
client.
|
|
47
|
-
|
|
42
|
+
client.on('turnComplete', () => {
|
|
43
|
+
console.log('AI finished');
|
|
48
44
|
});
|
|
49
45
|
|
|
50
46
|
client.setErrorHandler((error) => {
|
|
51
47
|
console.error('Error:', error.message);
|
|
52
48
|
});
|
|
53
49
|
|
|
54
|
-
// Connect and start
|
|
50
|
+
// Connect and start
|
|
55
51
|
await client.connect();
|
|
56
|
-
await client.startSession({
|
|
57
|
-
prePrompt: 'You are a helpful assistant.',
|
|
58
|
-
language: 'ko-KR',
|
|
59
|
-
});
|
|
52
|
+
await client.startSession({ prePrompt: 'You are a helpful assistant.' });
|
|
60
53
|
|
|
61
|
-
//
|
|
54
|
+
// Send audio
|
|
62
55
|
client.audioStart();
|
|
63
56
|
client.sendAudioChunk(pcmData); // PCM16 @ 16kHz
|
|
64
57
|
client.audioEnd();
|
|
@@ -68,380 +61,224 @@ await client.endSession();
|
|
|
68
61
|
client.disconnect();
|
|
69
62
|
```
|
|
70
63
|
|
|
71
|
-
|
|
64
|
+
---
|
|
72
65
|
|
|
73
|
-
|
|
74
|
-
connect() â startSession() â audioStart() â sendAudioChunk()* â audioEnd() â endSession()
|
|
75
|
-
â
|
|
76
|
-
sendSystemMessage() (optional, during live session)
|
|
77
|
-
sendToolResponse() (when toolCall received)
|
|
78
|
-
```
|
|
66
|
+
# Core API
|
|
79
67
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
|
85
|
-
|
|
86
|
-
| `
|
|
87
|
-
| `sendToolResponse(id, result)` | Send function result back to AI (after toolCall) |
|
|
88
|
-
| `updateUserId(userId)` | Migrate guest session to user account |
|
|
89
|
-
| `audioEnd()` | End streaming, triggers AI response |
|
|
90
|
-
| `endSession()` | End conversation |
|
|
68
|
+
Everything you need for basic voice conversations.
|
|
69
|
+
|
|
70
|
+
## Methods
|
|
71
|
+
|
|
72
|
+
| Method | Description |
|
|
73
|
+
|--------|-------------|
|
|
74
|
+
| `connect()` | Establish connection |
|
|
91
75
|
| `disconnect()` | Close connection |
|
|
76
|
+
| `startSession(config)` | Start conversation with system prompt |
|
|
77
|
+
| `endSession()` | End conversation |
|
|
78
|
+
| `sendAudioChunk(data)` | Send PCM16 audio (16kHz) |
|
|
79
|
+
|
|
80
|
+
## Events
|
|
81
|
+
|
|
82
|
+
| Event | Description | Action Required |
|
|
83
|
+
|-------|-------------|-----------------|
|
|
84
|
+
| `audio` | AI's audio output | Play audio (PCM16 @ 24kHz) |
|
|
85
|
+
| `turnComplete` | AI finished speaking | Ready for next input |
|
|
86
|
+
| `interrupted` | User barged in | **Clear audio buffer!** |
|
|
87
|
+
| `error` | Error occurred | Handle/log error |
|
|
88
|
+
|
|
89
|
+
### â ī¸ Critical: Handle `interrupted`
|
|
90
|
+
|
|
91
|
+
When the user speaks while AI is responding, **you must clear your audio buffer**:
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
client.on('interrupted', () => {
|
|
95
|
+
audioPlayer.clear(); // Stop buffered audio immediately
|
|
96
|
+
audioPlayer.stop();
|
|
97
|
+
});
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Without this, 2-3 seconds of buffered audio continues playing after the user interrupts.
|
|
101
|
+
|
|
102
|
+
## Audio Format
|
|
103
|
+
|
|
104
|
+
| Direction | Format | Sample Rate |
|
|
105
|
+
|-----------|--------|-------------|
|
|
106
|
+
| Input (mic) | PCM16 | 16,000 Hz |
|
|
107
|
+
| Output (AI) | PCM16 | 24,000 Hz |
|
|
92
108
|
|
|
93
109
|
## Configuration
|
|
94
110
|
|
|
95
111
|
```typescript
|
|
96
112
|
const client = new LiveSpeechClient({
|
|
97
|
-
region: 'ap-northeast-2', // Required
|
|
98
|
-
apiKey: 'your-api-key', // Required
|
|
99
|
-
userId: 'user-123', // Optional: Enable conversation memory
|
|
100
|
-
autoReconnect: true, // Auto-reconnect on disconnect
|
|
101
|
-
maxReconnectAttempts: 5, // Maximum reconnection attempts
|
|
102
|
-
debug: false, // Enable debug logging
|
|
113
|
+
region: 'ap-northeast-2', // Required
|
|
114
|
+
apiKey: 'your-api-key', // Required
|
|
103
115
|
});
|
|
104
116
|
|
|
105
117
|
await client.startSession({
|
|
106
118
|
prePrompt: 'You are a helpful assistant.',
|
|
107
|
-
language: 'ko-KR', //
|
|
108
|
-
pipelineMode: 'live', // 'live' (default) or 'composed'
|
|
109
|
-
aiSpeaksFirst: false, // AI speaks first (live mode only)
|
|
110
|
-
allowHarmCategory: false, // Disable safety filtering (use with caution)
|
|
111
|
-
tools: [{ name: 'func', description: 'desc', parameters: {...} }], // Function calling
|
|
119
|
+
language: 'ko-KR', // Optional: ko-KR, en-US, ja-JP, etc.
|
|
112
120
|
});
|
|
113
121
|
```
|
|
114
122
|
|
|
115
|
-
|
|
123
|
+
---
|
|
116
124
|
|
|
117
|
-
|
|
118
|
-
|--------|------|---------|-------------|
|
|
119
|
-
| `prePrompt` | `string` | - | System prompt for the AI assistant |
|
|
120
|
-
| `language` | `string` | `'en-US'` | Language code (e.g., `ko-KR`, `ja-JP`) |
|
|
121
|
-
| `pipelineMode` | `'live' \| 'composed'` | `'live'` | Audio processing mode |
|
|
122
|
-
| `aiSpeaksFirst` | `boolean` | `false` | AI initiates conversation (live mode only) |
|
|
123
|
-
| `allowHarmCategory` | `boolean` | `false` | Disable content safety filtering |
|
|
124
|
-
| `tools` | `Tool[]` | `undefined` | Function definitions for AI to call |
|
|
125
|
+
# Advanced API
|
|
125
126
|
|
|
126
|
-
|
|
127
|
+
Optional features for power users.
|
|
127
128
|
|
|
128
|
-
|
|
129
|
-
|------|---------|-------------|
|
|
130
|
-
| `live` | Lower (~300ms) | Direct audio-to-audio via Live API |
|
|
131
|
-
| `composed` | Higher (~1-2s) | Separate STT â LLM â TTS pipeline |
|
|
129
|
+
## Additional Methods
|
|
132
130
|
|
|
133
|
-
|
|
131
|
+
| Method | Description |
|
|
132
|
+
|--------|-------------|
|
|
133
|
+
| `audioStart()` / `audioEnd()` | Manual audio stream control |
|
|
134
|
+
| `interrupt()` | Explicitly stop AI response (for Stop button) |
|
|
135
|
+
| `sendSystemMessage(msg)` | Inject context during conversation |
|
|
136
|
+
| `sendToolResponse(id, result)` | Reply to function calls |
|
|
137
|
+
| `updateUserId(userId)` | Migrate guest to authenticated user |
|
|
134
138
|
|
|
135
|
-
|
|
139
|
+
## Additional Events
|
|
136
140
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
141
|
+
| Event | Description |
|
|
142
|
+
|-------|-------------|
|
|
143
|
+
| `connected` / `disconnected` | Connection lifecycle |
|
|
144
|
+
| `sessionStarted` / `sessionEnded` | Session lifecycle |
|
|
145
|
+
| `ready` | Session ready for audio |
|
|
146
|
+
| `userTranscript` | User's speech transcribed |
|
|
147
|
+
| `response` | AI's response text |
|
|
148
|
+
| `toolCall` | AI wants to call a function |
|
|
149
|
+
| `userIdUpdated` | Guest-to-user migration complete |
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Explicit Interrupt (Stop Button)
|
|
154
|
+
|
|
155
|
+
For UI "Stop" buttons or programmatic control:
|
|
142
156
|
|
|
143
|
-
|
|
157
|
+
```typescript
|
|
158
|
+
// User clicks Stop button
|
|
159
|
+
client.interrupt();
|
|
144
160
|
```
|
|
145
161
|
|
|
146
|
-
|
|
162
|
+
Note: Voice barge-in works automatically via Gemini's VAD. This method is for explicit control.
|
|
163
|
+
|
|
164
|
+
---
|
|
147
165
|
|
|
148
|
-
|
|
166
|
+
## System Messages
|
|
149
167
|
|
|
150
|
-
|
|
168
|
+
Inject text context during live sessions (game events, app state, etc.):
|
|
151
169
|
|
|
152
170
|
```typescript
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
171
|
+
// AI responds immediately
|
|
172
|
+
client.sendSystemMessage("User completed level 5. Congratulate them!");
|
|
173
|
+
|
|
174
|
+
// Context only, no response
|
|
175
|
+
client.sendSystemMessage({ text: "User is browsing", triggerResponse: false });
|
|
156
176
|
```
|
|
157
177
|
|
|
158
|
-
>
|
|
178
|
+
> Requires active live session (`audioStart()` called). Max 500 characters.
|
|
179
|
+
|
|
180
|
+
---
|
|
159
181
|
|
|
160
182
|
## Function Calling (Tool Use)
|
|
161
183
|
|
|
162
|
-
|
|
184
|
+
Let AI call functions in your app:
|
|
163
185
|
|
|
164
|
-
### Define Tools
|
|
186
|
+
### 1. Define Tools
|
|
165
187
|
|
|
166
188
|
```typescript
|
|
167
|
-
const tools = [
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
name: 'get_price',
|
|
175
|
-
description: 'Gets product price by ID',
|
|
176
|
-
parameters: {
|
|
177
|
-
type: 'OBJECT',
|
|
178
|
-
properties: {
|
|
179
|
-
productId: { type: 'string', description: 'Product ID' }
|
|
180
|
-
},
|
|
181
|
-
required: ['productId']
|
|
182
|
-
}
|
|
189
|
+
const tools = [{
|
|
190
|
+
name: 'get_price',
|
|
191
|
+
description: 'Gets product price by ID',
|
|
192
|
+
parameters: {
|
|
193
|
+
type: 'OBJECT',
|
|
194
|
+
properties: { productId: { type: 'string' } },
|
|
195
|
+
required: ['productId']
|
|
183
196
|
}
|
|
184
|
-
];
|
|
197
|
+
}];
|
|
185
198
|
|
|
186
199
|
await client.startSession({
|
|
187
|
-
prePrompt: 'You are
|
|
200
|
+
prePrompt: 'You are helpful.',
|
|
188
201
|
tools,
|
|
189
202
|
});
|
|
190
203
|
```
|
|
191
204
|
|
|
192
|
-
### Handle
|
|
205
|
+
### 2. Handle toolCall Events
|
|
193
206
|
|
|
194
207
|
```typescript
|
|
195
208
|
client.on('toolCall', (event) => {
|
|
196
|
-
console.log('AI wants to call:', event.name);
|
|
197
|
-
console.log('With arguments:', event.args);
|
|
198
|
-
|
|
199
|
-
if (event.name === 'open_login') {
|
|
200
|
-
showLoginModal();
|
|
201
|
-
client.sendToolResponse(event.id, { success: true });
|
|
202
|
-
}
|
|
203
|
-
|
|
204
209
|
if (event.name === 'get_price') {
|
|
205
|
-
const price =
|
|
206
|
-
client.sendToolResponse(event.id, { price
|
|
210
|
+
const price = lookupPrice(event.args.productId);
|
|
211
|
+
client.sendToolResponse(event.id, { price });
|
|
207
212
|
}
|
|
208
213
|
});
|
|
209
214
|
```
|
|
210
215
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
```typescript
|
|
214
|
-
interface Tool {
|
|
215
|
-
name: string; // Function name
|
|
216
|
-
description: string; // When AI should use this
|
|
217
|
-
parameters?: {
|
|
218
|
-
type: 'OBJECT';
|
|
219
|
-
properties: Record<string, unknown>;
|
|
220
|
-
required?: string[];
|
|
221
|
-
};
|
|
222
|
-
}
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
> â ī¸ **Note**: Function calling only works with `pipelineMode: 'live'`
|
|
226
|
-
|
|
227
|
-
## System Messages
|
|
228
|
-
|
|
229
|
-
During an active live session, you can inject text messages to the AI using `sendSystemMessage()`. This is useful for:
|
|
230
|
-
- Game events ("User completed level 5, congratulate them!")
|
|
231
|
-
- App state changes ("User opened the cart with 3 items")
|
|
232
|
-
- Timer/engagement triggers ("User has been quiet, engage them")
|
|
233
|
-
- External data updates ("Weather changed to rainy")
|
|
234
|
-
|
|
235
|
-
### Usage
|
|
236
|
-
|
|
237
|
-
```typescript
|
|
238
|
-
// Simple usage - AI responds immediately
|
|
239
|
-
client.sendSystemMessage("User just completed level 5. Congratulate them!");
|
|
240
|
-
|
|
241
|
-
// With options - context only, no immediate response
|
|
242
|
-
client.sendSystemMessage({
|
|
243
|
-
text: "User is browsing the cart",
|
|
244
|
-
triggerResponse: false
|
|
245
|
-
});
|
|
246
|
-
```
|
|
247
|
-
|
|
248
|
-
### Parameters
|
|
249
|
-
|
|
250
|
-
| Parameter | Type | Required | Default | Description |
|
|
251
|
-
|-----------|------|----------|---------|-------------|
|
|
252
|
-
| `text` | `string` | Yes | - | Message text (max 500 chars) |
|
|
253
|
-
| `triggerResponse` | `boolean` | No | `true` | AI responds immediately if `true` |
|
|
254
|
-
|
|
255
|
-
> â ī¸ **Note**: Requires an active live session (`audioStart()` must have been called). Only works with `pipelineMode: 'live'`.
|
|
216
|
+
---
|
|
256
217
|
|
|
257
218
|
## Conversation Memory
|
|
258
219
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
- **Entity Memory**: AI remembers facts shared in previous sessions (names, preferences, relationships)
|
|
262
|
-
- **Session Summaries**: Recent conversation summaries are available to the AI
|
|
263
|
-
- **Cross-Session**: Memory persists across sessions for the same `userId`
|
|
220
|
+
Enable persistent memory across sessions:
|
|
264
221
|
|
|
265
222
|
```typescript
|
|
266
|
-
// With memory (authenticated user)
|
|
267
223
|
const client = new LiveSpeechClient({
|
|
268
224
|
region: 'ap-northeast-2',
|
|
269
225
|
apiKey: 'your-api-key',
|
|
270
|
-
userId: 'user-123', // Enables
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
// Without memory (guest)
|
|
274
|
-
const client = new LiveSpeechClient({
|
|
275
|
-
region: 'ap-northeast-2',
|
|
276
|
-
apiKey: 'your-api-key',
|
|
277
|
-
// No userId = guest mode, no persistent memory
|
|
226
|
+
userId: 'user-123', // Enables memory
|
|
278
227
|
});
|
|
279
228
|
```
|
|
280
229
|
|
|
281
|
-
| Mode | Memory
|
|
282
|
-
|
|
283
|
-
| With `userId` | Permanent
|
|
284
|
-
| Without `userId` | Session only
|
|
230
|
+
| Mode | Memory |
|
|
231
|
+
|------|--------|
|
|
232
|
+
| With `userId` | Permanent (entities, summaries) |
|
|
233
|
+
| Without `userId` | Session only (guest) |
|
|
285
234
|
|
|
286
235
|
### Guest-to-User Migration
|
|
287
236
|
|
|
288
|
-
When a guest user logs in during a session, you can migrate their conversation history to their user account:
|
|
289
|
-
|
|
290
237
|
```typescript
|
|
291
|
-
// User logs in
|
|
292
|
-
client.on('userIdUpdated', (event) => {
|
|
293
|
-
console.log(`Migrated ${event.migratedMessages} messages to user ${event.userId}`);
|
|
294
|
-
});
|
|
295
|
-
|
|
296
|
-
// After authentication
|
|
238
|
+
// User logs in during session
|
|
297
239
|
await client.updateUserId('authenticated-user-123');
|
|
298
|
-
```
|
|
299
|
-
|
|
300
|
-
This enables:
|
|
301
|
-
- Entity extraction on guest conversation history
|
|
302
|
-
- Conversation continuity across sessions
|
|
303
|
-
- Personalization based on past interactions
|
|
304
|
-
|
|
305
|
-
## Events
|
|
306
|
-
|
|
307
|
-
| Event | Description | Key Properties |
|
|
308
|
-
|-------|-------------|----------------|
|
|
309
|
-
| `connected` | Connection established | `connectionId` |
|
|
310
|
-
| `disconnected` | Connection closed | `reason`, `code` |
|
|
311
|
-
| `sessionStarted` | Session created | `sessionId` |
|
|
312
|
-
| `ready` | Ready for audio input | `timestamp` |
|
|
313
|
-
| `userTranscript` | Your speech transcribed | `text` |
|
|
314
|
-
| `response` | AI's response text | `text`, `isFinal` |
|
|
315
|
-
| `audio` | AI's audio output | `data`, `sampleRate` |
|
|
316
|
-
| `turnComplete` | AI finished speaking | `timestamp` |
|
|
317
|
-
| `toolCall` | AI wants to call a function | `id`, `name`, `args` |
|
|
318
|
-
| `userIdUpdated` | Guest migrated to user account | `userId`, `migratedMessages` |
|
|
319
|
-
| `error` | Error occurred | `code`, `message` |
|
|
320
|
-
|
|
321
|
-
### Simple Handlers
|
|
322
|
-
|
|
323
|
-
```typescript
|
|
324
|
-
// Your speech transcription
|
|
325
|
-
client.setUserTranscriptHandler((text) => {
|
|
326
|
-
console.log('You said:', text);
|
|
327
|
-
});
|
|
328
|
-
|
|
329
|
-
// AI's text response
|
|
330
|
-
client.setResponseHandler((text, isFinal) => {
|
|
331
|
-
console.log('AI:', text, isFinal ? '(done)' : '...');
|
|
332
|
-
});
|
|
333
|
-
|
|
334
|
-
// AI's audio output
|
|
335
|
-
client.setAudioHandler((data: Uint8Array) => {
|
|
336
|
-
// data: PCM16 audio
|
|
337
|
-
// Sample rate: 24000 Hz
|
|
338
|
-
playAudio(data);
|
|
339
|
-
});
|
|
340
|
-
|
|
341
|
-
// Error handling
|
|
342
|
-
client.setErrorHandler((error) => {
|
|
343
|
-
console.error(`Error [${error.code}]: ${error.message}`);
|
|
344
|
-
});
|
|
345
|
-
|
|
346
|
-
// Tool calls (function calling)
|
|
347
|
-
client.on('toolCall', (event) => {
|
|
348
|
-
// Execute function and send result
|
|
349
|
-
const result = executeFunction(event.name, event.args);
|
|
350
|
-
client.sendToolResponse(event.id, result);
|
|
351
|
-
});
|
|
352
240
|
|
|
353
|
-
//
|
|
241
|
+
// Listen for confirmation
|
|
354
242
|
client.on('userIdUpdated', (event) => {
|
|
355
|
-
console.log(`
|
|
243
|
+
console.log(`Migrated ${event.migratedMessages} messages`);
|
|
356
244
|
});
|
|
357
245
|
```
|
|
358
246
|
|
|
359
|
-
|
|
247
|
+
---
|
|
360
248
|
|
|
361
|
-
|
|
362
|
-
client.on('connected', (event) => {
|
|
363
|
-
console.log('Connected:', event.connectionId);
|
|
364
|
-
});
|
|
249
|
+
## AI Speaks First
|
|
365
250
|
|
|
366
|
-
|
|
367
|
-
console.log('Ready for audio');
|
|
368
|
-
});
|
|
369
|
-
|
|
370
|
-
client.on('userTranscript', (event) => {
|
|
371
|
-
console.log('You:', event.text);
|
|
372
|
-
});
|
|
373
|
-
|
|
374
|
-
client.on('response', (event) => {
|
|
375
|
-
console.log('AI:', event.text, event.isFinal);
|
|
376
|
-
});
|
|
377
|
-
|
|
378
|
-
client.on('audio', (event) => {
|
|
379
|
-
// event.data: Uint8Array (PCM16)
|
|
380
|
-
// event.sampleRate: 24000
|
|
381
|
-
playAudio(event.data);
|
|
382
|
-
});
|
|
383
|
-
|
|
384
|
-
client.on('turnComplete', () => {
|
|
385
|
-
console.log('AI finished speaking');
|
|
386
|
-
});
|
|
387
|
-
|
|
388
|
-
client.on('error', (event) => {
|
|
389
|
-
console.error('Error:', event.code, event.message);
|
|
390
|
-
});
|
|
251
|
+
AI initiates the conversation:
|
|
391
252
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
const result = handleToolCall(event.name, event.args);
|
|
397
|
-
client.sendToolResponse(event.id, result);
|
|
253
|
+
```typescript
|
|
254
|
+
await client.startSession({
|
|
255
|
+
prePrompt: 'Greet the customer warmly.',
|
|
256
|
+
aiSpeaksFirst: true,
|
|
398
257
|
});
|
|
399
258
|
|
|
400
|
-
client.
|
|
401
|
-
// event.userId: string - the new user ID
|
|
402
|
-
// event.migratedMessages: number - count of migrated messages
|
|
403
|
-
console.log(`Migrated ${event.migratedMessages} messages to ${event.userId}`);
|
|
404
|
-
});
|
|
259
|
+
client.audioStart(); // AI speaks immediately
|
|
405
260
|
```
|
|
406
261
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
### Input (Your Microphone)
|
|
262
|
+
---
|
|
410
263
|
|
|
411
|
-
|
|
412
|
-
|----------|-------|
|
|
413
|
-
| Format | PCM16 (16-bit signed, little-endian) |
|
|
414
|
-
| Sample Rate | 16,000 Hz |
|
|
415
|
-
| Channels | 1 (Mono) |
|
|
416
|
-
| Chunk Size | ~3200 bytes (100ms) |
|
|
264
|
+
## Session Options
|
|
417
265
|
|
|
418
|
-
|
|
266
|
+
| Option | Default | Description |
|
|
267
|
+
|--------|---------|-------------|
|
|
268
|
+
| `prePrompt` | - | System prompt |
|
|
269
|
+
| `language` | `'en-US'` | Language code |
|
|
270
|
+
| `pipelineMode` | `'live'` | `'live'` (~300ms) or `'composed'` (~1-2s) |
|
|
271
|
+
| `aiSpeaksFirst` | `false` | AI initiates (live mode only) |
|
|
272
|
+
| `allowHarmCategory` | `false` | Disable safety filters |
|
|
273
|
+
| `tools` | `[]` | Function definitions |
|
|
419
274
|
|
|
420
|
-
|
|
421
|
-
|----------|-------|
|
|
422
|
-
| Format | PCM16 (16-bit signed, little-endian) |
|
|
423
|
-
| Sample Rate | 24,000 Hz |
|
|
424
|
-
| Channels | 1 (Mono) |
|
|
275
|
+
---
|
|
425
276
|
|
|
426
277
|
## Browser Example
|
|
427
278
|
|
|
428
279
|
```typescript
|
|
429
280
|
import { LiveSpeechClient, float32ToInt16, int16ToUint8 } from '@drawdream/livespeech';
|
|
430
281
|
|
|
431
|
-
const client = new LiveSpeechClient({
|
|
432
|
-
region: 'ap-northeast-2',
|
|
433
|
-
apiKey: 'your-api-key',
|
|
434
|
-
});
|
|
435
|
-
|
|
436
|
-
// Handlers
|
|
437
|
-
client.setUserTranscriptHandler((text) => console.log('You:', text));
|
|
438
|
-
client.setResponseHandler((text) => console.log('AI:', text));
|
|
439
|
-
client.setAudioHandler((data) => playAudioChunk(data));
|
|
440
|
-
|
|
441
|
-
// Connect
|
|
442
|
-
await client.connect();
|
|
443
|
-
await client.startSession({ prePrompt: 'You are a helpful assistant.' });
|
|
444
|
-
|
|
445
282
|
// Capture microphone
|
|
446
283
|
const stream = await navigator.mediaDevices.getUserMedia({
|
|
447
284
|
audio: { sampleRate: 16000, channelCount: 1 }
|
|
@@ -460,60 +297,30 @@ processor.onaudioprocess = (e) => {
|
|
|
460
297
|
|
|
461
298
|
source.connect(processor);
|
|
462
299
|
processor.connect(audioContext.destination);
|
|
463
|
-
|
|
464
|
-
// Start streaming
|
|
465
|
-
client.audioStart();
|
|
466
|
-
|
|
467
|
-
// Stop later
|
|
468
|
-
client.audioEnd();
|
|
469
|
-
stream.getTracks().forEach(track => track.stop());
|
|
470
300
|
```
|
|
471
301
|
|
|
302
|
+
---
|
|
303
|
+
|
|
472
304
|
## Audio Utilities
|
|
473
305
|
|
|
474
306
|
```typescript
|
|
475
|
-
import {
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
wrapPcmInWav, // Create WAV file
|
|
481
|
-
AudioEncoder, // Base64 encoding/decoding
|
|
482
|
-
} from '@drawdream/livespeech';
|
|
483
|
-
|
|
484
|
-
// Convert Web Audio to PCM16 for sending
|
|
485
|
-
const float32 = audioBuffer.getChannelData(0);
|
|
486
|
-
const int16 = float32ToInt16(float32);
|
|
487
|
-
const pcmBytes = int16ToUint8(int16);
|
|
488
|
-
client.sendAudioChunk(pcmBytes);
|
|
489
|
-
|
|
490
|
-
// Convert received PCM16 to Web Audio
|
|
491
|
-
const receivedInt16 = uint8ToInt16(audioEvent.data);
|
|
492
|
-
const float32Data = int16ToFloat32(receivedInt16);
|
|
307
|
+
import { float32ToInt16, int16ToUint8, wrapPcmInWav } from '@drawdream/livespeech';
|
|
308
|
+
|
|
309
|
+
const int16 = float32ToInt16(float32Data);
|
|
310
|
+
const bytes = int16ToUint8(int16);
|
|
311
|
+
const wav = wrapPcmInWav(bytes, 16000, 1, 16);
|
|
493
312
|
```
|
|
494
313
|
|
|
314
|
+
---
|
|
315
|
+
|
|
495
316
|
## Error Handling
|
|
496
317
|
|
|
497
318
|
```typescript
|
|
498
319
|
client.on('error', (event) => {
|
|
499
320
|
switch (event.code) {
|
|
500
|
-
case 'authentication_failed':
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
case 'connection_timeout':
|
|
504
|
-
console.error('Connection timed out');
|
|
505
|
-
break;
|
|
506
|
-
case 'rate_limit':
|
|
507
|
-
console.error('Rate limit exceeded');
|
|
508
|
-
break;
|
|
509
|
-
default:
|
|
510
|
-
console.error(`Error: ${event.message}`);
|
|
511
|
-
}
|
|
512
|
-
});
|
|
513
|
-
|
|
514
|
-
client.on('disconnected', (event) => {
|
|
515
|
-
if (event.reason === 'error') {
|
|
516
|
-
console.log('Will auto-reconnect...');
|
|
321
|
+
case 'authentication_failed': console.error('Invalid API key'); break;
|
|
322
|
+
case 'connection_timeout': console.error('Timed out'); break;
|
|
323
|
+
default: console.error(`Error: ${event.message}`);
|
|
517
324
|
}
|
|
518
325
|
});
|
|
519
326
|
|
|
@@ -522,44 +329,13 @@ client.on('reconnecting', (event) => {
|
|
|
522
329
|
});
|
|
523
330
|
```
|
|
524
331
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
| Property | Type | Description |
|
|
528
|
-
|----------|------|-------------|
|
|
529
|
-
| `isConnected` | `boolean` | Connection status |
|
|
530
|
-
| `hasActiveSession` | `boolean` | Session status |
|
|
531
|
-
| `isAudioStreaming` | `boolean` | Streaming status |
|
|
532
|
-
| `connectionId` | `string \| null` | Current connection ID |
|
|
533
|
-
| `currentSessionId` | `string \| null` | Current session ID |
|
|
332
|
+
---
|
|
534
333
|
|
|
535
334
|
## Regions
|
|
536
335
|
|
|
537
|
-
| Region | Code |
|
|
538
|
-
|
|
539
|
-
|
|
|
540
|
-
|
|
541
|
-
## TypeScript Types
|
|
542
|
-
|
|
543
|
-
```typescript
|
|
544
|
-
import type {
|
|
545
|
-
LiveSpeechConfig,
|
|
546
|
-
SessionConfig,
|
|
547
|
-
LiveSpeechEvent,
|
|
548
|
-
ConnectedEvent,
|
|
549
|
-
DisconnectedEvent,
|
|
550
|
-
SessionStartedEvent,
|
|
551
|
-
ReadyEvent,
|
|
552
|
-
UserTranscriptEvent,
|
|
553
|
-
ResponseEvent,
|
|
554
|
-
AudioEvent,
|
|
555
|
-
TurnCompleteEvent,
|
|
556
|
-
ToolCallEvent,
|
|
557
|
-
UserIdUpdatedEvent,
|
|
558
|
-
ErrorEvent,
|
|
559
|
-
ErrorCode,
|
|
560
|
-
Tool,
|
|
561
|
-
} from '@drawdream/livespeech';
|
|
562
|
-
```
|
|
336
|
+
| Region | Code |
|
|
337
|
+
|--------|------|
|
|
338
|
+
| Seoul (Korea) | `ap-northeast-2` |
|
|
563
339
|
|
|
564
340
|
## License
|
|
565
341
|
|
package/dist/index.d.mts
CHANGED
|
@@ -201,7 +201,7 @@ interface ResolvedConfig {
|
|
|
201
201
|
/**
|
|
202
202
|
* Event types emitted by the LiveSpeech client
|
|
203
203
|
*/
|
|
204
|
-
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'error';
|
|
204
|
+
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error';
|
|
205
205
|
/**
|
|
206
206
|
* Event payload for 'connected' event
|
|
207
207
|
*/
|
|
@@ -357,10 +357,30 @@ interface UserIdUpdatedEvent {
|
|
|
357
357
|
migratedMessages: number;
|
|
358
358
|
timestamp: string;
|
|
359
359
|
}
|
|
360
|
+
/**
|
|
361
|
+
* Event payload for 'interrupted' event (barge-in)
|
|
362
|
+
* Indicates the AI response was interrupted because the user started speaking.
|
|
363
|
+
*
|
|
364
|
+
* **Critical**: When you receive this event, immediately clear your audio playback
|
|
365
|
+
* buffer to stop the AI audio from continuing to play. This enables natural
|
|
366
|
+
* barge-in behavior like a real phone conversation.
|
|
367
|
+
*
|
|
368
|
+
* @example
|
|
369
|
+
* client.on('interrupted', (event) => {
|
|
370
|
+
* // Stop playing AI audio immediately
|
|
371
|
+
* audioPlayer.clearBuffer();
|
|
372
|
+
* audioPlayer.stop();
|
|
373
|
+
* console.log('AI interrupted - ready for user input');
|
|
374
|
+
* });
|
|
375
|
+
*/
|
|
376
|
+
interface InterruptedEvent {
|
|
377
|
+
type: 'interrupted';
|
|
378
|
+
timestamp: string;
|
|
379
|
+
}
|
|
360
380
|
/**
|
|
361
381
|
* Union type of all event payloads
|
|
362
382
|
*/
|
|
363
|
-
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | ErrorEvent;
|
|
383
|
+
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | InterruptedEvent | ErrorEvent;
|
|
364
384
|
/**
|
|
365
385
|
* Simplified event handlers for common use cases
|
|
366
386
|
*/
|
|
@@ -372,11 +392,11 @@ type ErrorHandler = (error: ErrorEvent) => void;
|
|
|
372
392
|
/**
|
|
373
393
|
* WebSocket message types sent from client to server
|
|
374
394
|
*/
|
|
375
|
-
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'updateUserId' | 'ping';
|
|
395
|
+
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'updateUserId' | 'interrupt' | 'ping';
|
|
376
396
|
/**
|
|
377
397
|
* WebSocket message types received from server
|
|
378
398
|
*/
|
|
379
|
-
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'error' | 'pong';
|
|
399
|
+
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error' | 'pong';
|
|
380
400
|
/**
|
|
381
401
|
* Base interface for client messages
|
|
382
402
|
*/
|
|
@@ -466,10 +486,16 @@ interface UpdateUserIdMessage extends BaseClientMessage {
|
|
|
466
486
|
/** The authenticated user's unique identifier */
|
|
467
487
|
userId: string;
|
|
468
488
|
}
|
|
489
|
+
/**
|
|
490
|
+
* Interrupt message - explicitly stop AI response (for Stop button)
|
|
491
|
+
*/
|
|
492
|
+
interface InterruptMessage extends BaseClientMessage {
|
|
493
|
+
action: 'interrupt';
|
|
494
|
+
}
|
|
469
495
|
/**
|
|
470
496
|
* Union type of all client messages
|
|
471
497
|
*/
|
|
472
|
-
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | UpdateUserIdMessage | PingMessage;
|
|
498
|
+
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | UpdateUserIdMessage | InterruptMessage | PingMessage;
|
|
473
499
|
/**
|
|
474
500
|
* Base interface for server messages
|
|
475
501
|
*/
|
|
@@ -567,10 +593,18 @@ interface ServerUserIdUpdatedMessage extends BaseServerMessage {
|
|
|
567
593
|
/** Number of messages migrated from guest to user partition */
|
|
568
594
|
migratedMessages: number;
|
|
569
595
|
}
|
|
596
|
+
/**
|
|
597
|
+
* Interrupted message from server (barge-in)
|
|
598
|
+
* Indicates the AI response was interrupted because the user started speaking.
|
|
599
|
+
* Clients should immediately clear their audio playback buffer when receiving this.
|
|
600
|
+
*/
|
|
601
|
+
interface ServerInterruptedMessage extends BaseServerMessage {
|
|
602
|
+
type: 'interrupted';
|
|
603
|
+
}
|
|
570
604
|
/**
|
|
571
605
|
* Union type of all server messages
|
|
572
606
|
*/
|
|
573
|
-
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerErrorMessage | ServerPongMessage;
|
|
607
|
+
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerInterruptedMessage | ServerErrorMessage | ServerPongMessage;
|
|
574
608
|
|
|
575
609
|
/**
|
|
576
610
|
* Connection state
|
|
@@ -593,6 +627,7 @@ type LiveSpeechEventMap = {
|
|
|
593
627
|
turnComplete: TurnCompleteEvent;
|
|
594
628
|
toolCall: ToolCallEvent;
|
|
595
629
|
userIdUpdated: UserIdUpdatedEvent;
|
|
630
|
+
interrupted: InterruptedEvent;
|
|
596
631
|
error: ErrorEvent;
|
|
597
632
|
};
|
|
598
633
|
/**
|
|
@@ -710,6 +745,26 @@ declare class LiveSpeechClient {
|
|
|
710
745
|
* });
|
|
711
746
|
*/
|
|
712
747
|
sendToolResponse(id: string, response?: unknown): void;
|
|
748
|
+
/**
|
|
749
|
+
* Explicitly interrupt the current AI response
|
|
750
|
+
*
|
|
751
|
+
* Use this method for:
|
|
752
|
+
* - UI "Stop" button functionality
|
|
753
|
+
* - Programmatic control to stop AI mid-response
|
|
754
|
+
*
|
|
755
|
+
* Note: In most cases, simply speaking will trigger automatic
|
|
756
|
+
* interruption via Gemini's voice activity detection (VAD).
|
|
757
|
+
* This method is for explicit programmatic control.
|
|
758
|
+
*
|
|
759
|
+
* @example
|
|
760
|
+
* // User clicks "Stop" button
|
|
761
|
+
* client.interrupt();
|
|
762
|
+
*
|
|
763
|
+
* @example
|
|
764
|
+
* // Stop AI after a certain time
|
|
765
|
+
* setTimeout(() => client.interrupt(), 10000);
|
|
766
|
+
*/
|
|
767
|
+
interrupt(): void;
|
|
713
768
|
/**
|
|
714
769
|
* Update the user ID for the current connection (guest-to-user migration)
|
|
715
770
|
*
|
package/dist/index.d.ts
CHANGED
|
@@ -201,7 +201,7 @@ interface ResolvedConfig {
|
|
|
201
201
|
/**
|
|
202
202
|
* Event types emitted by the LiveSpeech client
|
|
203
203
|
*/
|
|
204
|
-
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'error';
|
|
204
|
+
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error';
|
|
205
205
|
/**
|
|
206
206
|
* Event payload for 'connected' event
|
|
207
207
|
*/
|
|
@@ -357,10 +357,30 @@ interface UserIdUpdatedEvent {
|
|
|
357
357
|
migratedMessages: number;
|
|
358
358
|
timestamp: string;
|
|
359
359
|
}
|
|
360
|
+
/**
|
|
361
|
+
* Event payload for 'interrupted' event (barge-in)
|
|
362
|
+
* Indicates the AI response was interrupted because the user started speaking.
|
|
363
|
+
*
|
|
364
|
+
* **Critical**: When you receive this event, immediately clear your audio playback
|
|
365
|
+
* buffer to stop the AI audio from continuing to play. This enables natural
|
|
366
|
+
* barge-in behavior like a real phone conversation.
|
|
367
|
+
*
|
|
368
|
+
* @example
|
|
369
|
+
* client.on('interrupted', (event) => {
|
|
370
|
+
* // Stop playing AI audio immediately
|
|
371
|
+
* audioPlayer.clearBuffer();
|
|
372
|
+
* audioPlayer.stop();
|
|
373
|
+
* console.log('AI interrupted - ready for user input');
|
|
374
|
+
* });
|
|
375
|
+
*/
|
|
376
|
+
interface InterruptedEvent {
|
|
377
|
+
type: 'interrupted';
|
|
378
|
+
timestamp: string;
|
|
379
|
+
}
|
|
360
380
|
/**
|
|
361
381
|
* Union type of all event payloads
|
|
362
382
|
*/
|
|
363
|
-
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | ErrorEvent;
|
|
383
|
+
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | InterruptedEvent | ErrorEvent;
|
|
364
384
|
/**
|
|
365
385
|
* Simplified event handlers for common use cases
|
|
366
386
|
*/
|
|
@@ -372,11 +392,11 @@ type ErrorHandler = (error: ErrorEvent) => void;
|
|
|
372
392
|
/**
|
|
373
393
|
* WebSocket message types sent from client to server
|
|
374
394
|
*/
|
|
375
|
-
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'updateUserId' | 'ping';
|
|
395
|
+
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'systemMessage' | 'toolResponse' | 'updateUserId' | 'interrupt' | 'ping';
|
|
376
396
|
/**
|
|
377
397
|
* WebSocket message types received from server
|
|
378
398
|
*/
|
|
379
|
-
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'error' | 'pong';
|
|
399
|
+
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error' | 'pong';
|
|
380
400
|
/**
|
|
381
401
|
* Base interface for client messages
|
|
382
402
|
*/
|
|
@@ -466,10 +486,16 @@ interface UpdateUserIdMessage extends BaseClientMessage {
|
|
|
466
486
|
/** The authenticated user's unique identifier */
|
|
467
487
|
userId: string;
|
|
468
488
|
}
|
|
489
|
+
/**
|
|
490
|
+
* Interrupt message - explicitly stop AI response (for Stop button)
|
|
491
|
+
*/
|
|
492
|
+
interface InterruptMessage extends BaseClientMessage {
|
|
493
|
+
action: 'interrupt';
|
|
494
|
+
}
|
|
469
495
|
/**
|
|
470
496
|
* Union type of all client messages
|
|
471
497
|
*/
|
|
472
|
-
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | UpdateUserIdMessage | PingMessage;
|
|
498
|
+
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | SystemMessageMessage | ToolResponseMessage | UpdateUserIdMessage | InterruptMessage | PingMessage;
|
|
473
499
|
/**
|
|
474
500
|
* Base interface for server messages
|
|
475
501
|
*/
|
|
@@ -567,10 +593,18 @@ interface ServerUserIdUpdatedMessage extends BaseServerMessage {
|
|
|
567
593
|
/** Number of messages migrated from guest to user partition */
|
|
568
594
|
migratedMessages: number;
|
|
569
595
|
}
|
|
596
|
+
/**
|
|
597
|
+
* Interrupted message from server (barge-in)
|
|
598
|
+
* Indicates the AI response was interrupted because the user started speaking.
|
|
599
|
+
* Clients should immediately clear their audio playback buffer when receiving this.
|
|
600
|
+
*/
|
|
601
|
+
interface ServerInterruptedMessage extends BaseServerMessage {
|
|
602
|
+
type: 'interrupted';
|
|
603
|
+
}
|
|
570
604
|
/**
|
|
571
605
|
* Union type of all server messages
|
|
572
606
|
*/
|
|
573
|
-
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerErrorMessage | ServerPongMessage;
|
|
607
|
+
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerInterruptedMessage | ServerErrorMessage | ServerPongMessage;
|
|
574
608
|
|
|
575
609
|
/**
|
|
576
610
|
* Connection state
|
|
@@ -593,6 +627,7 @@ type LiveSpeechEventMap = {
|
|
|
593
627
|
turnComplete: TurnCompleteEvent;
|
|
594
628
|
toolCall: ToolCallEvent;
|
|
595
629
|
userIdUpdated: UserIdUpdatedEvent;
|
|
630
|
+
interrupted: InterruptedEvent;
|
|
596
631
|
error: ErrorEvent;
|
|
597
632
|
};
|
|
598
633
|
/**
|
|
@@ -710,6 +745,26 @@ declare class LiveSpeechClient {
|
|
|
710
745
|
* });
|
|
711
746
|
*/
|
|
712
747
|
sendToolResponse(id: string, response?: unknown): void;
|
|
748
|
+
/**
|
|
749
|
+
* Explicitly interrupt the current AI response
|
|
750
|
+
*
|
|
751
|
+
* Use this method for:
|
|
752
|
+
* - UI "Stop" button functionality
|
|
753
|
+
* - Programmatic control to stop AI mid-response
|
|
754
|
+
*
|
|
755
|
+
* Note: In most cases, simply speaking will trigger automatic
|
|
756
|
+
* interruption via Gemini's voice activity detection (VAD).
|
|
757
|
+
* This method is for explicit programmatic control.
|
|
758
|
+
*
|
|
759
|
+
* @example
|
|
760
|
+
* // User clicks "Stop" button
|
|
761
|
+
* client.interrupt();
|
|
762
|
+
*
|
|
763
|
+
* @example
|
|
764
|
+
* // Stop AI after a certain time
|
|
765
|
+
* setTimeout(() => client.interrupt(), 10000);
|
|
766
|
+
*/
|
|
767
|
+
interrupt(): void;
|
|
713
768
|
/**
|
|
714
769
|
* Update the user ID for the current connection (guest-to-user migration)
|
|
715
770
|
*
|
package/dist/index.js
CHANGED
|
@@ -877,6 +877,35 @@ var LiveSpeechClient = class {
|
|
|
877
877
|
payload: { id, response }
|
|
878
878
|
});
|
|
879
879
|
}
|
|
880
|
+
/**
|
|
881
|
+
* Explicitly interrupt the current AI response
|
|
882
|
+
*
|
|
883
|
+
* Use this method for:
|
|
884
|
+
* - UI "Stop" button functionality
|
|
885
|
+
* - Programmatic control to stop AI mid-response
|
|
886
|
+
*
|
|
887
|
+
* Note: In most cases, simply speaking will trigger automatic
|
|
888
|
+
* interruption via Gemini's voice activity detection (VAD).
|
|
889
|
+
* This method is for explicit programmatic control.
|
|
890
|
+
*
|
|
891
|
+
* @example
|
|
892
|
+
* // User clicks "Stop" button
|
|
893
|
+
* client.interrupt();
|
|
894
|
+
*
|
|
895
|
+
* @example
|
|
896
|
+
* // Stop AI after a certain time
|
|
897
|
+
* setTimeout(() => client.interrupt(), 10000);
|
|
898
|
+
*/
|
|
899
|
+
interrupt() {
|
|
900
|
+
if (!this.isConnected) {
|
|
901
|
+
throw new Error("Not connected");
|
|
902
|
+
}
|
|
903
|
+
if (!this.isStreaming) {
|
|
904
|
+
throw new Error("No active Live session. Call audioStart() first.");
|
|
905
|
+
}
|
|
906
|
+
this.logger.info("Sending explicit interrupt");
|
|
907
|
+
this.connection.send({ action: "interrupt" });
|
|
908
|
+
}
|
|
880
909
|
/**
|
|
881
910
|
* Update the user ID for the current connection (guest-to-user migration)
|
|
882
911
|
*
|
|
@@ -1119,6 +1148,15 @@ var LiveSpeechClient = class {
|
|
|
1119
1148
|
this.emit("userIdUpdated", userIdUpdatedEvent);
|
|
1120
1149
|
break;
|
|
1121
1150
|
}
|
|
1151
|
+
case "interrupted": {
|
|
1152
|
+
const interruptedEvent = {
|
|
1153
|
+
type: "interrupted",
|
|
1154
|
+
timestamp: message.timestamp
|
|
1155
|
+
};
|
|
1156
|
+
this.logger.info("AI response interrupted (barge-in)");
|
|
1157
|
+
this.emit("interrupted", interruptedEvent);
|
|
1158
|
+
break;
|
|
1159
|
+
}
|
|
1122
1160
|
case "error":
|
|
1123
1161
|
this.handleError(message.code, message.message);
|
|
1124
1162
|
break;
|
package/dist/index.mjs
CHANGED
|
@@ -838,6 +838,35 @@ var LiveSpeechClient = class {
|
|
|
838
838
|
payload: { id, response }
|
|
839
839
|
});
|
|
840
840
|
}
|
|
841
|
+
/**
|
|
842
|
+
* Explicitly interrupt the current AI response
|
|
843
|
+
*
|
|
844
|
+
* Use this method for:
|
|
845
|
+
* - UI "Stop" button functionality
|
|
846
|
+
* - Programmatic control to stop AI mid-response
|
|
847
|
+
*
|
|
848
|
+
* Note: In most cases, simply speaking will trigger automatic
|
|
849
|
+
* interruption via Gemini's voice activity detection (VAD).
|
|
850
|
+
* This method is for explicit programmatic control.
|
|
851
|
+
*
|
|
852
|
+
* @example
|
|
853
|
+
* // User clicks "Stop" button
|
|
854
|
+
* client.interrupt();
|
|
855
|
+
*
|
|
856
|
+
* @example
|
|
857
|
+
* // Stop AI after a certain time
|
|
858
|
+
* setTimeout(() => client.interrupt(), 10000);
|
|
859
|
+
*/
|
|
860
|
+
interrupt() {
|
|
861
|
+
if (!this.isConnected) {
|
|
862
|
+
throw new Error("Not connected");
|
|
863
|
+
}
|
|
864
|
+
if (!this.isStreaming) {
|
|
865
|
+
throw new Error("No active Live session. Call audioStart() first.");
|
|
866
|
+
}
|
|
867
|
+
this.logger.info("Sending explicit interrupt");
|
|
868
|
+
this.connection.send({ action: "interrupt" });
|
|
869
|
+
}
|
|
841
870
|
/**
|
|
842
871
|
* Update the user ID for the current connection (guest-to-user migration)
|
|
843
872
|
*
|
|
@@ -1080,6 +1109,15 @@ var LiveSpeechClient = class {
|
|
|
1080
1109
|
this.emit("userIdUpdated", userIdUpdatedEvent);
|
|
1081
1110
|
break;
|
|
1082
1111
|
}
|
|
1112
|
+
case "interrupted": {
|
|
1113
|
+
const interruptedEvent = {
|
|
1114
|
+
type: "interrupted",
|
|
1115
|
+
timestamp: message.timestamp
|
|
1116
|
+
};
|
|
1117
|
+
this.logger.info("AI response interrupted (barge-in)");
|
|
1118
|
+
this.emit("interrupted", interruptedEvent);
|
|
1119
|
+
break;
|
|
1120
|
+
}
|
|
1083
1121
|
case "error":
|
|
1084
1122
|
this.handleError(message.code, message.message);
|
|
1085
1123
|
break;
|