@drawdream/livespeech 0.1.10 â 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -377
- package/dist/index.d.mts +139 -9
- package/dist/index.d.ts +139 -9
- package/dist/index.js +65 -1
- package/dist/index.mjs +65 -1
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -10,7 +10,7 @@ A TypeScript/JavaScript SDK for real-time speech-to-speech AI conversations.
|
|
|
10
10
|
- đī¸ **Real-time Voice Conversations** - Natural, low-latency voice interactions
|
|
11
11
|
- đ **Multi-language Support** - Korean, English, Japanese, Chinese, and more
|
|
12
12
|
- đ **Streaming Audio** - Send and receive audio in real-time
|
|
13
|
-
-
|
|
13
|
+
- âšī¸ **Barge-in Support** - Interrupt AI mid-speech by talking or programmatically
|
|
14
14
|
- đ **Auto-reconnection** - Automatic recovery from network issues
|
|
15
15
|
- đ **Browser & Node.js** - Works in both environments
|
|
16
16
|
|
|
@@ -18,13 +18,9 @@ A TypeScript/JavaScript SDK for real-time speech-to-speech AI conversations.
|
|
|
18
18
|
|
|
19
19
|
```bash
|
|
20
20
|
npm install @drawdream/livespeech
|
|
21
|
-
# or
|
|
22
|
-
yarn add @drawdream/livespeech
|
|
23
|
-
# or
|
|
24
|
-
pnpm add @drawdream/livespeech
|
|
25
21
|
```
|
|
26
22
|
|
|
27
|
-
## Quick Start
|
|
23
|
+
## Quick Start (5 minutes)
|
|
28
24
|
|
|
29
25
|
```typescript
|
|
30
26
|
import { LiveSpeechClient } from '@drawdream/livespeech';
|
|
@@ -34,31 +30,28 @@ const client = new LiveSpeechClient({
|
|
|
34
30
|
apiKey: 'your-api-key',
|
|
35
31
|
});
|
|
36
32
|
|
|
37
|
-
//
|
|
38
|
-
client.
|
|
39
|
-
|
|
33
|
+
// Handle only 4 essential events!
|
|
34
|
+
client.setAudioHandler((audioData) => {
|
|
35
|
+
audioPlayer.queue(audioData); // PCM16 â use event.sampleRate (24kHz Live, 16kHz Composed)
|
|
40
36
|
});
|
|
41
37
|
|
|
42
|
-
client.
|
|
43
|
-
|
|
38
|
+
client.on('interrupted', () => {
|
|
39
|
+
audioPlayer.clear(); // CRITICAL: Clear buffer on interrupt!
|
|
44
40
|
});
|
|
45
41
|
|
|
46
|
-
client.
|
|
47
|
-
|
|
42
|
+
client.on('turnComplete', () => {
|
|
43
|
+
console.log('AI finished');
|
|
48
44
|
});
|
|
49
45
|
|
|
50
46
|
client.setErrorHandler((error) => {
|
|
51
47
|
console.error('Error:', error.message);
|
|
52
48
|
});
|
|
53
49
|
|
|
54
|
-
// Connect and start
|
|
50
|
+
// Connect and start
|
|
55
51
|
await client.connect();
|
|
56
|
-
await client.startSession({
|
|
57
|
-
prePrompt: 'You are a helpful assistant.',
|
|
58
|
-
language: 'ko-KR',
|
|
59
|
-
});
|
|
52
|
+
await client.startSession({ prePrompt: 'You are a helpful assistant.' });
|
|
60
53
|
|
|
61
|
-
//
|
|
54
|
+
// Send audio
|
|
62
55
|
client.audioStart();
|
|
63
56
|
client.sendAudioChunk(pcmData); // PCM16 @ 16kHz
|
|
64
57
|
client.audioEnd();
|
|
@@ -68,380 +61,267 @@ await client.endSession();
|
|
|
68
61
|
client.disconnect();
|
|
69
62
|
```
|
|
70
63
|
|
|
71
|
-
|
|
64
|
+
---
|
|
72
65
|
|
|
73
|
-
|
|
74
|
-
connect() â startSession() â audioStart() â sendAudioChunk()* â audioEnd() â endSession()
|
|
75
|
-
â
|
|
76
|
-
sendSystemMessage() (optional, during live session)
|
|
77
|
-
sendToolResponse() (when toolCall received)
|
|
78
|
-
```
|
|
66
|
+
# Core API
|
|
79
67
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
|
85
|
-
|
|
86
|
-
| `
|
|
87
|
-
| `sendToolResponse(id, result)` | Send function result back to AI (after toolCall) |
|
|
88
|
-
| `updateUserId(userId)` | Migrate guest session to user account |
|
|
89
|
-
| `audioEnd()` | End streaming, triggers AI response |
|
|
90
|
-
| `endSession()` | End conversation |
|
|
68
|
+
Everything you need for basic voice conversations.
|
|
69
|
+
|
|
70
|
+
## Methods
|
|
71
|
+
|
|
72
|
+
| Method | Description |
|
|
73
|
+
|--------|-------------|
|
|
74
|
+
| `connect()` | Establish connection |
|
|
91
75
|
| `disconnect()` | Close connection |
|
|
76
|
+
| `startSession(config)` | Start conversation with system prompt |
|
|
77
|
+
| `endSession()` | End conversation |
|
|
78
|
+
| `sendAudioChunk(data)` | Send PCM16 audio (16kHz) |
|
|
79
|
+
|
|
80
|
+
## Events
|
|
81
|
+
|
|
82
|
+
| Event | Description | Action Required |
|
|
83
|
+
|-------|-------------|-----------------|
|
|
84
|
+
| `audio` | AI's audio output | Play audio (PCM16 â check `sampleRate`) |
|
|
85
|
+
| `turnComplete` | AI finished speaking | Ready for next input |
|
|
86
|
+
| `interrupted` | User barged in | **Clear audio buffer!** |
|
|
87
|
+
| `error` | Error occurred | Handle/log error |
|
|
88
|
+
|
|
89
|
+
### â ī¸ Critical: Handle `interrupted`
|
|
90
|
+
|
|
91
|
+
When the user speaks while AI is responding, **you must clear your audio buffer**:
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
client.on('interrupted', () => {
|
|
95
|
+
audioPlayer.clear(); // Stop buffered audio immediately
|
|
96
|
+
audioPlayer.stop();
|
|
97
|
+
});
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Without this, 2-3 seconds of buffered audio continues playing after the user interrupts.
|
|
101
|
+
|
|
102
|
+
## Audio Format
|
|
103
|
+
|
|
104
|
+
| Direction | Format | Sample Rate |
|
|
105
|
+
|-----------|--------|-------------|
|
|
106
|
+
| Input (mic) | PCM16 | 16,000 Hz |
|
|
107
|
+
| Output (AI) â Live mode | PCM16 | 24,000 Hz |
|
|
108
|
+
| Output (AI) â Composed mode | PCM16 | 16,000 Hz |
|
|
109
|
+
|
|
110
|
+
> **Important:** The `audio` event includes a `sampleRate` field. Always use it to configure your audio decoder rather than hardcoding a rate.
|
|
92
111
|
|
|
93
112
|
## Configuration
|
|
94
113
|
|
|
95
114
|
```typescript
|
|
96
115
|
const client = new LiveSpeechClient({
|
|
97
|
-
region: 'ap-northeast-2', // Required
|
|
98
|
-
apiKey: 'your-api-key', // Required
|
|
99
|
-
userId: 'user-123', // Optional: Enable conversation memory
|
|
100
|
-
autoReconnect: true, // Auto-reconnect on disconnect
|
|
101
|
-
maxReconnectAttempts: 5, // Maximum reconnection attempts
|
|
102
|
-
debug: false, // Enable debug logging
|
|
116
|
+
region: 'ap-northeast-2', // Required
|
|
117
|
+
apiKey: 'your-api-key', // Required
|
|
103
118
|
});
|
|
104
119
|
|
|
105
120
|
await client.startSession({
|
|
106
121
|
prePrompt: 'You are a helpful assistant.',
|
|
107
|
-
language: 'ko-KR', //
|
|
108
|
-
pipelineMode: 'live', // 'live' (default) or 'composed'
|
|
109
|
-
aiSpeaksFirst: false, // AI speaks first (live mode only)
|
|
110
|
-
allowHarmCategory: false, // Disable safety filtering (use with caution)
|
|
111
|
-
tools: [{ name: 'func', description: 'desc', parameters: {...} }], // Function calling
|
|
122
|
+
language: 'ko-KR', // Optional: ko-KR, en-US, ja-JP, etc.
|
|
112
123
|
});
|
|
113
124
|
```
|
|
114
125
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
| Option | Type | Default | Description |
|
|
118
|
-
|--------|------|---------|-------------|
|
|
119
|
-
| `prePrompt` | `string` | - | System prompt for the AI assistant |
|
|
120
|
-
| `language` | `string` | `'en-US'` | Language code (e.g., `ko-KR`, `ja-JP`) |
|
|
121
|
-
| `pipelineMode` | `'live' \| 'composed'` | `'live'` | Audio processing mode |
|
|
122
|
-
| `aiSpeaksFirst` | `boolean` | `false` | AI initiates conversation (live mode only) |
|
|
123
|
-
| `allowHarmCategory` | `boolean` | `false` | Disable content safety filtering |
|
|
124
|
-
| `tools` | `Tool[]` | `undefined` | Function definitions for AI to call |
|
|
125
|
-
|
|
126
|
-
### Pipeline Modes
|
|
127
|
-
|
|
128
|
-
| Mode | Latency | Description |
|
|
129
|
-
|------|---------|-------------|
|
|
130
|
-
| `live` | Lower (~300ms) | Direct audio-to-audio via Live API |
|
|
131
|
-
| `composed` | Higher (~1-2s) | Separate STT â LLM â TTS pipeline |
|
|
126
|
+
---
|
|
132
127
|
|
|
133
|
-
|
|
128
|
+
# Composed Mode
|
|
134
129
|
|
|
135
|
-
|
|
130
|
+
Use composed mode for higher accuracy with slightly more latency. It runs a separate STT â LLM â TTS pipeline instead of direct audio-to-audio.
|
|
136
131
|
|
|
137
132
|
```typescript
|
|
138
133
|
await client.startSession({
|
|
139
|
-
prePrompt: 'You are a
|
|
140
|
-
|
|
134
|
+
prePrompt: 'You are a helpful assistant.',
|
|
135
|
+
pipelineMode: 'composed',
|
|
136
|
+
language: 'ko-KR',
|
|
141
137
|
});
|
|
142
138
|
|
|
143
|
-
client.audioStart();
|
|
139
|
+
client.audioStart();
|
|
140
|
+
// Send/receive audio the same way as live mode
|
|
144
141
|
```
|
|
145
142
|
|
|
146
|
-
|
|
143
|
+
### Live vs Composed
|
|
147
144
|
|
|
148
|
-
|
|
145
|
+
| | Live | Composed |
|
|
146
|
+
|---|---|---|
|
|
147
|
+
| **Latency** | ~300ms | ~1-2s |
|
|
148
|
+
| **Pipeline** | Direct audio-to-audio (Gemini Live) | STT â LLM â TTS |
|
|
149
|
+
| **Accuracy** | Good | Higher |
|
|
150
|
+
| **`aiSpeaksFirst`** | â
Supported | â Not supported |
|
|
151
|
+
| **`tools` (function calling)** | â
Supported | â Not supported |
|
|
152
|
+
| **Output sample rate** | 24,000 Hz | 16,000 Hz |
|
|
153
|
+
| **Barge-in** | Automatic (Gemini VAD) | Automatic |
|
|
149
154
|
|
|
150
|
-
|
|
155
|
+
> **Note:** All other SDK methods and events work identically in both modes. The only code change is adding `pipelineMode: 'composed'` to your session config.
|
|
151
156
|
|
|
152
|
-
|
|
153
|
-
await client.startSession({
|
|
154
|
-
allowHarmCategory: true, // â ī¸ Disables all safety filters
|
|
155
|
-
});
|
|
156
|
-
```
|
|
157
|
+
---
|
|
157
158
|
|
|
158
|
-
|
|
159
|
+
# Advanced API
|
|
159
160
|
|
|
160
|
-
|
|
161
|
+
Optional features for power users.
|
|
161
162
|
|
|
162
|
-
|
|
163
|
+
## Additional Methods
|
|
163
164
|
|
|
164
|
-
|
|
165
|
+
| Method | Description |
|
|
166
|
+
|--------|-------------|
|
|
167
|
+
| `audioStart()` / `audioEnd()` | Manual audio stream control |
|
|
168
|
+
| `interrupt()` | Explicitly stop AI response (for Stop button) |
|
|
169
|
+
| `sendSystemMessage(msg)` | Inject context during conversation |
|
|
170
|
+
| `sendToolResponse(id, result)` | Reply to function calls |
|
|
171
|
+
| `updateUserId(userId)` | Migrate guest to authenticated user |
|
|
165
172
|
|
|
166
|
-
|
|
167
|
-
const tools = [
|
|
168
|
-
{
|
|
169
|
-
name: 'open_login',
|
|
170
|
-
description: 'Opens Google Login popup when user wants to sign in',
|
|
171
|
-
parameters: { type: 'OBJECT', properties: {}, required: [] }
|
|
172
|
-
},
|
|
173
|
-
{
|
|
174
|
-
name: 'get_price',
|
|
175
|
-
description: 'Gets product price by ID',
|
|
176
|
-
parameters: {
|
|
177
|
-
type: 'OBJECT',
|
|
178
|
-
properties: {
|
|
179
|
-
productId: { type: 'string', description: 'Product ID' }
|
|
180
|
-
},
|
|
181
|
-
required: ['productId']
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
];
|
|
173
|
+
## Additional Events
|
|
185
174
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
175
|
+
| Event | Description |
|
|
176
|
+
|-------|-------------|
|
|
177
|
+
| `connected` / `disconnected` | Connection lifecycle |
|
|
178
|
+
| `sessionStarted` / `sessionEnded` | Session lifecycle |
|
|
179
|
+
| `ready` | Session ready for audio |
|
|
180
|
+
| `userTranscript` | User's speech transcribed |
|
|
181
|
+
| `response` | AI's response text |
|
|
182
|
+
| `toolCall` | AI wants to call a function |
|
|
183
|
+
| `reconnecting` | Auto-reconnection attempt |
|
|
184
|
+
| `userIdUpdated` | Guest-to-user migration complete |
|
|
185
|
+
| `sessionWarning` | Session nearing duration limit |
|
|
186
|
+
| `sessionGoodbye` | Session about to end |
|
|
191
187
|
|
|
192
|
-
|
|
188
|
+
---
|
|
193
189
|
|
|
194
|
-
|
|
195
|
-
client.on('toolCall', (event) => {
|
|
196
|
-
console.log('AI wants to call:', event.name);
|
|
197
|
-
console.log('With arguments:', event.args);
|
|
198
|
-
|
|
199
|
-
if (event.name === 'open_login') {
|
|
200
|
-
showLoginModal();
|
|
201
|
-
client.sendToolResponse(event.id, { success: true });
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
if (event.name === 'get_price') {
|
|
205
|
-
const price = getProductPrice(event.args.productId);
|
|
206
|
-
client.sendToolResponse(event.id, { price, currency: 'USD' });
|
|
207
|
-
}
|
|
208
|
-
});
|
|
209
|
-
```
|
|
190
|
+
## Explicit Interrupt (Stop Button)
|
|
210
191
|
|
|
211
|
-
|
|
192
|
+
For UI "Stop" buttons or programmatic control:
|
|
212
193
|
|
|
213
194
|
```typescript
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
description: string; // When AI should use this
|
|
217
|
-
parameters?: {
|
|
218
|
-
type: 'OBJECT';
|
|
219
|
-
properties: Record<string, unknown>;
|
|
220
|
-
required?: string[];
|
|
221
|
-
};
|
|
222
|
-
}
|
|
195
|
+
// User clicks Stop button
|
|
196
|
+
client.interrupt();
|
|
223
197
|
```
|
|
224
198
|
|
|
225
|
-
|
|
199
|
+
Note: Voice barge-in works automatically via Gemini's VAD. This method is for explicit control.
|
|
226
200
|
|
|
227
|
-
|
|
201
|
+
---
|
|
228
202
|
|
|
229
|
-
|
|
230
|
-
- Game events ("User completed level 5, congratulate them!")
|
|
231
|
-
- App state changes ("User opened the cart with 3 items")
|
|
232
|
-
- Timer/engagement triggers ("User has been quiet, engage them")
|
|
233
|
-
- External data updates ("Weather changed to rainy")
|
|
203
|
+
## System Messages
|
|
234
204
|
|
|
235
|
-
|
|
205
|
+
Inject text context during live sessions (game events, app state, etc.):
|
|
236
206
|
|
|
237
207
|
```typescript
|
|
238
|
-
//
|
|
239
|
-
client.sendSystemMessage("User
|
|
208
|
+
// AI responds immediately
|
|
209
|
+
client.sendSystemMessage("User completed level 5. Congratulate them!");
|
|
240
210
|
|
|
241
|
-
//
|
|
242
|
-
client.sendSystemMessage({
|
|
243
|
-
text: "User is browsing the cart",
|
|
244
|
-
triggerResponse: false
|
|
245
|
-
});
|
|
211
|
+
// Context only, no response
|
|
212
|
+
client.sendSystemMessage({ text: "User is browsing", triggerResponse: false });
|
|
246
213
|
```
|
|
247
214
|
|
|
248
|
-
|
|
215
|
+
> Requires active live session (`audioStart()` called). Max 500 characters.
|
|
249
216
|
|
|
250
|
-
|
|
251
|
-
|-----------|------|----------|---------|-------------|
|
|
252
|
-
| `text` | `string` | Yes | - | Message text (max 500 chars) |
|
|
253
|
-
| `triggerResponse` | `boolean` | No | `true` | AI responds immediately if `true` |
|
|
217
|
+
---
|
|
254
218
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
## Conversation Memory
|
|
219
|
+
## Function Calling (Tool Use)
|
|
258
220
|
|
|
259
|
-
|
|
221
|
+
Let AI call functions in your app:
|
|
260
222
|
|
|
261
|
-
|
|
262
|
-
- **Session Summaries**: Recent conversation summaries are available to the AI
|
|
263
|
-
- **Cross-Session**: Memory persists across sessions for the same `userId`
|
|
223
|
+
### 1. Define Tools
|
|
264
224
|
|
|
265
225
|
```typescript
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
}
|
|
226
|
+
const tools = [{
|
|
227
|
+
name: 'get_price',
|
|
228
|
+
description: 'Gets product price by ID',
|
|
229
|
+
parameters: {
|
|
230
|
+
type: 'OBJECT',
|
|
231
|
+
properties: { productId: { type: 'string' } },
|
|
232
|
+
required: ['productId']
|
|
233
|
+
}
|
|
234
|
+
}];
|
|
272
235
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
apiKey: 'your-api-key',
|
|
277
|
-
// No userId = guest mode, no persistent memory
|
|
236
|
+
await client.startSession({
|
|
237
|
+
prePrompt: 'You are helpful.',
|
|
238
|
+
tools,
|
|
278
239
|
});
|
|
279
240
|
```
|
|
280
241
|
|
|
281
|
-
|
|
282
|
-
|------|-------------------|----------|
|
|
283
|
-
| With `userId` | Permanent | Authenticated users |
|
|
284
|
-
| Without `userId` | Session only | Guests, anonymous users |
|
|
285
|
-
|
|
286
|
-
### Guest-to-User Migration
|
|
287
|
-
|
|
288
|
-
When a guest user logs in during a session, you can migrate their conversation history to their user account:
|
|
242
|
+
### 2. Handle toolCall Events
|
|
289
243
|
|
|
290
244
|
```typescript
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
245
|
+
client.on('toolCall', (event) => {
|
|
246
|
+
if (event.name === 'get_price') {
|
|
247
|
+
const price = lookupPrice(event.args.productId);
|
|
248
|
+
client.sendToolResponse(event.id, { price });
|
|
249
|
+
}
|
|
294
250
|
});
|
|
295
|
-
|
|
296
|
-
// After authentication
|
|
297
|
-
await client.updateUserId('authenticated-user-123');
|
|
298
251
|
```
|
|
299
252
|
|
|
300
|
-
|
|
301
|
-
- Entity extraction on guest conversation history
|
|
302
|
-
- Conversation continuity across sessions
|
|
303
|
-
- Personalization based on past interactions
|
|
253
|
+
---
|
|
304
254
|
|
|
305
|
-
##
|
|
255
|
+
## Conversation Memory
|
|
306
256
|
|
|
307
|
-
|
|
308
|
-
|-------|-------------|----------------|
|
|
309
|
-
| `connected` | Connection established | `connectionId` |
|
|
310
|
-
| `disconnected` | Connection closed | `reason`, `code` |
|
|
311
|
-
| `sessionStarted` | Session created | `sessionId` |
|
|
312
|
-
| `ready` | Ready for audio input | `timestamp` |
|
|
313
|
-
| `userTranscript` | Your speech transcribed | `text` |
|
|
314
|
-
| `response` | AI's response text | `text`, `isFinal` |
|
|
315
|
-
| `audio` | AI's audio output | `data`, `sampleRate` |
|
|
316
|
-
| `turnComplete` | AI finished speaking | `timestamp` |
|
|
317
|
-
| `toolCall` | AI wants to call a function | `id`, `name`, `args` |
|
|
318
|
-
| `userIdUpdated` | Guest migrated to user account | `userId`, `migratedMessages` |
|
|
319
|
-
| `error` | Error occurred | `code`, `message` |
|
|
320
|
-
|
|
321
|
-
### Simple Handlers
|
|
257
|
+
Enable persistent memory across sessions:
|
|
322
258
|
|
|
323
259
|
```typescript
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
// AI's text response
|
|
330
|
-
client.setResponseHandler((text, isFinal) => {
|
|
331
|
-
console.log('AI:', text, isFinal ? '(done)' : '...');
|
|
260
|
+
const client = new LiveSpeechClient({
|
|
261
|
+
region: 'ap-northeast-2',
|
|
262
|
+
apiKey: 'your-api-key',
|
|
263
|
+
userId: 'user-123', // Enables memory
|
|
332
264
|
});
|
|
265
|
+
```
|
|
333
266
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
playAudio(data);
|
|
339
|
-
});
|
|
267
|
+
| Mode | Memory |
|
|
268
|
+
|------|--------|
|
|
269
|
+
| With `userId` | Permanent (entities, summaries) |
|
|
270
|
+
| Without `userId` | Session only (guest) |
|
|
340
271
|
|
|
341
|
-
|
|
342
|
-
client.setErrorHandler((error) => {
|
|
343
|
-
console.error(`Error [${error.code}]: ${error.message}`);
|
|
344
|
-
});
|
|
272
|
+
### Guest-to-User Migration
|
|
345
273
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
const result = executeFunction(event.name, event.args);
|
|
350
|
-
client.sendToolResponse(event.id, result);
|
|
351
|
-
});
|
|
274
|
+
```typescript
|
|
275
|
+
// User logs in during session
|
|
276
|
+
await client.updateUserId('authenticated-user-123');
|
|
352
277
|
|
|
353
|
-
//
|
|
278
|
+
// Listen for confirmation
|
|
354
279
|
client.on('userIdUpdated', (event) => {
|
|
355
|
-
console.log(`
|
|
280
|
+
console.log(`Migrated ${event.migratedMessages} messages`);
|
|
356
281
|
});
|
|
357
282
|
```
|
|
358
283
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
```typescript
|
|
362
|
-
client.on('connected', (event) => {
|
|
363
|
-
console.log('Connected:', event.connectionId);
|
|
364
|
-
});
|
|
365
|
-
|
|
366
|
-
client.on('ready', () => {
|
|
367
|
-
console.log('Ready for audio');
|
|
368
|
-
});
|
|
284
|
+
---
|
|
369
285
|
|
|
370
|
-
|
|
371
|
-
console.log('You:', event.text);
|
|
372
|
-
});
|
|
373
|
-
|
|
374
|
-
client.on('response', (event) => {
|
|
375
|
-
console.log('AI:', event.text, event.isFinal);
|
|
376
|
-
});
|
|
286
|
+
## AI Speaks First
|
|
377
287
|
|
|
378
|
-
|
|
379
|
-
// event.data: Uint8Array (PCM16)
|
|
380
|
-
// event.sampleRate: 24000
|
|
381
|
-
playAudio(event.data);
|
|
382
|
-
});
|
|
383
|
-
|
|
384
|
-
client.on('turnComplete', () => {
|
|
385
|
-
console.log('AI finished speaking');
|
|
386
|
-
});
|
|
288
|
+
AI initiates the conversation:
|
|
387
289
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
client.on('toolCall', (event) => {
|
|
393
|
-
// event.id: string - use with sendToolResponse
|
|
394
|
-
// event.name: string - function name
|
|
395
|
-
// event.args: object - function arguments
|
|
396
|
-
const result = handleToolCall(event.name, event.args);
|
|
397
|
-
client.sendToolResponse(event.id, result);
|
|
290
|
+
```typescript
|
|
291
|
+
await client.startSession({
|
|
292
|
+
prePrompt: 'Greet the customer warmly.',
|
|
293
|
+
aiSpeaksFirst: true,
|
|
398
294
|
});
|
|
399
295
|
|
|
400
|
-
client.
|
|
401
|
-
// event.userId: string - the new user ID
|
|
402
|
-
// event.migratedMessages: number - count of migrated messages
|
|
403
|
-
console.log(`Migrated ${event.migratedMessages} messages to ${event.userId}`);
|
|
404
|
-
});
|
|
296
|
+
client.audioStart(); // AI speaks immediately
|
|
405
297
|
```
|
|
406
298
|
|
|
407
|
-
|
|
299
|
+
---
|
|
408
300
|
|
|
409
|
-
|
|
301
|
+
## Session Options
|
|
410
302
|
|
|
411
|
-
|
|
|
412
|
-
|
|
413
|
-
|
|
|
414
|
-
|
|
|
415
|
-
|
|
|
416
|
-
|
|
|
303
|
+
| Option | Default | Description |
|
|
304
|
+
|--------|---------|-------------|
|
|
305
|
+
| `prePrompt` | - | System prompt |
|
|
306
|
+
| `language` | `'en-US'` | Language code |
|
|
307
|
+
| `pipelineMode` | `'live'` | `'live'` (~300ms) or `'composed'` (~1-2s) |
|
|
308
|
+
| `aiSpeaksFirst` | `false` | AI initiates (live mode only) |
|
|
309
|
+
| `allowHarmCategory` | `false` | Disable safety filters |
|
|
310
|
+
| `tools` | `[]` | Function definitions |
|
|
311
|
+
| `sessionDuration` | - | Enables session duration limits when provided |
|
|
417
312
|
|
|
418
|
-
|
|
313
|
+
**Notes**
|
|
314
|
+
- Duration checks are **disabled by default**. They activate only when `sessionDuration` is provided.
|
|
315
|
+
- If only `sessionDuration.maxSeconds` is provided, `enableWarning`/`enableGoodbye` default to `false` in the SDK.
|
|
316
|
+
- Server limits take precedence in production.
|
|
419
317
|
|
|
420
|
-
|
|
421
|
-
|----------|-------|
|
|
422
|
-
| Format | PCM16 (16-bit signed, little-endian) |
|
|
423
|
-
| Sample Rate | 24,000 Hz |
|
|
424
|
-
| Channels | 1 (Mono) |
|
|
318
|
+
---
|
|
425
319
|
|
|
426
320
|
## Browser Example
|
|
427
321
|
|
|
428
322
|
```typescript
|
|
429
323
|
import { LiveSpeechClient, float32ToInt16, int16ToUint8 } from '@drawdream/livespeech';
|
|
430
324
|
|
|
431
|
-
const client = new LiveSpeechClient({
|
|
432
|
-
region: 'ap-northeast-2',
|
|
433
|
-
apiKey: 'your-api-key',
|
|
434
|
-
});
|
|
435
|
-
|
|
436
|
-
// Handlers
|
|
437
|
-
client.setUserTranscriptHandler((text) => console.log('You:', text));
|
|
438
|
-
client.setResponseHandler((text) => console.log('AI:', text));
|
|
439
|
-
client.setAudioHandler((data) => playAudioChunk(data));
|
|
440
|
-
|
|
441
|
-
// Connect
|
|
442
|
-
await client.connect();
|
|
443
|
-
await client.startSession({ prePrompt: 'You are a helpful assistant.' });
|
|
444
|
-
|
|
445
325
|
// Capture microphone
|
|
446
326
|
const stream = await navigator.mediaDevices.getUserMedia({
|
|
447
327
|
audio: { sampleRate: 16000, channelCount: 1 }
|
|
@@ -460,60 +340,30 @@ processor.onaudioprocess = (e) => {
|
|
|
460
340
|
|
|
461
341
|
source.connect(processor);
|
|
462
342
|
processor.connect(audioContext.destination);
|
|
463
|
-
|
|
464
|
-
// Start streaming
|
|
465
|
-
client.audioStart();
|
|
466
|
-
|
|
467
|
-
// Stop later
|
|
468
|
-
client.audioEnd();
|
|
469
|
-
stream.getTracks().forEach(track => track.stop());
|
|
470
343
|
```
|
|
471
344
|
|
|
345
|
+
---
|
|
346
|
+
|
|
472
347
|
## Audio Utilities
|
|
473
348
|
|
|
474
349
|
```typescript
|
|
475
|
-
import {
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
wrapPcmInWav, // Create WAV file
|
|
481
|
-
AudioEncoder, // Base64 encoding/decoding
|
|
482
|
-
} from '@drawdream/livespeech';
|
|
483
|
-
|
|
484
|
-
// Convert Web Audio to PCM16 for sending
|
|
485
|
-
const float32 = audioBuffer.getChannelData(0);
|
|
486
|
-
const int16 = float32ToInt16(float32);
|
|
487
|
-
const pcmBytes = int16ToUint8(int16);
|
|
488
|
-
client.sendAudioChunk(pcmBytes);
|
|
489
|
-
|
|
490
|
-
// Convert received PCM16 to Web Audio
|
|
491
|
-
const receivedInt16 = uint8ToInt16(audioEvent.data);
|
|
492
|
-
const float32Data = int16ToFloat32(receivedInt16);
|
|
350
|
+
import { float32ToInt16, int16ToUint8, wrapPcmInWav } from '@drawdream/livespeech';
|
|
351
|
+
|
|
352
|
+
const int16 = float32ToInt16(float32Data);
|
|
353
|
+
const bytes = int16ToUint8(int16);
|
|
354
|
+
const wav = wrapPcmInWav(bytes, { sampleRate: 16000, channels: 1, bitDepth: 16 });
|
|
493
355
|
```
|
|
494
356
|
|
|
357
|
+
---
|
|
358
|
+
|
|
495
359
|
## Error Handling
|
|
496
360
|
|
|
497
361
|
```typescript
|
|
498
362
|
client.on('error', (event) => {
|
|
499
363
|
switch (event.code) {
|
|
500
|
-
case 'authentication_failed':
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
case 'connection_timeout':
|
|
504
|
-
console.error('Connection timed out');
|
|
505
|
-
break;
|
|
506
|
-
case 'rate_limit':
|
|
507
|
-
console.error('Rate limit exceeded');
|
|
508
|
-
break;
|
|
509
|
-
default:
|
|
510
|
-
console.error(`Error: ${event.message}`);
|
|
511
|
-
}
|
|
512
|
-
});
|
|
513
|
-
|
|
514
|
-
client.on('disconnected', (event) => {
|
|
515
|
-
if (event.reason === 'error') {
|
|
516
|
-
console.log('Will auto-reconnect...');
|
|
364
|
+
case 'authentication_failed': console.error('Invalid API key'); break;
|
|
365
|
+
case 'connection_timeout': console.error('Timed out'); break;
|
|
366
|
+
default: console.error(`Error: ${event.message}`);
|
|
517
367
|
}
|
|
518
368
|
});
|
|
519
369
|
|
|
@@ -522,44 +372,13 @@ client.on('reconnecting', (event) => {
|
|
|
522
372
|
});
|
|
523
373
|
```
|
|
524
374
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
| Property | Type | Description |
|
|
528
|
-
|----------|------|-------------|
|
|
529
|
-
| `isConnected` | `boolean` | Connection status |
|
|
530
|
-
| `hasActiveSession` | `boolean` | Session status |
|
|
531
|
-
| `isAudioStreaming` | `boolean` | Streaming status |
|
|
532
|
-
| `connectionId` | `string \| null` | Current connection ID |
|
|
533
|
-
| `currentSessionId` | `string \| null` | Current session ID |
|
|
375
|
+
---
|
|
534
376
|
|
|
535
377
|
## Regions
|
|
536
378
|
|
|
537
|
-
| Region | Code |
|
|
538
|
-
|
|
539
|
-
|
|
|
540
|
-
|
|
541
|
-
## TypeScript Types
|
|
542
|
-
|
|
543
|
-
```typescript
|
|
544
|
-
import type {
|
|
545
|
-
LiveSpeechConfig,
|
|
546
|
-
SessionConfig,
|
|
547
|
-
LiveSpeechEvent,
|
|
548
|
-
ConnectedEvent,
|
|
549
|
-
DisconnectedEvent,
|
|
550
|
-
SessionStartedEvent,
|
|
551
|
-
ReadyEvent,
|
|
552
|
-
UserTranscriptEvent,
|
|
553
|
-
ResponseEvent,
|
|
554
|
-
AudioEvent,
|
|
555
|
-
TurnCompleteEvent,
|
|
556
|
-
ToolCallEvent,
|
|
557
|
-
UserIdUpdatedEvent,
|
|
558
|
-
ErrorEvent,
|
|
559
|
-
ErrorCode,
|
|
560
|
-
Tool,
|
|
561
|
-
} from '@drawdream/livespeech';
|
|
562
|
-
```
|
|
379
|
+
| Region | Code |
|
|
380
|
+
|--------|------|
|
|
381
|
+
| Seoul (Korea) | `ap-northeast-2` |
|
|
563
382
|
|
|
564
383
|
## License
|
|
565
384
|
|