@drawdream/livespeech 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -17
- package/dist/index.d.mts +108 -87
- package/dist/index.d.ts +108 -87
- package/dist/index.js +91 -74
- package/dist/index.mjs +91 -74
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -26,12 +26,12 @@ const client = new LiveSpeechClient({
|
|
|
26
26
|
});
|
|
27
27
|
|
|
28
28
|
// Handle events
|
|
29
|
-
client.
|
|
30
|
-
console.log(`
|
|
29
|
+
client.setUserTranscriptHandler((text) => {
|
|
30
|
+
console.log(`You said: ${text}`);
|
|
31
31
|
});
|
|
32
32
|
|
|
33
|
-
client.
|
|
34
|
-
console.log(`AI
|
|
33
|
+
client.setTranscriptHandler((text, isFinal) => {
|
|
34
|
+
console.log(`AI Transcript: ${text}`);
|
|
35
35
|
});
|
|
36
36
|
|
|
37
37
|
client.setAudioHandler((audioData) => {
|
|
@@ -42,10 +42,44 @@ client.setAudioHandler((audioData) => {
|
|
|
42
42
|
await client.connect();
|
|
43
43
|
await client.startSession({
|
|
44
44
|
prePrompt: 'You are a helpful assistant.',
|
|
45
|
+
// pipelineMode: 'live' is the default
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// Start streaming and send audio
|
|
49
|
+
client.audioStart();
|
|
50
|
+
client.sendAudioChunk(audioBuffer);
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Pipeline Modes
|
|
54
|
+
|
|
55
|
+
The SDK supports two pipeline modes for audio processing:
|
|
56
|
+
|
|
57
|
+
### Live Mode (Default)
|
|
58
|
+
|
|
59
|
+
Uses Gemini 2.5 Flash Live API for end-to-end audio conversation. This provides:
|
|
60
|
+
- **Lower latency** - Direct audio-to-audio processing
|
|
61
|
+
- **Natural conversation** - Built-in voice activity detection and turn-taking
|
|
62
|
+
- **Real-time transcription** - Both user and AI speech are transcribed
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
await client.startSession({
|
|
66
|
+
prePrompt: 'You are a helpful assistant.',
|
|
67
|
+
pipelineMode: 'live', // Default, can be omitted
|
|
45
68
|
});
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Composed Mode
|
|
46
72
|
|
|
47
|
-
|
|
48
|
-
|
|
73
|
+
Uses separate STT + LLM + TTS services for more customization:
|
|
74
|
+
- **More control** - Separate services for each step
|
|
75
|
+
- **Custom voices** - Use different TTS voices
|
|
76
|
+
- **Text responses** - Access to intermediate text responses
|
|
77
|
+
|
|
78
|
+
```typescript
|
|
79
|
+
await client.startSession({
|
|
80
|
+
prePrompt: 'You are a helpful assistant.',
|
|
81
|
+
pipelineMode: 'composed',
|
|
82
|
+
});
|
|
49
83
|
```
|
|
50
84
|
|
|
51
85
|
## API Reference
|
|
@@ -87,8 +121,9 @@ The SDK provides built-in region support, so you don't need to remember endpoint
|
|
|
87
121
|
|
|
88
122
|
```typescript
|
|
89
123
|
// Simple handlers
|
|
90
|
-
client.
|
|
91
|
-
client.
|
|
124
|
+
client.setUserTranscriptHandler((text) => {}); // User's speech transcription
|
|
125
|
+
client.setTranscriptHandler((text, isFinal) => {}); // AI's speech transcription (live mode)
|
|
126
|
+
client.setResponseHandler((text, isFinal) => {}); // AI text response (composed mode)
|
|
92
127
|
client.setAudioHandler((audioData) => {});
|
|
93
128
|
client.setErrorHandler((error) => {});
|
|
94
129
|
|
|
@@ -97,24 +132,23 @@ client.on('connected', (event) => {});
|
|
|
97
132
|
client.on('disconnected', (event) => {});
|
|
98
133
|
client.on('sessionStarted', (event) => {});
|
|
99
134
|
client.on('sessionEnded', (event) => {});
|
|
100
|
-
client.on('
|
|
101
|
-
client.on('
|
|
135
|
+
client.on('userTranscript', (event) => {}); // User's speech transcription
|
|
136
|
+
client.on('transcript', (event) => {}); // AI's speech transcription
|
|
137
|
+
client.on('response', (event) => {}); // AI text response
|
|
102
138
|
client.on('audio', (event) => {});
|
|
103
139
|
client.on('error', (event) => {});
|
|
104
140
|
client.on('reconnecting', (event) => {});
|
|
141
|
+
client.on('ready', (event) => {}); // Gemini Live ready (live mode)
|
|
142
|
+
client.on('turnComplete', (event) => {}); // AI finished speaking (live mode)
|
|
105
143
|
```
|
|
106
144
|
|
|
107
145
|
### SessionConfig
|
|
108
146
|
|
|
109
147
|
| Option | Type | Default | Description |
|
|
110
148
|
|--------|------|---------|-------------|
|
|
111
|
-
| `prePrompt` | `string` |
|
|
112
|
-
| `
|
|
113
|
-
| `
|
|
114
|
-
| `inputFormat` | `AudioFormat` | `'pcm16'` | Input audio format |
|
|
115
|
-
| `outputFormat` | `AudioFormat` | `'pcm16'` | Output audio format |
|
|
116
|
-
| `sampleRate` | `number` | `16000` | Sample rate in Hz |
|
|
117
|
-
| `metadata` | `Record<string,string>` | `{}` | Custom metadata |
|
|
149
|
+
| `prePrompt` | `string` | - | System prompt for the AI |
|
|
150
|
+
| `language` | `string` | `'en-US'` | Language code for speech (e.g., "ko-KR") |
|
|
151
|
+
| `pipelineMode` | `'live' \| 'composed'` | `'live'` | Audio processing mode |
|
|
118
152
|
|
|
119
153
|
## Audio Utilities
|
|
120
154
|
|
package/dist/index.d.mts
CHANGED
|
@@ -21,6 +21,12 @@ declare function getEndpointForRegion(region: Region): string;
|
|
|
21
21
|
*/
|
|
22
22
|
declare function isValidRegion(value: string): value is Region;
|
|
23
23
|
|
|
24
|
+
/**
|
|
25
|
+
* Pipeline mode for audio processing
|
|
26
|
+
* - 'live': Uses Gemini Live API for end-to-end audio conversation (default)
|
|
27
|
+
* - 'composed': Uses separate STT + LLM + TTS services
|
|
28
|
+
*/
|
|
29
|
+
type PipelineMode = 'live' | 'composed';
|
|
24
30
|
/**
|
|
25
31
|
* Configuration options for the LiveSpeech client
|
|
26
32
|
*
|
|
@@ -74,41 +80,20 @@ interface SessionConfig {
|
|
|
74
80
|
/**
|
|
75
81
|
* System prompt for the AI assistant
|
|
76
82
|
*/
|
|
77
|
-
prePrompt
|
|
78
|
-
/**
|
|
79
|
-
* Voice ID for text-to-speech output
|
|
80
|
-
* @default 'en-US-Standard-A'
|
|
81
|
-
*/
|
|
82
|
-
voiceId?: string;
|
|
83
|
-
/**
|
|
84
|
-
* Language code for speech recognition
|
|
85
|
-
* @default 'en-US'
|
|
86
|
-
*/
|
|
87
|
-
languageCode?: string;
|
|
88
|
-
/**
|
|
89
|
-
* Audio encoding format for input
|
|
90
|
-
* @default 'pcm16'
|
|
91
|
-
*/
|
|
92
|
-
inputFormat?: AudioFormat;
|
|
93
|
-
/**
|
|
94
|
-
* Audio encoding format for output
|
|
95
|
-
* @default 'pcm16'
|
|
96
|
-
*/
|
|
97
|
-
outputFormat?: AudioFormat;
|
|
83
|
+
prePrompt?: string;
|
|
98
84
|
/**
|
|
99
|
-
*
|
|
100
|
-
* @default
|
|
85
|
+
* Language code for speech recognition (e.g., "en-US", "ko-KR")
|
|
86
|
+
* @default "en-US"
|
|
101
87
|
*/
|
|
102
|
-
|
|
88
|
+
language?: string;
|
|
103
89
|
/**
|
|
104
|
-
*
|
|
90
|
+
* Pipeline mode for audio processing
|
|
91
|
+
* - 'live': Uses Gemini Live API for end-to-end audio conversation (default, lower latency)
|
|
92
|
+
* - 'composed': Uses separate STT + LLM + TTS services (more customizable)
|
|
93
|
+
* @default "live"
|
|
105
94
|
*/
|
|
106
|
-
|
|
95
|
+
pipelineMode?: PipelineMode;
|
|
107
96
|
}
|
|
108
|
-
/**
|
|
109
|
-
* Supported audio formats
|
|
110
|
-
*/
|
|
111
|
-
type AudioFormat = 'pcm16' | 'opus' | 'wav';
|
|
112
97
|
/**
|
|
113
98
|
* Internal resolved configuration with defaults applied
|
|
114
99
|
*/
|
|
@@ -125,7 +110,7 @@ interface ResolvedConfig {
|
|
|
125
110
|
/**
|
|
126
111
|
* Event types emitted by the LiveSpeech client
|
|
127
112
|
*/
|
|
128
|
-
type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | '
|
|
113
|
+
type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error';
|
|
129
114
|
/**
|
|
130
115
|
* Event payload for 'connected' event
|
|
131
116
|
*/
|
|
@@ -164,13 +149,19 @@ interface SessionEndedEvent {
|
|
|
164
149
|
timestamp: string;
|
|
165
150
|
}
|
|
166
151
|
/**
|
|
167
|
-
* Event payload for '
|
|
152
|
+
* Event payload for 'ready' event
|
|
153
|
+
*/
|
|
154
|
+
interface ReadyEvent {
|
|
155
|
+
type: 'ready';
|
|
156
|
+
timestamp: string;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Event payload for 'userTranscript' event
|
|
160
|
+
* User's speech transcription
|
|
168
161
|
*/
|
|
169
|
-
interface
|
|
170
|
-
type: '
|
|
162
|
+
interface UserTranscriptEvent {
|
|
163
|
+
type: 'userTranscript';
|
|
171
164
|
text: string;
|
|
172
|
-
isFinal: boolean;
|
|
173
|
-
confidence?: number;
|
|
174
165
|
timestamp: string;
|
|
175
166
|
}
|
|
176
167
|
/**
|
|
@@ -205,7 +196,7 @@ interface ErrorEvent {
|
|
|
205
196
|
/**
|
|
206
197
|
* Error codes
|
|
207
198
|
*/
|
|
208
|
-
type ErrorCode = 'connection_failed' | 'connection_timeout' | 'authentication_failed' | 'session_error' | 'audio_error' | 'stt_error' | 'llm_error' | 'tts_error' | 'rate_limit' | 'internal_error' | 'invalid_message';
|
|
199
|
+
type ErrorCode = 'connection_failed' | 'connection_timeout' | 'authentication_failed' | 'session_error' | 'audio_error' | 'streaming_error' | 'stt_error' | 'llm_error' | 'tts_error' | 'rate_limit' | 'internal_error' | 'invalid_message';
|
|
209
200
|
/**
|
|
210
201
|
* Event payload for 'reconnecting' event
|
|
211
202
|
*/
|
|
@@ -216,14 +207,22 @@ interface ReconnectingEvent {
|
|
|
216
207
|
delay: number;
|
|
217
208
|
timestamp: string;
|
|
218
209
|
}
|
|
210
|
+
/**
|
|
211
|
+
* Event payload for 'turnComplete' event (both modes)
|
|
212
|
+
* Indicates the AI has finished its response turn
|
|
213
|
+
*/
|
|
214
|
+
interface TurnCompleteEvent {
|
|
215
|
+
type: 'turnComplete';
|
|
216
|
+
timestamp: string;
|
|
217
|
+
}
|
|
219
218
|
/**
|
|
220
219
|
* Union type of all event payloads
|
|
221
220
|
*/
|
|
222
|
-
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent |
|
|
221
|
+
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ErrorEvent;
|
|
223
222
|
/**
|
|
224
223
|
* Simplified event handlers for common use cases
|
|
225
224
|
*/
|
|
226
|
-
type
|
|
225
|
+
type UserTranscriptHandler = (text: string) => void;
|
|
227
226
|
type ResponseHandler = (text: string, isFinal: boolean) => void;
|
|
228
227
|
type AudioHandler = (data: Uint8Array) => void;
|
|
229
228
|
type ErrorHandler = (error: ErrorEvent) => void;
|
|
@@ -231,30 +230,25 @@ type ErrorHandler = (error: ErrorEvent) => void;
|
|
|
231
230
|
/**
|
|
232
231
|
* WebSocket message types sent from client to server
|
|
233
232
|
*/
|
|
234
|
-
type ClientMessageType = 'startSession' | 'endSession' | '
|
|
233
|
+
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'ping';
|
|
235
234
|
/**
|
|
236
235
|
* WebSocket message types received from server
|
|
237
236
|
*/
|
|
238
|
-
type ServerMessageType = '
|
|
237
|
+
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error' | 'pong';
|
|
239
238
|
/**
|
|
240
239
|
* Base interface for client messages
|
|
241
240
|
*/
|
|
242
241
|
interface BaseClientMessage {
|
|
243
242
|
action: ClientMessageType;
|
|
244
|
-
requestId?: string;
|
|
245
243
|
}
|
|
246
244
|
/**
|
|
247
245
|
* Start session message
|
|
248
246
|
*/
|
|
249
247
|
interface StartSessionMessage extends BaseClientMessage {
|
|
250
248
|
action: 'startSession';
|
|
251
|
-
prePrompt
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
inputFormat?: string;
|
|
255
|
-
outputFormat?: string;
|
|
256
|
-
sampleRate?: number;
|
|
257
|
-
metadata?: Record<string, string>;
|
|
249
|
+
prePrompt?: string;
|
|
250
|
+
language?: string;
|
|
251
|
+
pipelineMode?: 'live' | 'composed';
|
|
258
252
|
}
|
|
259
253
|
/**
|
|
260
254
|
* End session message
|
|
@@ -263,14 +257,23 @@ interface EndSessionMessage extends BaseClientMessage {
|
|
|
263
257
|
action: 'endSession';
|
|
264
258
|
}
|
|
265
259
|
/**
|
|
266
|
-
* Audio
|
|
260
|
+
* Audio start message - begin streaming session
|
|
261
|
+
*/
|
|
262
|
+
interface AudioStartMessage extends BaseClientMessage {
|
|
263
|
+
action: 'audioStart';
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Audio chunk message - send audio data
|
|
267
267
|
*/
|
|
268
|
-
interface
|
|
269
|
-
action: '
|
|
268
|
+
interface AudioChunkMessage extends BaseClientMessage {
|
|
269
|
+
action: 'audioChunk';
|
|
270
270
|
data: string;
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Audio end message - end streaming session
|
|
274
|
+
*/
|
|
275
|
+
interface AudioEndMessage extends BaseClientMessage {
|
|
276
|
+
action: 'audioEnd';
|
|
274
277
|
}
|
|
275
278
|
/**
|
|
276
279
|
* Ping message for keep-alive
|
|
@@ -281,22 +284,14 @@ interface PingMessage extends BaseClientMessage {
|
|
|
281
284
|
/**
|
|
282
285
|
* Union type of all client messages
|
|
283
286
|
*/
|
|
284
|
-
type ClientMessage = StartSessionMessage | EndSessionMessage |
|
|
287
|
+
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | PingMessage;
|
|
285
288
|
/**
|
|
286
289
|
* Base interface for server messages
|
|
287
290
|
*/
|
|
288
291
|
interface BaseServerMessage {
|
|
289
292
|
type: ServerMessageType;
|
|
290
|
-
requestId?: string;
|
|
291
293
|
timestamp: string;
|
|
292
294
|
}
|
|
293
|
-
/**
|
|
294
|
-
* Connected message from server
|
|
295
|
-
*/
|
|
296
|
-
interface ServerConnectedMessage extends BaseServerMessage {
|
|
297
|
-
type: 'connected';
|
|
298
|
-
connectionId: string;
|
|
299
|
-
}
|
|
300
295
|
/**
|
|
301
296
|
* Session started message from server
|
|
302
297
|
*/
|
|
@@ -312,13 +307,11 @@ interface ServerSessionEndedMessage extends BaseServerMessage {
|
|
|
312
307
|
sessionId: string;
|
|
313
308
|
}
|
|
314
309
|
/**
|
|
315
|
-
*
|
|
310
|
+
* User transcript message from server (user's speech transcription)
|
|
316
311
|
*/
|
|
317
|
-
interface
|
|
318
|
-
type: '
|
|
312
|
+
interface ServerUserTranscriptMessage extends BaseServerMessage {
|
|
313
|
+
type: 'userTranscript';
|
|
319
314
|
text: string;
|
|
320
|
-
isFinal: boolean;
|
|
321
|
-
confidence?: number;
|
|
322
315
|
}
|
|
323
316
|
/**
|
|
324
317
|
* Response message from server
|
|
@@ -344,7 +337,6 @@ interface ServerErrorMessage extends BaseServerMessage {
|
|
|
344
337
|
type: 'error';
|
|
345
338
|
code: string;
|
|
346
339
|
message: string;
|
|
347
|
-
details?: unknown;
|
|
348
340
|
}
|
|
349
341
|
/**
|
|
350
342
|
* Pong message from server
|
|
@@ -352,10 +344,24 @@ interface ServerErrorMessage extends BaseServerMessage {
|
|
|
352
344
|
interface ServerPongMessage extends BaseServerMessage {
|
|
353
345
|
type: 'pong';
|
|
354
346
|
}
|
|
347
|
+
/**
|
|
348
|
+
* Turn complete message from server
|
|
349
|
+
* Indicates the AI has finished its response turn
|
|
350
|
+
*/
|
|
351
|
+
interface ServerTurnCompleteMessage extends BaseServerMessage {
|
|
352
|
+
type: 'turnComplete';
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Ready message from server
|
|
356
|
+
* Indicates the Gemini Live session is ready for audio input
|
|
357
|
+
*/
|
|
358
|
+
interface ServerReadyMessage extends BaseServerMessage {
|
|
359
|
+
type: 'ready';
|
|
360
|
+
}
|
|
355
361
|
/**
|
|
356
362
|
* Union type of all server messages
|
|
357
363
|
*/
|
|
358
|
-
type ServerMessage =
|
|
364
|
+
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerErrorMessage | ServerPongMessage;
|
|
359
365
|
|
|
360
366
|
/**
|
|
361
367
|
* Connection state
|
|
@@ -368,13 +374,15 @@ type ConnectionState = 'disconnected' | 'connecting' | 'connected' | 'reconnecti
|
|
|
368
374
|
type LiveSpeechEventMap = {
|
|
369
375
|
connected: ConnectedEvent;
|
|
370
376
|
disconnected: DisconnectedEvent;
|
|
377
|
+
reconnecting: ReconnectingEvent;
|
|
371
378
|
sessionStarted: SessionStartedEvent;
|
|
372
379
|
sessionEnded: SessionEndedEvent;
|
|
373
|
-
|
|
380
|
+
ready: ReadyEvent;
|
|
381
|
+
userTranscript: UserTranscriptEvent;
|
|
374
382
|
response: ResponseEvent;
|
|
375
383
|
audio: AudioEvent;
|
|
384
|
+
turnComplete: TurnCompleteEvent;
|
|
376
385
|
error: ErrorEvent;
|
|
377
|
-
reconnecting: ReconnectingEvent;
|
|
378
386
|
};
|
|
379
387
|
/**
|
|
380
388
|
* LiveSpeech client for real-time speech-to-speech AI conversations
|
|
@@ -385,9 +393,9 @@ declare class LiveSpeechClient {
|
|
|
385
393
|
private readonly audioEncoder;
|
|
386
394
|
private readonly logger;
|
|
387
395
|
private sessionId;
|
|
388
|
-
private
|
|
396
|
+
private isStreaming;
|
|
389
397
|
private readonly eventListeners;
|
|
390
|
-
private
|
|
398
|
+
private userTranscriptHandler;
|
|
391
399
|
private responseHandler;
|
|
392
400
|
private audioHandler;
|
|
393
401
|
private errorHandler;
|
|
@@ -412,6 +420,10 @@ declare class LiveSpeechClient {
|
|
|
412
420
|
* Check if session is active
|
|
413
421
|
*/
|
|
414
422
|
get hasActiveSession(): boolean;
|
|
423
|
+
/**
|
|
424
|
+
* Check if audio streaming is active
|
|
425
|
+
*/
|
|
426
|
+
get isAudioStreaming(): boolean;
|
|
415
427
|
/**
|
|
416
428
|
* Connect to the server
|
|
417
429
|
*/
|
|
@@ -423,18 +435,23 @@ declare class LiveSpeechClient {
|
|
|
423
435
|
/**
|
|
424
436
|
* Start a new session
|
|
425
437
|
*/
|
|
426
|
-
startSession(config
|
|
438
|
+
startSession(config?: SessionConfig): Promise<string>;
|
|
427
439
|
/**
|
|
428
440
|
* End the current session
|
|
429
441
|
*/
|
|
430
442
|
endSession(): Promise<void>;
|
|
431
443
|
/**
|
|
432
|
-
*
|
|
444
|
+
* Start audio streaming session
|
|
445
|
+
*/
|
|
446
|
+
audioStart(): void;
|
|
447
|
+
/**
|
|
448
|
+
* Send audio chunk (PCM16 base64 encoded)
|
|
449
|
+
*/
|
|
450
|
+
sendAudioChunk(data: Uint8Array): void;
|
|
451
|
+
/**
|
|
452
|
+
* End audio streaming session
|
|
433
453
|
*/
|
|
434
|
-
|
|
435
|
-
format?: AudioFormat;
|
|
436
|
-
isFinal?: boolean;
|
|
437
|
-
}): void;
|
|
454
|
+
audioEnd(): void;
|
|
438
455
|
/**
|
|
439
456
|
* Add event listener
|
|
440
457
|
*/
|
|
@@ -444,17 +461,17 @@ declare class LiveSpeechClient {
|
|
|
444
461
|
*/
|
|
445
462
|
off<K extends keyof LiveSpeechEventMap>(event: K, listener: (event: LiveSpeechEventMap[K]) => void): void;
|
|
446
463
|
/**
|
|
447
|
-
* Set
|
|
448
|
-
*/
|
|
449
|
-
setTranscriptHandler(handler: TranscriptHandler): void;
|
|
450
|
-
/**
|
|
451
|
-
* Set response handler (simplified)
|
|
464
|
+
* Set response handler
|
|
452
465
|
*/
|
|
453
466
|
setResponseHandler(handler: ResponseHandler): void;
|
|
454
467
|
/**
|
|
455
468
|
* Set audio handler (simplified)
|
|
456
469
|
*/
|
|
457
470
|
setAudioHandler(handler: AudioHandler): void;
|
|
471
|
+
/**
|
|
472
|
+
* Set user transcript handler
|
|
473
|
+
*/
|
|
474
|
+
setUserTranscriptHandler(handler: UserTranscriptHandler): void;
|
|
458
475
|
/**
|
|
459
476
|
* Set error handler (simplified)
|
|
460
477
|
*/
|
|
@@ -467,6 +484,10 @@ declare class LiveSpeechClient {
|
|
|
467
484
|
private handleMessage;
|
|
468
485
|
}
|
|
469
486
|
|
|
487
|
+
/**
|
|
488
|
+
* Audio format type
|
|
489
|
+
*/
|
|
490
|
+
type AudioFormat = 'pcm16' | 'opus' | 'wav';
|
|
470
491
|
/**
|
|
471
492
|
* Audio encoder options
|
|
472
493
|
*/
|
|
@@ -552,4 +573,4 @@ declare class AudioEncoder {
|
|
|
552
573
|
wrapWav(data: Uint8Array): Uint8Array;
|
|
553
574
|
}
|
|
554
575
|
|
|
555
|
-
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type
|
|
576
|
+
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
|