@drawdream/livespeech 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +87 -72
- package/dist/index.d.ts +87 -72
- package/dist/index.js +74 -55
- package/dist/index.mjs +74 -55
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -74,41 +74,8 @@ interface SessionConfig {
|
|
|
74
74
|
/**
|
|
75
75
|
* System prompt for the AI assistant
|
|
76
76
|
*/
|
|
77
|
-
prePrompt
|
|
78
|
-
/**
|
|
79
|
-
* Voice ID for text-to-speech output
|
|
80
|
-
* @default 'en-US-Standard-A'
|
|
81
|
-
*/
|
|
82
|
-
voiceId?: string;
|
|
83
|
-
/**
|
|
84
|
-
* Language code for speech recognition
|
|
85
|
-
* @default 'en-US'
|
|
86
|
-
*/
|
|
87
|
-
languageCode?: string;
|
|
88
|
-
/**
|
|
89
|
-
* Audio encoding format for input
|
|
90
|
-
* @default 'pcm16'
|
|
91
|
-
*/
|
|
92
|
-
inputFormat?: AudioFormat;
|
|
93
|
-
/**
|
|
94
|
-
* Audio encoding format for output
|
|
95
|
-
* @default 'pcm16'
|
|
96
|
-
*/
|
|
97
|
-
outputFormat?: AudioFormat;
|
|
98
|
-
/**
|
|
99
|
-
* Sample rate for audio in Hz
|
|
100
|
-
* @default 16000
|
|
101
|
-
*/
|
|
102
|
-
sampleRate?: number;
|
|
103
|
-
/**
|
|
104
|
-
* Custom metadata to attach to the session
|
|
105
|
-
*/
|
|
106
|
-
metadata?: Record<string, string>;
|
|
77
|
+
prePrompt?: string;
|
|
107
78
|
}
|
|
108
|
-
/**
|
|
109
|
-
* Supported audio formats
|
|
110
|
-
*/
|
|
111
|
-
type AudioFormat = 'pcm16' | 'opus' | 'wav';
|
|
112
79
|
/**
|
|
113
80
|
* Internal resolved configuration with defaults applied
|
|
114
81
|
*/
|
|
@@ -125,7 +92,7 @@ interface ResolvedConfig {
|
|
|
125
92
|
/**
|
|
126
93
|
* Event types emitted by the LiveSpeech client
|
|
127
94
|
*/
|
|
128
|
-
type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | 'transcript' | 'response' | 'audio' | 'error' | 'reconnecting';
|
|
95
|
+
type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'reconnecting';
|
|
129
96
|
/**
|
|
130
97
|
* Event payload for 'connected' event
|
|
131
98
|
*/
|
|
@@ -163,6 +130,27 @@ interface SessionEndedEvent {
|
|
|
163
130
|
sessionId: string;
|
|
164
131
|
timestamp: string;
|
|
165
132
|
}
|
|
133
|
+
/**
|
|
134
|
+
* Event payload for 'streamingStarted' event - acknowledgment of audioStart
|
|
135
|
+
*/
|
|
136
|
+
interface StreamingStartedEvent {
|
|
137
|
+
type: 'streamingStarted';
|
|
138
|
+
timestamp: string;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Event payload for 'speechStart' event - VAD detected speech begin
|
|
142
|
+
*/
|
|
143
|
+
interface SpeechStartEvent {
|
|
144
|
+
type: 'speechStart';
|
|
145
|
+
timestamp: string;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Event payload for 'speechEnd' event - VAD detected speech end
|
|
149
|
+
*/
|
|
150
|
+
interface SpeechEndEvent {
|
|
151
|
+
type: 'speechEnd';
|
|
152
|
+
timestamp: string;
|
|
153
|
+
}
|
|
166
154
|
/**
|
|
167
155
|
* Event payload for 'transcript' event
|
|
168
156
|
*/
|
|
@@ -205,7 +193,7 @@ interface ErrorEvent {
|
|
|
205
193
|
/**
|
|
206
194
|
* Error codes
|
|
207
195
|
*/
|
|
208
|
-
type ErrorCode = 'connection_failed' | 'connection_timeout' | 'authentication_failed' | 'session_error' | 'audio_error' | 'stt_error' | 'llm_error' | 'tts_error' | 'rate_limit' | 'internal_error' | 'invalid_message';
|
|
196
|
+
type ErrorCode = 'connection_failed' | 'connection_timeout' | 'authentication_failed' | 'session_error' | 'audio_error' | 'streaming_error' | 'stt_error' | 'llm_error' | 'tts_error' | 'rate_limit' | 'internal_error' | 'invalid_message';
|
|
209
197
|
/**
|
|
210
198
|
* Event payload for 'reconnecting' event
|
|
211
199
|
*/
|
|
@@ -219,7 +207,7 @@ interface ReconnectingEvent {
|
|
|
219
207
|
/**
|
|
220
208
|
* Union type of all event payloads
|
|
221
209
|
*/
|
|
222
|
-
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent | TranscriptEvent | ResponseEvent | AudioEvent | ErrorEvent | ReconnectingEvent;
|
|
210
|
+
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent | StreamingStartedEvent | SpeechStartEvent | SpeechEndEvent | TranscriptEvent | ResponseEvent | AudioEvent | ErrorEvent | ReconnectingEvent;
|
|
223
211
|
/**
|
|
224
212
|
* Simplified event handlers for common use cases
|
|
225
213
|
*/
|
|
@@ -231,30 +219,23 @@ type ErrorHandler = (error: ErrorEvent) => void;
|
|
|
231
219
|
/**
|
|
232
220
|
* WebSocket message types sent from client to server
|
|
233
221
|
*/
|
|
234
|
-
type ClientMessageType = 'startSession' | 'endSession' | '
|
|
222
|
+
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'ping';
|
|
235
223
|
/**
|
|
236
224
|
* WebSocket message types received from server
|
|
237
225
|
*/
|
|
238
|
-
type ServerMessageType = '
|
|
226
|
+
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'pong';
|
|
239
227
|
/**
|
|
240
228
|
* Base interface for client messages
|
|
241
229
|
*/
|
|
242
230
|
interface BaseClientMessage {
|
|
243
231
|
action: ClientMessageType;
|
|
244
|
-
requestId?: string;
|
|
245
232
|
}
|
|
246
233
|
/**
|
|
247
234
|
* Start session message
|
|
248
235
|
*/
|
|
249
236
|
interface StartSessionMessage extends BaseClientMessage {
|
|
250
237
|
action: 'startSession';
|
|
251
|
-
prePrompt
|
|
252
|
-
voiceId?: string;
|
|
253
|
-
languageCode?: string;
|
|
254
|
-
inputFormat?: string;
|
|
255
|
-
outputFormat?: string;
|
|
256
|
-
sampleRate?: number;
|
|
257
|
-
metadata?: Record<string, string>;
|
|
238
|
+
prePrompt?: string;
|
|
258
239
|
}
|
|
259
240
|
/**
|
|
260
241
|
* End session message
|
|
@@ -263,14 +244,23 @@ interface EndSessionMessage extends BaseClientMessage {
|
|
|
263
244
|
action: 'endSession';
|
|
264
245
|
}
|
|
265
246
|
/**
|
|
266
|
-
* Audio
|
|
247
|
+
* Audio start message - begin streaming session
|
|
267
248
|
*/
|
|
268
|
-
interface
|
|
269
|
-
action: '
|
|
249
|
+
interface AudioStartMessage extends BaseClientMessage {
|
|
250
|
+
action: 'audioStart';
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Audio chunk message - send audio data
|
|
254
|
+
*/
|
|
255
|
+
interface AudioChunkMessage extends BaseClientMessage {
|
|
256
|
+
action: 'audioChunk';
|
|
270
257
|
data: string;
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Audio end message - end streaming session
|
|
261
|
+
*/
|
|
262
|
+
interface AudioEndMessage extends BaseClientMessage {
|
|
263
|
+
action: 'audioEnd';
|
|
274
264
|
}
|
|
275
265
|
/**
|
|
276
266
|
* Ping message for keep-alive
|
|
@@ -281,22 +271,14 @@ interface PingMessage extends BaseClientMessage {
|
|
|
281
271
|
/**
|
|
282
272
|
* Union type of all client messages
|
|
283
273
|
*/
|
|
284
|
-
type ClientMessage = StartSessionMessage | EndSessionMessage |
|
|
274
|
+
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | PingMessage;
|
|
285
275
|
/**
|
|
286
276
|
* Base interface for server messages
|
|
287
277
|
*/
|
|
288
278
|
interface BaseServerMessage {
|
|
289
279
|
type: ServerMessageType;
|
|
290
|
-
requestId?: string;
|
|
291
280
|
timestamp: string;
|
|
292
281
|
}
|
|
293
|
-
/**
|
|
294
|
-
* Connected message from server
|
|
295
|
-
*/
|
|
296
|
-
interface ServerConnectedMessage extends BaseServerMessage {
|
|
297
|
-
type: 'connected';
|
|
298
|
-
connectionId: string;
|
|
299
|
-
}
|
|
300
282
|
/**
|
|
301
283
|
* Session started message from server
|
|
302
284
|
*/
|
|
@@ -311,6 +293,24 @@ interface ServerSessionEndedMessage extends BaseServerMessage {
|
|
|
311
293
|
type: 'sessionEnded';
|
|
312
294
|
sessionId: string;
|
|
313
295
|
}
|
|
296
|
+
/**
|
|
297
|
+
* Streaming started message - acknowledgment of audioStart
|
|
298
|
+
*/
|
|
299
|
+
interface ServerStreamingStartedMessage extends BaseServerMessage {
|
|
300
|
+
type: 'streamingStarted';
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Speech start message - VAD detected speech begin
|
|
304
|
+
*/
|
|
305
|
+
interface ServerSpeechStartMessage extends BaseServerMessage {
|
|
306
|
+
type: 'speechStart';
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Speech end message - VAD detected speech end
|
|
310
|
+
*/
|
|
311
|
+
interface ServerSpeechEndMessage extends BaseServerMessage {
|
|
312
|
+
type: 'speechEnd';
|
|
313
|
+
}
|
|
314
314
|
/**
|
|
315
315
|
* Transcript message from server
|
|
316
316
|
*/
|
|
@@ -344,7 +344,6 @@ interface ServerErrorMessage extends BaseServerMessage {
|
|
|
344
344
|
type: 'error';
|
|
345
345
|
code: string;
|
|
346
346
|
message: string;
|
|
347
|
-
details?: unknown;
|
|
348
347
|
}
|
|
349
348
|
/**
|
|
350
349
|
* Pong message from server
|
|
@@ -355,7 +354,7 @@ interface ServerPongMessage extends BaseServerMessage {
|
|
|
355
354
|
/**
|
|
356
355
|
* Union type of all server messages
|
|
357
356
|
*/
|
|
358
|
-
type ServerMessage =
|
|
357
|
+
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerStreamingStartedMessage | ServerSpeechStartMessage | ServerSpeechEndMessage | ServerTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerErrorMessage | ServerPongMessage;
|
|
359
358
|
|
|
360
359
|
/**
|
|
361
360
|
* Connection state
|
|
@@ -370,6 +369,9 @@ type LiveSpeechEventMap = {
|
|
|
370
369
|
disconnected: DisconnectedEvent;
|
|
371
370
|
sessionStarted: SessionStartedEvent;
|
|
372
371
|
sessionEnded: SessionEndedEvent;
|
|
372
|
+
streamingStarted: StreamingStartedEvent;
|
|
373
|
+
speechStart: SpeechStartEvent;
|
|
374
|
+
speechEnd: SpeechEndEvent;
|
|
373
375
|
transcript: TranscriptEvent;
|
|
374
376
|
response: ResponseEvent;
|
|
375
377
|
audio: AudioEvent;
|
|
@@ -385,7 +387,7 @@ declare class LiveSpeechClient {
|
|
|
385
387
|
private readonly audioEncoder;
|
|
386
388
|
private readonly logger;
|
|
387
389
|
private sessionId;
|
|
388
|
-
private
|
|
390
|
+
private isStreaming;
|
|
389
391
|
private readonly eventListeners;
|
|
390
392
|
private transcriptHandler;
|
|
391
393
|
private responseHandler;
|
|
@@ -412,6 +414,10 @@ declare class LiveSpeechClient {
|
|
|
412
414
|
* Check if session is active
|
|
413
415
|
*/
|
|
414
416
|
get hasActiveSession(): boolean;
|
|
417
|
+
/**
|
|
418
|
+
* Check if audio streaming is active
|
|
419
|
+
*/
|
|
420
|
+
get isAudioStreaming(): boolean;
|
|
415
421
|
/**
|
|
416
422
|
* Connect to the server
|
|
417
423
|
*/
|
|
@@ -423,18 +429,23 @@ declare class LiveSpeechClient {
|
|
|
423
429
|
/**
|
|
424
430
|
* Start a new session
|
|
425
431
|
*/
|
|
426
|
-
startSession(config
|
|
432
|
+
startSession(config?: SessionConfig): Promise<string>;
|
|
427
433
|
/**
|
|
428
434
|
* End the current session
|
|
429
435
|
*/
|
|
430
436
|
endSession(): Promise<void>;
|
|
431
437
|
/**
|
|
432
|
-
*
|
|
438
|
+
* Start audio streaming session
|
|
433
439
|
*/
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
440
|
+
audioStart(): void;
|
|
441
|
+
/**
|
|
442
|
+
* Send audio chunk (PCM16 base64 encoded)
|
|
443
|
+
*/
|
|
444
|
+
sendAudioChunk(data: Uint8Array): void;
|
|
445
|
+
/**
|
|
446
|
+
* End audio streaming session
|
|
447
|
+
*/
|
|
448
|
+
audioEnd(): void;
|
|
438
449
|
/**
|
|
439
450
|
* Add event listener
|
|
440
451
|
*/
|
|
@@ -467,6 +478,10 @@ declare class LiveSpeechClient {
|
|
|
467
478
|
private handleMessage;
|
|
468
479
|
}
|
|
469
480
|
|
|
481
|
+
/**
|
|
482
|
+
* Audio format type
|
|
483
|
+
*/
|
|
484
|
+
type AudioFormat = 'pcm16' | 'opus' | 'wav';
|
|
470
485
|
/**
|
|
471
486
|
* Audio encoder options
|
|
472
487
|
*/
|
|
@@ -552,4 +567,4 @@ declare class AudioEncoder {
|
|
|
552
567
|
wrapWav(data: Uint8Array): Uint8Array;
|
|
553
568
|
}
|
|
554
569
|
|
|
555
|
-
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type
|
|
570
|
+
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type SpeechEndEvent, type SpeechStartEvent, type StreamingStartedEvent, type TranscriptEvent, type TranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
|
package/dist/index.d.ts
CHANGED
|
@@ -74,41 +74,8 @@ interface SessionConfig {
|
|
|
74
74
|
/**
|
|
75
75
|
* System prompt for the AI assistant
|
|
76
76
|
*/
|
|
77
|
-
prePrompt
|
|
78
|
-
/**
|
|
79
|
-
* Voice ID for text-to-speech output
|
|
80
|
-
* @default 'en-US-Standard-A'
|
|
81
|
-
*/
|
|
82
|
-
voiceId?: string;
|
|
83
|
-
/**
|
|
84
|
-
* Language code for speech recognition
|
|
85
|
-
* @default 'en-US'
|
|
86
|
-
*/
|
|
87
|
-
languageCode?: string;
|
|
88
|
-
/**
|
|
89
|
-
* Audio encoding format for input
|
|
90
|
-
* @default 'pcm16'
|
|
91
|
-
*/
|
|
92
|
-
inputFormat?: AudioFormat;
|
|
93
|
-
/**
|
|
94
|
-
* Audio encoding format for output
|
|
95
|
-
* @default 'pcm16'
|
|
96
|
-
*/
|
|
97
|
-
outputFormat?: AudioFormat;
|
|
98
|
-
/**
|
|
99
|
-
* Sample rate for audio in Hz
|
|
100
|
-
* @default 16000
|
|
101
|
-
*/
|
|
102
|
-
sampleRate?: number;
|
|
103
|
-
/**
|
|
104
|
-
* Custom metadata to attach to the session
|
|
105
|
-
*/
|
|
106
|
-
metadata?: Record<string, string>;
|
|
77
|
+
prePrompt?: string;
|
|
107
78
|
}
|
|
108
|
-
/**
|
|
109
|
-
* Supported audio formats
|
|
110
|
-
*/
|
|
111
|
-
type AudioFormat = 'pcm16' | 'opus' | 'wav';
|
|
112
79
|
/**
|
|
113
80
|
* Internal resolved configuration with defaults applied
|
|
114
81
|
*/
|
|
@@ -125,7 +92,7 @@ interface ResolvedConfig {
|
|
|
125
92
|
/**
|
|
126
93
|
* Event types emitted by the LiveSpeech client
|
|
127
94
|
*/
|
|
128
|
-
type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | 'transcript' | 'response' | 'audio' | 'error' | 'reconnecting';
|
|
95
|
+
type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'reconnecting';
|
|
129
96
|
/**
|
|
130
97
|
* Event payload for 'connected' event
|
|
131
98
|
*/
|
|
@@ -163,6 +130,27 @@ interface SessionEndedEvent {
|
|
|
163
130
|
sessionId: string;
|
|
164
131
|
timestamp: string;
|
|
165
132
|
}
|
|
133
|
+
/**
|
|
134
|
+
* Event payload for 'streamingStarted' event - acknowledgment of audioStart
|
|
135
|
+
*/
|
|
136
|
+
interface StreamingStartedEvent {
|
|
137
|
+
type: 'streamingStarted';
|
|
138
|
+
timestamp: string;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Event payload for 'speechStart' event - VAD detected speech begin
|
|
142
|
+
*/
|
|
143
|
+
interface SpeechStartEvent {
|
|
144
|
+
type: 'speechStart';
|
|
145
|
+
timestamp: string;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Event payload for 'speechEnd' event - VAD detected speech end
|
|
149
|
+
*/
|
|
150
|
+
interface SpeechEndEvent {
|
|
151
|
+
type: 'speechEnd';
|
|
152
|
+
timestamp: string;
|
|
153
|
+
}
|
|
166
154
|
/**
|
|
167
155
|
* Event payload for 'transcript' event
|
|
168
156
|
*/
|
|
@@ -205,7 +193,7 @@ interface ErrorEvent {
|
|
|
205
193
|
/**
|
|
206
194
|
* Error codes
|
|
207
195
|
*/
|
|
208
|
-
type ErrorCode = 'connection_failed' | 'connection_timeout' | 'authentication_failed' | 'session_error' | 'audio_error' | 'stt_error' | 'llm_error' | 'tts_error' | 'rate_limit' | 'internal_error' | 'invalid_message';
|
|
196
|
+
type ErrorCode = 'connection_failed' | 'connection_timeout' | 'authentication_failed' | 'session_error' | 'audio_error' | 'streaming_error' | 'stt_error' | 'llm_error' | 'tts_error' | 'rate_limit' | 'internal_error' | 'invalid_message';
|
|
209
197
|
/**
|
|
210
198
|
* Event payload for 'reconnecting' event
|
|
211
199
|
*/
|
|
@@ -219,7 +207,7 @@ interface ReconnectingEvent {
|
|
|
219
207
|
/**
|
|
220
208
|
* Union type of all event payloads
|
|
221
209
|
*/
|
|
222
|
-
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent | TranscriptEvent | ResponseEvent | AudioEvent | ErrorEvent | ReconnectingEvent;
|
|
210
|
+
type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent | StreamingStartedEvent | SpeechStartEvent | SpeechEndEvent | TranscriptEvent | ResponseEvent | AudioEvent | ErrorEvent | ReconnectingEvent;
|
|
223
211
|
/**
|
|
224
212
|
* Simplified event handlers for common use cases
|
|
225
213
|
*/
|
|
@@ -231,30 +219,23 @@ type ErrorHandler = (error: ErrorEvent) => void;
|
|
|
231
219
|
/**
|
|
232
220
|
* WebSocket message types sent from client to server
|
|
233
221
|
*/
|
|
234
|
-
type ClientMessageType = 'startSession' | 'endSession' | '
|
|
222
|
+
type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioChunk' | 'audioEnd' | 'ping';
|
|
235
223
|
/**
|
|
236
224
|
* WebSocket message types received from server
|
|
237
225
|
*/
|
|
238
|
-
type ServerMessageType = '
|
|
226
|
+
type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'pong';
|
|
239
227
|
/**
|
|
240
228
|
* Base interface for client messages
|
|
241
229
|
*/
|
|
242
230
|
interface BaseClientMessage {
|
|
243
231
|
action: ClientMessageType;
|
|
244
|
-
requestId?: string;
|
|
245
232
|
}
|
|
246
233
|
/**
|
|
247
234
|
* Start session message
|
|
248
235
|
*/
|
|
249
236
|
interface StartSessionMessage extends BaseClientMessage {
|
|
250
237
|
action: 'startSession';
|
|
251
|
-
prePrompt
|
|
252
|
-
voiceId?: string;
|
|
253
|
-
languageCode?: string;
|
|
254
|
-
inputFormat?: string;
|
|
255
|
-
outputFormat?: string;
|
|
256
|
-
sampleRate?: number;
|
|
257
|
-
metadata?: Record<string, string>;
|
|
238
|
+
prePrompt?: string;
|
|
258
239
|
}
|
|
259
240
|
/**
|
|
260
241
|
* End session message
|
|
@@ -263,14 +244,23 @@ interface EndSessionMessage extends BaseClientMessage {
|
|
|
263
244
|
action: 'endSession';
|
|
264
245
|
}
|
|
265
246
|
/**
|
|
266
|
-
* Audio
|
|
247
|
+
* Audio start message - begin streaming session
|
|
267
248
|
*/
|
|
268
|
-
interface
|
|
269
|
-
action: '
|
|
249
|
+
interface AudioStartMessage extends BaseClientMessage {
|
|
250
|
+
action: 'audioStart';
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Audio chunk message - send audio data
|
|
254
|
+
*/
|
|
255
|
+
interface AudioChunkMessage extends BaseClientMessage {
|
|
256
|
+
action: 'audioChunk';
|
|
270
257
|
data: string;
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Audio end message - end streaming session
|
|
261
|
+
*/
|
|
262
|
+
interface AudioEndMessage extends BaseClientMessage {
|
|
263
|
+
action: 'audioEnd';
|
|
274
264
|
}
|
|
275
265
|
/**
|
|
276
266
|
* Ping message for keep-alive
|
|
@@ -281,22 +271,14 @@ interface PingMessage extends BaseClientMessage {
|
|
|
281
271
|
/**
|
|
282
272
|
* Union type of all client messages
|
|
283
273
|
*/
|
|
284
|
-
type ClientMessage = StartSessionMessage | EndSessionMessage |
|
|
274
|
+
type ClientMessage = StartSessionMessage | EndSessionMessage | AudioStartMessage | AudioChunkMessage | AudioEndMessage | PingMessage;
|
|
285
275
|
/**
|
|
286
276
|
* Base interface for server messages
|
|
287
277
|
*/
|
|
288
278
|
interface BaseServerMessage {
|
|
289
279
|
type: ServerMessageType;
|
|
290
|
-
requestId?: string;
|
|
291
280
|
timestamp: string;
|
|
292
281
|
}
|
|
293
|
-
/**
|
|
294
|
-
* Connected message from server
|
|
295
|
-
*/
|
|
296
|
-
interface ServerConnectedMessage extends BaseServerMessage {
|
|
297
|
-
type: 'connected';
|
|
298
|
-
connectionId: string;
|
|
299
|
-
}
|
|
300
282
|
/**
|
|
301
283
|
* Session started message from server
|
|
302
284
|
*/
|
|
@@ -311,6 +293,24 @@ interface ServerSessionEndedMessage extends BaseServerMessage {
|
|
|
311
293
|
type: 'sessionEnded';
|
|
312
294
|
sessionId: string;
|
|
313
295
|
}
|
|
296
|
+
/**
|
|
297
|
+
* Streaming started message - acknowledgment of audioStart
|
|
298
|
+
*/
|
|
299
|
+
interface ServerStreamingStartedMessage extends BaseServerMessage {
|
|
300
|
+
type: 'streamingStarted';
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Speech start message - VAD detected speech begin
|
|
304
|
+
*/
|
|
305
|
+
interface ServerSpeechStartMessage extends BaseServerMessage {
|
|
306
|
+
type: 'speechStart';
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Speech end message - VAD detected speech end
|
|
310
|
+
*/
|
|
311
|
+
interface ServerSpeechEndMessage extends BaseServerMessage {
|
|
312
|
+
type: 'speechEnd';
|
|
313
|
+
}
|
|
314
314
|
/**
|
|
315
315
|
* Transcript message from server
|
|
316
316
|
*/
|
|
@@ -344,7 +344,6 @@ interface ServerErrorMessage extends BaseServerMessage {
|
|
|
344
344
|
type: 'error';
|
|
345
345
|
code: string;
|
|
346
346
|
message: string;
|
|
347
|
-
details?: unknown;
|
|
348
347
|
}
|
|
349
348
|
/**
|
|
350
349
|
* Pong message from server
|
|
@@ -355,7 +354,7 @@ interface ServerPongMessage extends BaseServerMessage {
|
|
|
355
354
|
/**
|
|
356
355
|
* Union type of all server messages
|
|
357
356
|
*/
|
|
358
|
-
type ServerMessage =
|
|
357
|
+
type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerStreamingStartedMessage | ServerSpeechStartMessage | ServerSpeechEndMessage | ServerTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerErrorMessage | ServerPongMessage;
|
|
359
358
|
|
|
360
359
|
/**
|
|
361
360
|
* Connection state
|
|
@@ -370,6 +369,9 @@ type LiveSpeechEventMap = {
|
|
|
370
369
|
disconnected: DisconnectedEvent;
|
|
371
370
|
sessionStarted: SessionStartedEvent;
|
|
372
371
|
sessionEnded: SessionEndedEvent;
|
|
372
|
+
streamingStarted: StreamingStartedEvent;
|
|
373
|
+
speechStart: SpeechStartEvent;
|
|
374
|
+
speechEnd: SpeechEndEvent;
|
|
373
375
|
transcript: TranscriptEvent;
|
|
374
376
|
response: ResponseEvent;
|
|
375
377
|
audio: AudioEvent;
|
|
@@ -385,7 +387,7 @@ declare class LiveSpeechClient {
|
|
|
385
387
|
private readonly audioEncoder;
|
|
386
388
|
private readonly logger;
|
|
387
389
|
private sessionId;
|
|
388
|
-
private
|
|
390
|
+
private isStreaming;
|
|
389
391
|
private readonly eventListeners;
|
|
390
392
|
private transcriptHandler;
|
|
391
393
|
private responseHandler;
|
|
@@ -412,6 +414,10 @@ declare class LiveSpeechClient {
|
|
|
412
414
|
* Check if session is active
|
|
413
415
|
*/
|
|
414
416
|
get hasActiveSession(): boolean;
|
|
417
|
+
/**
|
|
418
|
+
* Check if audio streaming is active
|
|
419
|
+
*/
|
|
420
|
+
get isAudioStreaming(): boolean;
|
|
415
421
|
/**
|
|
416
422
|
* Connect to the server
|
|
417
423
|
*/
|
|
@@ -423,18 +429,23 @@ declare class LiveSpeechClient {
|
|
|
423
429
|
/**
|
|
424
430
|
* Start a new session
|
|
425
431
|
*/
|
|
426
|
-
startSession(config
|
|
432
|
+
startSession(config?: SessionConfig): Promise<string>;
|
|
427
433
|
/**
|
|
428
434
|
* End the current session
|
|
429
435
|
*/
|
|
430
436
|
endSession(): Promise<void>;
|
|
431
437
|
/**
|
|
432
|
-
*
|
|
438
|
+
* Start audio streaming session
|
|
433
439
|
*/
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
440
|
+
audioStart(): void;
|
|
441
|
+
/**
|
|
442
|
+
* Send audio chunk (PCM16 base64 encoded)
|
|
443
|
+
*/
|
|
444
|
+
sendAudioChunk(data: Uint8Array): void;
|
|
445
|
+
/**
|
|
446
|
+
* End audio streaming session
|
|
447
|
+
*/
|
|
448
|
+
audioEnd(): void;
|
|
438
449
|
/**
|
|
439
450
|
* Add event listener
|
|
440
451
|
*/
|
|
@@ -467,6 +478,10 @@ declare class LiveSpeechClient {
|
|
|
467
478
|
private handleMessage;
|
|
468
479
|
}
|
|
469
480
|
|
|
481
|
+
/**
|
|
482
|
+
* Audio format type
|
|
483
|
+
*/
|
|
484
|
+
type AudioFormat = 'pcm16' | 'opus' | 'wav';
|
|
470
485
|
/**
|
|
471
486
|
* Audio encoder options
|
|
472
487
|
*/
|
|
@@ -552,4 +567,4 @@ declare class AudioEncoder {
|
|
|
552
567
|
wrapWav(data: Uint8Array): Uint8Array;
|
|
553
568
|
}
|
|
554
569
|
|
|
555
|
-
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type
|
|
570
|
+
export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type SpeechEndEvent, type SpeechStartEvent, type StreamingStartedEvent, type TranscriptEvent, type TranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
|
package/dist/index.js
CHANGED
|
@@ -332,22 +332,13 @@ var WebSocketConnection = class {
|
|
|
332
332
|
/**
|
|
333
333
|
* Handle incoming message
|
|
334
334
|
*/
|
|
335
|
-
handleMessage(data
|
|
335
|
+
handleMessage(data) {
|
|
336
336
|
const message = parseServerMessage(data);
|
|
337
337
|
if (!message) {
|
|
338
338
|
this.logger.warn("Invalid message received:", data);
|
|
339
339
|
return;
|
|
340
340
|
}
|
|
341
341
|
this.logger.debug("Received message:", message.type);
|
|
342
|
-
if (message.type === "connected") {
|
|
343
|
-
this.connectionId = message.connectionId;
|
|
344
|
-
this.state = "connected";
|
|
345
|
-
this.retryController.reset();
|
|
346
|
-
this.startPingInterval();
|
|
347
|
-
this.events.onOpen?.(message.connectionId);
|
|
348
|
-
onFirstConnect?.();
|
|
349
|
-
return;
|
|
350
|
-
}
|
|
351
342
|
if (message.type === "pong") {
|
|
352
343
|
this.logger.debug("Pong received");
|
|
353
344
|
return;
|
|
@@ -613,20 +604,13 @@ var CONFIG_DEFAULTS = {
|
|
|
613
604
|
reconnectDelay: 1e3,
|
|
614
605
|
debug: false
|
|
615
606
|
};
|
|
616
|
-
var SESSION_DEFAULTS = {
|
|
617
|
-
voiceId: "en-US-Standard-A",
|
|
618
|
-
languageCode: "en-US",
|
|
619
|
-
inputFormat: "pcm16",
|
|
620
|
-
outputFormat: "pcm16",
|
|
621
|
-
sampleRate: 16e3
|
|
622
|
-
};
|
|
623
607
|
var LiveSpeechClient = class {
|
|
624
608
|
config;
|
|
625
609
|
connection;
|
|
626
610
|
audioEncoder;
|
|
627
611
|
logger;
|
|
628
612
|
sessionId = null;
|
|
629
|
-
|
|
613
|
+
isStreaming = false;
|
|
630
614
|
// Event listeners using a simple map
|
|
631
615
|
eventListeners = /* @__PURE__ */ new Map();
|
|
632
616
|
// Simplified handlers
|
|
@@ -692,6 +676,12 @@ var LiveSpeechClient = class {
|
|
|
692
676
|
get hasActiveSession() {
|
|
693
677
|
return this.sessionId !== null;
|
|
694
678
|
}
|
|
679
|
+
/**
|
|
680
|
+
* Check if audio streaming is active
|
|
681
|
+
*/
|
|
682
|
+
get isAudioStreaming() {
|
|
683
|
+
return this.isStreaming;
|
|
684
|
+
}
|
|
695
685
|
/**
|
|
696
686
|
* Connect to the server
|
|
697
687
|
*/
|
|
@@ -705,7 +695,7 @@ var LiveSpeechClient = class {
|
|
|
705
695
|
disconnect() {
|
|
706
696
|
this.logger.info("Disconnecting...");
|
|
707
697
|
this.sessionId = null;
|
|
708
|
-
this.
|
|
698
|
+
this.isStreaming = false;
|
|
709
699
|
this.connection.disconnect();
|
|
710
700
|
}
|
|
711
701
|
/**
|
|
@@ -718,16 +708,6 @@ var LiveSpeechClient = class {
|
|
|
718
708
|
if (this.sessionId) {
|
|
719
709
|
throw new Error("Session already active. Call endSession() first.");
|
|
720
710
|
}
|
|
721
|
-
const resolvedConfig = {
|
|
722
|
-
prePrompt: config.prePrompt,
|
|
723
|
-
voiceId: config.voiceId ?? SESSION_DEFAULTS.voiceId,
|
|
724
|
-
languageCode: config.languageCode ?? SESSION_DEFAULTS.languageCode,
|
|
725
|
-
inputFormat: config.inputFormat ?? SESSION_DEFAULTS.inputFormat,
|
|
726
|
-
outputFormat: config.outputFormat ?? SESSION_DEFAULTS.outputFormat,
|
|
727
|
-
sampleRate: config.sampleRate ?? SESSION_DEFAULTS.sampleRate,
|
|
728
|
-
metadata: config.metadata ?? {}
|
|
729
|
-
};
|
|
730
|
-
this.sessionConfig = resolvedConfig;
|
|
731
711
|
this.logger.info("Starting session...");
|
|
732
712
|
return new Promise((resolve, reject) => {
|
|
733
713
|
const onSessionStarted = (event) => {
|
|
@@ -744,16 +724,13 @@ var LiveSpeechClient = class {
|
|
|
744
724
|
};
|
|
745
725
|
this.on("sessionStarted", onSessionStarted);
|
|
746
726
|
this.on("error", onError);
|
|
747
|
-
|
|
748
|
-
action: "startSession"
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
sampleRate: resolvedConfig.sampleRate,
|
|
755
|
-
metadata: resolvedConfig.metadata
|
|
756
|
-
});
|
|
727
|
+
const startMessage = {
|
|
728
|
+
action: "startSession"
|
|
729
|
+
};
|
|
730
|
+
if (config?.prePrompt) {
|
|
731
|
+
startMessage.prePrompt = config.prePrompt;
|
|
732
|
+
}
|
|
733
|
+
this.connection.send(startMessage);
|
|
757
734
|
});
|
|
758
735
|
}
|
|
759
736
|
/**
|
|
@@ -765,6 +742,9 @@ var LiveSpeechClient = class {
|
|
|
765
742
|
return;
|
|
766
743
|
}
|
|
767
744
|
this.logger.info("Ending session...");
|
|
745
|
+
if (this.isStreaming) {
|
|
746
|
+
this.audioEnd();
|
|
747
|
+
}
|
|
768
748
|
return new Promise((resolve) => {
|
|
769
749
|
const onSessionEnded = () => {
|
|
770
750
|
this.off("sessionEnded", onSessionEnded);
|
|
@@ -775,28 +755,49 @@ var LiveSpeechClient = class {
|
|
|
775
755
|
});
|
|
776
756
|
}
|
|
777
757
|
/**
|
|
778
|
-
*
|
|
758
|
+
* Start audio streaming session
|
|
779
759
|
*/
|
|
780
|
-
|
|
760
|
+
audioStart() {
|
|
781
761
|
if (!this.isConnected) {
|
|
782
762
|
throw new Error("Not connected");
|
|
783
763
|
}
|
|
784
764
|
if (!this.sessionId) {
|
|
785
765
|
throw new Error("No active session. Call startSession() first.");
|
|
786
766
|
}
|
|
767
|
+
if (this.isStreaming) {
|
|
768
|
+
throw new Error("Already streaming. Call audioEnd() first.");
|
|
769
|
+
}
|
|
770
|
+
this.logger.info("Starting audio stream...");
|
|
771
|
+
this.connection.send({ action: "audioStart" });
|
|
772
|
+
this.isStreaming = true;
|
|
773
|
+
}
|
|
774
|
+
/**
|
|
775
|
+
* Send audio chunk (PCM16 base64 encoded)
|
|
776
|
+
*/
|
|
777
|
+
sendAudioChunk(data) {
|
|
778
|
+
if (!this.isConnected) {
|
|
779
|
+
throw new Error("Not connected");
|
|
780
|
+
}
|
|
781
|
+
if (!this.isStreaming) {
|
|
782
|
+
throw new Error("Not streaming. Call audioStart() first.");
|
|
783
|
+
}
|
|
787
784
|
const base64Data = this.audioEncoder.encode(data);
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
785
|
+
this.connection.send({
|
|
786
|
+
action: "audioChunk",
|
|
787
|
+
data: base64Data
|
|
788
|
+
});
|
|
789
|
+
}
|
|
790
|
+
/**
|
|
791
|
+
* End audio streaming session
|
|
792
|
+
*/
|
|
793
|
+
audioEnd() {
|
|
794
|
+
if (!this.isStreaming) {
|
|
795
|
+
this.logger.warn("Not streaming");
|
|
796
|
+
return;
|
|
798
797
|
}
|
|
799
|
-
this.
|
|
798
|
+
this.logger.info("Ending audio stream...");
|
|
799
|
+
this.connection.send({ action: "audioEnd" });
|
|
800
|
+
this.isStreaming = false;
|
|
800
801
|
}
|
|
801
802
|
// ==================== Event System ====================
|
|
802
803
|
/**
|
|
@@ -864,7 +865,7 @@ var LiveSpeechClient = class {
|
|
|
864
865
|
}
|
|
865
866
|
handleDisconnected(code, _reason) {
|
|
866
867
|
this.sessionId = null;
|
|
867
|
-
this.
|
|
868
|
+
this.isStreaming = false;
|
|
868
869
|
const event = {
|
|
869
870
|
type: "disconnected",
|
|
870
871
|
reason: code === 1e3 ? "normal" : "error",
|
|
@@ -906,13 +907,31 @@ var LiveSpeechClient = class {
|
|
|
906
907
|
break;
|
|
907
908
|
case "sessionEnded":
|
|
908
909
|
this.sessionId = null;
|
|
909
|
-
this.
|
|
910
|
+
this.isStreaming = false;
|
|
910
911
|
this.emit("sessionEnded", {
|
|
911
912
|
type: "sessionEnded",
|
|
912
913
|
sessionId: message.sessionId,
|
|
913
914
|
timestamp: message.timestamp
|
|
914
915
|
});
|
|
915
916
|
break;
|
|
917
|
+
case "streamingStarted":
|
|
918
|
+
this.emit("streamingStarted", {
|
|
919
|
+
type: "streamingStarted",
|
|
920
|
+
timestamp: message.timestamp
|
|
921
|
+
});
|
|
922
|
+
break;
|
|
923
|
+
case "speechStart":
|
|
924
|
+
this.emit("speechStart", {
|
|
925
|
+
type: "speechStart",
|
|
926
|
+
timestamp: message.timestamp
|
|
927
|
+
});
|
|
928
|
+
break;
|
|
929
|
+
case "speechEnd":
|
|
930
|
+
this.emit("speechEnd", {
|
|
931
|
+
type: "speechEnd",
|
|
932
|
+
timestamp: message.timestamp
|
|
933
|
+
});
|
|
934
|
+
break;
|
|
916
935
|
case "transcript": {
|
|
917
936
|
const transcriptEvent = {
|
|
918
937
|
type: "transcript",
|
|
@@ -952,7 +971,7 @@ var LiveSpeechClient = class {
|
|
|
952
971
|
break;
|
|
953
972
|
}
|
|
954
973
|
case "error":
|
|
955
|
-
this.handleError(message.code, message.message
|
|
974
|
+
this.handleError(message.code, message.message);
|
|
956
975
|
break;
|
|
957
976
|
default:
|
|
958
977
|
this.logger.warn("Unknown message type:", message.type);
|
package/dist/index.mjs
CHANGED
|
@@ -293,22 +293,13 @@ var WebSocketConnection = class {
|
|
|
293
293
|
/**
|
|
294
294
|
* Handle incoming message
|
|
295
295
|
*/
|
|
296
|
-
handleMessage(data
|
|
296
|
+
handleMessage(data) {
|
|
297
297
|
const message = parseServerMessage(data);
|
|
298
298
|
if (!message) {
|
|
299
299
|
this.logger.warn("Invalid message received:", data);
|
|
300
300
|
return;
|
|
301
301
|
}
|
|
302
302
|
this.logger.debug("Received message:", message.type);
|
|
303
|
-
if (message.type === "connected") {
|
|
304
|
-
this.connectionId = message.connectionId;
|
|
305
|
-
this.state = "connected";
|
|
306
|
-
this.retryController.reset();
|
|
307
|
-
this.startPingInterval();
|
|
308
|
-
this.events.onOpen?.(message.connectionId);
|
|
309
|
-
onFirstConnect?.();
|
|
310
|
-
return;
|
|
311
|
-
}
|
|
312
303
|
if (message.type === "pong") {
|
|
313
304
|
this.logger.debug("Pong received");
|
|
314
305
|
return;
|
|
@@ -574,20 +565,13 @@ var CONFIG_DEFAULTS = {
|
|
|
574
565
|
reconnectDelay: 1e3,
|
|
575
566
|
debug: false
|
|
576
567
|
};
|
|
577
|
-
var SESSION_DEFAULTS = {
|
|
578
|
-
voiceId: "en-US-Standard-A",
|
|
579
|
-
languageCode: "en-US",
|
|
580
|
-
inputFormat: "pcm16",
|
|
581
|
-
outputFormat: "pcm16",
|
|
582
|
-
sampleRate: 16e3
|
|
583
|
-
};
|
|
584
568
|
var LiveSpeechClient = class {
|
|
585
569
|
config;
|
|
586
570
|
connection;
|
|
587
571
|
audioEncoder;
|
|
588
572
|
logger;
|
|
589
573
|
sessionId = null;
|
|
590
|
-
|
|
574
|
+
isStreaming = false;
|
|
591
575
|
// Event listeners using a simple map
|
|
592
576
|
eventListeners = /* @__PURE__ */ new Map();
|
|
593
577
|
// Simplified handlers
|
|
@@ -653,6 +637,12 @@ var LiveSpeechClient = class {
|
|
|
653
637
|
get hasActiveSession() {
|
|
654
638
|
return this.sessionId !== null;
|
|
655
639
|
}
|
|
640
|
+
/**
|
|
641
|
+
* Check if audio streaming is active
|
|
642
|
+
*/
|
|
643
|
+
get isAudioStreaming() {
|
|
644
|
+
return this.isStreaming;
|
|
645
|
+
}
|
|
656
646
|
/**
|
|
657
647
|
* Connect to the server
|
|
658
648
|
*/
|
|
@@ -666,7 +656,7 @@ var LiveSpeechClient = class {
|
|
|
666
656
|
disconnect() {
|
|
667
657
|
this.logger.info("Disconnecting...");
|
|
668
658
|
this.sessionId = null;
|
|
669
|
-
this.
|
|
659
|
+
this.isStreaming = false;
|
|
670
660
|
this.connection.disconnect();
|
|
671
661
|
}
|
|
672
662
|
/**
|
|
@@ -679,16 +669,6 @@ var LiveSpeechClient = class {
|
|
|
679
669
|
if (this.sessionId) {
|
|
680
670
|
throw new Error("Session already active. Call endSession() first.");
|
|
681
671
|
}
|
|
682
|
-
const resolvedConfig = {
|
|
683
|
-
prePrompt: config.prePrompt,
|
|
684
|
-
voiceId: config.voiceId ?? SESSION_DEFAULTS.voiceId,
|
|
685
|
-
languageCode: config.languageCode ?? SESSION_DEFAULTS.languageCode,
|
|
686
|
-
inputFormat: config.inputFormat ?? SESSION_DEFAULTS.inputFormat,
|
|
687
|
-
outputFormat: config.outputFormat ?? SESSION_DEFAULTS.outputFormat,
|
|
688
|
-
sampleRate: config.sampleRate ?? SESSION_DEFAULTS.sampleRate,
|
|
689
|
-
metadata: config.metadata ?? {}
|
|
690
|
-
};
|
|
691
|
-
this.sessionConfig = resolvedConfig;
|
|
692
672
|
this.logger.info("Starting session...");
|
|
693
673
|
return new Promise((resolve, reject) => {
|
|
694
674
|
const onSessionStarted = (event) => {
|
|
@@ -705,16 +685,13 @@ var LiveSpeechClient = class {
|
|
|
705
685
|
};
|
|
706
686
|
this.on("sessionStarted", onSessionStarted);
|
|
707
687
|
this.on("error", onError);
|
|
708
|
-
|
|
709
|
-
action: "startSession"
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
sampleRate: resolvedConfig.sampleRate,
|
|
716
|
-
metadata: resolvedConfig.metadata
|
|
717
|
-
});
|
|
688
|
+
const startMessage = {
|
|
689
|
+
action: "startSession"
|
|
690
|
+
};
|
|
691
|
+
if (config?.prePrompt) {
|
|
692
|
+
startMessage.prePrompt = config.prePrompt;
|
|
693
|
+
}
|
|
694
|
+
this.connection.send(startMessage);
|
|
718
695
|
});
|
|
719
696
|
}
|
|
720
697
|
/**
|
|
@@ -726,6 +703,9 @@ var LiveSpeechClient = class {
|
|
|
726
703
|
return;
|
|
727
704
|
}
|
|
728
705
|
this.logger.info("Ending session...");
|
|
706
|
+
if (this.isStreaming) {
|
|
707
|
+
this.audioEnd();
|
|
708
|
+
}
|
|
729
709
|
return new Promise((resolve) => {
|
|
730
710
|
const onSessionEnded = () => {
|
|
731
711
|
this.off("sessionEnded", onSessionEnded);
|
|
@@ -736,28 +716,49 @@ var LiveSpeechClient = class {
|
|
|
736
716
|
});
|
|
737
717
|
}
|
|
738
718
|
/**
|
|
739
|
-
*
|
|
719
|
+
* Start audio streaming session
|
|
740
720
|
*/
|
|
741
|
-
|
|
721
|
+
audioStart() {
|
|
742
722
|
if (!this.isConnected) {
|
|
743
723
|
throw new Error("Not connected");
|
|
744
724
|
}
|
|
745
725
|
if (!this.sessionId) {
|
|
746
726
|
throw new Error("No active session. Call startSession() first.");
|
|
747
727
|
}
|
|
728
|
+
if (this.isStreaming) {
|
|
729
|
+
throw new Error("Already streaming. Call audioEnd() first.");
|
|
730
|
+
}
|
|
731
|
+
this.logger.info("Starting audio stream...");
|
|
732
|
+
this.connection.send({ action: "audioStart" });
|
|
733
|
+
this.isStreaming = true;
|
|
734
|
+
}
|
|
735
|
+
/**
|
|
736
|
+
* Send audio chunk (PCM16 base64 encoded)
|
|
737
|
+
*/
|
|
738
|
+
sendAudioChunk(data) {
|
|
739
|
+
if (!this.isConnected) {
|
|
740
|
+
throw new Error("Not connected");
|
|
741
|
+
}
|
|
742
|
+
if (!this.isStreaming) {
|
|
743
|
+
throw new Error("Not streaming. Call audioStart() first.");
|
|
744
|
+
}
|
|
748
745
|
const base64Data = this.audioEncoder.encode(data);
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
746
|
+
this.connection.send({
|
|
747
|
+
action: "audioChunk",
|
|
748
|
+
data: base64Data
|
|
749
|
+
});
|
|
750
|
+
}
|
|
751
|
+
/**
|
|
752
|
+
* End audio streaming session
|
|
753
|
+
*/
|
|
754
|
+
audioEnd() {
|
|
755
|
+
if (!this.isStreaming) {
|
|
756
|
+
this.logger.warn("Not streaming");
|
|
757
|
+
return;
|
|
759
758
|
}
|
|
760
|
-
this.
|
|
759
|
+
this.logger.info("Ending audio stream...");
|
|
760
|
+
this.connection.send({ action: "audioEnd" });
|
|
761
|
+
this.isStreaming = false;
|
|
761
762
|
}
|
|
762
763
|
// ==================== Event System ====================
|
|
763
764
|
/**
|
|
@@ -825,7 +826,7 @@ var LiveSpeechClient = class {
|
|
|
825
826
|
}
|
|
826
827
|
handleDisconnected(code, _reason) {
|
|
827
828
|
this.sessionId = null;
|
|
828
|
-
this.
|
|
829
|
+
this.isStreaming = false;
|
|
829
830
|
const event = {
|
|
830
831
|
type: "disconnected",
|
|
831
832
|
reason: code === 1e3 ? "normal" : "error",
|
|
@@ -867,13 +868,31 @@ var LiveSpeechClient = class {
|
|
|
867
868
|
break;
|
|
868
869
|
case "sessionEnded":
|
|
869
870
|
this.sessionId = null;
|
|
870
|
-
this.
|
|
871
|
+
this.isStreaming = false;
|
|
871
872
|
this.emit("sessionEnded", {
|
|
872
873
|
type: "sessionEnded",
|
|
873
874
|
sessionId: message.sessionId,
|
|
874
875
|
timestamp: message.timestamp
|
|
875
876
|
});
|
|
876
877
|
break;
|
|
878
|
+
case "streamingStarted":
|
|
879
|
+
this.emit("streamingStarted", {
|
|
880
|
+
type: "streamingStarted",
|
|
881
|
+
timestamp: message.timestamp
|
|
882
|
+
});
|
|
883
|
+
break;
|
|
884
|
+
case "speechStart":
|
|
885
|
+
this.emit("speechStart", {
|
|
886
|
+
type: "speechStart",
|
|
887
|
+
timestamp: message.timestamp
|
|
888
|
+
});
|
|
889
|
+
break;
|
|
890
|
+
case "speechEnd":
|
|
891
|
+
this.emit("speechEnd", {
|
|
892
|
+
type: "speechEnd",
|
|
893
|
+
timestamp: message.timestamp
|
|
894
|
+
});
|
|
895
|
+
break;
|
|
877
896
|
case "transcript": {
|
|
878
897
|
const transcriptEvent = {
|
|
879
898
|
type: "transcript",
|
|
@@ -913,7 +932,7 @@ var LiveSpeechClient = class {
|
|
|
913
932
|
break;
|
|
914
933
|
}
|
|
915
934
|
case "error":
|
|
916
|
-
this.handleError(message.code, message.message
|
|
935
|
+
this.handleError(message.code, message.message);
|
|
917
936
|
break;
|
|
918
937
|
default:
|
|
919
938
|
this.logger.warn("Unknown message type:", message.type);
|