@drawdream/livespeech 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,555 @@
1
+ /**
2
+ * Available regions for LiveSpeech SDK
3
+ */
4
+ declare const Region: {
5
+ /** Asia Pacific (Seoul) */
6
+ readonly AP_NORTHEAST_2: "ap-northeast-2";
7
+ /** US West (Oregon) - Coming soon */
8
+ readonly US_WEST_2: "us-west-2";
9
+ };
10
+ type Region = (typeof Region)[keyof typeof Region];
11
+ /**
12
+ * Get the WebSocket endpoint URL for a given region
13
+ * @param region The region identifier
14
+ * @returns The WebSocket endpoint URL
15
+ */
16
+ declare function getEndpointForRegion(region: Region): string;
17
+ /**
18
+ * Check if a string is a valid region
19
+ * @param value The string to check
20
+ * @returns True if the string is a valid region
21
+ */
22
+ declare function isValidRegion(value: string): value is Region;
23
+
24
+ /**
25
+ * Configuration options for the LiveSpeech client
26
+ *
27
+ * @example
28
+ * const client = new LiveSpeechClient({
29
+ * region: 'ap-northeast-2',
30
+ * apiKey: 'your-api-key',
31
+ * });
32
+ */
33
+ interface LiveSpeechConfig {
34
+ /**
35
+ * Region for the LiveSpeech service
36
+ * @example 'ap-northeast-2'
37
+ */
38
+ region: Region;
39
+ /**
40
+ * API key for authentication
41
+ */
42
+ apiKey: string;
43
+ /**
44
+ * Connection timeout in milliseconds
45
+ * @default 30000
46
+ */
47
+ connectionTimeout?: number;
48
+ /**
49
+ * Enable automatic reconnection on disconnect
50
+ * @default true
51
+ */
52
+ autoReconnect?: boolean;
53
+ /**
54
+ * Maximum number of reconnection attempts
55
+ * @default 5
56
+ */
57
+ maxReconnectAttempts?: number;
58
+ /**
59
+ * Base delay between reconnection attempts in milliseconds
60
+ * Uses exponential backoff
61
+ * @default 1000
62
+ */
63
+ reconnectDelay?: number;
64
+ /**
65
+ * Enable debug logging
66
+ * @default false
67
+ */
68
+ debug?: boolean;
69
+ }
70
+ /**
71
+ * Session configuration options
72
+ */
73
+ interface SessionConfig {
74
+ /**
75
+ * System prompt for the AI assistant
76
+ */
77
+ prePrompt: string;
78
+ /**
79
+ * Voice ID for text-to-speech output
80
+ * @default 'en-US-Standard-A'
81
+ */
82
+ voiceId?: string;
83
+ /**
84
+ * Language code for speech recognition
85
+ * @default 'en-US'
86
+ */
87
+ languageCode?: string;
88
+ /**
89
+ * Audio encoding format for input
90
+ * @default 'pcm16'
91
+ */
92
+ inputFormat?: AudioFormat;
93
+ /**
94
+ * Audio encoding format for output
95
+ * @default 'pcm16'
96
+ */
97
+ outputFormat?: AudioFormat;
98
+ /**
99
+ * Sample rate for audio in Hz
100
+ * @default 16000
101
+ */
102
+ sampleRate?: number;
103
+ /**
104
+ * Custom metadata to attach to the session
105
+ */
106
+ metadata?: Record<string, string>;
107
+ }
108
+ /**
109
+ * Supported audio formats
110
+ */
111
+ type AudioFormat = 'pcm16' | 'opus' | 'wav';
112
+ /**
113
+ * Internal resolved configuration with defaults applied
114
+ */
115
+ interface ResolvedConfig {
116
+ endpoint: string;
117
+ apiKey: string;
118
+ connectionTimeout: number;
119
+ autoReconnect: boolean;
120
+ maxReconnectAttempts: number;
121
+ reconnectDelay: number;
122
+ debug: boolean;
123
+ }
124
+
125
+ /**
126
+ * Event types emitted by the LiveSpeech client
127
+ */
128
+ type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | 'transcript' | 'response' | 'audio' | 'error' | 'reconnecting';
129
+ /**
130
+ * Event payload for 'connected' event
131
+ */
132
+ interface ConnectedEvent {
133
+ type: 'connected';
134
+ connectionId: string;
135
+ timestamp: string;
136
+ }
137
+ /**
138
+ * Event payload for 'disconnected' event
139
+ */
140
+ interface DisconnectedEvent {
141
+ type: 'disconnected';
142
+ reason: DisconnectReason;
143
+ code?: number;
144
+ timestamp: string;
145
+ }
146
+ /**
147
+ * Disconnect reason codes
148
+ */
149
+ type DisconnectReason = 'normal' | 'error' | 'timeout' | 'server_close' | 'reconnect_failed';
150
+ /**
151
+ * Event payload for 'sessionStarted' event
152
+ */
153
+ interface SessionStartedEvent {
154
+ type: 'sessionStarted';
155
+ sessionId: string;
156
+ timestamp: string;
157
+ }
158
+ /**
159
+ * Event payload for 'sessionEnded' event
160
+ */
161
+ interface SessionEndedEvent {
162
+ type: 'sessionEnded';
163
+ sessionId: string;
164
+ timestamp: string;
165
+ }
166
+ /**
167
+ * Event payload for 'transcript' event
168
+ */
169
+ interface TranscriptEvent {
170
+ type: 'transcript';
171
+ text: string;
172
+ isFinal: boolean;
173
+ confidence?: number;
174
+ timestamp: string;
175
+ }
176
+ /**
177
+ * Event payload for 'response' event
178
+ */
179
+ interface ResponseEvent {
180
+ type: 'response';
181
+ text: string;
182
+ isFinal: boolean;
183
+ timestamp: string;
184
+ }
185
+ /**
186
+ * Event payload for 'audio' event
187
+ */
188
+ interface AudioEvent {
189
+ type: 'audio';
190
+ data: Uint8Array;
191
+ format: string;
192
+ sampleRate: number;
193
+ timestamp: string;
194
+ }
195
+ /**
196
+ * Event payload for 'error' event
197
+ */
198
+ interface ErrorEvent {
199
+ type: 'error';
200
+ code: ErrorCode;
201
+ message: string;
202
+ details?: unknown;
203
+ timestamp: string;
204
+ }
205
+ /**
206
+ * Error codes
207
+ */
208
+ type ErrorCode = 'connection_failed' | 'connection_timeout' | 'authentication_failed' | 'session_error' | 'audio_error' | 'stt_error' | 'llm_error' | 'tts_error' | 'rate_limit' | 'internal_error' | 'invalid_message';
209
+ /**
210
+ * Event payload for 'reconnecting' event
211
+ */
212
+ interface ReconnectingEvent {
213
+ type: 'reconnecting';
214
+ attempt: number;
215
+ maxAttempts: number;
216
+ delay: number;
217
+ timestamp: string;
218
+ }
219
+ /**
220
+ * Union type of all event payloads
221
+ */
222
+ type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent | TranscriptEvent | ResponseEvent | AudioEvent | ErrorEvent | ReconnectingEvent;
223
+ /**
224
+ * Simplified event handlers for common use cases
225
+ */
226
+ type TranscriptHandler = (text: string, isFinal: boolean) => void;
227
+ type ResponseHandler = (text: string, isFinal: boolean) => void;
228
+ type AudioHandler = (data: Uint8Array) => void;
229
+ type ErrorHandler = (error: ErrorEvent) => void;
230
+
231
+ /**
232
+ * WebSocket message types sent from client to server
233
+ */
234
+ type ClientMessageType = 'startSession' | 'endSession' | 'audio' | 'ping';
235
+ /**
236
+ * WebSocket message types received from server
237
+ */
238
+ type ServerMessageType = 'connected' | 'sessionStarted' | 'sessionEnded' | 'transcript' | 'response' | 'audio' | 'error' | 'pong';
239
+ /**
240
+ * Base interface for client messages
241
+ */
242
+ interface BaseClientMessage {
243
+ action: ClientMessageType;
244
+ requestId?: string;
245
+ }
246
+ /**
247
+ * Start session message
248
+ */
249
+ interface StartSessionMessage extends BaseClientMessage {
250
+ action: 'startSession';
251
+ prePrompt: string;
252
+ voiceId?: string;
253
+ languageCode?: string;
254
+ inputFormat?: string;
255
+ outputFormat?: string;
256
+ sampleRate?: number;
257
+ metadata?: Record<string, string>;
258
+ }
259
+ /**
260
+ * End session message
261
+ */
262
+ interface EndSessionMessage extends BaseClientMessage {
263
+ action: 'endSession';
264
+ }
265
+ /**
266
+ * Audio data message
267
+ */
268
+ interface AudioMessage extends BaseClientMessage {
269
+ action: 'audio';
270
+ data: string;
271
+ format?: string;
272
+ sampleRate?: number;
273
+ isFinal?: boolean;
274
+ }
275
+ /**
276
+ * Ping message for keep-alive
277
+ */
278
+ interface PingMessage extends BaseClientMessage {
279
+ action: 'ping';
280
+ }
281
+ /**
282
+ * Union type of all client messages
283
+ */
284
+ type ClientMessage = StartSessionMessage | EndSessionMessage | AudioMessage | PingMessage;
285
+ /**
286
+ * Base interface for server messages
287
+ */
288
+ interface BaseServerMessage {
289
+ type: ServerMessageType;
290
+ requestId?: string;
291
+ timestamp: string;
292
+ }
293
+ /**
294
+ * Connected message from server
295
+ */
296
+ interface ServerConnectedMessage extends BaseServerMessage {
297
+ type: 'connected';
298
+ connectionId: string;
299
+ }
300
+ /**
301
+ * Session started message from server
302
+ */
303
+ interface ServerSessionStartedMessage extends BaseServerMessage {
304
+ type: 'sessionStarted';
305
+ sessionId: string;
306
+ }
307
+ /**
308
+ * Session ended message from server
309
+ */
310
+ interface ServerSessionEndedMessage extends BaseServerMessage {
311
+ type: 'sessionEnded';
312
+ sessionId: string;
313
+ }
314
+ /**
315
+ * Transcript message from server
316
+ */
317
+ interface ServerTranscriptMessage extends BaseServerMessage {
318
+ type: 'transcript';
319
+ text: string;
320
+ isFinal: boolean;
321
+ confidence?: number;
322
+ }
323
+ /**
324
+ * Response message from server
325
+ */
326
+ interface ServerResponseMessage extends BaseServerMessage {
327
+ type: 'response';
328
+ text: string;
329
+ isFinal: boolean;
330
+ }
331
+ /**
332
+ * Audio message from server
333
+ */
334
+ interface ServerAudioMessage extends BaseServerMessage {
335
+ type: 'audio';
336
+ data: string;
337
+ format: string;
338
+ sampleRate: number;
339
+ }
340
+ /**
341
+ * Error message from server
342
+ */
343
+ interface ServerErrorMessage extends BaseServerMessage {
344
+ type: 'error';
345
+ code: string;
346
+ message: string;
347
+ details?: unknown;
348
+ }
349
+ /**
350
+ * Pong message from server
351
+ */
352
+ interface ServerPongMessage extends BaseServerMessage {
353
+ type: 'pong';
354
+ }
355
+ /**
356
+ * Union type of all server messages
357
+ */
358
+ type ServerMessage = ServerConnectedMessage | ServerSessionStartedMessage | ServerSessionEndedMessage | ServerTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerErrorMessage | ServerPongMessage;
359
+
360
+ /**
361
+ * Connection state
362
+ */
363
+ type ConnectionState = 'disconnected' | 'connecting' | 'connected' | 'reconnecting';
364
+
365
+ /**
366
+ * Event types for the client
367
+ */
368
+ type LiveSpeechEventMap = {
369
+ connected: ConnectedEvent;
370
+ disconnected: DisconnectedEvent;
371
+ sessionStarted: SessionStartedEvent;
372
+ sessionEnded: SessionEndedEvent;
373
+ transcript: TranscriptEvent;
374
+ response: ResponseEvent;
375
+ audio: AudioEvent;
376
+ error: ErrorEvent;
377
+ reconnecting: ReconnectingEvent;
378
+ };
379
+ /**
380
+ * LiveSpeech client for real-time speech-to-speech AI conversations
381
+ */
382
+ declare class LiveSpeechClient {
383
+ private readonly config;
384
+ private readonly connection;
385
+ private readonly audioEncoder;
386
+ private readonly logger;
387
+ private sessionId;
388
+ private sessionConfig;
389
+ private readonly eventListeners;
390
+ private transcriptHandler;
391
+ private responseHandler;
392
+ private audioHandler;
393
+ private errorHandler;
394
+ constructor(config: LiveSpeechConfig);
395
+ /**
396
+ * Get current connection state
397
+ */
398
+ get connectionState(): ConnectionState;
399
+ /**
400
+ * Get connection ID
401
+ */
402
+ get connectionId(): string | null;
403
+ /**
404
+ * Get current session ID
405
+ */
406
+ get currentSessionId(): string | null;
407
+ /**
408
+ * Check if connected
409
+ */
410
+ get isConnected(): boolean;
411
+ /**
412
+ * Check if session is active
413
+ */
414
+ get hasActiveSession(): boolean;
415
+ /**
416
+ * Connect to the server
417
+ */
418
+ connect(): Promise<void>;
419
+ /**
420
+ * Disconnect from the server
421
+ */
422
+ disconnect(): void;
423
+ /**
424
+ * Start a new session
425
+ */
426
+ startSession(config: SessionConfig): Promise<string>;
427
+ /**
428
+ * End the current session
429
+ */
430
+ endSession(): Promise<void>;
431
+ /**
432
+ * Send audio data
433
+ */
434
+ sendAudio(data: Uint8Array, options?: {
435
+ format?: AudioFormat;
436
+ isFinal?: boolean;
437
+ }): void;
438
+ /**
439
+ * Add event listener
440
+ */
441
+ on<K extends keyof LiveSpeechEventMap>(event: K, listener: (event: LiveSpeechEventMap[K]) => void): void;
442
+ /**
443
+ * Remove event listener
444
+ */
445
+ off<K extends keyof LiveSpeechEventMap>(event: K, listener: (event: LiveSpeechEventMap[K]) => void): void;
446
+ /**
447
+ * Set transcript handler (simplified)
448
+ */
449
+ setTranscriptHandler(handler: TranscriptHandler): void;
450
+ /**
451
+ * Set response handler (simplified)
452
+ */
453
+ setResponseHandler(handler: ResponseHandler): void;
454
+ /**
455
+ * Set audio handler (simplified)
456
+ */
457
+ setAudioHandler(handler: AudioHandler): void;
458
+ /**
459
+ * Set error handler (simplified)
460
+ */
461
+ setErrorHandler(handler: ErrorHandler): void;
462
+ private emit;
463
+ private handleConnected;
464
+ private handleDisconnected;
465
+ private handleReconnecting;
466
+ private handleError;
467
+ private handleMessage;
468
+ }
469
+
470
+ /**
471
+ * Audio encoder options
472
+ */
473
+ interface AudioEncoderOptions {
474
+ format: AudioFormat;
475
+ sampleRate: number;
476
+ channels?: number;
477
+ bitDepth?: number;
478
+ }
479
+ /**
480
+ * Encode audio data to base64 for transmission
481
+ * Works in both Node.js and browser environments
482
+ */
483
+ declare function encodeAudioToBase64(data: Uint8Array): string;
484
+ /**
485
+ * Decode base64 audio data
486
+ * Works in both Node.js and browser environments
487
+ */
488
+ declare function decodeBase64ToAudio(base64: string): Uint8Array;
489
+ /**
490
+ * Convert Float32Array audio samples to Int16 PCM
491
+ */
492
+ declare function float32ToInt16(float32Array: Float32Array): Int16Array;
493
+ /**
494
+ * Convert Int16 PCM to Float32Array audio samples
495
+ */
496
+ declare function int16ToFloat32(int16Array: Int16Array): Float32Array;
497
+ /**
498
+ * Convert Int16Array to Uint8Array (little-endian)
499
+ */
500
+ declare function int16ToUint8(int16Array: Int16Array): Uint8Array;
501
+ /**
502
+ * Convert Uint8Array to Int16Array (little-endian)
503
+ */
504
+ declare function uint8ToInt16(uint8Array: Uint8Array): Int16Array;
505
+ /**
506
+ * Create a WAV header for PCM audio data
507
+ */
508
+ declare function createWavHeader(dataLength: number, sampleRate: number, channels: number, bitDepth: number): Uint8Array;
509
+ /**
510
+ * Wrap PCM data in a WAV container
511
+ */
512
+ declare function wrapPcmInWav(pcmData: Uint8Array, options?: Partial<AudioEncoderOptions>): Uint8Array;
513
+ /**
514
+ * Extract PCM data from a WAV file
515
+ */
516
+ declare function extractPcmFromWav(wavData: Uint8Array): {
517
+ pcmData: Uint8Array;
518
+ sampleRate: number;
519
+ channels: number;
520
+ bitDepth: number;
521
+ };
522
+ /**
523
+ * Audio encoder class for managing audio format conversions
524
+ */
525
+ declare class AudioEncoder {
526
+ private readonly options;
527
+ constructor(options?: Partial<AudioEncoderOptions>);
528
+ /**
529
+ * Get current options
530
+ */
531
+ get format(): AudioFormat;
532
+ get sampleRate(): number;
533
+ /**
534
+ * Encode audio data for transmission
535
+ */
536
+ encode(data: Uint8Array): string;
537
+ /**
538
+ * Decode received audio data
539
+ */
540
+ decode(base64: string): Uint8Array;
541
+ /**
542
+ * Convert Float32 samples to transmission-ready format
543
+ */
544
+ fromFloat32(samples: Float32Array): Uint8Array;
545
+ /**
546
+ * Convert received data to Float32 samples
547
+ */
548
+ toFloat32(data: Uint8Array): Float32Array;
549
+ /**
550
+ * Wrap data in WAV format if needed
551
+ */
552
+ wrapWav(data: Uint8Array): Uint8Array;
553
+ }
554
+
555
+ export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioFormat, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TranscriptEvent, type TranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };