@drawdream/livespeech 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,6 +5,15 @@
5
5
 
6
6
  A TypeScript/JavaScript SDK for real-time speech-to-speech AI conversations.
7
7
 
8
+ ## Features
9
+
10
+ - 🎙️ **Real-time Voice Conversations** - Natural, low-latency voice interactions
11
+ - 🌐 **Multi-language Support** - Korean, English, Japanese, Chinese, and more
12
+ - 🔊 **Streaming Audio** - Send and receive audio in real-time
13
+ - 📝 **Live Transcription** - Get transcriptions of both user and AI speech
14
+ - 🔄 **Auto-reconnection** - Automatic recovery from network issues
15
+ - 🌐 **Browser & Node.js** - Works in both environments
16
+
8
17
  ## Installation
9
18
 
10
19
  ```bash
@@ -18,137 +27,306 @@ pnpm add @drawdream/livespeech
18
27
  ## Quick Start
19
28
 
20
29
  ```typescript
21
- import { LiveSpeechClient, Region } from '@drawdream/livespeech';
30
+ import { LiveSpeechClient } from '@drawdream/livespeech';
22
31
 
23
32
  const client = new LiveSpeechClient({
24
- region: 'ap-northeast-2', // or Region.AP_NORTHEAST_2
33
+ region: 'ap-northeast-2',
25
34
  apiKey: 'your-api-key',
26
35
  });
27
36
 
28
- // Handle events
29
- client.setTranscriptHandler((text, isFinal) => {
30
- console.log(`Transcript: ${text} (final: ${isFinal})`);
37
+ // Set up event handlers
38
+ client.setUserTranscriptHandler((text) => {
39
+ console.log('You:', text);
31
40
  });
32
41
 
33
42
  client.setResponseHandler((text, isFinal) => {
34
- console.log(`AI Response: ${text}`);
43
+ console.log('AI:', text);
35
44
  });
36
45
 
37
46
  client.setAudioHandler((audioData) => {
38
- // Play audio through speakers
47
+ playAudio(audioData); // PCM16 @ 24kHz
48
+ });
49
+
50
+ client.setErrorHandler((error) => {
51
+ console.error('Error:', error.message);
39
52
  });
40
53
 
41
- // Connect and start session
54
+ // Connect and start conversation
42
55
  await client.connect();
43
56
  await client.startSession({
44
57
  prePrompt: 'You are a helpful assistant.',
58
+ language: 'ko-KR',
45
59
  });
46
60
 
47
- // Send audio
48
- client.sendAudio(audioBuffer);
61
+ // Stream audio
62
+ client.audioStart();
63
+ client.sendAudioChunk(pcmData); // PCM16 @ 16kHz
64
+ client.audioEnd();
65
+
66
+ // Cleanup
67
+ await client.endSession();
68
+ client.disconnect();
49
69
  ```
50
70
 
51
- ## API Reference
71
+ ## Audio Flow
52
72
 
53
- ### Regions
73
+ ```
74
+ connect() → startSession() → audioStart() → sendAudioChunk()* → audioEnd() → endSession()
75
+ ```
54
76
 
55
- The SDK provides built-in region support, so you don't need to remember endpoint URLs:
77
+ | Step | Description |
78
+ |------|-------------|
79
+ | `connect()` | Establish WebSocket connection |
80
+ | `startSession(config)` | Start conversation with optional system prompt |
81
+ | `audioStart()` | Begin audio streaming |
82
+ | `sendAudioChunk(data)` | Send PCM16 audio (call multiple times) |
83
+ | `audioEnd()` | End streaming, triggers AI response |
84
+ | `endSession()` | End conversation |
85
+ | `disconnect()` | Close connection |
56
86
 
57
- | Region | Identifier | Location |
58
- |--------|------------|----------|
59
- | `ap-northeast-2` | `Region.AP_NORTHEAST_2` | Asia Pacific (Seoul) |
60
- | `us-west-2` | `Region.US_WEST_2` | US West (Oregon) - Coming soon |
87
+ ## Configuration
88
+
89
+ ```typescript
90
+ const client = new LiveSpeechClient({
91
+ region: 'ap-northeast-2', // Required: Seoul region
92
+ apiKey: 'your-api-key', // Required: Your API key
93
+ autoReconnect: true, // Auto-reconnect on disconnect
94
+ maxReconnectAttempts: 5, // Maximum reconnection attempts
95
+ debug: false, // Enable debug logging
96
+ });
61
97
 
62
- ### LiveSpeechClient
98
+ await client.startSession({
99
+ prePrompt: 'You are a helpful assistant.',
100
+ language: 'ko-KR', // Language: ko-KR, en-US, ja-JP, etc.
101
+ });
102
+ ```
63
103
 
64
- #### Constructor Options
104
+ ## Events
65
105
 
66
- | Option | Type | Default | Description |
67
- |--------|------|---------|-------------|
68
- | `region` | `string` | **required** | Region identifier |
69
- | `apiKey` | `string` | **required** | API key for authentication |
70
- | `connectionTimeout` | `number` | `30000` | Connection timeout in ms |
71
- | `autoReconnect` | `boolean` | `true` | Auto-reconnect on disconnect |
72
- | `maxReconnectAttempts` | `number` | `5` | Max reconnection attempts |
73
- | `reconnectDelay` | `number` | `1000` | Base reconnection delay in ms |
74
- | `debug` | `boolean` | `false` | Enable debug logging |
106
+ | Event | Description | Key Properties |
107
+ |-------|-------------|----------------|
108
+ | `connected` | Connection established | `connectionId` |
109
+ | `disconnected` | Connection closed | `reason`, `code` |
110
+ | `sessionStarted` | Session created | `sessionId` |
111
+ | `ready` | Ready for audio input | `timestamp` |
112
+ | `userTranscript` | Your speech transcribed | `text` |
113
+ | `response` | AI's response text | `text`, `isFinal` |
114
+ | `audio` | AI's audio output | `data`, `sampleRate` |
115
+ | `turnComplete` | AI finished speaking | `timestamp` |
116
+ | `error` | Error occurred | `code`, `message` |
75
117
 
76
- #### Methods
118
+ ### Simple Handlers
77
119
 
78
- | Method | Description |
79
- |--------|-------------|
80
- | `connect()` | Connect to the server |
81
- | `disconnect()` | Disconnect from the server |
82
- | `startSession(config)` | Start a conversation session |
83
- | `endSession()` | End the current session |
84
- | `sendAudio(data, options?)` | Send audio data to be transcribed |
120
+ ```typescript
121
+ // Your speech transcription
122
+ client.setUserTranscriptHandler((text) => {
123
+ console.log('You said:', text);
124
+ });
85
125
 
86
- #### Event Handlers
126
+ // AI's text response
127
+ client.setResponseHandler((text, isFinal) => {
128
+ console.log('AI:', text, isFinal ? '(done)' : '...');
129
+ });
130
+
131
+ // AI's audio output
132
+ client.setAudioHandler((data: Uint8Array) => {
133
+ // data: PCM16 audio
134
+ // Sample rate: 24000 Hz
135
+ playAudio(data);
136
+ });
137
+
138
+ // Error handling
139
+ client.setErrorHandler((error) => {
140
+ console.error(`Error [${error.code}]: ${error.message}`);
141
+ });
142
+ ```
143
+
144
+ ### Full Event API
87
145
 
88
146
  ```typescript
89
- // Simple handlers
90
- client.setTranscriptHandler((text, isFinal) => {});
91
- client.setResponseHandler((text, isFinal) => {});
92
- client.setAudioHandler((audioData) => {});
93
- client.setErrorHandler((error) => {});
94
-
95
- // Full event API
96
- client.on('connected', (event) => {});
97
- client.on('disconnected', (event) => {});
98
- client.on('sessionStarted', (event) => {});
99
- client.on('sessionEnded', (event) => {});
100
- client.on('transcript', (event) => {});
101
- client.on('response', (event) => {});
102
- client.on('audio', (event) => {});
103
- client.on('error', (event) => {});
104
- client.on('reconnecting', (event) => {});
147
+ client.on('connected', (event) => {
148
+ console.log('Connected:', event.connectionId);
149
+ });
150
+
151
+ client.on('ready', () => {
152
+ console.log('Ready for audio');
153
+ });
154
+
155
+ client.on('userTranscript', (event) => {
156
+ console.log('You:', event.text);
157
+ });
158
+
159
+ client.on('response', (event) => {
160
+ console.log('AI:', event.text, event.isFinal);
161
+ });
162
+
163
+ client.on('audio', (event) => {
164
+ // event.data: Uint8Array (PCM16)
165
+ // event.sampleRate: 24000
166
+ playAudio(event.data);
167
+ });
168
+
169
+ client.on('turnComplete', () => {
170
+ console.log('AI finished speaking');
171
+ });
172
+
173
+ client.on('error', (event) => {
174
+ console.error('Error:', event.code, event.message);
175
+ });
105
176
  ```
106
177
 
107
- ### SessionConfig
178
+ ## Audio Format
108
179
 
109
- | Option | Type | Default | Description |
110
- |--------|------|---------|-------------|
111
- | `prePrompt` | `string` | **required** | System prompt for the AI |
112
- | `voiceId` | `string` | `'en-US-Standard-A'` | TTS voice ID |
113
- | `languageCode` | `string` | `'en-US'` | Language for STT |
114
- | `inputFormat` | `AudioFormat` | `'pcm16'` | Input audio format |
115
- | `outputFormat` | `AudioFormat` | `'pcm16'` | Output audio format |
116
- | `sampleRate` | `number` | `16000` | Sample rate in Hz |
117
- | `metadata` | `Record<string,string>` | `{}` | Custom metadata |
180
+ ### Input (Your Microphone)
118
181
 
119
- ## Audio Utilities
182
+ | Property | Value |
183
+ |----------|-------|
184
+ | Format | PCM16 (16-bit signed, little-endian) |
185
+ | Sample Rate | 16,000 Hz |
186
+ | Channels | 1 (Mono) |
187
+ | Chunk Size | ~3200 bytes (100ms) |
188
+
189
+ ### Output (AI Response)
190
+
191
+ | Property | Value |
192
+ |----------|-------|
193
+ | Format | PCM16 (16-bit signed, little-endian) |
194
+ | Sample Rate | 24,000 Hz |
195
+ | Channels | 1 (Mono) |
196
+
197
+ ## Browser Example
198
+
199
+ ```typescript
200
+ import { LiveSpeechClient, float32ToInt16, int16ToUint8 } from '@drawdream/livespeech';
201
+
202
+ const client = new LiveSpeechClient({
203
+ region: 'ap-northeast-2',
204
+ apiKey: 'your-api-key',
205
+ });
206
+
207
+ // Handlers
208
+ client.setUserTranscriptHandler((text) => console.log('You:', text));
209
+ client.setResponseHandler((text) => console.log('AI:', text));
210
+ client.setAudioHandler((data) => playAudioChunk(data));
211
+
212
+ // Connect
213
+ await client.connect();
214
+ await client.startSession({ prePrompt: 'You are a helpful assistant.' });
215
+
216
+ // Capture microphone
217
+ const stream = await navigator.mediaDevices.getUserMedia({
218
+ audio: { sampleRate: 16000, channelCount: 1 }
219
+ });
220
+
221
+ const audioContext = new AudioContext({ sampleRate: 16000 });
222
+ const source = audioContext.createMediaStreamSource(stream);
223
+ const processor = audioContext.createScriptProcessor(4096, 1, 1);
224
+
225
+ processor.onaudioprocess = (e) => {
226
+ const float32 = e.inputBuffer.getChannelData(0);
227
+ const int16 = float32ToInt16(float32);
228
+ const pcm = int16ToUint8(int16);
229
+ client.sendAudioChunk(pcm);
230
+ };
231
+
232
+ source.connect(processor);
233
+ processor.connect(audioContext.destination);
120
234
 
121
- The SDK includes audio encoding/decoding utilities:
235
+ // Start streaming
236
+ client.audioStart();
237
+
238
+ // Stop later
239
+ client.audioEnd();
240
+ stream.getTracks().forEach(track => track.stop());
241
+ ```
242
+
243
+ ## Audio Utilities
122
244
 
123
245
  ```typescript
124
246
  import {
125
- encodeAudioToBase64,
126
- decodeBase64ToAudio,
127
- float32ToInt16,
128
- int16ToFloat32,
129
- wrapPcmInWav,
247
+ float32ToInt16, // Web Audio Float32 → PCM16
248
+ int16ToFloat32, // PCM16 → Float32
249
+ int16ToUint8, // Int16Array → Uint8Array
250
+ uint8ToInt16, // Uint8Array → Int16Array
251
+ wrapPcmInWav, // Create WAV file
252
+ AudioEncoder, // Base64 encoding/decoding
130
253
  } from '@drawdream/livespeech';
131
254
 
132
- // Convert Float32 audio samples to PCM16
133
- const pcmData = float32ToInt16(float32Samples);
255
+ // Convert Web Audio to PCM16 for sending
256
+ const float32 = audioBuffer.getChannelData(0);
257
+ const int16 = float32ToInt16(float32);
258
+ const pcmBytes = int16ToUint8(int16);
259
+ client.sendAudioChunk(pcmBytes);
134
260
 
135
- // Create WAV file from PCM data
136
- const wavFile = wrapPcmInWav(pcmData, { sampleRate: 16000 });
261
+ // Convert received PCM16 to Web Audio
262
+ const receivedInt16 = uint8ToInt16(audioEvent.data);
263
+ const float32Data = int16ToFloat32(receivedInt16);
137
264
  ```
138
265
 
139
- ## Browser Usage
266
+ ## Error Handling
140
267
 
141
- The SDK works in both Node.js and browser environments:
268
+ ```typescript
269
+ client.on('error', (event) => {
270
+ switch (event.code) {
271
+ case 'authentication_failed':
272
+ console.error('Invalid API key');
273
+ break;
274
+ case 'connection_timeout':
275
+ console.error('Connection timed out');
276
+ break;
277
+ case 'rate_limit':
278
+ console.error('Rate limit exceeded');
279
+ break;
280
+ default:
281
+ console.error(`Error: ${event.message}`);
282
+ }
283
+ });
142
284
 
143
- ```html
144
- <script type="module">
145
- import { LiveSpeechClient } from '@drawdream/livespeech';
285
+ client.on('disconnected', (event) => {
286
+ if (event.reason === 'error') {
287
+ console.log('Will auto-reconnect...');
288
+ }
289
+ });
146
290
 
147
- // Use the Web Audio API to capture microphone
148
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
149
- const audioContext = new AudioContext({ sampleRate: 16000 });
150
- // ... process audio and send to client
151
- </script>
291
+ client.on('reconnecting', (event) => {
292
+ console.log(`Reconnecting ${event.attempt}/${event.maxAttempts}`);
293
+ });
294
+ ```
295
+
296
+ ## Client Properties
297
+
298
+ | Property | Type | Description |
299
+ |----------|------|-------------|
300
+ | `isConnected` | `boolean` | Connection status |
301
+ | `hasActiveSession` | `boolean` | Session status |
302
+ | `isAudioStreaming` | `boolean` | Streaming status |
303
+ | `connectionId` | `string \| null` | Current connection ID |
304
+ | `currentSessionId` | `string \| null` | Current session ID |
305
+
306
+ ## Regions
307
+
308
+ | Region | Code | Location |
309
+ |--------|------|----------|
310
+ | Asia Pacific (Seoul) | `ap-northeast-2` | Korea |
311
+
312
+ ## TypeScript Types
313
+
314
+ ```typescript
315
+ import type {
316
+ LiveSpeechConfig,
317
+ SessionConfig,
318
+ LiveSpeechEvent,
319
+ ConnectedEvent,
320
+ DisconnectedEvent,
321
+ SessionStartedEvent,
322
+ ReadyEvent,
323
+ UserTranscriptEvent,
324
+ ResponseEvent,
325
+ AudioEvent,
326
+ TurnCompleteEvent,
327
+ ErrorEvent,
328
+ ErrorCode,
329
+ } from '@drawdream/livespeech';
152
330
  ```
153
331
 
154
332
  ## License
package/dist/index.d.mts CHANGED
@@ -21,6 +21,12 @@ declare function getEndpointForRegion(region: Region): string;
21
21
  */
22
22
  declare function isValidRegion(value: string): value is Region;
23
23
 
24
+ /**
25
+ * Pipeline mode for audio processing
26
+ * - 'live': Direct audio-to-audio conversation (default, lower latency)
27
+ * - 'composed': Uses separate STT + LLM + TTS services (more customizable)
28
+ */
29
+ type PipelineMode = 'live' | 'composed';
24
30
  /**
25
31
  * Configuration options for the LiveSpeech client
26
32
  *
@@ -75,6 +81,25 @@ interface SessionConfig {
75
81
  * System prompt for the AI assistant
76
82
  */
77
83
  prePrompt?: string;
84
+ /**
85
+ * Language code for speech recognition (e.g., "en-US", "ko-KR")
86
+ * @default "en-US"
87
+ */
88
+ language?: string;
89
+ /**
90
+ * Pipeline mode for audio processing
91
+ * - 'live': Direct audio-to-audio conversation (default, lower latency)
92
+ * - 'composed': Uses separate STT + LLM + TTS services (more customizable)
93
+ * @default "live"
94
+ */
95
+ pipelineMode?: PipelineMode;
96
+ /**
97
+ * Enable AI to speak first before user input (live mode only)
98
+ * When enabled, the AI will initiate the conversation based on the prePrompt.
99
+ * Make sure your prePrompt includes instructions for how the AI should greet the user.
100
+ * @default false
101
+ */
102
+ aiSpeaksFirst?: boolean;
78
103
  }
79
104
  /**
80
105
  * Internal resolved configuration with defaults applied
@@ -92,7 +117,7 @@ interface ResolvedConfig {
92
117
  /**
93
118
  * Event types emitted by the LiveSpeech client
94
119
  */
95
- type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'reconnecting';
120
+ type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error';
96
121
  /**
97
122
  * Event payload for 'connected' event
98
123
  */
@@ -131,34 +156,19 @@ interface SessionEndedEvent {
131
156
  timestamp: string;
132
157
  }
133
158
  /**
134
- * Event payload for 'streamingStarted' event - acknowledgment of audioStart
135
- */
136
- interface StreamingStartedEvent {
137
- type: 'streamingStarted';
138
- timestamp: string;
139
- }
140
- /**
141
- * Event payload for 'speechStart' event - VAD detected speech begin
142
- */
143
- interface SpeechStartEvent {
144
- type: 'speechStart';
145
- timestamp: string;
146
- }
147
- /**
148
- * Event payload for 'speechEnd' event - VAD detected speech end
159
+ * Event payload for 'ready' event
149
160
  */
150
- interface SpeechEndEvent {
151
- type: 'speechEnd';
161
+ interface ReadyEvent {
162
+ type: 'ready';
152
163
  timestamp: string;
153
164
  }
154
165
  /**
155
- * Event payload for 'transcript' event
166
+ * Event payload for 'userTranscript' event
167
+ * User's speech transcription
156
168
  */
157
- interface TranscriptEvent {
158
- type: 'transcript';
169
+ interface UserTranscriptEvent {
170
+ type: 'userTranscript';
159
171
  text: string;
160
- isFinal: boolean;
161
- confidence?: number;
162
172
  timestamp: string;
163
173
  }
164
174
  /**
@@ -204,14 +214,22 @@ interface ReconnectingEvent {
204
214
  delay: number;
205
215
  timestamp: string;
206
216
  }
217
+ /**
218
+ * Event payload for 'turnComplete' event (both modes)
219
+ * Indicates the AI has finished its response turn
220
+ */
221
+ interface TurnCompleteEvent {
222
+ type: 'turnComplete';
223
+ timestamp: string;
224
+ }
207
225
  /**
208
226
  * Union type of all event payloads
209
227
  */
210
- type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent | StreamingStartedEvent | SpeechStartEvent | SpeechEndEvent | TranscriptEvent | ResponseEvent | AudioEvent | ErrorEvent | ReconnectingEvent;
228
+ type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ErrorEvent;
211
229
  /**
212
230
  * Simplified event handlers for common use cases
213
231
  */
214
- type TranscriptHandler = (text: string, isFinal: boolean) => void;
232
+ type UserTranscriptHandler = (text: string) => void;
215
233
  type ResponseHandler = (text: string, isFinal: boolean) => void;
216
234
  type AudioHandler = (data: Uint8Array) => void;
217
235
  type ErrorHandler = (error: ErrorEvent) => void;
@@ -223,7 +241,7 @@ type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioCh
223
241
  /**
224
242
  * WebSocket message types received from server
225
243
  */
226
- type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'pong';
244
+ type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error' | 'pong';
227
245
  /**
228
246
  * Base interface for client messages
229
247
  */
@@ -236,6 +254,8 @@ interface BaseClientMessage {
236
254
  interface StartSessionMessage extends BaseClientMessage {
237
255
  action: 'startSession';
238
256
  prePrompt?: string;
257
+ language?: string;
258
+ pipelineMode?: 'live' | 'composed';
239
259
  }
240
260
  /**
241
261
  * End session message
@@ -294,31 +314,11 @@ interface ServerSessionEndedMessage extends BaseServerMessage {
294
314
  sessionId: string;
295
315
  }
296
316
  /**
297
- * Streaming started message - acknowledgment of audioStart
317
+ * User transcript message from server (user's speech transcription)
298
318
  */
299
- interface ServerStreamingStartedMessage extends BaseServerMessage {
300
- type: 'streamingStarted';
301
- }
302
- /**
303
- * Speech start message - VAD detected speech begin
304
- */
305
- interface ServerSpeechStartMessage extends BaseServerMessage {
306
- type: 'speechStart';
307
- }
308
- /**
309
- * Speech end message - VAD detected speech end
310
- */
311
- interface ServerSpeechEndMessage extends BaseServerMessage {
312
- type: 'speechEnd';
313
- }
314
- /**
315
- * Transcript message from server
316
- */
317
- interface ServerTranscriptMessage extends BaseServerMessage {
318
- type: 'transcript';
319
+ interface ServerUserTranscriptMessage extends BaseServerMessage {
320
+ type: 'userTranscript';
319
321
  text: string;
320
- isFinal: boolean;
321
- confidence?: number;
322
322
  }
323
323
  /**
324
324
  * Response message from server
@@ -351,10 +351,24 @@ interface ServerErrorMessage extends BaseServerMessage {
351
351
  interface ServerPongMessage extends BaseServerMessage {
352
352
  type: 'pong';
353
353
  }
354
+ /**
355
+ * Turn complete message from server
356
+ * Indicates the AI has finished its response turn
357
+ */
358
+ interface ServerTurnCompleteMessage extends BaseServerMessage {
359
+ type: 'turnComplete';
360
+ }
361
+ /**
362
+ * Ready message from server
363
+ * Indicates the session is ready for audio input
364
+ */
365
+ interface ServerReadyMessage extends BaseServerMessage {
366
+ type: 'ready';
367
+ }
354
368
  /**
355
369
  * Union type of all server messages
356
370
  */
357
- type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerStreamingStartedMessage | ServerSpeechStartMessage | ServerSpeechEndMessage | ServerTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerErrorMessage | ServerPongMessage;
371
+ type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerErrorMessage | ServerPongMessage;
358
372
 
359
373
  /**
360
374
  * Connection state
@@ -367,16 +381,15 @@ type ConnectionState = 'disconnected' | 'connecting' | 'connected' | 'reconnecti
367
381
  type LiveSpeechEventMap = {
368
382
  connected: ConnectedEvent;
369
383
  disconnected: DisconnectedEvent;
384
+ reconnecting: ReconnectingEvent;
370
385
  sessionStarted: SessionStartedEvent;
371
386
  sessionEnded: SessionEndedEvent;
372
- streamingStarted: StreamingStartedEvent;
373
- speechStart: SpeechStartEvent;
374
- speechEnd: SpeechEndEvent;
375
- transcript: TranscriptEvent;
387
+ ready: ReadyEvent;
388
+ userTranscript: UserTranscriptEvent;
376
389
  response: ResponseEvent;
377
390
  audio: AudioEvent;
391
+ turnComplete: TurnCompleteEvent;
378
392
  error: ErrorEvent;
379
- reconnecting: ReconnectingEvent;
380
393
  };
381
394
  /**
382
395
  * LiveSpeech client for real-time speech-to-speech AI conversations
@@ -389,7 +402,7 @@ declare class LiveSpeechClient {
389
402
  private sessionId;
390
403
  private isStreaming;
391
404
  private readonly eventListeners;
392
- private transcriptHandler;
405
+ private userTranscriptHandler;
393
406
  private responseHandler;
394
407
  private audioHandler;
395
408
  private errorHandler;
@@ -455,17 +468,17 @@ declare class LiveSpeechClient {
455
468
  */
456
469
  off<K extends keyof LiveSpeechEventMap>(event: K, listener: (event: LiveSpeechEventMap[K]) => void): void;
457
470
  /**
458
- * Set transcript handler (simplified)
459
- */
460
- setTranscriptHandler(handler: TranscriptHandler): void;
461
- /**
462
- * Set response handler (simplified)
471
+ * Set response handler
463
472
  */
464
473
  setResponseHandler(handler: ResponseHandler): void;
465
474
  /**
466
475
  * Set audio handler (simplified)
467
476
  */
468
477
  setAudioHandler(handler: AudioHandler): void;
478
+ /**
479
+ * Set user transcript handler
480
+ */
481
+ setUserTranscriptHandler(handler: UserTranscriptHandler): void;
469
482
  /**
470
483
  * Set error handler (simplified)
471
484
  */
@@ -567,4 +580,4 @@ declare class AudioEncoder {
567
580
  wrapWav(data: Uint8Array): Uint8Array;
568
581
  }
569
582
 
570
- export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type SpeechEndEvent, type SpeechStartEvent, type StreamingStartedEvent, type TranscriptEvent, type TranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
583
+ export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
package/dist/index.d.ts CHANGED
@@ -21,6 +21,12 @@ declare function getEndpointForRegion(region: Region): string;
21
21
  */
22
22
  declare function isValidRegion(value: string): value is Region;
23
23
 
24
+ /**
25
+ * Pipeline mode for audio processing
26
+ * - 'live': Direct audio-to-audio conversation (default, lower latency)
27
+ * - 'composed': Uses separate STT + LLM + TTS services (more customizable)
28
+ */
29
+ type PipelineMode = 'live' | 'composed';
24
30
  /**
25
31
  * Configuration options for the LiveSpeech client
26
32
  *
@@ -75,6 +81,25 @@ interface SessionConfig {
75
81
  * System prompt for the AI assistant
76
82
  */
77
83
  prePrompt?: string;
84
+ /**
85
+ * Language code for speech recognition (e.g., "en-US", "ko-KR")
86
+ * @default "en-US"
87
+ */
88
+ language?: string;
89
+ /**
90
+ * Pipeline mode for audio processing
91
+ * - 'live': Direct audio-to-audio conversation (default, lower latency)
92
+ * - 'composed': Uses separate STT + LLM + TTS services (more customizable)
93
+ * @default "live"
94
+ */
95
+ pipelineMode?: PipelineMode;
96
+ /**
97
+ * Enable AI to speak first before user input (live mode only)
98
+ * When enabled, the AI will initiate the conversation based on the prePrompt.
99
+ * Make sure your prePrompt includes instructions for how the AI should greet the user.
100
+ * @default false
101
+ */
102
+ aiSpeaksFirst?: boolean;
78
103
  }
79
104
  /**
80
105
  * Internal resolved configuration with defaults applied
@@ -92,7 +117,7 @@ interface ResolvedConfig {
92
117
  /**
93
118
  * Event types emitted by the LiveSpeech client
94
119
  */
95
- type LiveSpeechEventType = 'connected' | 'disconnected' | 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'reconnecting';
120
+ type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error';
96
121
  /**
97
122
  * Event payload for 'connected' event
98
123
  */
@@ -131,34 +156,19 @@ interface SessionEndedEvent {
131
156
  timestamp: string;
132
157
  }
133
158
  /**
134
- * Event payload for 'streamingStarted' event - acknowledgment of audioStart
135
- */
136
- interface StreamingStartedEvent {
137
- type: 'streamingStarted';
138
- timestamp: string;
139
- }
140
- /**
141
- * Event payload for 'speechStart' event - VAD detected speech begin
142
- */
143
- interface SpeechStartEvent {
144
- type: 'speechStart';
145
- timestamp: string;
146
- }
147
- /**
148
- * Event payload for 'speechEnd' event - VAD detected speech end
159
+ * Event payload for 'ready' event
149
160
  */
150
- interface SpeechEndEvent {
151
- type: 'speechEnd';
161
+ interface ReadyEvent {
162
+ type: 'ready';
152
163
  timestamp: string;
153
164
  }
154
165
  /**
155
- * Event payload for 'transcript' event
166
+ * Event payload for 'userTranscript' event
167
+ * User's speech transcription
156
168
  */
157
- interface TranscriptEvent {
158
- type: 'transcript';
169
+ interface UserTranscriptEvent {
170
+ type: 'userTranscript';
159
171
  text: string;
160
- isFinal: boolean;
161
- confidence?: number;
162
172
  timestamp: string;
163
173
  }
164
174
  /**
@@ -204,14 +214,22 @@ interface ReconnectingEvent {
204
214
  delay: number;
205
215
  timestamp: string;
206
216
  }
217
+ /**
218
+ * Event payload for 'turnComplete' event (both modes)
219
+ * Indicates the AI has finished its response turn
220
+ */
221
+ interface TurnCompleteEvent {
222
+ type: 'turnComplete';
223
+ timestamp: string;
224
+ }
207
225
  /**
208
226
  * Union type of all event payloads
209
227
  */
210
- type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | SessionStartedEvent | SessionEndedEvent | StreamingStartedEvent | SpeechStartEvent | SpeechEndEvent | TranscriptEvent | ResponseEvent | AudioEvent | ErrorEvent | ReconnectingEvent;
228
+ type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ErrorEvent;
211
229
  /**
212
230
  * Simplified event handlers for common use cases
213
231
  */
214
- type TranscriptHandler = (text: string, isFinal: boolean) => void;
232
+ type UserTranscriptHandler = (text: string) => void;
215
233
  type ResponseHandler = (text: string, isFinal: boolean) => void;
216
234
  type AudioHandler = (data: Uint8Array) => void;
217
235
  type ErrorHandler = (error: ErrorEvent) => void;
@@ -223,7 +241,7 @@ type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioCh
223
241
  /**
224
242
  * WebSocket message types received from server
225
243
  */
226
- type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'streamingStarted' | 'speechStart' | 'speechEnd' | 'transcript' | 'response' | 'audio' | 'error' | 'pong';
244
+ type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'error' | 'pong';
227
245
  /**
228
246
  * Base interface for client messages
229
247
  */
@@ -236,6 +254,8 @@ interface BaseClientMessage {
236
254
  interface StartSessionMessage extends BaseClientMessage {
237
255
  action: 'startSession';
238
256
  prePrompt?: string;
257
+ language?: string;
258
+ pipelineMode?: 'live' | 'composed';
239
259
  }
240
260
  /**
241
261
  * End session message
@@ -294,31 +314,11 @@ interface ServerSessionEndedMessage extends BaseServerMessage {
294
314
  sessionId: string;
295
315
  }
296
316
  /**
297
- * Streaming started message - acknowledgment of audioStart
317
+ * User transcript message from server (user's speech transcription)
298
318
  */
299
- interface ServerStreamingStartedMessage extends BaseServerMessage {
300
- type: 'streamingStarted';
301
- }
302
- /**
303
- * Speech start message - VAD detected speech begin
304
- */
305
- interface ServerSpeechStartMessage extends BaseServerMessage {
306
- type: 'speechStart';
307
- }
308
- /**
309
- * Speech end message - VAD detected speech end
310
- */
311
- interface ServerSpeechEndMessage extends BaseServerMessage {
312
- type: 'speechEnd';
313
- }
314
- /**
315
- * Transcript message from server
316
- */
317
- interface ServerTranscriptMessage extends BaseServerMessage {
318
- type: 'transcript';
319
+ interface ServerUserTranscriptMessage extends BaseServerMessage {
320
+ type: 'userTranscript';
319
321
  text: string;
320
- isFinal: boolean;
321
- confidence?: number;
322
322
  }
323
323
  /**
324
324
  * Response message from server
@@ -351,10 +351,24 @@ interface ServerErrorMessage extends BaseServerMessage {
351
351
  interface ServerPongMessage extends BaseServerMessage {
352
352
  type: 'pong';
353
353
  }
354
+ /**
355
+ * Turn complete message from server
356
+ * Indicates the AI has finished its response turn
357
+ */
358
+ interface ServerTurnCompleteMessage extends BaseServerMessage {
359
+ type: 'turnComplete';
360
+ }
361
+ /**
362
+ * Ready message from server
363
+ * Indicates the session is ready for audio input
364
+ */
365
+ interface ServerReadyMessage extends BaseServerMessage {
366
+ type: 'ready';
367
+ }
354
368
  /**
355
369
  * Union type of all server messages
356
370
  */
357
- type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerStreamingStartedMessage | ServerSpeechStartMessage | ServerSpeechEndMessage | ServerTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerErrorMessage | ServerPongMessage;
371
+ type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerErrorMessage | ServerPongMessage;
358
372
 
359
373
  /**
360
374
  * Connection state
@@ -367,16 +381,15 @@ type ConnectionState = 'disconnected' | 'connecting' | 'connected' | 'reconnecti
367
381
  type LiveSpeechEventMap = {
368
382
  connected: ConnectedEvent;
369
383
  disconnected: DisconnectedEvent;
384
+ reconnecting: ReconnectingEvent;
370
385
  sessionStarted: SessionStartedEvent;
371
386
  sessionEnded: SessionEndedEvent;
372
- streamingStarted: StreamingStartedEvent;
373
- speechStart: SpeechStartEvent;
374
- speechEnd: SpeechEndEvent;
375
- transcript: TranscriptEvent;
387
+ ready: ReadyEvent;
388
+ userTranscript: UserTranscriptEvent;
376
389
  response: ResponseEvent;
377
390
  audio: AudioEvent;
391
+ turnComplete: TurnCompleteEvent;
378
392
  error: ErrorEvent;
379
- reconnecting: ReconnectingEvent;
380
393
  };
381
394
  /**
382
395
  * LiveSpeech client for real-time speech-to-speech AI conversations
@@ -389,7 +402,7 @@ declare class LiveSpeechClient {
389
402
  private sessionId;
390
403
  private isStreaming;
391
404
  private readonly eventListeners;
392
- private transcriptHandler;
405
+ private userTranscriptHandler;
393
406
  private responseHandler;
394
407
  private audioHandler;
395
408
  private errorHandler;
@@ -455,17 +468,17 @@ declare class LiveSpeechClient {
455
468
  */
456
469
  off<K extends keyof LiveSpeechEventMap>(event: K, listener: (event: LiveSpeechEventMap[K]) => void): void;
457
470
  /**
458
- * Set transcript handler (simplified)
459
- */
460
- setTranscriptHandler(handler: TranscriptHandler): void;
461
- /**
462
- * Set response handler (simplified)
471
+ * Set response handler
463
472
  */
464
473
  setResponseHandler(handler: ResponseHandler): void;
465
474
  /**
466
475
  * Set audio handler (simplified)
467
476
  */
468
477
  setAudioHandler(handler: AudioHandler): void;
478
+ /**
479
+ * Set user transcript handler
480
+ */
481
+ setUserTranscriptHandler(handler: UserTranscriptHandler): void;
469
482
  /**
470
483
  * Set error handler (simplified)
471
484
  */
@@ -567,4 +580,4 @@ declare class AudioEncoder {
567
580
  wrapWav(data: Uint8Array): Uint8Array;
568
581
  }
569
582
 
570
- export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type SpeechEndEvent, type SpeechStartEvent, type StreamingStartedEvent, type TranscriptEvent, type TranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
583
+ export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
package/dist/index.js CHANGED
@@ -46,7 +46,7 @@ var Region = {
46
46
  };
47
47
  var REGION_ENDPOINTS = {
48
48
  "ap-northeast-2": "wss://talk.drawdream.co.kr",
49
- "us-west-2": "wss://talk..drawdream.ca"
49
+ "us-west-2": "wss://talk.drawdream.ca"
50
50
  // Coming soon
51
51
  };
52
52
  function getEndpointForRegion(region) {
@@ -614,7 +614,7 @@ var LiveSpeechClient = class {
614
614
  // Event listeners using a simple map
615
615
  eventListeners = /* @__PURE__ */ new Map();
616
616
  // Simplified handlers
617
- transcriptHandler = null;
617
+ userTranscriptHandler = null;
618
618
  responseHandler = null;
619
619
  audioHandler = null;
620
620
  errorHandler = null;
@@ -730,6 +730,13 @@ var LiveSpeechClient = class {
730
730
  if (config?.prePrompt) {
731
731
  startMessage.prePrompt = config.prePrompt;
732
732
  }
733
+ if (config?.language) {
734
+ startMessage.language = config.language;
735
+ }
736
+ startMessage.pipelineMode = config?.pipelineMode ?? "live";
737
+ if (config?.aiSpeaksFirst) {
738
+ startMessage.aiSpeaksFirst = config.aiSpeaksFirst;
739
+ }
733
740
  this.connection.send(startMessage);
734
741
  });
735
742
  }
@@ -819,13 +826,7 @@ var LiveSpeechClient = class {
819
826
  }
820
827
  }
821
828
  /**
822
- * Set transcript handler (simplified)
823
- */
824
- setTranscriptHandler(handler) {
825
- this.transcriptHandler = handler;
826
- }
827
- /**
828
- * Set response handler (simplified)
829
+ * Set response handler
829
830
  */
830
831
  setResponseHandler(handler) {
831
832
  this.responseHandler = handler;
@@ -836,6 +837,12 @@ var LiveSpeechClient = class {
836
837
  setAudioHandler(handler) {
837
838
  this.audioHandler = handler;
838
839
  }
840
+ /**
841
+ * Set user transcript handler
842
+ */
843
+ setUserTranscriptHandler(handler) {
844
+ this.userTranscriptHandler = handler;
845
+ }
839
846
  /**
840
847
  * Set error handler (simplified)
841
848
  */
@@ -914,36 +921,12 @@ var LiveSpeechClient = class {
914
921
  timestamp: message.timestamp
915
922
  });
916
923
  break;
917
- case "streamingStarted":
918
- this.emit("streamingStarted", {
919
- type: "streamingStarted",
920
- timestamp: message.timestamp
921
- });
922
- break;
923
- case "speechStart":
924
- this.emit("speechStart", {
925
- type: "speechStart",
926
- timestamp: message.timestamp
927
- });
928
- break;
929
- case "speechEnd":
930
- this.emit("speechEnd", {
931
- type: "speechEnd",
932
- timestamp: message.timestamp
933
- });
934
- break;
935
- case "transcript": {
936
- const transcriptEvent = {
937
- type: "transcript",
938
- text: message.text,
939
- isFinal: message.isFinal,
924
+ case "ready": {
925
+ const readyEvent = {
926
+ type: "ready",
940
927
  timestamp: message.timestamp
941
928
  };
942
- if (message.confidence !== void 0) {
943
- transcriptEvent.confidence = message.confidence;
944
- }
945
- this.emit("transcript", transcriptEvent);
946
- this.transcriptHandler?.(message.text, message.isFinal);
929
+ this.emit("ready", readyEvent);
947
930
  break;
948
931
  }
949
932
  case "response": {
@@ -970,6 +953,24 @@ var LiveSpeechClient = class {
970
953
  this.audioHandler?.(audioData);
971
954
  break;
972
955
  }
956
+ case "userTranscript": {
957
+ const userTranscriptEvent = {
958
+ type: "userTranscript",
959
+ text: message.text,
960
+ timestamp: message.timestamp
961
+ };
962
+ this.emit("userTranscript", userTranscriptEvent);
963
+ this.userTranscriptHandler?.(message.text);
964
+ break;
965
+ }
966
+ case "turnComplete": {
967
+ const turnCompleteEvent = {
968
+ type: "turnComplete",
969
+ timestamp: message.timestamp
970
+ };
971
+ this.emit("turnComplete", turnCompleteEvent);
972
+ break;
973
+ }
973
974
  case "error":
974
975
  this.handleError(message.code, message.message);
975
976
  break;
package/dist/index.mjs CHANGED
@@ -7,7 +7,7 @@ var Region = {
7
7
  };
8
8
  var REGION_ENDPOINTS = {
9
9
  "ap-northeast-2": "wss://talk.drawdream.co.kr",
10
- "us-west-2": "wss://talk..drawdream.ca"
10
+ "us-west-2": "wss://talk.drawdream.ca"
11
11
  // Coming soon
12
12
  };
13
13
  function getEndpointForRegion(region) {
@@ -575,7 +575,7 @@ var LiveSpeechClient = class {
575
575
  // Event listeners using a simple map
576
576
  eventListeners = /* @__PURE__ */ new Map();
577
577
  // Simplified handlers
578
- transcriptHandler = null;
578
+ userTranscriptHandler = null;
579
579
  responseHandler = null;
580
580
  audioHandler = null;
581
581
  errorHandler = null;
@@ -691,6 +691,13 @@ var LiveSpeechClient = class {
691
691
  if (config?.prePrompt) {
692
692
  startMessage.prePrompt = config.prePrompt;
693
693
  }
694
+ if (config?.language) {
695
+ startMessage.language = config.language;
696
+ }
697
+ startMessage.pipelineMode = config?.pipelineMode ?? "live";
698
+ if (config?.aiSpeaksFirst) {
699
+ startMessage.aiSpeaksFirst = config.aiSpeaksFirst;
700
+ }
694
701
  this.connection.send(startMessage);
695
702
  });
696
703
  }
@@ -780,13 +787,7 @@ var LiveSpeechClient = class {
780
787
  }
781
788
  }
782
789
  /**
783
- * Set transcript handler (simplified)
784
- */
785
- setTranscriptHandler(handler) {
786
- this.transcriptHandler = handler;
787
- }
788
- /**
789
- * Set response handler (simplified)
790
+ * Set response handler
790
791
  */
791
792
  setResponseHandler(handler) {
792
793
  this.responseHandler = handler;
@@ -797,6 +798,12 @@ var LiveSpeechClient = class {
797
798
  setAudioHandler(handler) {
798
799
  this.audioHandler = handler;
799
800
  }
801
+ /**
802
+ * Set user transcript handler
803
+ */
804
+ setUserTranscriptHandler(handler) {
805
+ this.userTranscriptHandler = handler;
806
+ }
800
807
  /**
801
808
  * Set error handler (simplified)
802
809
  */
@@ -875,36 +882,12 @@ var LiveSpeechClient = class {
875
882
  timestamp: message.timestamp
876
883
  });
877
884
  break;
878
- case "streamingStarted":
879
- this.emit("streamingStarted", {
880
- type: "streamingStarted",
881
- timestamp: message.timestamp
882
- });
883
- break;
884
- case "speechStart":
885
- this.emit("speechStart", {
886
- type: "speechStart",
887
- timestamp: message.timestamp
888
- });
889
- break;
890
- case "speechEnd":
891
- this.emit("speechEnd", {
892
- type: "speechEnd",
893
- timestamp: message.timestamp
894
- });
895
- break;
896
- case "transcript": {
897
- const transcriptEvent = {
898
- type: "transcript",
899
- text: message.text,
900
- isFinal: message.isFinal,
885
+ case "ready": {
886
+ const readyEvent = {
887
+ type: "ready",
901
888
  timestamp: message.timestamp
902
889
  };
903
- if (message.confidence !== void 0) {
904
- transcriptEvent.confidence = message.confidence;
905
- }
906
- this.emit("transcript", transcriptEvent);
907
- this.transcriptHandler?.(message.text, message.isFinal);
890
+ this.emit("ready", readyEvent);
908
891
  break;
909
892
  }
910
893
  case "response": {
@@ -931,6 +914,24 @@ var LiveSpeechClient = class {
931
914
  this.audioHandler?.(audioData);
932
915
  break;
933
916
  }
917
+ case "userTranscript": {
918
+ const userTranscriptEvent = {
919
+ type: "userTranscript",
920
+ text: message.text,
921
+ timestamp: message.timestamp
922
+ };
923
+ this.emit("userTranscript", userTranscriptEvent);
924
+ this.userTranscriptHandler?.(message.text);
925
+ break;
926
+ }
927
+ case "turnComplete": {
928
+ const turnCompleteEvent = {
929
+ type: "turnComplete",
930
+ timestamp: message.timestamp
931
+ };
932
+ this.emit("turnComplete", turnCompleteEvent);
933
+ break;
934
+ }
934
935
  case "error":
935
936
  this.handleError(message.code, message.message);
936
937
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@drawdream/livespeech",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Real-time speech-to-speech AI conversation SDK",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",