@drawdream/livespeech 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,7 +32,7 @@ const client = new LiveSpeechClient({
32
32
 
33
33
  // Handle only 4 essential events!
34
34
  client.setAudioHandler((audioData) => {
35
- audioPlayer.queue(audioData); // PCM16 @ 24kHz
35
+ audioPlayer.queue(audioData); // PCM16 use event.sampleRate (24kHz Live, 16kHz Composed)
36
36
  });
37
37
 
38
38
  client.on('interrupted', () => {
@@ -81,7 +81,7 @@ Everything you need for basic voice conversations.
81
81
 
82
82
  | Event | Description | Action Required |
83
83
  |-------|-------------|-----------------|
84
- | `audio` | AI's audio output | Play audio (PCM16 @ 24kHz) |
84
+ | `audio` | AI's audio output | Play audio (PCM16 check `sampleRate`) |
85
85
  | `turnComplete` | AI finished speaking | Ready for next input |
86
86
  | `interrupted` | User barged in | **Clear audio buffer!** |
87
87
  | `error` | Error occurred | Handle/log error |
@@ -104,7 +104,10 @@ Without this, 2-3 seconds of buffered audio continues playing after the user int
104
104
  | Direction | Format | Sample Rate |
105
105
  |-----------|--------|-------------|
106
106
  | Input (mic) | PCM16 | 16,000 Hz |
107
- | Output (AI) | PCM16 | 24,000 Hz |
107
+ | Output (AI) — Live mode | PCM16 | 24,000 Hz |
108
+ | Output (AI) — Composed mode | PCM16 | 16,000 Hz |
109
+
110
+ > **Important:** The `audio` event includes a `sampleRate` field. Always use it to configure your audio decoder rather than hardcoding a rate.
108
111
 
109
112
  ## Configuration
110
113
 
@@ -122,6 +125,37 @@ await client.startSession({
122
125
 
123
126
  ---
124
127
 
128
+ # Composed Mode
129
+
130
+ Use composed mode for higher accuracy with slightly more latency. It runs a separate STT → LLM → TTS pipeline instead of direct audio-to-audio.
131
+
132
+ ```typescript
133
+ await client.startSession({
134
+ prePrompt: 'You are a helpful assistant.',
135
+ pipelineMode: 'composed',
136
+ language: 'ko-KR',
137
+ });
138
+
139
+ client.audioStart();
140
+ // Send/receive audio the same way as live mode
141
+ ```
142
+
143
+ ### Live vs Composed
144
+
145
+ | | Live | Composed |
146
+ |---|---|---|
147
+ | **Latency** | ~300ms | ~1-2s |
148
+ | **Pipeline** | Direct audio-to-audio (Gemini Live) | STT → LLM → TTS |
149
+ | **Accuracy** | Good | Higher |
150
+ | **`aiSpeaksFirst`** | ✅ Supported | ❌ Not supported |
151
+ | **`tools` (function calling)** | ✅ Supported | ❌ Not supported |
152
+ | **Output sample rate** | 24,000 Hz | 16,000 Hz |
153
+ | **Barge-in** | Automatic (Gemini VAD) | Automatic |
154
+
155
+ > **Note:** All other SDK methods and events work identically in both modes. The only code change is adding `pipelineMode: 'composed'` to your session config.
156
+
157
+ ---
158
+
125
159
  # Advanced API
126
160
 
127
161
  Optional features for power users.
@@ -146,7 +180,10 @@ Optional features for power users.
146
180
  | `userTranscript` | User's speech transcribed |
147
181
  | `response` | AI's response text |
148
182
  | `toolCall` | AI wants to call a function |
183
+ | `reconnecting` | Auto-reconnection attempt |
149
184
  | `userIdUpdated` | Guest-to-user migration complete |
185
+ | `sessionWarning` | Session nearing duration limit |
186
+ | `sessionGoodbye` | Session about to end |
150
187
 
151
188
  ---
152
189
 
@@ -271,6 +308,12 @@ client.audioStart(); // AI speaks immediately
271
308
  | `aiSpeaksFirst` | `false` | AI initiates (live mode only) |
272
309
  | `allowHarmCategory` | `false` | Disable safety filters |
273
310
  | `tools` | `[]` | Function definitions |
311
+ | `sessionDuration` | - | Enables session duration limits when provided |
312
+
313
+ **Notes**
314
+ - Duration checks are **disabled by default**. They activate only when `sessionDuration` is provided.
315
+ - If only `sessionDuration.maxSeconds` is provided, `enableWarning`/`enableGoodbye` default to `false` in the SDK.
316
+ - Server limits take precedence in production.
274
317
 
275
318
  ---
276
319
 
@@ -308,7 +351,7 @@ import { float32ToInt16, int16ToUint8, wrapPcmInWav } from '@drawdream/livespeec
308
351
 
309
352
  const int16 = float32ToInt16(float32Data);
310
353
  const bytes = int16ToUint8(int16);
311
- const wav = wrapPcmInWav(bytes, 16000, 1, 16);
354
+ const wav = wrapPcmInWav(bytes, { sampleRate: 16000, channels: 1, bitDepth: 16 });
312
355
  ```
313
356
 
314
357
  ---
package/dist/index.d.mts CHANGED
@@ -39,7 +39,6 @@ type PipelineMode = 'live' | 'composed';
39
39
  interface LiveSpeechConfig {
40
40
  /**
41
41
  * Region for the LiveSpeech service
42
- * @example 'ap-northeast-2'
43
42
  */
44
43
  region: Region;
45
44
  /**
@@ -134,6 +133,23 @@ interface Tool {
134
133
  */
135
134
  parameters?: FunctionParameters;
136
135
  }
136
+ /**
137
+ * Session configuration options
138
+ */
139
+ interface SessionDurationConfig {
140
+ /**
141
+ * Max session duration in seconds (required)
142
+ */
143
+ maxSeconds: number;
144
+ /**
145
+ * Enable session warning events/messages (default: false)
146
+ */
147
+ enableWarning?: boolean;
148
+ /**
149
+ * Enable session goodbye events/messages (default: false)
150
+ */
151
+ enableGoodbye?: boolean;
152
+ }
137
153
  /**
138
154
  * Session configuration options
139
155
  */
@@ -143,10 +159,29 @@ interface SessionConfig {
143
159
  */
144
160
  prePrompt?: string;
145
161
  /**
146
- * Language code for speech recognition (e.g., "en-US", "ko-KR")
162
+ * Language code (e.g., "en-US", "ko-KR").
163
+ *
164
+ * - **Composed mode:** Used for STT speech recognition language AND as fallback
165
+ * for TTS voice selection (if `outputLanguage` is not set).
166
+ * - **Live mode:** Used for TTS voice selection only. Gemini auto-detects
167
+ * the input language from the audio stream.
168
+ *
147
169
  * @default "en-US"
148
170
  */
149
171
  language?: string;
172
+ /**
173
+ * Output language for TTS voice selection (Composed mode only).
174
+ *
175
+ * Use this when the AI output language differs from the input language
176
+ * (e.g., input is "ko-KR" but AI responds in English via prePrompt translation).
177
+ * If not set, defaults to `language`.
178
+ *
179
+ * **Note:** This field is only used in Composed mode. In Live mode, voice
180
+ * selection is driven by `language` since Gemini handles the full pipeline natively.
181
+ *
182
+ * @example "en-US"
183
+ */
184
+ outputLanguage?: string;
150
185
  /**
151
186
  * Pipeline mode for audio processing
152
187
  * - 'live': Direct audio-to-audio conversation (default, lower latency)
@@ -183,6 +218,10 @@ interface SessionConfig {
183
218
  * }]
184
219
  */
185
220
  tools?: Tool[];
221
+ /**
222
+ * Session duration configuration (enables duration limits when set)
223
+ */
224
+ sessionDuration?: SessionDurationConfig;
186
225
  }
187
226
  /**
188
227
  * Internal resolved configuration with defaults applied
@@ -201,7 +240,7 @@ interface ResolvedConfig {
201
240
  /**
202
241
  * Event types emitted by the LiveSpeech client
203
242
  */
204
- type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error';
243
+ type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'sessionWarning' | 'sessionGoodbye' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error';
205
244
  /**
206
245
  * Event payload for 'connected' event
207
246
  */
@@ -239,6 +278,22 @@ interface SessionEndedEvent {
239
278
  sessionId: string;
240
279
  timestamp: string;
241
280
  }
281
+ /**
282
+ * Event payload for 'sessionWarning' event
283
+ */
284
+ interface SessionWarningEvent {
285
+ type: 'sessionWarning';
286
+ remainingSeconds: number;
287
+ timestamp: string;
288
+ }
289
+ /**
290
+ * Event payload for 'sessionGoodbye' event
291
+ */
292
+ interface SessionGoodbyeEvent {
293
+ type: 'sessionGoodbye';
294
+ remainingSeconds: number;
295
+ timestamp: string;
296
+ }
242
297
  /**
243
298
  * Event payload for 'ready' event
244
299
  */
@@ -380,7 +435,7 @@ interface InterruptedEvent {
380
435
  /**
381
436
  * Union type of all event payloads
382
437
  */
383
- type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | InterruptedEvent | ErrorEvent;
438
+ type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | SessionWarningEvent | SessionGoodbyeEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | InterruptedEvent | ErrorEvent;
384
439
  /**
385
440
  * Simplified event handlers for common use cases
386
441
  */
@@ -396,7 +451,7 @@ type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioCh
396
451
  /**
397
452
  * WebSocket message types received from server
398
453
  */
399
- type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error' | 'pong';
454
+ type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'session_warning' | 'session_goodbye' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error' | 'pong';
400
455
  /**
401
456
  * Base interface for client messages
402
457
  */
@@ -410,10 +465,14 @@ interface StartSessionMessage extends BaseClientMessage {
410
465
  action: 'startSession';
411
466
  prePrompt?: string;
412
467
  language?: string;
468
+ outputLanguage?: string;
413
469
  pipelineMode?: 'live' | 'composed';
414
470
  aiSpeaksFirst?: boolean;
415
471
  allowHarmCategory?: boolean;
416
472
  tools?: Tool[];
473
+ sessionMaxDurationSeconds?: number;
474
+ enableSessionWarning?: boolean;
475
+ enableSessionGoodbye?: boolean;
417
476
  }
418
477
  /**
419
478
  * End session message
@@ -517,6 +576,20 @@ interface ServerSessionEndedMessage extends BaseServerMessage {
517
576
  type: 'sessionEnded';
518
577
  sessionId: string;
519
578
  }
579
+ /**
580
+ * Session warning message from server
581
+ */
582
+ interface ServerSessionWarningMessage extends BaseServerMessage {
583
+ type: 'session_warning';
584
+ remainingSeconds: number;
585
+ }
586
+ /**
587
+ * Session goodbye message from server
588
+ */
589
+ interface ServerSessionGoodbyeMessage extends BaseServerMessage {
590
+ type: 'session_goodbye';
591
+ remainingSeconds: number;
592
+ }
520
593
  /**
521
594
  * User transcript message from server (user's speech transcription)
522
595
  */
@@ -604,7 +677,7 @@ interface ServerInterruptedMessage extends BaseServerMessage {
604
677
  /**
605
678
  * Union type of all server messages
606
679
  */
607
- type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerInterruptedMessage | ServerErrorMessage | ServerPongMessage;
680
+ type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerSessionWarningMessage | ServerSessionGoodbyeMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerInterruptedMessage | ServerErrorMessage | ServerPongMessage;
608
681
 
609
682
  /**
610
683
  * Connection state
@@ -620,6 +693,8 @@ type LiveSpeechEventMap = {
620
693
  reconnecting: ReconnectingEvent;
621
694
  sessionStarted: SessionStartedEvent;
622
695
  sessionEnded: SessionEndedEvent;
696
+ sessionWarning: SessionWarningEvent;
697
+ sessionGoodbye: SessionGoodbyeEvent;
623
698
  ready: ReadyEvent;
624
699
  userTranscript: UserTranscriptEvent;
625
700
  response: ResponseEvent;
@@ -912,4 +987,4 @@ declare class AudioEncoder {
912
987
  wrapWav(data: Uint8Array): Uint8Array;
913
988
  }
914
989
 
915
- export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
990
+ export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionDurationConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
package/dist/index.d.ts CHANGED
@@ -39,7 +39,6 @@ type PipelineMode = 'live' | 'composed';
39
39
  interface LiveSpeechConfig {
40
40
  /**
41
41
  * Region for the LiveSpeech service
42
- * @example 'ap-northeast-2'
43
42
  */
44
43
  region: Region;
45
44
  /**
@@ -134,6 +133,23 @@ interface Tool {
134
133
  */
135
134
  parameters?: FunctionParameters;
136
135
  }
136
+ /**
137
+ * Session configuration options
138
+ */
139
+ interface SessionDurationConfig {
140
+ /**
141
+ * Max session duration in seconds (required)
142
+ */
143
+ maxSeconds: number;
144
+ /**
145
+ * Enable session warning events/messages (default: false)
146
+ */
147
+ enableWarning?: boolean;
148
+ /**
149
+ * Enable session goodbye events/messages (default: false)
150
+ */
151
+ enableGoodbye?: boolean;
152
+ }
137
153
  /**
138
154
  * Session configuration options
139
155
  */
@@ -143,10 +159,29 @@ interface SessionConfig {
143
159
  */
144
160
  prePrompt?: string;
145
161
  /**
146
- * Language code for speech recognition (e.g., "en-US", "ko-KR")
162
+ * Language code (e.g., "en-US", "ko-KR").
163
+ *
164
+ * - **Composed mode:** Used for STT speech recognition language AND as fallback
165
+ * for TTS voice selection (if `outputLanguage` is not set).
166
+ * - **Live mode:** Used for TTS voice selection only. Gemini auto-detects
167
+ * the input language from the audio stream.
168
+ *
147
169
  * @default "en-US"
148
170
  */
149
171
  language?: string;
172
+ /**
173
+ * Output language for TTS voice selection (Composed mode only).
174
+ *
175
+ * Use this when the AI output language differs from the input language
176
+ * (e.g., input is "ko-KR" but AI responds in English via prePrompt translation).
177
+ * If not set, defaults to `language`.
178
+ *
179
+ * **Note:** This field is only used in Composed mode. In Live mode, voice
180
+ * selection is driven by `language` since Gemini handles the full pipeline natively.
181
+ *
182
+ * @example "en-US"
183
+ */
184
+ outputLanguage?: string;
150
185
  /**
151
186
  * Pipeline mode for audio processing
152
187
  * - 'live': Direct audio-to-audio conversation (default, lower latency)
@@ -183,6 +218,10 @@ interface SessionConfig {
183
218
  * }]
184
219
  */
185
220
  tools?: Tool[];
221
+ /**
222
+ * Session duration configuration (enables duration limits when set)
223
+ */
224
+ sessionDuration?: SessionDurationConfig;
186
225
  }
187
226
  /**
188
227
  * Internal resolved configuration with defaults applied
@@ -201,7 +240,7 @@ interface ResolvedConfig {
201
240
  /**
202
241
  * Event types emitted by the LiveSpeech client
203
242
  */
204
- type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error';
243
+ type LiveSpeechEventType = 'connected' | 'disconnected' | 'reconnecting' | 'sessionStarted' | 'sessionEnded' | 'sessionWarning' | 'sessionGoodbye' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error';
205
244
  /**
206
245
  * Event payload for 'connected' event
207
246
  */
@@ -239,6 +278,22 @@ interface SessionEndedEvent {
239
278
  sessionId: string;
240
279
  timestamp: string;
241
280
  }
281
+ /**
282
+ * Event payload for 'sessionWarning' event
283
+ */
284
+ interface SessionWarningEvent {
285
+ type: 'sessionWarning';
286
+ remainingSeconds: number;
287
+ timestamp: string;
288
+ }
289
+ /**
290
+ * Event payload for 'sessionGoodbye' event
291
+ */
292
+ interface SessionGoodbyeEvent {
293
+ type: 'sessionGoodbye';
294
+ remainingSeconds: number;
295
+ timestamp: string;
296
+ }
242
297
  /**
243
298
  * Event payload for 'ready' event
244
299
  */
@@ -380,7 +435,7 @@ interface InterruptedEvent {
380
435
  /**
381
436
  * Union type of all event payloads
382
437
  */
383
- type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | InterruptedEvent | ErrorEvent;
438
+ type LiveSpeechEvent = ConnectedEvent | DisconnectedEvent | ReconnectingEvent | SessionStartedEvent | SessionEndedEvent | SessionWarningEvent | SessionGoodbyeEvent | ReadyEvent | UserTranscriptEvent | ResponseEvent | AudioEvent | TurnCompleteEvent | ToolCallEvent | UserIdUpdatedEvent | InterruptedEvent | ErrorEvent;
384
439
  /**
385
440
  * Simplified event handlers for common use cases
386
441
  */
@@ -396,7 +451,7 @@ type ClientMessageType = 'startSession' | 'endSession' | 'audioStart' | 'audioCh
396
451
  /**
397
452
  * WebSocket message types received from server
398
453
  */
399
- type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error' | 'pong';
454
+ type ServerMessageType = 'sessionStarted' | 'sessionEnded' | 'session_warning' | 'session_goodbye' | 'ready' | 'userTranscript' | 'response' | 'audio' | 'turnComplete' | 'toolCall' | 'userIdUpdated' | 'interrupted' | 'error' | 'pong';
400
455
  /**
401
456
  * Base interface for client messages
402
457
  */
@@ -410,10 +465,14 @@ interface StartSessionMessage extends BaseClientMessage {
410
465
  action: 'startSession';
411
466
  prePrompt?: string;
412
467
  language?: string;
468
+ outputLanguage?: string;
413
469
  pipelineMode?: 'live' | 'composed';
414
470
  aiSpeaksFirst?: boolean;
415
471
  allowHarmCategory?: boolean;
416
472
  tools?: Tool[];
473
+ sessionMaxDurationSeconds?: number;
474
+ enableSessionWarning?: boolean;
475
+ enableSessionGoodbye?: boolean;
417
476
  }
418
477
  /**
419
478
  * End session message
@@ -517,6 +576,20 @@ interface ServerSessionEndedMessage extends BaseServerMessage {
517
576
  type: 'sessionEnded';
518
577
  sessionId: string;
519
578
  }
579
+ /**
580
+ * Session warning message from server
581
+ */
582
+ interface ServerSessionWarningMessage extends BaseServerMessage {
583
+ type: 'session_warning';
584
+ remainingSeconds: number;
585
+ }
586
+ /**
587
+ * Session goodbye message from server
588
+ */
589
+ interface ServerSessionGoodbyeMessage extends BaseServerMessage {
590
+ type: 'session_goodbye';
591
+ remainingSeconds: number;
592
+ }
520
593
  /**
521
594
  * User transcript message from server (user's speech transcription)
522
595
  */
@@ -604,7 +677,7 @@ interface ServerInterruptedMessage extends BaseServerMessage {
604
677
  /**
605
678
  * Union type of all server messages
606
679
  */
607
- type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerInterruptedMessage | ServerErrorMessage | ServerPongMessage;
680
+ type ServerMessage = ServerSessionStartedMessage | ServerSessionEndedMessage | ServerSessionWarningMessage | ServerSessionGoodbyeMessage | ServerReadyMessage | ServerUserTranscriptMessage | ServerResponseMessage | ServerAudioMessage | ServerTurnCompleteMessage | ServerToolCallMessage | ServerUserIdUpdatedMessage | ServerInterruptedMessage | ServerErrorMessage | ServerPongMessage;
608
681
 
609
682
  /**
610
683
  * Connection state
@@ -620,6 +693,8 @@ type LiveSpeechEventMap = {
620
693
  reconnecting: ReconnectingEvent;
621
694
  sessionStarted: SessionStartedEvent;
622
695
  sessionEnded: SessionEndedEvent;
696
+ sessionWarning: SessionWarningEvent;
697
+ sessionGoodbye: SessionGoodbyeEvent;
623
698
  ready: ReadyEvent;
624
699
  userTranscript: UserTranscriptEvent;
625
700
  response: ResponseEvent;
@@ -912,4 +987,4 @@ declare class AudioEncoder {
912
987
  wrapWav(data: Uint8Array): Uint8Array;
913
988
  }
914
989
 
915
- export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
990
+ export { AudioEncoder, type AudioEncoderOptions, type AudioEvent, type AudioHandler, type ClientMessage, type ClientMessageType, type ConnectedEvent, type ConnectionState, type DisconnectReason, type DisconnectedEvent, type ErrorCode, type ErrorEvent, type ErrorHandler, type FunctionParameters, LiveSpeechClient, type LiveSpeechConfig, type LiveSpeechEvent, type LiveSpeechEventMap, type LiveSpeechEventType, type PipelineMode, type ReadyEvent, type ReconnectingEvent, Region, Region as RegionType, type ResolvedConfig, type ResponseEvent, type ResponseHandler, type ServerMessage, type ServerMessageType, type SessionConfig, type SessionDurationConfig, type SessionEndedEvent, type SessionStartedEvent, type Tool, type ToolCallEvent, type TurnCompleteEvent, type UserTranscriptEvent, type UserTranscriptHandler, createWavHeader, decodeBase64ToAudio, encodeAudioToBase64, extractPcmFromWav, float32ToInt16, getEndpointForRegion, int16ToFloat32, int16ToUint8, isValidRegion, uint8ToInt16, wrapPcmInWav };
package/dist/index.js CHANGED
@@ -728,14 +728,22 @@ var LiveSpeechClient = class {
728
728
  };
729
729
  this.on("sessionStarted", onSessionStarted);
730
730
  this.on("error", onError);
731
+ const sessionDuration = config?.sessionDuration;
732
+ const hasSessionDuration = typeof sessionDuration?.maxSeconds === "number";
731
733
  this.connection.send({
732
734
  action: "startSession",
733
735
  ...config?.prePrompt && { prePrompt: config.prePrompt },
734
736
  ...config?.language && { language: config.language },
737
+ ...config?.outputLanguage && { outputLanguage: config.outputLanguage },
735
738
  pipelineMode: config?.pipelineMode ?? "live",
736
739
  ...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
737
740
  allowHarmCategory: config?.allowHarmCategory ?? false,
738
- ...config?.tools && config.tools.length > 0 && { tools: config.tools }
741
+ ...config?.tools && config.tools.length > 0 && { tools: config.tools },
742
+ ...hasSessionDuration && {
743
+ sessionMaxDurationSeconds: sessionDuration.maxSeconds,
744
+ enableSessionWarning: sessionDuration.enableWarning ?? false,
745
+ enableSessionGoodbye: sessionDuration.enableGoodbye ?? false
746
+ }
739
747
  });
740
748
  });
741
749
  }
@@ -1075,6 +1083,24 @@ var LiveSpeechClient = class {
1075
1083
  timestamp: message.timestamp
1076
1084
  });
1077
1085
  break;
1086
+ case "session_warning": {
1087
+ const warningEvent = {
1088
+ type: "sessionWarning",
1089
+ remainingSeconds: message.remainingSeconds ?? 0,
1090
+ timestamp: message.timestamp
1091
+ };
1092
+ this.emit("sessionWarning", warningEvent);
1093
+ break;
1094
+ }
1095
+ case "session_goodbye": {
1096
+ const goodbyeEvent = {
1097
+ type: "sessionGoodbye",
1098
+ remainingSeconds: message.remainingSeconds ?? 0,
1099
+ timestamp: message.timestamp
1100
+ };
1101
+ this.emit("sessionGoodbye", goodbyeEvent);
1102
+ break;
1103
+ }
1078
1104
  case "ready": {
1079
1105
  const readyEvent = {
1080
1106
  type: "ready",
package/dist/index.mjs CHANGED
@@ -689,14 +689,22 @@ var LiveSpeechClient = class {
689
689
  };
690
690
  this.on("sessionStarted", onSessionStarted);
691
691
  this.on("error", onError);
692
+ const sessionDuration = config?.sessionDuration;
693
+ const hasSessionDuration = typeof sessionDuration?.maxSeconds === "number";
692
694
  this.connection.send({
693
695
  action: "startSession",
694
696
  ...config?.prePrompt && { prePrompt: config.prePrompt },
695
697
  ...config?.language && { language: config.language },
698
+ ...config?.outputLanguage && { outputLanguage: config.outputLanguage },
696
699
  pipelineMode: config?.pipelineMode ?? "live",
697
700
  ...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
698
701
  allowHarmCategory: config?.allowHarmCategory ?? false,
699
- ...config?.tools && config.tools.length > 0 && { tools: config.tools }
702
+ ...config?.tools && config.tools.length > 0 && { tools: config.tools },
703
+ ...hasSessionDuration && {
704
+ sessionMaxDurationSeconds: sessionDuration.maxSeconds,
705
+ enableSessionWarning: sessionDuration.enableWarning ?? false,
706
+ enableSessionGoodbye: sessionDuration.enableGoodbye ?? false
707
+ }
700
708
  });
701
709
  });
702
710
  }
@@ -1036,6 +1044,24 @@ var LiveSpeechClient = class {
1036
1044
  timestamp: message.timestamp
1037
1045
  });
1038
1046
  break;
1047
+ case "session_warning": {
1048
+ const warningEvent = {
1049
+ type: "sessionWarning",
1050
+ remainingSeconds: message.remainingSeconds ?? 0,
1051
+ timestamp: message.timestamp
1052
+ };
1053
+ this.emit("sessionWarning", warningEvent);
1054
+ break;
1055
+ }
1056
+ case "session_goodbye": {
1057
+ const goodbyeEvent = {
1058
+ type: "sessionGoodbye",
1059
+ remainingSeconds: message.remainingSeconds ?? 0,
1060
+ timestamp: message.timestamp
1061
+ };
1062
+ this.emit("sessionGoodbye", goodbyeEvent);
1063
+ break;
1064
+ }
1039
1065
  case "ready": {
1040
1066
  const readyEvent = {
1041
1067
  type: "ready",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@drawdream/livespeech",
3
- "version": "0.1.12",
3
+ "version": "0.1.14",
4
4
  "description": "Real-time speech-to-speech AI conversation SDK",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -56,12 +56,12 @@
56
56
  "devDependencies": {
57
57
  "@types/node": "^20.10.0",
58
58
  "@types/ws": "^8.5.10",
59
- "@typescript-eslint/eslint-plugin": "^6.13.0",
60
- "@typescript-eslint/parser": "^6.13.0",
61
- "eslint": "^8.55.0",
59
+ "@typescript-eslint/eslint-plugin": "^7.18.0",
60
+ "@typescript-eslint/parser": "^7.18.0",
61
+ "eslint": "^8.56.0",
62
62
  "tsup": "^8.0.1",
63
63
  "typescript": "^5.3.0",
64
- "vitest": "^1.0.0"
64
+ "vitest": "^4.0.0"
65
65
  },
66
66
  "peerDependencies": {
67
67
  "typescript": ">=5.0.0"