@mastra/voice-google-gemini-live 0.0.0-add-libsql-changeset-20250910154739

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2786 @@
1
+ import { randomUUID } from 'crypto';
2
+ import { MastraVoice } from '@mastra/core/voice';
3
+ import { WebSocket } from 'ws';
4
+ import { PassThrough } from 'stream';
5
+ import { EventEmitter } from 'events';
6
+ import { GoogleAuth } from 'google-auth-library';
7
+
8
+ // src/index.ts
9
+ var DEFAULT_AUDIO_CONFIG = {
10
+ inputSampleRate: 16e3,
11
+ outputSampleRate: 24e3,
12
+ encoding: "pcm16",
13
+ channels: 1
14
+ };
15
+ var AudioStreamManager = class {
16
+ speakerStreams = /* @__PURE__ */ new Map();
17
+ currentResponseId;
18
+ MAX_CONCURRENT_STREAMS = 10;
19
+ STREAM_TIMEOUT_MS = 3e4;
20
+ // 30 seconds
21
+ debug;
22
+ audioConfig;
23
+ maxChunkSize = 32768;
24
+ // 32KB max chunk size per Gemini limits
25
+ minSendInterval = 0;
26
+ // No throttling - let the stream control the pace
27
+ lastSendTime = 0;
28
+ pendingChunks = [];
29
+ pendingTimer;
30
+ sendToGemini;
31
+ // Audio buffer management constants
32
+ MAX_BUFFER_SIZE = 50 * 1024 * 1024;
33
+ // 50MB maximum buffer size
34
+ MAX_AUDIO_DURATION = 300;
35
+ // 5 minutes maximum audio duration
36
+ constructor(audioConfig, debug = false) {
37
+ this.audioConfig = audioConfig;
38
+ this.debug = debug;
39
+ }
40
+ /**
41
+ * Provide a sender callback that will be used to deliver messages to Gemini
42
+ */
43
+ setSender(sender) {
44
+ this.sendToGemini = sender;
45
+ }
46
+ /**
47
+ * Get the default audio configuration
48
+ */
49
+ static getDefaultAudioConfig() {
50
+ return { ...DEFAULT_AUDIO_CONFIG };
51
+ }
52
+ /**
53
+ * Create a merged audio configuration with defaults
54
+ */
55
+ static createAudioConfig(customConfig) {
56
+ return {
57
+ ...DEFAULT_AUDIO_CONFIG,
58
+ ...customConfig
59
+ };
60
+ }
61
+ /**
62
+ * Get the current response ID for the next audio chunk
63
+ */
64
+ getCurrentResponseId() {
65
+ return this.currentResponseId;
66
+ }
67
+ /**
68
+ * Set the current response ID for the next audio chunk
69
+ */
70
+ setCurrentResponseId(responseId) {
71
+ this.currentResponseId = responseId;
72
+ }
73
+ /**
74
+ * Get the current speaker stream
75
+ */
76
+ getCurrentSpeakerStream() {
77
+ const currentResponseId = this.getCurrentResponseId();
78
+ if (!currentResponseId) {
79
+ return null;
80
+ }
81
+ const currentStream = this.speakerStreams.get(currentResponseId);
82
+ return currentStream ? currentStream : null;
83
+ }
84
+ /**
85
+ * Add a new speaker stream for a response
86
+ */
87
+ addSpeakerStream(responseId, stream) {
88
+ const streamWithMetadata = Object.assign(stream, {
89
+ id: responseId,
90
+ created: Date.now()
91
+ });
92
+ this.speakerStreams.set(responseId, streamWithMetadata);
93
+ this.log(`Added speaker stream for response: ${responseId}`);
94
+ this.enforceStreamLimits();
95
+ }
96
+ /**
97
+ * Remove a specific speaker stream
98
+ */
99
+ removeSpeakerStream(responseId) {
100
+ const stream = this.speakerStreams.get(responseId);
101
+ if (stream && !stream.destroyed) {
102
+ stream.end();
103
+ setTimeout(() => {
104
+ if (!stream.destroyed) {
105
+ stream.destroy();
106
+ this.log(`Force destroyed stream for response: ${responseId}`);
107
+ }
108
+ }, 1e3);
109
+ }
110
+ this.speakerStreams.delete(responseId);
111
+ this.log(`Removed speaker stream for response: ${responseId}`);
112
+ }
113
+ /**
114
+ * Clean up all speaker streams
115
+ */
116
+ cleanupSpeakerStreams() {
117
+ try {
118
+ if (this.speakerStreams.size === 0) {
119
+ return;
120
+ }
121
+ this.log(`Cleaning up ${this.speakerStreams.size} speaker streams`);
122
+ for (const [responseId, stream] of this.speakerStreams.entries()) {
123
+ try {
124
+ if (!stream.destroyed) {
125
+ stream.end();
126
+ setTimeout(() => {
127
+ if (!stream.destroyed) {
128
+ stream.destroy();
129
+ this.log(`Force destroyed stream for response: ${responseId}`);
130
+ }
131
+ }, 1e3);
132
+ }
133
+ this.speakerStreams.delete(responseId);
134
+ this.log(`Cleaned up speaker stream for response: ${responseId}`);
135
+ } catch (streamError) {
136
+ this.log(`Error cleaning up stream ${responseId}:`, streamError);
137
+ this.speakerStreams.delete(responseId);
138
+ }
139
+ }
140
+ this.currentResponseId = void 0;
141
+ this.log("All speaker streams cleaned up");
142
+ } catch (error) {
143
+ this.log("Error during speaker stream cleanup:", error);
144
+ this.speakerStreams.clear();
145
+ this.currentResponseId = void 0;
146
+ }
147
+ }
148
+ /**
149
+ * Clean up old/stale streams to prevent memory leaks
150
+ */
151
+ cleanupStaleStreams() {
152
+ try {
153
+ const now = Date.now();
154
+ const staleCutoff = now - this.STREAM_TIMEOUT_MS;
155
+ const staleStreams = [];
156
+ for (const [responseId, stream] of this.speakerStreams.entries()) {
157
+ const created = stream.created || 0;
158
+ if (created < staleCutoff) {
159
+ staleStreams.push(responseId);
160
+ }
161
+ }
162
+ if (staleStreams.length > 0) {
163
+ this.log(`Cleaning up ${staleStreams.length} stale streams`);
164
+ for (const responseId of staleStreams) {
165
+ const stream = this.speakerStreams.get(responseId);
166
+ if (stream && !stream.destroyed) {
167
+ stream.end();
168
+ }
169
+ this.speakerStreams.delete(responseId);
170
+ }
171
+ }
172
+ } catch (error) {
173
+ this.log("Error cleaning up stale streams:", error);
174
+ }
175
+ }
176
+ /**
177
+ * Enforce stream limits to prevent memory exhaustion
178
+ */
179
+ enforceStreamLimits() {
180
+ try {
181
+ if (this.speakerStreams.size <= this.MAX_CONCURRENT_STREAMS) {
182
+ return;
183
+ }
184
+ this.log(
185
+ `Stream limit exceeded (${this.speakerStreams.size}/${this.MAX_CONCURRENT_STREAMS}), cleaning up oldest streams`
186
+ );
187
+ const sortedStreams = Array.from(this.speakerStreams.entries()).sort(
188
+ ([, a], [, b]) => (a.created || 0) - (b.created || 0)
189
+ );
190
+ const streamsToRemove = sortedStreams.slice(0, this.speakerStreams.size - this.MAX_CONCURRENT_STREAMS);
191
+ for (const [responseId, stream] of streamsToRemove) {
192
+ if (!stream.destroyed) {
193
+ stream.end();
194
+ }
195
+ this.speakerStreams.delete(responseId);
196
+ this.log(`Removed old stream for response: ${responseId}`);
197
+ }
198
+ } catch (error) {
199
+ this.log("Error enforcing stream limits:", error);
200
+ }
201
+ }
202
+ /**
203
+ * Get information about current streams for debugging
204
+ */
205
+ getStreamInfo() {
206
+ const streamDetails = Array.from(this.speakerStreams.entries()).map(([responseId, stream]) => ({
207
+ responseId,
208
+ created: stream.created || 0,
209
+ destroyed: stream.destroyed
210
+ }));
211
+ return {
212
+ totalStreams: this.speakerStreams.size,
213
+ currentResponseId: this.currentResponseId,
214
+ streamDetails
215
+ };
216
+ }
217
+ /**
218
+ * Convert Int16Array audio data to base64 string for WebSocket transmission
219
+ */
220
+ int16ArrayToBase64(int16Array) {
221
+ const buffer = new ArrayBuffer(int16Array.length * 2);
222
+ const view = new DataView(buffer);
223
+ for (let i = 0; i < int16Array.length; i++) {
224
+ view.setInt16(i * 2, int16Array[i], true);
225
+ }
226
+ const nodeBuffer = Buffer.from(buffer);
227
+ return nodeBuffer.toString("base64");
228
+ }
229
+ /**
230
+ * Convert base64 string to Int16Array audio data
231
+ */
232
+ base64ToInt16Array(base64Audio) {
233
+ try {
234
+ const buffer = Buffer.from(base64Audio, "base64");
235
+ if (buffer.length % 2 !== 0) {
236
+ throw new Error("Invalid audio data: buffer length must be even for 16-bit audio");
237
+ }
238
+ return new Int16Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 2);
239
+ } catch (error) {
240
+ throw new Error(
241
+ `Failed to decode base64 audio data: ${error instanceof Error ? error.message : "Unknown error"}`
242
+ );
243
+ }
244
+ }
245
+ /**
246
+ * Validate and convert audio data to the required format for Gemini Live API
247
+ * Gemini Live expects 16kHz PCM16 for input
248
+ */
249
+ validateAndConvertAudioInput(audioData) {
250
+ if (Buffer.isBuffer(audioData)) {
251
+ if (audioData.length % 2 !== 0) {
252
+ throw new Error("Audio buffer length must be even for 16-bit audio");
253
+ }
254
+ return new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
255
+ }
256
+ if (audioData instanceof Int16Array) {
257
+ return audioData;
258
+ }
259
+ throw new Error("Unsupported audio data format. Expected Buffer or Int16Array");
260
+ }
261
+ /**
262
+ * Process audio chunk for streaming - handles format validation and conversion
263
+ */
264
+ processAudioChunk(chunk) {
265
+ let int16Array;
266
+ if (chunk instanceof Int16Array) {
267
+ int16Array = chunk;
268
+ } else if (Buffer.isBuffer(chunk)) {
269
+ if (chunk.length % 2 !== 0) {
270
+ throw new Error("Audio chunk length must be even for 16-bit audio");
271
+ }
272
+ int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / 2);
273
+ } else if (chunk instanceof Uint8Array) {
274
+ if (chunk.length % 2 !== 0) {
275
+ throw new Error("Audio chunk length must be even for 16-bit audio");
276
+ }
277
+ int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / 2);
278
+ } else {
279
+ throw new Error("Unsupported audio chunk format");
280
+ }
281
+ return this.int16ArrayToBase64(int16Array);
282
+ }
283
+ /**
284
+ * Validate audio format and sample rate for Gemini Live API requirements
285
+ */
286
+ validateAudioFormat(sampleRate, channels) {
287
+ if (sampleRate && sampleRate !== this.audioConfig.inputSampleRate) {
288
+ this.log(
289
+ `Warning: Audio sample rate ${sampleRate}Hz does not match expected ${this.audioConfig.inputSampleRate}Hz`
290
+ );
291
+ }
292
+ if (channels && channels !== this.audioConfig.channels) {
293
+ throw new Error(`Unsupported channel count: ${channels}. Gemini Live API requires mono audio (1 channel)`);
294
+ }
295
+ }
296
+ /**
297
+ * Create an audio message for the Gemini Live API
298
+ */
299
+ createAudioMessage(audioData, messageType = "realtime") {
300
+ if (messageType === "input") {
301
+ return {
302
+ client_content: {
303
+ turns: [
304
+ {
305
+ role: "user",
306
+ parts: [
307
+ {
308
+ inlineData: {
309
+ mimeType: "audio/pcm",
310
+ data: audioData
311
+ }
312
+ }
313
+ ]
314
+ }
315
+ ],
316
+ turnComplete: true
317
+ }
318
+ };
319
+ } else {
320
+ return {
321
+ realtime_input: {
322
+ media_chunks: [
323
+ {
324
+ mime_type: "audio/pcm",
325
+ data: audioData
326
+ }
327
+ ]
328
+ }
329
+ };
330
+ }
331
+ }
332
+ /**
333
+ * Get a speaker stream by response ID
334
+ */
335
+ getSpeakerStream(responseId) {
336
+ return this.speakerStreams.get(responseId);
337
+ }
338
+ /**
339
+ * Create a new speaker stream for a response ID
340
+ */
341
+ createSpeakerStream(responseId) {
342
+ const stream = new PassThrough();
343
+ stream.id = responseId;
344
+ stream.created = Date.now();
345
+ this.addSpeakerStream(responseId, stream);
346
+ return stream;
347
+ }
348
+ /**
349
+ * Get the number of active streams
350
+ */
351
+ getActiveStreamCount() {
352
+ return this.speakerStreams.size;
353
+ }
354
+ /**
355
+ * Check if a specific response ID has an active stream
356
+ */
357
+ hasStream(responseId) {
358
+ return this.speakerStreams.has(responseId);
359
+ }
360
+ /**
361
+ * Get all active response IDs
362
+ */
363
+ getActiveResponseIds() {
364
+ return Array.from(this.speakerStreams.keys());
365
+ }
366
+ /**
367
+ * Reset the manager state (useful for testing or reconnection)
368
+ */
369
+ reset() {
370
+ this.cleanupSpeakerStreams();
371
+ this.currentResponseId = void 0;
372
+ this.log("AudioStreamManager reset");
373
+ }
374
+ /**
375
+ * Validate audio chunk size and format
376
+ */
377
+ validateAudioChunk(chunk) {
378
+ if (chunk.length === 0) {
379
+ throw new Error("Audio chunk cannot be empty");
380
+ }
381
+ if (chunk.length > this.maxChunkSize) {
382
+ throw new Error(`Audio chunk size ${chunk.length} exceeds maximum allowed size ${this.maxChunkSize}`);
383
+ }
384
+ if (chunk.length % 2 !== 0) {
385
+ throw new Error("Audio chunk length must be even for 16-bit audio");
386
+ }
387
+ }
388
+ /**
389
+ * Send audio chunk with throttling and validation
390
+ */
391
+ sendAudioChunk(chunk) {
392
+ try {
393
+ this.validateAudioChunk(chunk);
394
+ const now = Date.now();
395
+ if (now - this.lastSendTime < this.minSendInterval) {
396
+ this.pendingChunks.push({ chunk, timestamp: now });
397
+ const delay = this.minSendInterval - (now - this.lastSendTime);
398
+ if (!this.pendingTimer) {
399
+ this.pendingTimer = setTimeout(
400
+ () => {
401
+ this.pendingTimer = void 0;
402
+ this.processPendingChunks();
403
+ },
404
+ Math.max(0, delay)
405
+ );
406
+ }
407
+ return;
408
+ }
409
+ this.processChunk(chunk);
410
+ this.processPendingChunks();
411
+ } catch (error) {
412
+ this.log("Error sending audio chunk:", error);
413
+ throw error;
414
+ }
415
+ }
416
+ /**
417
+ * Handle audio stream processing
418
+ */
419
+ async handleAudioStream(stream) {
420
+ return new Promise((resolve, reject) => {
421
+ const cleanup = () => {
422
+ stream.removeAllListeners();
423
+ };
424
+ stream.on("data", (chunk) => {
425
+ try {
426
+ if (chunk.length > this.maxChunkSize) {
427
+ const chunks = this.splitAudioChunk(chunk);
428
+ for (const subChunk of chunks) {
429
+ this.validateAudioChunk(subChunk);
430
+ this.sendAudioChunk(subChunk);
431
+ }
432
+ } else {
433
+ this.validateAudioChunk(chunk);
434
+ this.sendAudioChunk(chunk);
435
+ }
436
+ } catch (error) {
437
+ cleanup();
438
+ reject(error);
439
+ }
440
+ });
441
+ stream.on("end", () => {
442
+ cleanup();
443
+ resolve();
444
+ });
445
+ stream.on("error", (error) => {
446
+ cleanup();
447
+ reject(error);
448
+ });
449
+ });
450
+ }
451
+ /**
452
+ * Split large audio chunks into smaller ones
453
+ */
454
+ splitAudioChunk(chunk) {
455
+ const chunks = [];
456
+ let offset = 0;
457
+ while (offset < chunk.length) {
458
+ const size = Math.min(this.maxChunkSize, chunk.length - offset);
459
+ chunks.push(chunk.subarray(offset, offset + size));
460
+ offset += size;
461
+ }
462
+ return chunks;
463
+ }
464
+ /**
465
+ * Calculate audio duration from buffer length
466
+ */
467
+ calculateAudioDuration(bufferLength, sampleRate) {
468
+ const effectiveSampleRate = sampleRate || this.audioConfig.inputSampleRate;
469
+ return bufferLength / (effectiveSampleRate * 2);
470
+ }
471
+ /**
472
+ * Validate audio buffer size and duration
473
+ */
474
+ validateAudioBuffer(buffer) {
475
+ if (buffer.length === 0) {
476
+ throw new Error("Audio buffer cannot be empty");
477
+ }
478
+ if (buffer.length > this.MAX_BUFFER_SIZE) {
479
+ throw new Error(
480
+ `Audio buffer size ${buffer.length} exceeds maximum allowed size ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`
481
+ );
482
+ }
483
+ if (buffer.length % 2 !== 0) {
484
+ throw new Error("Audio buffer length must be even for 16-bit audio");
485
+ }
486
+ const duration = this.calculateAudioDuration(buffer.length);
487
+ if (duration > this.MAX_AUDIO_DURATION) {
488
+ throw new Error(
489
+ `Audio duration ${duration.toFixed(2)}s exceeds maximum allowed duration ${this.MAX_AUDIO_DURATION}s`
490
+ );
491
+ }
492
+ }
493
+ /**
494
+ * Process audio buffer for transcription
495
+ * Combines chunks, validates format, and converts to base64
496
+ */
497
+ processAudioBufferForTranscription(audioBuffer) {
498
+ if (audioBuffer.length % 2 !== 0) {
499
+ throw new Error("Invalid audio data: buffer length must be even for 16-bit audio");
500
+ }
501
+ const duration = this.calculateAudioDuration(audioBuffer.length);
502
+ const base64Audio = audioBuffer.toString("base64");
503
+ return {
504
+ base64Audio,
505
+ duration,
506
+ size: audioBuffer.length
507
+ };
508
+ }
509
+ /**
510
+ * Process audio chunks for transcription with buffer management
511
+ * Handles chunk collection, size validation, and buffer management
512
+ */
513
+ processAudioChunksForTranscription(chunks, totalBufferSize) {
514
+ if (totalBufferSize > this.MAX_BUFFER_SIZE) {
515
+ throw new Error(`Audio data exceeds maximum size of ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`);
516
+ }
517
+ const audioBuffer = Buffer.concat(chunks);
518
+ const result = this.processAudioBufferForTranscription(audioBuffer);
519
+ return {
520
+ audioBuffer,
521
+ ...result
522
+ };
523
+ }
524
+ /**
525
+ * Validate audio chunks and calculate total size
526
+ */
527
+ validateAudioChunks(chunks) {
528
+ let totalSize = 0;
529
+ for (const chunk of chunks) {
530
+ if (!Buffer.isBuffer(chunk)) {
531
+ return { totalSize: 0, isValid: false, error: "Invalid chunk format" };
532
+ }
533
+ totalSize += chunk.length;
534
+ if (totalSize > this.MAX_BUFFER_SIZE) {
535
+ return {
536
+ totalSize,
537
+ isValid: false,
538
+ error: `Total size ${totalSize} exceeds maximum allowed size ${this.MAX_BUFFER_SIZE}`
539
+ };
540
+ }
541
+ }
542
+ return { totalSize, isValid: true };
543
+ }
544
+ /**
545
+ * Get audio buffer limits and configuration
546
+ */
547
+ getAudioBufferLimits() {
548
+ return {
549
+ maxBufferSize: this.MAX_BUFFER_SIZE,
550
+ maxAudioDuration: this.MAX_AUDIO_DURATION,
551
+ maxChunkSize: this.maxChunkSize
552
+ };
553
+ }
554
+ /**
555
+ * Get audio configuration
556
+ */
557
+ getAudioConfig() {
558
+ return this.audioConfig;
559
+ }
560
+ /**
561
+ * Log message if debug is enabled
562
+ */
563
+ log(message, ...args) {
564
+ if (this.debug) {
565
+ console.log(`[AudioStreamManager] ${message}`, ...args);
566
+ }
567
+ }
568
+ /**
569
+ * Handle complete audio transcription workflow
570
+ * Manages stream processing, chunk collection, and transcription
571
+ */
572
+ async handleAudioTranscription(audioStream, sendAndAwaitTranscript, onError, timeoutMs = 3e4) {
573
+ return new Promise((resolve, reject) => {
574
+ const chunks = [];
575
+ let isCleanedUp = false;
576
+ let totalBufferSize = 0;
577
+ let isResolved = false;
578
+ const timeout = setTimeout(() => {
579
+ if (!isResolved) {
580
+ cleanup();
581
+ reject(new Error(`Transcription timeout - no response received within ${timeoutMs / 1e3} seconds`));
582
+ }
583
+ }, timeoutMs);
584
+ const onStreamData = (chunk) => {
585
+ try {
586
+ const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
587
+ totalBufferSize += buffer.length;
588
+ if (totalBufferSize > this.MAX_BUFFER_SIZE) {
589
+ cleanup();
590
+ reject(new Error(`Audio data exceeds maximum size of ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`));
591
+ return;
592
+ }
593
+ chunks.push(buffer);
594
+ } catch (error) {
595
+ cleanup();
596
+ reject(
597
+ new Error(`Failed to process audio chunk: ${error instanceof Error ? error.message : "Unknown error"}`)
598
+ );
599
+ }
600
+ };
601
+ const onStreamError = (error) => {
602
+ cleanup();
603
+ reject(new Error(`Audio stream error: ${error.message}`));
604
+ };
605
+ const onStreamEnd = async () => {
606
+ try {
607
+ audioStream.removeListener("data", onStreamData);
608
+ audioStream.removeListener("error", onStreamError);
609
+ const result = this.processAudioChunksForTranscription(chunks, totalBufferSize);
610
+ this.log("Processing audio for transcription:", {
611
+ chunks: chunks.length,
612
+ totalSize: result.size,
613
+ duration: result.duration
614
+ });
615
+ try {
616
+ const transcript = await sendAndAwaitTranscript(result.base64Audio);
617
+ if (!isResolved) {
618
+ isResolved = true;
619
+ cleanup();
620
+ resolve(transcript.trim());
621
+ }
622
+ } catch (error) {
623
+ if (!isResolved) {
624
+ isResolved = true;
625
+ cleanup();
626
+ reject(
627
+ new Error(
628
+ `Failed to obtain transcription: ${error instanceof Error ? error.message : "Unknown error"}`
629
+ )
630
+ );
631
+ }
632
+ }
633
+ } catch (error) {
634
+ cleanup();
635
+ reject(
636
+ new Error(`Failed to process audio stream: ${error instanceof Error ? error.message : "Unknown error"}`)
637
+ );
638
+ }
639
+ };
640
+ const cleanup = () => {
641
+ if (isCleanedUp) return;
642
+ isCleanedUp = true;
643
+ clearTimeout(timeout);
644
+ audioStream.removeListener("data", onStreamData);
645
+ audioStream.removeListener("error", onStreamError);
646
+ audioStream.removeListener("end", onStreamEnd);
647
+ chunks.length = 0;
648
+ };
649
+ audioStream.on("data", onStreamData);
650
+ audioStream.on("error", onStreamError);
651
+ audioStream.on("end", onStreamEnd);
652
+ });
653
+ }
654
+ processChunk(chunk) {
655
+ const base64Audio = this.processAudioChunk(chunk);
656
+ const message = this.createAudioMessage(base64Audio, "realtime");
657
+ if (this.sendToGemini) {
658
+ this.sendToGemini("realtime_input", message);
659
+ } else {
660
+ this.log("No sender configured for AudioStreamManager; dropping audio chunk");
661
+ }
662
+ this.lastSendTime = Date.now();
663
+ this.log(`Sent audio chunk of size: ${chunk.length} bytes`);
664
+ }
665
+ processPendingChunks() {
666
+ while (this.pendingChunks.length > 0) {
667
+ const nextChunk = this.pendingChunks[0];
668
+ const now = Date.now();
669
+ if (nextChunk && now - this.lastSendTime >= this.minSendInterval) {
670
+ this.pendingChunks.shift();
671
+ this.processChunk(nextChunk.chunk);
672
+ } else {
673
+ const delay = this.minSendInterval - (now - this.lastSendTime);
674
+ if (!this.pendingTimer) {
675
+ this.pendingTimer = setTimeout(
676
+ () => {
677
+ this.pendingTimer = void 0;
678
+ this.processPendingChunks();
679
+ },
680
+ Math.max(0, delay)
681
+ );
682
+ }
683
+ break;
684
+ }
685
+ }
686
+ }
687
+ };
688
+
689
+ // src/utils/errors.ts
690
+ var GeminiLiveError = class extends Error {
691
+ code;
692
+ details;
693
+ timestamp;
694
+ constructor(code, message, details) {
695
+ super(message);
696
+ this.name = "GeminiLiveError";
697
+ this.code = code;
698
+ this.details = details;
699
+ this.timestamp = Date.now();
700
+ }
701
+ toEventData() {
702
+ return {
703
+ message: this.message,
704
+ code: this.code,
705
+ details: this.details,
706
+ timestamp: this.timestamp
707
+ };
708
+ }
709
+ };
710
+
711
+ // src/managers/ConnectionManager.ts
712
+ var ConnectionManager = class {
713
+ ws;
714
+ eventEmitter;
715
+ debug;
716
+ timeoutMs;
717
+ constructor(config) {
718
+ this.eventEmitter = new EventEmitter();
719
+ this.debug = config.debug;
720
+ this.timeoutMs = config.timeoutMs || 3e4;
721
+ }
722
+ /**
723
+ * Set the WebSocket instance
724
+ */
725
+ setWebSocket(ws) {
726
+ this.ws = ws;
727
+ }
728
+ /**
729
+ * Get the current WebSocket instance
730
+ */
731
+ getWebSocket() {
732
+ return this.ws;
733
+ }
734
+ /**
735
+ * Check if WebSocket is connected
736
+ */
737
+ isConnected() {
738
+ return this.ws?.readyState === WebSocket.OPEN;
739
+ }
740
+ /**
741
+ * Check if WebSocket is connecting
742
+ */
743
+ isConnecting() {
744
+ return this.ws?.readyState === WebSocket.CONNECTING;
745
+ }
746
+ /**
747
+ * Check if WebSocket is closed
748
+ */
749
+ isClosed() {
750
+ return this.ws?.readyState === WebSocket.CLOSED;
751
+ }
752
+ /**
753
+ * Wait for WebSocket to open
754
+ */
755
+ async waitForOpen() {
756
+ return new Promise((resolve, reject) => {
757
+ if (!this.ws) {
758
+ reject(new Error("WebSocket not initialized"));
759
+ return;
760
+ }
761
+ if (this.ws.readyState === WebSocket.OPEN) {
762
+ resolve();
763
+ return;
764
+ }
765
+ const onOpen = () => {
766
+ cleanup();
767
+ resolve();
768
+ };
769
+ const onError = (error) => {
770
+ cleanup();
771
+ reject(new Error(`WebSocket connection failed: ${error.message}`));
772
+ };
773
+ const onClose = () => {
774
+ cleanup();
775
+ reject(new Error("WebSocket connection closed before opening"));
776
+ };
777
+ const cleanup = () => {
778
+ this.ws?.removeListener("open", onOpen);
779
+ this.ws?.removeListener("error", onError);
780
+ this.ws?.removeListener("close", onClose);
781
+ };
782
+ this.ws.once("open", onOpen);
783
+ this.ws.once("error", onError);
784
+ this.ws.once("close", onClose);
785
+ setTimeout(() => {
786
+ cleanup();
787
+ reject(new GeminiLiveError("connection_failed" /* CONNECTION_FAILED */, "WebSocket connection timeout"));
788
+ }, this.timeoutMs);
789
+ });
790
+ }
791
+ /**
792
+ * Send data through WebSocket
793
+ */
794
+ send(data) {
795
+ if (!this.ws) {
796
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket not initialized");
797
+ }
798
+ if (this.ws.readyState !== WebSocket.OPEN) {
799
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket is not open");
800
+ }
801
+ this.ws.send(data);
802
+ }
803
+ /**
804
+ * Close the WebSocket connection
805
+ */
806
+ close() {
807
+ if (this.ws) {
808
+ this.ws.close();
809
+ this.ws = void 0;
810
+ }
811
+ }
812
+ /**
813
+ * Get connection state
814
+ */
815
+ getConnectionState() {
816
+ if (!this.ws) return "disconnected";
817
+ switch (this.ws.readyState) {
818
+ case WebSocket.CONNECTING:
819
+ return "connecting";
820
+ case WebSocket.OPEN:
821
+ return "connected";
822
+ case WebSocket.CLOSED:
823
+ return "closed";
824
+ default:
825
+ return "disconnected";
826
+ }
827
+ }
828
+ /**
829
+ * Validate WebSocket state for operations
830
+ */
831
+ validateWebSocketState() {
832
+ if (!this.ws) {
833
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket not initialized");
834
+ }
835
+ if (this.ws.readyState !== WebSocket.OPEN) {
836
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket is not open");
837
+ }
838
+ }
839
+ /**
840
+ * Log message if debug is enabled
841
+ */
842
+ log(message, ...args) {
843
+ if (this.debug) {
844
+ console.log(`[ConnectionManager] ${message}`, ...args);
845
+ }
846
+ }
847
+ };
848
+
849
+ // src/managers/ContextManager.ts
850
+ var ContextManager = class {
851
+ contextHistory = [];
852
+ maxEntries;
853
+ maxContentLength;
854
+ compressionThreshold;
855
+ compressionEnabled;
856
+ constructor(config = {}) {
857
+ this.maxEntries = config.maxEntries || 100;
858
+ this.maxContentLength = config.maxContentLength || 1e4;
859
+ this.compressionThreshold = config.compressionThreshold || 50;
860
+ this.compressionEnabled = config.compressionEnabled ?? false;
861
+ }
862
+ /**
863
+ * Add entry to context history
864
+ */
865
+ addEntry(role, content) {
866
+ let processedContent = content;
867
+ if (content.length > this.maxContentLength) {
868
+ processedContent = content.substring(0, this.maxContentLength) + "...";
869
+ }
870
+ const entry = {
871
+ role,
872
+ content: processedContent,
873
+ timestamp: Date.now()
874
+ };
875
+ this.contextHistory.push(entry);
876
+ if (this.contextHistory.length > this.maxEntries) {
877
+ if (this.compressionEnabled) {
878
+ this.compressContext();
879
+ } else {
880
+ this.contextHistory = this.contextHistory.slice(-this.maxEntries);
881
+ }
882
+ }
883
+ }
884
+ /**
885
+ * Get context history
886
+ */
887
+ getContextHistory() {
888
+ return [...this.contextHistory];
889
+ }
890
+ /**
891
+ * Get context history as array of role/content pairs
892
+ */
893
+ getContextArray() {
894
+ return this.contextHistory.map((entry) => ({
895
+ role: entry.role,
896
+ content: entry.content
897
+ }));
898
+ }
899
+ /**
900
+ * Clear context history
901
+ */
902
+ clearContext() {
903
+ this.contextHistory = [];
904
+ }
905
+ /**
906
+ * Get context size
907
+ */
908
+ getContextSize() {
909
+ return this.contextHistory.length;
910
+ }
911
+ /**
912
+ * Compress context when it exceeds threshold
913
+ */
914
+ compressContext() {
915
+ if (!this.compressionEnabled || this.contextHistory.length <= this.compressionThreshold) {
916
+ return;
917
+ }
918
+ const keepCount = Math.floor(this.compressionThreshold / 3);
919
+ const firstEntries = this.contextHistory.slice(0, keepCount);
920
+ const lastEntries = this.contextHistory.slice(-keepCount);
921
+ const middleEntries = this.contextHistory.slice(keepCount, -keepCount);
922
+ if (middleEntries.length > 0) {
923
+ const compressedEntry = {
924
+ role: "assistant",
925
+ content: `[Compressed ${middleEntries.length} previous messages]`,
926
+ timestamp: Date.now()
927
+ };
928
+ this.contextHistory = [...firstEntries, compressedEntry, ...lastEntries];
929
+ } else {
930
+ this.contextHistory = [...firstEntries, ...lastEntries];
931
+ }
932
+ }
933
+ /**
934
+ * Enable or disable compression at runtime
935
+ */
936
+ setCompressionEnabled(enabled) {
937
+ this.compressionEnabled = enabled;
938
+ }
939
+ /**
940
+ * Get context summary
941
+ */
942
+ getContextSummary() {
943
+ if (this.contextHistory.length === 0) {
944
+ return {
945
+ totalEntries: 0,
946
+ userEntries: 0,
947
+ assistantEntries: 0,
948
+ oldestTimestamp: null,
949
+ newestTimestamp: null
950
+ };
951
+ }
952
+ const userEntries = this.contextHistory.filter((entry) => entry.role === "user").length;
953
+ const assistantEntries = this.contextHistory.filter((entry) => entry.role === "assistant").length;
954
+ const timestamps = this.contextHistory.map((entry) => entry.timestamp);
955
+ return {
956
+ totalEntries: this.contextHistory.length,
957
+ userEntries,
958
+ assistantEntries,
959
+ oldestTimestamp: Math.min(...timestamps),
960
+ newestTimestamp: Math.max(...timestamps)
961
+ };
962
+ }
963
+ /**
964
+ * Search context for specific content
965
+ */
966
+ searchContext(query, role) {
967
+ const searchQuery = query.toLowerCase();
968
+ return this.contextHistory.filter((entry) => {
969
+ const matchesRole = role ? entry.role === role : true;
970
+ const matchesContent = entry.content.toLowerCase().includes(searchQuery);
971
+ return matchesRole && matchesContent;
972
+ });
973
+ }
974
+ /**
975
+ * Get recent context entries
976
+ */
977
+ getRecentEntries(count) {
978
+ return this.contextHistory.slice(-count);
979
+ }
980
+ /**
981
+ * Get context entries by role
982
+ */
983
+ getEntriesByRole(role) {
984
+ return this.contextHistory.filter((entry) => entry.role === role);
985
+ }
986
+ };
987
+ var AuthManager = class {
988
+ authClient;
989
+ accessToken;
990
+ tokenExpirationTime;
991
+ config;
992
+ constructor(config) {
993
+ this.config = config;
994
+ this.tokenExpirationTime = config.tokenExpirationTime ?? 50 * 60 * 1e3;
995
+ }
996
+ /**
997
+ * Initialize authentication based on configuration
998
+ */
999
+ async initialize() {
1000
+ if (this.config.vertexAI) {
1001
+ await this.initializeVertexAI();
1002
+ } else if (this.config.apiKey) {
1003
+ return;
1004
+ } else {
1005
+ throw new GeminiLiveError(
1006
+ "api_key_missing" /* API_KEY_MISSING */,
1007
+ "Either API key or Vertex AI configuration is required"
1008
+ );
1009
+ }
1010
+ }
1011
+ /**
1012
+ * Initialize Vertex AI authentication
1013
+ */
1014
+ async initializeVertexAI() {
1015
+ if (!this.config.project) {
1016
+ throw new GeminiLiveError(
1017
+ "project_id_missing" /* PROJECT_ID_MISSING */,
1018
+ "Google Cloud project ID is required when using Vertex AI"
1019
+ );
1020
+ }
1021
+ const authOptions = {
1022
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
1023
+ projectId: this.config.project
1024
+ };
1025
+ if (this.config.serviceAccountKeyFile) {
1026
+ authOptions.keyFilename = this.config.serviceAccountKeyFile;
1027
+ this.log("Using service account key file for authentication:", this.config.serviceAccountKeyFile);
1028
+ }
1029
+ if (this.config.serviceAccountEmail) {
1030
+ authOptions.clientOptions = { subject: this.config.serviceAccountEmail };
1031
+ this.log("Using service account impersonation:", this.config.serviceAccountEmail);
1032
+ }
1033
+ try {
1034
+ this.authClient = new GoogleAuth(authOptions);
1035
+ } catch (error) {
1036
+ throw new GeminiLiveError(
1037
+ "authentication_failed" /* AUTHENTICATION_FAILED */,
1038
+ `Failed to initialize Vertex AI authentication: ${error instanceof Error ? error.message : "Unknown error"}`
1039
+ );
1040
+ }
1041
+ }
1042
+ /**
1043
+ * Get access token for Vertex AI
1044
+ */
1045
+ async getAccessToken() {
1046
+ if (!this.config.vertexAI) {
1047
+ throw new GeminiLiveError("authentication_failed" /* AUTHENTICATION_FAILED */, "Vertex AI authentication not configured");
1048
+ }
1049
+ if (!this.authClient) {
1050
+ throw new GeminiLiveError("authentication_failed" /* AUTHENTICATION_FAILED */, "Authentication client not initialized");
1051
+ }
1052
+ if (this.accessToken && this.tokenExpirationTime && Date.now() < this.tokenExpirationTime) {
1053
+ return this.accessToken;
1054
+ }
1055
+ try {
1056
+ const client = await this.authClient.getClient();
1057
+ const token = await client.getAccessToken();
1058
+ if (!token.token) {
1059
+ throw new Error("No access token received");
1060
+ }
1061
+ this.accessToken = token.token;
1062
+ this.tokenExpirationTime = Date.now() + 50 * 60 * 1e3;
1063
+ this.log("Successfully obtained new access token");
1064
+ return this.accessToken;
1065
+ } catch (error) {
1066
+ throw new GeminiLiveError(
1067
+ "authentication_failed" /* AUTHENTICATION_FAILED */,
1068
+ `Failed to get access token: ${error instanceof Error ? error.message : "Unknown error"}`
1069
+ );
1070
+ }
1071
+ }
1072
+ /**
1073
+ * Get API key if using API key authentication
1074
+ */
1075
+ getApiKey() {
1076
+ if (this.config.vertexAI) {
1077
+ return void 0;
1078
+ }
1079
+ return this.config.apiKey;
1080
+ }
1081
+ /**
1082
+ * Check if using Vertex AI authentication
1083
+ */
1084
+ isUsingVertexAI() {
1085
+ return this.config.vertexAI === true;
1086
+ }
1087
+ /**
1088
+ * Check if authentication is configured
1089
+ */
1090
+ isConfigured() {
1091
+ return !!(this.config.apiKey || this.config.vertexAI && this.config.project);
1092
+ }
1093
+ /**
1094
+ * Check if access token is valid
1095
+ */
1096
+ hasValidToken() {
1097
+ if (!this.config.vertexAI) return false;
1098
+ return !!(this.accessToken && this.tokenExpirationTime && Date.now() < this.tokenExpirationTime);
1099
+ }
1100
+ /**
1101
+ * Clear cached authentication data
1102
+ */
1103
+ clearCache() {
1104
+ this.accessToken = void 0;
1105
+ this.tokenExpirationTime = void 0;
1106
+ }
1107
+ /**
1108
+ * Get authentication configuration
1109
+ */
1110
+ getConfig() {
1111
+ return { ...this.config };
1112
+ }
1113
+ /**
1114
+ * Log message if debug is enabled
1115
+ */
1116
+ log(message, ...args) {
1117
+ if (this.config.debug) {
1118
+ console.log(`[AuthManager] ${message}`, ...args);
1119
+ }
1120
+ }
1121
+ };
1122
+ var EventManager = class {
1123
+ eventEmitter;
1124
+ debug;
1125
+ eventCounts = {};
1126
+ constructor(config) {
1127
+ this.eventEmitter = new EventEmitter();
1128
+ this.debug = config.debug;
1129
+ }
1130
+ /**
1131
+ * Emit an event with data
1132
+ */
1133
+ emit(event, data) {
1134
+ this.incrementEventCount(event);
1135
+ const result = this.eventEmitter.emit(event, data);
1136
+ if (this.debug) {
1137
+ this.log(`Emitted event: ${event}`, data);
1138
+ }
1139
+ return result;
1140
+ }
1141
+ /**
1142
+ * Add event listener
1143
+ */
1144
+ on(event, callback) {
1145
+ this.eventEmitter.on(event, callback);
1146
+ if (this.debug) {
1147
+ this.log(`Added listener for event: ${event}`);
1148
+ }
1149
+ }
1150
+ /**
1151
+ * Remove event listener
1152
+ */
1153
+ off(event, callback) {
1154
+ this.eventEmitter.off(event, callback);
1155
+ if (this.debug) {
1156
+ this.log(`Removed listener for event: ${event}`);
1157
+ }
1158
+ }
1159
+ /**
1160
+ * Add one-time event listener
1161
+ */
1162
+ once(event, callback) {
1163
+ this.eventEmitter.once(event, callback);
1164
+ if (this.debug) {
1165
+ this.log(`Added one-time listener for event: ${event}`);
1166
+ }
1167
+ }
1168
+ /**
1169
+ * Remove all listeners for an event
1170
+ */
1171
+ removeAllListeners(event) {
1172
+ this.eventEmitter.removeAllListeners(event);
1173
+ if (this.debug) {
1174
+ this.log(`Removed all listeners${event ? ` for event: ${event}` : ""}`);
1175
+ }
1176
+ }
1177
+ /**
1178
+ * Get event listener count
1179
+ */
1180
+ getListenerCount(event) {
1181
+ return this.eventEmitter.listenerCount(event);
1182
+ }
1183
+ /**
1184
+ * Get event listener info
1185
+ */
1186
+ getEventListenerInfo() {
1187
+ const events = this.eventEmitter.eventNames();
1188
+ const info = {};
1189
+ events.forEach((event) => {
1190
+ const eventName = typeof event === "string" ? event : event.toString();
1191
+ info[eventName] = this.eventEmitter.listenerCount(event);
1192
+ });
1193
+ return info;
1194
+ }
1195
+ /**
1196
+ * Get event emission counts
1197
+ */
1198
+ getEventCounts() {
1199
+ return { ...this.eventCounts };
1200
+ }
1201
+ /**
1202
+ * Reset event counts
1203
+ */
1204
+ resetEventCounts() {
1205
+ this.eventCounts = {};
1206
+ }
1207
+ /**
1208
+ * Clean up event listeners
1209
+ */
1210
+ cleanup() {
1211
+ this.eventEmitter.removeAllListeners();
1212
+ this.resetEventCounts();
1213
+ if (this.debug) {
1214
+ this.log("Cleaned up all event listeners");
1215
+ }
1216
+ }
1217
+ /**
1218
+ * Get the underlying EventEmitter
1219
+ */
1220
+ getEventEmitter() {
1221
+ return this.eventEmitter;
1222
+ }
1223
+ /**
1224
+ * Increment event count for tracking
1225
+ */
1226
+ incrementEventCount(event) {
1227
+ this.eventCounts[event] = (this.eventCounts[event] || 0) + 1;
1228
+ }
1229
+ /**
1230
+ * Log message if debug is enabled
1231
+ */
1232
+ log(message, ...args) {
1233
+ if (this.debug) {
1234
+ console.log(`[EventManager] ${message}`, ...args);
1235
+ }
1236
+ }
1237
+ };
1238
+
1239
+ // src/index.ts
1240
+ var DEFAULT_MODEL = "gemini-2.0-flash-exp";
1241
+ var DEFAULT_VOICE = "Puck";
1242
+ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
1243
+ ws;
1244
+ eventManager;
1245
+ state = "disconnected";
1246
+ sessionHandle;
1247
+ debug;
1248
+ audioConfig;
1249
+ queue = [];
1250
+ // Managers
1251
+ connectionManager;
1252
+ contextManager;
1253
+ authManager;
1254
+ // Audio chunk concatenation - optimized stream management
1255
+ audioStreamManager;
1256
+ // Session management properties
1257
+ sessionId;
1258
+ sessionStartTime;
1259
+ isResuming = false;
1260
+ sessionDurationTimeout;
1261
+ // Tool integration properties
1262
+ tools;
1263
+ runtimeContext;
1264
+ // Store the configuration options
1265
+ options;
1266
+ /**
1267
+ * Normalize configuration to ensure proper VoiceConfig format
1268
+ * Handles backward compatibility with direct GeminiLiveVoiceConfig
1269
+ * @private
1270
+ */
1271
+ static normalizeConfig(config) {
1272
+ if ("realtimeConfig" in config || "speechModel" in config || "listeningModel" in config) {
1273
+ return config;
1274
+ }
1275
+ const geminiConfig = config;
1276
+ return {
1277
+ speechModel: {
1278
+ name: geminiConfig.model || DEFAULT_MODEL,
1279
+ apiKey: geminiConfig.apiKey
1280
+ },
1281
+ speaker: geminiConfig.speaker || DEFAULT_VOICE,
1282
+ realtimeConfig: {
1283
+ model: geminiConfig.model || DEFAULT_MODEL,
1284
+ apiKey: geminiConfig.apiKey,
1285
+ options: geminiConfig
1286
+ }
1287
+ };
1288
+ }
1289
+ /**
1290
+ * Creates a new GeminiLiveVoice instance
1291
+ *
1292
+ * @param config Configuration options
1293
+ */
1294
+ constructor(config = {}) {
1295
+ const normalizedConfig = _GeminiLiveVoice.normalizeConfig(config);
1296
+ super(normalizedConfig);
1297
+ this.options = normalizedConfig.realtimeConfig?.options || {};
1298
+ const apiKey = this.options.apiKey;
1299
+ if (!apiKey && !this.options.vertexAI) {
1300
+ throw new GeminiLiveError(
1301
+ "api_key_missing" /* API_KEY_MISSING */,
1302
+ "Google API key is required. Set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
1303
+ );
1304
+ }
1305
+ this.debug = this.options.debug || false;
1306
+ this.audioConfig = {
1307
+ ...AudioStreamManager.getDefaultAudioConfig(),
1308
+ ...this.options.audioConfig
1309
+ };
1310
+ this.audioStreamManager = new AudioStreamManager(this.audioConfig, this.debug);
1311
+ this.audioStreamManager.setSender((type, message) => this.sendEvent(type, message));
1312
+ this.eventManager = new EventManager({ debug: this.debug });
1313
+ this.connectionManager = new ConnectionManager({ debug: this.debug, timeoutMs: 3e4 });
1314
+ this.contextManager = new ContextManager({
1315
+ maxEntries: 100,
1316
+ compressionThreshold: 50,
1317
+ compressionEnabled: this.options.sessionConfig?.contextCompression ?? false
1318
+ });
1319
+ this.authManager = new AuthManager({
1320
+ apiKey: this.options.apiKey,
1321
+ vertexAI: this.options.vertexAI,
1322
+ project: this.options.project,
1323
+ serviceAccountKeyFile: this.options.serviceAccountKeyFile,
1324
+ serviceAccountEmail: this.options.serviceAccountEmail,
1325
+ debug: this.debug,
1326
+ tokenExpirationTime: this.options.tokenExpirationTime
1327
+ });
1328
+ if (this.options.vertexAI && !this.options.project) {
1329
+ throw new GeminiLiveError(
1330
+ "project_id_missing" /* PROJECT_ID_MISSING */,
1331
+ "Google Cloud project ID is required when using Vertex AI. Set GOOGLE_CLOUD_PROJECT environment variable or pass project to constructor"
1332
+ );
1333
+ }
1334
+ }
1335
+ /**
1336
+ * Register an event listener
1337
+ * @param event Event name (e.g., 'speaking', 'writing', 'error', 'speaker')
1338
+ * @param callback Callback function that receives event data
1339
+ *
1340
+ * @example
1341
+ * ```typescript
1342
+ * // Listen for audio responses
1343
+ * voice.on('speaking', ({ audio, audioData, sampleRate }) => {
1344
+ * console.log('Received audio chunk:', audioData.length);
1345
+ * });
1346
+ *
1347
+ * // Listen for text responses and transcriptions
1348
+ * voice.on('writing', ({ text, role }) => {
1349
+ * console.log(`${role}: ${text}`);
1350
+ * });
1351
+ *
1352
+ * // Listen for audio streams (for concatenated playback)
1353
+ * voice.on('speaker', (audioStream) => {
1354
+ * audioStream.pipe(playbackDevice);
1355
+ * });
1356
+ *
1357
+ * // Handle errors
1358
+ * voice.on('error', ({ message, code, details }) => {
1359
+ * console.error('Voice error:', message);
1360
+ * });
1361
+ * ```
1362
+ */
1363
+ on(event, callback) {
1364
+ try {
1365
+ this.eventManager.on(event, callback);
1366
+ this.log(`Event listener registered for: ${event}`);
1367
+ } catch (error) {
1368
+ this.log(`Failed to register event listener for ${event}:`, error);
1369
+ throw error;
1370
+ }
1371
+ }
1372
+ /**
1373
+ * Remove an event listener
1374
+ * @param event Event name
1375
+ * @param callback Callback function to remove
1376
+ */
1377
+ off(event, callback) {
1378
+ try {
1379
+ this.eventManager.off(event, callback);
1380
+ this.log(`Event listener removed for: ${event}`);
1381
+ } catch (error) {
1382
+ this.log(`Failed to remove event listener for ${event}:`, error);
1383
+ }
1384
+ }
1385
+ /**
1386
+ * Register a one-time event listener that automatically removes itself after the first emission
1387
+ * @param event Event name
1388
+ * @param callback Callback function that receives event data
1389
+ */
1390
+ once(event, callback) {
1391
+ try {
1392
+ this.eventManager.once(event, callback);
1393
+ this.log(`One-time event listener registered for: ${event}`);
1394
+ } catch (error) {
1395
+ this.log(`Failed to register one-time event listener for ${event}:`, error);
1396
+ throw error;
1397
+ }
1398
+ }
1399
+ /**
1400
+ * Emit an event to listeners with improved error handling
1401
+ * @private
1402
+ */
1403
+ emit(event, data) {
1404
+ try {
1405
+ const listenerCount = this.eventManager.getListenerCount(event);
1406
+ if (listenerCount === 0 && this.debug) {
1407
+ this.log(`No listeners for event: ${String(event)}`);
1408
+ }
1409
+ const result = this.eventManager.emit(event, data);
1410
+ if (this.debug && listenerCount > 0) {
1411
+ this.log(`Emitted event: ${String(event)} to ${listenerCount} listeners`);
1412
+ }
1413
+ return result;
1414
+ } catch (error) {
1415
+ this.log(`Error emitting event ${String(event)}:`, error);
1416
+ if (event !== "error") {
1417
+ try {
1418
+ this.eventManager.getEventEmitter().emit("error", {
1419
+ message: `Failed to emit event: ${String(event)}`,
1420
+ code: "event_emission_error",
1421
+ details: error
1422
+ });
1423
+ } catch (nestedError) {
1424
+ this.log("Critical: Failed to emit error event:", nestedError);
1425
+ }
1426
+ }
1427
+ return false;
1428
+ }
1429
+ }
1430
+ /**
1431
+ * Clean up event listeners to prevent memory leaks
1432
+ * @private
1433
+ */
1434
+ cleanupEventListeners() {
1435
+ try {
1436
+ const events = this.eventManager.getEventEmitter().eventNames();
1437
+ if (this.debug && events.length > 0) {
1438
+ this.log(
1439
+ "Cleaning up event listeners:",
1440
+ events.map((event) => `${String(event)}: ${this.eventManager.getListenerCount(String(event))}`).join(", ")
1441
+ );
1442
+ }
1443
+ this.eventManager.cleanup();
1444
+ this.log("Event listeners cleaned up");
1445
+ } catch (error) {
1446
+ this.log("Error cleaning up event listeners:", error);
1447
+ }
1448
+ }
1449
+ /**
1450
+ * Get current event listener information for debugging
1451
+ * @returns Object with event names and listener counts
1452
+ */
1453
+ getEventListenerInfo() {
1454
+ try {
1455
+ return this.eventManager.getEventListenerInfo();
1456
+ } catch (error) {
1457
+ this.log("Error getting event listener info:", error);
1458
+ return {};
1459
+ }
1460
+ }
1461
+ /**
1462
+ * Create and emit a standardized error
1463
+ * @private
1464
+ */
1465
+ createAndEmitError(code, message, details) {
1466
+ const error = new GeminiLiveError(code, message, details);
1467
+ this.log(`Error [${code}]: ${message}`, details);
1468
+ this.emit("error", error.toEventData());
1469
+ return error;
1470
+ }
1471
+ /**
1472
+ * Handle connection state validation with standardized errors
1473
+ * @private
1474
+ */
1475
+ validateConnectionState() {
1476
+ if (this.state !== "connected") {
1477
+ throw this.createAndEmitError(
1478
+ "not_connected" /* NOT_CONNECTED */,
1479
+ "Not connected to Gemini Live API. Call connect() first.",
1480
+ { currentState: this.state }
1481
+ );
1482
+ }
1483
+ }
1484
+ /**
1485
+ * Handle WebSocket state validation with standardized errors
1486
+ * @private
1487
+ */
1488
+ validateWebSocketState() {
1489
+ if (!this.connectionManager.isConnected()) {
1490
+ throw this.createAndEmitError("websocket_error" /* WEBSOCKET_ERROR */, "WebSocket is not open", {
1491
+ wsExists: !!this.connectionManager.getWebSocket(),
1492
+ readyState: this.connectionManager.getWebSocket()?.readyState,
1493
+ expectedState: WebSocket.OPEN
1494
+ });
1495
+ }
1496
+ }
1497
+ /**
1498
+ * Establish connection to the Gemini Live API
1499
+ */
1500
+ async connect({ runtimeContext } = {}) {
1501
+ return this.traced(async () => {
1502
+ if (this.state === "connected") {
1503
+ this.log("Already connected to Gemini Live API");
1504
+ return;
1505
+ }
1506
+ this.runtimeContext = runtimeContext;
1507
+ this.emit("session", { state: "connecting" });
1508
+ try {
1509
+ let wsUrl;
1510
+ let headers = {};
1511
+ if (this.options.vertexAI) {
1512
+ wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
1513
+ await this.authManager.initialize();
1514
+ const accessToken = await this.authManager.getAccessToken();
1515
+ headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1516
+ this.log("Using Vertex AI authentication with OAuth token");
1517
+ } else {
1518
+ wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1519
+ headers = {
1520
+ headers: {
1521
+ "x-goog-api-key": this.options.apiKey || "",
1522
+ "Content-Type": "application/json"
1523
+ }
1524
+ };
1525
+ this.log("Using Live API authentication with API key");
1526
+ }
1527
+ this.log("Connecting to:", wsUrl);
1528
+ this.ws = new WebSocket(wsUrl, void 0, headers);
1529
+ this.connectionManager.setWebSocket(this.ws);
1530
+ this.setupEventListeners();
1531
+ await this.connectionManager.waitForOpen();
1532
+ if (this.isResuming && this.sessionHandle) {
1533
+ await this.sendSessionResumption();
1534
+ } else {
1535
+ this.sendInitialConfig();
1536
+ this.sessionStartTime = Date.now();
1537
+ this.sessionId = randomUUID();
1538
+ }
1539
+ await this.waitForSessionCreated();
1540
+ this.state = "connected";
1541
+ this.emit("session", {
1542
+ state: "connected",
1543
+ config: {
1544
+ sessionId: this.sessionId,
1545
+ isResuming: this.isResuming,
1546
+ toolCount: Object.keys(this.tools || {}).length
1547
+ }
1548
+ });
1549
+ this.log("Successfully connected to Gemini Live API", {
1550
+ sessionId: this.sessionId,
1551
+ isResuming: this.isResuming,
1552
+ toolCount: Object.keys(this.tools || {}).length
1553
+ });
1554
+ if (this.options.sessionConfig?.maxDuration) {
1555
+ this.startSessionDurationMonitor();
1556
+ }
1557
+ } catch (error) {
1558
+ this.state = "disconnected";
1559
+ this.log("Connection failed", error);
1560
+ throw error;
1561
+ }
1562
+ }, "gemini-live.connect")();
1563
+ }
1564
+ /**
1565
+ * Disconnect from the Gemini Live API
1566
+ */
1567
+ async disconnect() {
1568
+ if (this.state === "disconnected") {
1569
+ this.log("Already disconnected");
1570
+ return;
1571
+ }
1572
+ this.emit("session", { state: "disconnecting" });
1573
+ if (this.sessionDurationTimeout) {
1574
+ clearTimeout(this.sessionDurationTimeout);
1575
+ this.sessionDurationTimeout = void 0;
1576
+ }
1577
+ if (this.options.sessionConfig?.enableResumption && this.sessionId) {
1578
+ this.sessionHandle = this.sessionId;
1579
+ this.log("Session handle saved for resumption", { handle: this.sessionHandle });
1580
+ }
1581
+ if (this.ws) {
1582
+ this.connectionManager.close();
1583
+ this.ws = void 0;
1584
+ }
1585
+ this.audioStreamManager.cleanupSpeakerStreams();
1586
+ this.authManager.clearCache();
1587
+ this.state = "disconnected";
1588
+ this.isResuming = false;
1589
+ this.emit("session", { state: "disconnected" });
1590
+ this.cleanupEventListeners();
1591
+ this.log("Disconnected from Gemini Live API", {
1592
+ sessionId: this.sessionId,
1593
+ sessionDuration: this.sessionStartTime ? Date.now() - this.sessionStartTime : void 0
1594
+ });
1595
+ }
1596
+ /**
1597
+ * Send text to be converted to speech
1598
+ */
1599
+ async speak(input, options) {
1600
+ return this.traced(async () => {
1601
+ this.validateConnectionState();
1602
+ if (typeof input !== "string") {
1603
+ const chunks = [];
1604
+ for await (const chunk of input) {
1605
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1606
+ }
1607
+ input = Buffer.concat(chunks).toString("utf-8");
1608
+ }
1609
+ if (input.trim().length === 0) {
1610
+ throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1611
+ }
1612
+ this.addToContext("user", input);
1613
+ const textMessage = {
1614
+ client_content: {
1615
+ turns: [
1616
+ {
1617
+ role: "user",
1618
+ parts: [
1619
+ {
1620
+ text: input
1621
+ }
1622
+ ]
1623
+ }
1624
+ ],
1625
+ turnComplete: true
1626
+ }
1627
+ };
1628
+ if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1629
+ const updateMessage = {
1630
+ type: "session.update",
1631
+ session: {
1632
+ generation_config: {
1633
+ ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1634
+ speech_config: {
1635
+ ...options.languageCode ? { language_code: options.languageCode } : {},
1636
+ ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1637
+ }
1638
+ }
1639
+ }
1640
+ };
1641
+ try {
1642
+ this.sendEvent("session.update", updateMessage);
1643
+ this.log("Applied per-turn runtime options", options);
1644
+ } catch (error) {
1645
+ this.log("Failed to apply per-turn runtime options", error);
1646
+ }
1647
+ }
1648
+ try {
1649
+ this.sendEvent("client_content", textMessage);
1650
+ this.log("Text message sent", { text: input });
1651
+ } catch (error) {
1652
+ this.log("Failed to send text message", error);
1653
+ throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1654
+ }
1655
+ }, "gemini-live.speak")();
1656
+ }
1657
+ /**
1658
+ * Send audio stream for processing
1659
+ */
1660
+ async send(audioData) {
1661
+ return this.traced(async () => {
1662
+ this.validateConnectionState();
1663
+ if ("readable" in audioData && typeof audioData.on === "function") {
1664
+ const stream = audioData;
1665
+ stream.on("data", (chunk) => {
1666
+ try {
1667
+ const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1668
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1669
+ this.sendEvent("realtime_input", message);
1670
+ } catch (error) {
1671
+ this.log("Failed to process audio chunk", error);
1672
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1673
+ }
1674
+ });
1675
+ stream.on("error", (error) => {
1676
+ this.log("Audio stream error", error);
1677
+ this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1678
+ });
1679
+ stream.on("end", () => {
1680
+ this.log("Audio stream ended");
1681
+ });
1682
+ } else {
1683
+ const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1684
+ const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1685
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1686
+ this.sendEvent("realtime_input", message);
1687
+ }
1688
+ }, "gemini-live.send")();
1689
+ }
1690
+ /**
1691
+ * Process speech from audio stream (traditional STT interface)
1692
+ */
1693
+ async listen(audioStream, _options) {
1694
+ return this.traced(async () => {
1695
+ this.validateConnectionState();
1696
+ let transcriptionText = "";
1697
+ const onWriting = (data) => {
1698
+ if (data.role === "user") {
1699
+ transcriptionText += data.text;
1700
+ this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1701
+ }
1702
+ };
1703
+ const onError = (error) => {
1704
+ throw new Error(`Transcription failed: ${error.message}`);
1705
+ };
1706
+ const onSession = (data) => {
1707
+ if (data.state === "disconnected") {
1708
+ throw new Error("Session disconnected during transcription");
1709
+ }
1710
+ };
1711
+ this.on("writing", onWriting);
1712
+ this.on("error", onError);
1713
+ this.on("session", onSession);
1714
+ try {
1715
+ const result = await this.audioStreamManager.handleAudioTranscription(
1716
+ audioStream,
1717
+ (base64Audio) => {
1718
+ return new Promise((resolve, reject) => {
1719
+ try {
1720
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1721
+ const cleanup = () => {
1722
+ this.off("turnComplete", onTurnComplete);
1723
+ this.off("error", onErr);
1724
+ };
1725
+ const onTurnComplete = () => {
1726
+ cleanup();
1727
+ resolve(transcriptionText.trim());
1728
+ };
1729
+ const onErr = (e) => {
1730
+ cleanup();
1731
+ reject(new Error(e.message));
1732
+ };
1733
+ this.on("turnComplete", onTurnComplete);
1734
+ this.on("error", onErr);
1735
+ this.sendEvent("client_content", message);
1736
+ this.log("Sent audio for transcription");
1737
+ } catch (err) {
1738
+ reject(err);
1739
+ }
1740
+ });
1741
+ },
1742
+ (error) => {
1743
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1744
+ }
1745
+ );
1746
+ return result;
1747
+ } finally {
1748
+ this.off("writing", onWriting);
1749
+ this.off("error", onError);
1750
+ this.off("session", onSession);
1751
+ }
1752
+ }, "gemini-live.listen")();
1753
+ }
1754
+ /**
1755
+ * Get available speakers/voices
1756
+ */
1757
+ async getSpeakers() {
1758
+ return this.traced(async () => {
1759
+ return [
1760
+ { voiceId: "Puck", description: "Conversational, friendly" },
1761
+ { voiceId: "Charon", description: "Deep, authoritative" },
1762
+ { voiceId: "Kore", description: "Neutral, professional" },
1763
+ { voiceId: "Fenrir", description: "Warm, approachable" }
1764
+ ];
1765
+ }, "gemini-live.getSpeakers")();
1766
+ }
1767
+ /**
1768
+ * Resume a previous session using a session handle
1769
+ */
1770
+ async resumeSession(handle, context) {
1771
+ if (this.state === "connected") {
1772
+ throw new Error("Cannot resume session while already connected. Disconnect first.");
1773
+ }
1774
+ this.log("Attempting to resume session", { handle });
1775
+ this.sessionHandle = handle;
1776
+ this.isResuming = true;
1777
+ if (context && context.length > 0) {
1778
+ this.contextManager.clearContext();
1779
+ for (const item of context) {
1780
+ this.contextManager.addEntry(item.role, item.content);
1781
+ }
1782
+ }
1783
+ try {
1784
+ await this.connect();
1785
+ this.log("Session resumed successfully", { handle, contextItems: context?.length || 0 });
1786
+ } catch (error) {
1787
+ this.isResuming = false;
1788
+ this.sessionHandle = void 0;
1789
+ throw new Error(`Failed to resume session: ${error instanceof Error ? error.message : "Unknown error"}`);
1790
+ }
1791
+ }
1792
+ /**
1793
+ * Update session configuration during an active session
1794
+ * Allows dynamic updates to voice, instructions, tools, and other settings
1795
+ *
1796
+ * @param config Partial configuration to update
1797
+ * @throws Error if not connected or update fails
1798
+ *
1799
+ * @example
1800
+ * ```typescript
1801
+ * // Change voice during conversation
1802
+ * await voice.updateSessionConfig({
1803
+ * speaker: 'Charon'
1804
+ * });
1805
+ *
1806
+ * // Update instructions
1807
+ * await voice.updateSessionConfig({
1808
+ * instructions: 'You are now a helpful coding assistant'
1809
+ * });
1810
+ *
1811
+ * // Add or update tools
1812
+ * await voice.updateSessionConfig({
1813
+ * tools: [{ name: 'new_tool', ... }]
1814
+ * });
1815
+ * ```
1816
+ */
1817
+ async updateSessionConfig(config) {
1818
+ this.validateConnectionState();
1819
+ this.validateWebSocketState();
1820
+ return new Promise((resolve, reject) => {
1821
+ if (config.model) {
1822
+ this.log("Warning: Model cannot be changed during an active session. Ignoring model update.");
1823
+ }
1824
+ if (config.vertexAI !== void 0 || config.project !== void 0 || config.location !== void 0) {
1825
+ this.log("Warning: Authentication settings cannot be changed during an active session.");
1826
+ }
1827
+ const updateMessage = {
1828
+ type: "session.update",
1829
+ session: {}
1830
+ };
1831
+ let hasUpdates = false;
1832
+ if (config.speaker) {
1833
+ hasUpdates = true;
1834
+ updateMessage.session.generation_config = {
1835
+ ...updateMessage.session.generation_config,
1836
+ speech_config: {
1837
+ voice_config: {
1838
+ prebuilt_voice_config: {
1839
+ voice_name: config.speaker
1840
+ }
1841
+ }
1842
+ }
1843
+ };
1844
+ this.speaker = config.speaker;
1845
+ this.log("Updating speaker to:", config.speaker);
1846
+ }
1847
+ if (config.instructions !== void 0) {
1848
+ hasUpdates = true;
1849
+ updateMessage.session.system_instruction = {
1850
+ parts: [{ text: config.instructions }]
1851
+ };
1852
+ this.log("Updating instructions");
1853
+ }
1854
+ if (config.tools !== void 0) {
1855
+ hasUpdates = true;
1856
+ if (config.tools.length > 0) {
1857
+ updateMessage.session.tools = config.tools.map((tool) => ({
1858
+ function_declarations: [
1859
+ {
1860
+ name: tool.name,
1861
+ description: tool.description,
1862
+ parameters: tool.parameters
1863
+ }
1864
+ ]
1865
+ }));
1866
+ } else {
1867
+ updateMessage.session.tools = [];
1868
+ }
1869
+ this.log("Updating tools:", config.tools.length, "tools");
1870
+ }
1871
+ if (this.tools && Object.keys(this.tools).length > 0) {
1872
+ hasUpdates = true;
1873
+ const allTools = [];
1874
+ for (const [toolName, tool] of Object.entries(this.tools)) {
1875
+ try {
1876
+ let parameters;
1877
+ if ("inputSchema" in tool && tool.inputSchema) {
1878
+ if (typeof tool.inputSchema === "object" && "safeParse" in tool.inputSchema) {
1879
+ parameters = this.convertZodSchemaToJsonSchema(tool.inputSchema);
1880
+ } else {
1881
+ parameters = tool.inputSchema;
1882
+ }
1883
+ } else if ("parameters" in tool && tool.parameters) {
1884
+ parameters = tool.parameters;
1885
+ } else {
1886
+ parameters = { type: "object", properties: {} };
1887
+ }
1888
+ allTools.push({
1889
+ function_declarations: [
1890
+ {
1891
+ name: toolName,
1892
+ description: tool.description || `Tool: ${toolName}`,
1893
+ parameters
1894
+ }
1895
+ ]
1896
+ });
1897
+ } catch (error) {
1898
+ this.log("Failed to process tool for session update", { toolName, error });
1899
+ }
1900
+ }
1901
+ if (allTools.length > 0) {
1902
+ updateMessage.session.tools = allTools;
1903
+ this.log("Updating tools from addTools method:", allTools.length, "tools");
1904
+ }
1905
+ }
1906
+ if (config.sessionConfig) {
1907
+ if (config.sessionConfig.vad) {
1908
+ hasUpdates = true;
1909
+ updateMessage.session.vad = {
1910
+ enabled: config.sessionConfig.vad.enabled ?? true,
1911
+ sensitivity: config.sessionConfig.vad.sensitivity ?? 0.5,
1912
+ silence_duration_ms: config.sessionConfig.vad.silenceDurationMs ?? 1e3
1913
+ };
1914
+ this.log("Updating VAD settings:", config.sessionConfig.vad);
1915
+ }
1916
+ if (config.sessionConfig.interrupts) {
1917
+ hasUpdates = true;
1918
+ updateMessage.session.interrupts = {
1919
+ enabled: config.sessionConfig.interrupts.enabled ?? true,
1920
+ allow_user_interruption: config.sessionConfig.interrupts.allowUserInterruption ?? true
1921
+ };
1922
+ this.log("Updating interrupt settings:", config.sessionConfig.interrupts);
1923
+ }
1924
+ if (config.sessionConfig.contextCompression !== void 0) {
1925
+ hasUpdates = true;
1926
+ updateMessage.session.context_compression = config.sessionConfig.contextCompression;
1927
+ this.log("Updating context compression:", config.sessionConfig.contextCompression);
1928
+ this.contextManager.setCompressionEnabled(config.sessionConfig.contextCompression);
1929
+ }
1930
+ }
1931
+ if (!hasUpdates) {
1932
+ this.log("No valid configuration updates to send");
1933
+ resolve();
1934
+ return;
1935
+ }
1936
+ const timeout = setTimeout(() => {
1937
+ cleanup();
1938
+ reject(new Error("Session configuration update timeout - no response received"));
1939
+ }, 1e4);
1940
+ const onSessionUpdated = (data) => {
1941
+ cleanup();
1942
+ this.log("Session configuration updated successfully", data);
1943
+ resolve();
1944
+ };
1945
+ const onError = (error) => {
1946
+ cleanup();
1947
+ this.log("Session configuration update failed", error);
1948
+ reject(new Error(`Failed to update session configuration: ${error.message || "Unknown error"}`));
1949
+ };
1950
+ const cleanup = () => {
1951
+ clearTimeout(timeout);
1952
+ this.eventManager.getEventEmitter().removeListener("session.updated", onSessionUpdated);
1953
+ this.eventManager.getEventEmitter().removeListener("error", onError);
1954
+ };
1955
+ this.eventManager.getEventEmitter().once("session.updated", onSessionUpdated);
1956
+ this.eventManager.getEventEmitter().once("error", onError);
1957
+ try {
1958
+ this.sendEvent("session.update", updateMessage);
1959
+ this.log("Sent session configuration update", updateMessage);
1960
+ } catch (error) {
1961
+ cleanup();
1962
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
1963
+ this.log("Failed to send session configuration update", error);
1964
+ reject(new Error(`Failed to send session configuration update: ${errorMessage}`));
1965
+ }
1966
+ });
1967
+ }
1968
+ /**
1969
+ * Get current connection state
1970
+ */
1971
+ getConnectionState() {
1972
+ return this.state;
1973
+ }
1974
+ /**
1975
+ * Check if currently connected
1976
+ */
1977
+ isConnected() {
1978
+ return this.state === "connected";
1979
+ }
1980
+ /**
1981
+ * Get current speaker stream for audio concatenation
1982
+ * This allows external access to the current audio stream being built
1983
+ */
1984
+ getCurrentSpeakerStream() {
1985
+ return this.audioStreamManager.getCurrentSpeakerStream();
1986
+ }
1987
+ /**
1988
+ * Get session handle for resumption
1989
+ */
1990
+ getSessionHandle() {
1991
+ return this.sessionHandle;
1992
+ }
1993
+ /**
1994
+ * Get comprehensive session information
1995
+ */
1996
+ getSessionInfo() {
1997
+ return {
1998
+ id: this.sessionId,
1999
+ handle: this.sessionHandle,
2000
+ startTime: this.sessionStartTime ? new Date(this.sessionStartTime) : void 0,
2001
+ duration: this.sessionStartTime ? Date.now() - this.sessionStartTime : void 0,
2002
+ state: this.state,
2003
+ config: this.options.sessionConfig,
2004
+ contextSize: this.contextManager.getContextSize()
2005
+ };
2006
+ }
2007
+ /**
2008
+ * Get session context history
2009
+ */
2010
+ getContextHistory() {
2011
+ return this.contextManager.getContextHistory();
2012
+ }
2013
+ /**
2014
+ * Add to context history for session continuity
2015
+ */
2016
+ addToContext(role, content) {
2017
+ this.contextManager.addEntry(role, content);
2018
+ }
2019
+ /**
2020
+ * Clear session context
2021
+ */
2022
+ clearContext() {
2023
+ this.contextManager.clearContext();
2024
+ this.log("Session context cleared");
2025
+ }
2026
+ /**
2027
+ * Enable or disable automatic reconnection
2028
+ */
2029
+ setAutoReconnect(enabled) {
2030
+ if (!this.options.sessionConfig) {
2031
+ this.options.sessionConfig = {};
2032
+ }
2033
+ this.options.sessionConfig.enableResumption = enabled;
2034
+ this.log(`Auto-reconnect ${enabled ? "enabled" : "disabled"}`);
2035
+ }
2036
+ /**
2037
+ * Send session resumption message
2038
+ * @private
2039
+ */
2040
+ async sendSessionResumption() {
2041
+ if (!this.sessionHandle) {
2042
+ throw new Error("No session handle available for resumption");
2043
+ }
2044
+ const context = this.contextManager.getContextArray();
2045
+ const resumeMessage = {
2046
+ session_resume: {
2047
+ handle: this.sessionHandle,
2048
+ ...context.length > 0 && {
2049
+ context
2050
+ }
2051
+ }
2052
+ };
2053
+ try {
2054
+ if (this.ws?.readyState !== WebSocket.OPEN) {
2055
+ throw new Error("WebSocket not ready for session resumption");
2056
+ }
2057
+ this.sendEvent("session_resume", resumeMessage);
2058
+ this.log("Session resumption message sent", { handle: this.sessionHandle });
2059
+ } catch (error) {
2060
+ this.log("Failed to send session resumption", error);
2061
+ throw new Error(`Failed to send session resumption: ${error instanceof Error ? error.message : "Unknown error"}`);
2062
+ }
2063
+ }
2064
+ /**
2065
+ * Start monitoring session duration
2066
+ * @private
2067
+ */
2068
+ startSessionDurationMonitor() {
2069
+ if (!this.options.sessionConfig?.maxDuration) {
2070
+ return;
2071
+ }
2072
+ const durationMs = this.parseDuration(this.options.sessionConfig.maxDuration);
2073
+ if (!durationMs) {
2074
+ this.log("Invalid session duration format", { duration: this.options.sessionConfig.maxDuration });
2075
+ return;
2076
+ }
2077
+ if (this.sessionDurationTimeout) {
2078
+ clearTimeout(this.sessionDurationTimeout);
2079
+ }
2080
+ const warningTime = durationMs - 5 * 60 * 1e3;
2081
+ if (warningTime > 0) {
2082
+ setTimeout(() => {
2083
+ this.emit("sessionExpiring", {
2084
+ expiresIn: 5 * 60 * 1e3,
2085
+ sessionId: this.sessionId
2086
+ });
2087
+ }, warningTime);
2088
+ }
2089
+ this.sessionDurationTimeout = setTimeout(() => {
2090
+ this.log("Session duration limit reached, disconnecting");
2091
+ void this.disconnect();
2092
+ }, durationMs);
2093
+ }
2094
+ /**
2095
+ * Parse duration string to milliseconds
2096
+ * @private
2097
+ */
2098
+ parseDuration(duration) {
2099
+ const match = duration.match(/^(\d+)([hms])$/);
2100
+ if (!match) return null;
2101
+ const value = parseInt(match[1], 10);
2102
+ const unit = match[2];
2103
+ switch (unit) {
2104
+ case "h":
2105
+ return value * 60 * 60 * 1e3;
2106
+ case "m":
2107
+ return value * 60 * 1e3;
2108
+ case "s":
2109
+ return value * 1e3;
2110
+ default:
2111
+ return null;
2112
+ }
2113
+ }
2114
+ /**
2115
+ * Compress context history to manage memory
2116
+ * @private
2117
+ */
2118
+ compressContext() {
2119
+ this.log("compressContext is deprecated; handled by ContextManager");
2120
+ }
2121
+ /**
2122
+ * Setup WebSocket event listeners for Gemini Live API messages
2123
+ * @private
2124
+ */
2125
+ setupEventListeners() {
2126
+ if (!this.ws) {
2127
+ throw new Error("WebSocket not initialized");
2128
+ }
2129
+ this.ws.on("open", () => {
2130
+ this.log("WebSocket connection opened");
2131
+ });
2132
+ this.ws.on("close", (code, reason) => {
2133
+ this.log("WebSocket connection closed", { code, reason: reason.toString() });
2134
+ this.state = "disconnected";
2135
+ this.emit("session", { state: "disconnected" });
2136
+ });
2137
+ this.ws.on("error", (error) => {
2138
+ this.log("WebSocket error", error);
2139
+ this.state = "disconnected";
2140
+ this.emit("session", { state: "disconnected" });
2141
+ this.emit("error", {
2142
+ message: error.message,
2143
+ code: "websocket_error",
2144
+ details: error
2145
+ });
2146
+ });
2147
+ this.ws.on("message", async (message) => {
2148
+ try {
2149
+ const data = JSON.parse(message.toString());
2150
+ await this.handleGeminiMessage(data);
2151
+ } catch (error) {
2152
+ this.log("Failed to parse WebSocket message", error);
2153
+ this.emit("error", {
2154
+ message: "Failed to parse WebSocket message",
2155
+ code: "parse_error",
2156
+ details: error
2157
+ });
2158
+ }
2159
+ });
2160
+ }
2161
+ /**
2162
+ * Handle different types of messages from Gemini Live API
2163
+ * @private
2164
+ */
2165
+ async handleGeminiMessage(data) {
2166
+ this.log("Received message:", JSON.stringify(data, null, 2));
2167
+ if (data.responseId) {
2168
+ this.setCurrentResponseId(data.responseId);
2169
+ this.log("Set current response ID:", data.responseId);
2170
+ }
2171
+ if (data.setup) {
2172
+ this.log("Processing setup message");
2173
+ this.handleSetupComplete(data);
2174
+ } else if (data.setupComplete) {
2175
+ this.log("Processing setupComplete message");
2176
+ this.handleSetupComplete(data);
2177
+ } else if (data.serverContent) {
2178
+ this.log("Processing server content message");
2179
+ this.handleServerContent(data.serverContent);
2180
+ } else if (data.toolCall) {
2181
+ this.log("Processing tool call message");
2182
+ await this.handleToolCall(data);
2183
+ } else if (data.usageMetadata) {
2184
+ this.log("Processing usage metadata message");
2185
+ this.handleUsageUpdate(data);
2186
+ } else if (data.sessionEnd) {
2187
+ this.log("Processing session end message");
2188
+ this.handleSessionEnd(data);
2189
+ } else if (data.error) {
2190
+ this.log("Processing error message");
2191
+ this.handleError(data.error);
2192
+ } else {
2193
+ const messageData = data;
2194
+ if (messageData.type === "setup" || messageData.type === "session.ready" || messageData.type === "ready") {
2195
+ this.log("Processing alternative setup message with type:", messageData.type);
2196
+ this.handleSetupComplete(data);
2197
+ } else if (messageData.sessionHandle) {
2198
+ this.log("Processing session handle message");
2199
+ this.handleSetupComplete(data);
2200
+ } else if (messageData.session || messageData.ready || messageData.status === "ready" || messageData.status === "setup_complete") {
2201
+ this.log("Processing setup completion message with status:", messageData.status);
2202
+ this.handleSetupComplete(data);
2203
+ } else if (messageData.candidates || messageData.promptFeedback) {
2204
+ this.log("Processing BidiGenerateContent response");
2205
+ this.handleSetupComplete(data);
2206
+ } else if (messageData.contents && Array.isArray(messageData.contents)) {
2207
+ this.log("Processing content response");
2208
+ this.handleServerContent({ modelTurn: { parts: messageData.contents.flatMap((c) => c.parts || []) } });
2209
+ this.handleSetupComplete(data);
2210
+ } else if (messageData.candidates && Array.isArray(messageData.candidates)) {
2211
+ this.log("Processing candidates response");
2212
+ this.handleSetupComplete(data);
2213
+ } else {
2214
+ this.log("Unknown message format - no recognized fields found");
2215
+ }
2216
+ }
2217
+ }
2218
+ /**
2219
+ * Handle setup completion message
2220
+ * @private
2221
+ */
2222
+ handleSetupComplete(data) {
2223
+ this.log("Setup completed");
2224
+ const queue = this.queue.splice(0, this.queue.length);
2225
+ if (queue.length > 0) {
2226
+ this.log("Processing queued messages:", queue.length);
2227
+ for (const queuedMessage of queue) {
2228
+ try {
2229
+ this.connectionManager.send(JSON.stringify(queuedMessage));
2230
+ this.log("Sent queued message:", queuedMessage);
2231
+ } catch (err) {
2232
+ this.log("Failed to send queued message, re-queuing:", err);
2233
+ this.queue.unshift(queuedMessage);
2234
+ break;
2235
+ }
2236
+ }
2237
+ }
2238
+ this.eventManager.getEventEmitter().emit("setupComplete", data);
2239
+ }
2240
+ /**
2241
+ * Handle session update confirmation
2242
+ * @private
2243
+ */
2244
+ handleSessionUpdated(data) {
2245
+ this.log("Session updated", data);
2246
+ this.eventManager.getEventEmitter().emit("session.updated", data);
2247
+ this.emit("session", {
2248
+ state: "updated",
2249
+ config: data
2250
+ });
2251
+ }
2252
+ /**
2253
+ * Handle server content (text/audio responses)
2254
+ * @private
2255
+ */
2256
+ handleServerContent(data) {
2257
+ if (!data) {
2258
+ return;
2259
+ }
2260
+ let assistantResponse = "";
2261
+ if (data.modelTurn?.parts) {
2262
+ for (const part of data.modelTurn.parts) {
2263
+ if (part.text) {
2264
+ assistantResponse += part.text;
2265
+ this.emit("writing", {
2266
+ text: part.text,
2267
+ role: "assistant"
2268
+ });
2269
+ }
2270
+ if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
2271
+ try {
2272
+ const audioData = part.inlineData.data;
2273
+ const int16Array = this.audioStreamManager.base64ToInt16Array(audioData);
2274
+ const responseId = this.getCurrentResponseId() || randomUUID();
2275
+ let speakerStream = this.audioStreamManager.getSpeakerStream(responseId);
2276
+ if (!speakerStream) {
2277
+ this.audioStreamManager.cleanupStaleStreams();
2278
+ this.audioStreamManager.enforceStreamLimits();
2279
+ speakerStream = this.audioStreamManager.createSpeakerStream(responseId);
2280
+ speakerStream.on("error", (streamError) => {
2281
+ this.log(`Speaker stream error for ${responseId}:`, streamError);
2282
+ this.audioStreamManager.removeSpeakerStream(responseId);
2283
+ this.emit("error", {
2284
+ message: "Speaker stream error",
2285
+ code: "speaker_stream_error",
2286
+ details: { responseId, error: streamError }
2287
+ });
2288
+ });
2289
+ speakerStream.on("end", () => {
2290
+ this.log(`Speaker stream ended for response: ${responseId}`);
2291
+ this.audioStreamManager.removeSpeakerStream(responseId);
2292
+ });
2293
+ speakerStream.on("close", () => {
2294
+ this.log(`Speaker stream closed for response: ${responseId}`);
2295
+ this.audioStreamManager.removeSpeakerStream(responseId);
2296
+ });
2297
+ this.log("Created new speaker stream for response:", responseId);
2298
+ this.emit("speaker", speakerStream);
2299
+ }
2300
+ const audioBuffer = Buffer.from(int16Array.buffer, int16Array.byteOffset, int16Array.byteLength);
2301
+ speakerStream.write(audioBuffer);
2302
+ this.log("Wrote audio chunk to stream:", {
2303
+ responseId,
2304
+ chunkSize: audioBuffer.length,
2305
+ totalStreams: this.audioStreamManager.getActiveStreamCount()
2306
+ });
2307
+ this.emit("speaking", {
2308
+ audio: audioData,
2309
+ // Base64 string
2310
+ audioData: int16Array,
2311
+ sampleRate: this.audioConfig.outputSampleRate
2312
+ // Gemini Live outputs at 24kHz
2313
+ });
2314
+ } catch (error) {
2315
+ this.log("Error processing audio data:", error);
2316
+ this.emit("error", {
2317
+ message: "Failed to process audio data",
2318
+ code: "audio_processing_error",
2319
+ details: error
2320
+ });
2321
+ }
2322
+ }
2323
+ }
2324
+ }
2325
+ if (assistantResponse.trim()) {
2326
+ this.addToContext("assistant", assistantResponse);
2327
+ }
2328
+ if (data.turnComplete) {
2329
+ this.log("Turn completed");
2330
+ this.audioStreamManager.cleanupSpeakerStreams();
2331
+ this.emit("turnComplete", {
2332
+ timestamp: Date.now()
2333
+ });
2334
+ }
2335
+ }
2336
+ /**
2337
+ * Handle tool call requests from the model
2338
+ * @private
2339
+ */
2340
+ async handleToolCall(data) {
2341
+ if (!data.toolCall) {
2342
+ return;
2343
+ }
2344
+ const toolName = data.toolCall.name || "";
2345
+ const toolArgs = data.toolCall.args || {};
2346
+ const toolId = data.toolCall.id || randomUUID();
2347
+ this.log("Processing tool call", { toolName, toolArgs, toolId });
2348
+ this.emit("toolCall", {
2349
+ name: toolName,
2350
+ args: toolArgs,
2351
+ id: toolId
2352
+ });
2353
+ const tool = this.tools?.[toolName];
2354
+ if (!tool) {
2355
+ this.log("Tool not found", { toolName });
2356
+ this.createAndEmitError("tool_not_found" /* TOOL_NOT_FOUND */, `Tool "${toolName}" not found`, {
2357
+ toolName,
2358
+ availableTools: Object.keys(this.tools || {})
2359
+ });
2360
+ return;
2361
+ }
2362
+ try {
2363
+ let result;
2364
+ if (tool.execute) {
2365
+ this.log("Executing tool", { toolName, toolArgs });
2366
+ result = await tool.execute(
2367
+ { context: toolArgs, runtimeContext: this.runtimeContext },
2368
+ {
2369
+ toolCallId: toolId,
2370
+ messages: []
2371
+ }
2372
+ );
2373
+ this.log("Tool executed successfully", { toolName, result });
2374
+ } else {
2375
+ this.log("Tool has no execute function", { toolName });
2376
+ result = { error: "Tool has no execute function" };
2377
+ }
2378
+ const toolResultMessage = {
2379
+ tool_result: {
2380
+ tool_call_id: toolId,
2381
+ result
2382
+ }
2383
+ };
2384
+ this.sendEvent("tool_result", toolResultMessage);
2385
+ this.log("Tool result sent", { toolName, toolId, result });
2386
+ } catch (error) {
2387
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
2388
+ this.log("Tool execution failed", { toolName, error: errorMessage });
2389
+ const errorResultMessage = {
2390
+ tool_result: {
2391
+ tool_call_id: toolId,
2392
+ result: { error: errorMessage }
2393
+ }
2394
+ };
2395
+ this.sendEvent("tool_result", errorResultMessage);
2396
+ this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
2397
+ toolName,
2398
+ toolArgs,
2399
+ error
2400
+ });
2401
+ }
2402
+ }
2403
+ /**
2404
+ * Handle token usage information
2405
+ * @private
2406
+ */
2407
+ handleUsageUpdate(data) {
2408
+ if (data.usageMetadata) {
2409
+ this.emit("usage", {
2410
+ inputTokens: data.usageMetadata.promptTokenCount || 0,
2411
+ outputTokens: data.usageMetadata.responseTokenCount || 0,
2412
+ totalTokens: data.usageMetadata.totalTokenCount || 0,
2413
+ modality: this.determineModality(data)
2414
+ });
2415
+ }
2416
+ }
2417
+ /**
2418
+ * Handle session end
2419
+ * @private
2420
+ */
2421
+ handleSessionEnd(data) {
2422
+ this.log("Session ended", data.sessionEnd?.reason);
2423
+ this.state = "disconnected";
2424
+ this.emit("session", { state: "disconnected" });
2425
+ }
2426
+ /**
2427
+ * Handle errors
2428
+ * @private
2429
+ */
2430
+ handleError(error) {
2431
+ if (!error) {
2432
+ this.log("Received error from Gemini Live API (no error details)");
2433
+ return;
2434
+ }
2435
+ this.log("Received error from Gemini Live API", error);
2436
+ this.emit("error", {
2437
+ message: error.message || "Unknown error",
2438
+ code: error.code || "unknown_error",
2439
+ details: error.details
2440
+ });
2441
+ }
2442
+ /**
2443
+ * Determine the modality from message data
2444
+ * @private
2445
+ */
2446
+ determineModality(data) {
2447
+ if (data.serverContent?.modelTurn?.parts?.some((part) => part.inlineData?.mimeType?.includes("audio"))) {
2448
+ return "audio";
2449
+ }
2450
+ if (data.serverContent?.modelTurn?.parts?.some((part) => part.inlineData?.mimeType?.includes("video"))) {
2451
+ return "video";
2452
+ }
2453
+ return "text";
2454
+ }
2455
+ /**
2456
+ * Send initial configuration to Gemini Live API
2457
+ * @private
2458
+ */
2459
+ sendInitialConfig() {
2460
+ if (!this.ws || !this.connectionManager.isConnected()) {
2461
+ throw new Error("WebSocket not connected");
2462
+ }
2463
+ const setupMessage = {
2464
+ setup: {
2465
+ model: `models/${this.options.model}`
2466
+ }
2467
+ };
2468
+ if (this.options.instructions) {
2469
+ setupMessage.setup.systemInstruction = {
2470
+ parts: [{ text: this.options.instructions }]
2471
+ };
2472
+ }
2473
+ const allTools = [];
2474
+ if (this.options.tools && this.options.tools.length > 0) {
2475
+ for (const tool of this.options.tools) {
2476
+ allTools.push({
2477
+ functionDeclarations: [
2478
+ {
2479
+ name: tool.name,
2480
+ description: tool.description,
2481
+ parameters: tool.parameters
2482
+ }
2483
+ ]
2484
+ });
2485
+ }
2486
+ }
2487
+ if (this.tools && Object.keys(this.tools).length > 0) {
2488
+ for (const [toolName, tool] of Object.entries(this.tools)) {
2489
+ try {
2490
+ let parameters;
2491
+ if ("inputSchema" in tool && tool.inputSchema) {
2492
+ if (typeof tool.inputSchema === "object" && "safeParse" in tool.inputSchema) {
2493
+ parameters = this.convertZodSchemaToJsonSchema(tool.inputSchema);
2494
+ } else {
2495
+ parameters = tool.inputSchema;
2496
+ }
2497
+ } else if ("parameters" in tool && tool.parameters) {
2498
+ parameters = tool.parameters;
2499
+ } else {
2500
+ parameters = { type: "object", properties: {} };
2501
+ }
2502
+ allTools.push({
2503
+ functionDeclarations: [
2504
+ {
2505
+ name: toolName,
2506
+ description: tool.description || `Tool: ${toolName}`,
2507
+ parameters
2508
+ }
2509
+ ]
2510
+ });
2511
+ } catch (error) {
2512
+ this.log("Failed to process tool", { toolName, error });
2513
+ }
2514
+ }
2515
+ }
2516
+ if (allTools.length > 0) {
2517
+ setupMessage.setup.tools = allTools;
2518
+ this.log("Including tools in setup message", { toolCount: allTools.length });
2519
+ }
2520
+ this.log("Sending Live API setup message:", setupMessage);
2521
+ try {
2522
+ this.sendEvent("setup", setupMessage);
2523
+ } catch (error) {
2524
+ this.log("Failed to send Live API setup message:", error);
2525
+ throw new Error(
2526
+ `Failed to send Live API setup message: ${error instanceof Error ? error.message : "Unknown error"}`
2527
+ );
2528
+ }
2529
+ }
2530
+ /**
2531
+ * Wait for Gemini Live session to be created and ready
2532
+ * @private
2533
+ */
2534
+ waitForSessionCreated() {
2535
+ return new Promise((resolve, reject) => {
2536
+ let isResolved = false;
2537
+ const onSetupComplete = () => {
2538
+ if (!isResolved) {
2539
+ isResolved = true;
2540
+ cleanup();
2541
+ resolve();
2542
+ }
2543
+ };
2544
+ const onError = (errorData) => {
2545
+ if (!isResolved) {
2546
+ isResolved = true;
2547
+ cleanup();
2548
+ reject(new Error(`Session creation failed: ${errorData.message || "Unknown error"}`));
2549
+ }
2550
+ };
2551
+ const onSessionEnd = () => {
2552
+ if (!isResolved) {
2553
+ isResolved = true;
2554
+ cleanup();
2555
+ reject(new Error("Session ended before setup completed"));
2556
+ }
2557
+ };
2558
+ const cleanup = () => {
2559
+ this.eventManager.getEventEmitter().removeListener("setupComplete", onSetupComplete);
2560
+ this.eventManager.getEventEmitter().removeListener("error", onError);
2561
+ this.eventManager.getEventEmitter().removeListener("sessionEnd", onSessionEnd);
2562
+ };
2563
+ this.eventManager.getEventEmitter().once("setupComplete", onSetupComplete);
2564
+ this.eventManager.getEventEmitter().once("error", onError);
2565
+ this.eventManager.getEventEmitter().once("sessionEnd", onSessionEnd);
2566
+ setTimeout(() => {
2567
+ if (!isResolved) {
2568
+ isResolved = true;
2569
+ cleanup();
2570
+ reject(new Error("Session creation timeout"));
2571
+ }
2572
+ }, 3e4);
2573
+ });
2574
+ }
2575
+ /**
2576
+ * Get OAuth access token for Vertex AI authentication
2577
+ * Implements token caching and automatic refresh
2578
+ * @private
2579
+ */
2580
+ async getAccessToken() {
2581
+ if (!this.options.vertexAI) {
2582
+ throw new Error("getAccessToken should only be called for Vertex AI mode");
2583
+ }
2584
+ return this.authManager.getAccessToken();
2585
+ }
2586
+ /**
2587
+ * Get the current response ID from the server message
2588
+ * This is needed to associate audio chunks with their respective responses.
2589
+ * @private
2590
+ */
2591
+ getCurrentResponseId() {
2592
+ return this.audioStreamManager.getCurrentResponseId();
2593
+ }
2594
+ /**
2595
+ * Set the current response ID for the next audio chunk.
2596
+ * This is used to track the response ID for the current turn.
2597
+ * @private
2598
+ */
2599
+ setCurrentResponseId(responseId) {
2600
+ this.audioStreamManager.setCurrentResponseId(responseId);
2601
+ }
2602
+ /**
2603
+ * Send an event to the Gemini Live API with queueing support
2604
+ * @private
2605
+ */
2606
+ sendEvent(type, data) {
2607
+ let message;
2608
+ if (type === "setup" && data.setup) {
2609
+ message = data;
2610
+ } else if (type === "client_content" && data.client_content) {
2611
+ message = data;
2612
+ } else if (type === "realtime_input" && data.realtime_input) {
2613
+ message = data;
2614
+ } else if (type === "session.update" && data.session) {
2615
+ message = data;
2616
+ } else {
2617
+ message = { type, ...data };
2618
+ }
2619
+ if (!this.ws || !this.connectionManager.isConnected()) {
2620
+ this.queue.push(message);
2621
+ this.log("Queued message:", { type, data });
2622
+ } else {
2623
+ this.connectionManager.send(JSON.stringify(message));
2624
+ this.log("Sent message:", { type, data });
2625
+ }
2626
+ }
2627
+ /**
2628
+ * Equip the voice provider with tools
2629
+ * @param tools Object containing tool definitions that can be called by the voice model
2630
+ *
2631
+ * @example
2632
+ * ```typescript
2633
+ * const weatherTool = createTool({
2634
+ * id: "getWeather",
2635
+ * description: "Get the current weather for a location",
2636
+ * inputSchema: z.object({
2637
+ * location: z.string().describe("The city and state, e.g. San Francisco, CA"),
2638
+ * }),
2639
+ * execute: async ({ context }) => {
2640
+ * // Fetch weather data from an API
2641
+ * const response = await fetch(
2642
+ * `https://api.weather.com?location=${encodeURIComponent(context.location)}`,
2643
+ * );
2644
+ * const data = await response.json();
2645
+ * return {
2646
+ * message: `The current temperature in ${context.location} is ${data.temperature}°F with ${data.conditions}.`,
2647
+ * };
2648
+ * },
2649
+ * });
2650
+ *
2651
+ * voice.addTools({
2652
+ * getWeather: weatherTool,
2653
+ * });
2654
+ * ```
2655
+ */
2656
+ addTools(tools) {
2657
+ this.tools = tools;
2658
+ this.log("Tools added to Gemini Live Voice", { toolCount: Object.keys(tools || {}).length });
2659
+ }
2660
+ /**
2661
+ * Get the current tools configured for this voice instance
2662
+ * @returns Object containing the current tools
2663
+ */
2664
+ getTools() {
2665
+ return this.tools;
2666
+ }
2667
+ log(message, ...args) {
2668
+ if (this.debug) {
2669
+ console.log(`[GeminiLiveVoice] ${message}`, ...args);
2670
+ }
2671
+ }
2672
+ /**
2673
+ * Convert Zod schema to JSON Schema for tool parameters
2674
+ * @private
2675
+ */
2676
+ convertZodSchemaToJsonSchema(schema) {
2677
+ try {
2678
+ if (typeof schema.toJSON === "function") {
2679
+ return schema.toJSON();
2680
+ }
2681
+ if (schema._def) {
2682
+ return this.convertZodDefToJsonSchema(schema._def);
2683
+ }
2684
+ if (typeof schema === "object" && !schema.safeParse) {
2685
+ return schema;
2686
+ }
2687
+ return {
2688
+ type: "object",
2689
+ properties: {},
2690
+ description: schema.description || ""
2691
+ };
2692
+ } catch (error) {
2693
+ this.log("Failed to convert Zod schema to JSON schema", { error, schema });
2694
+ return {
2695
+ type: "object",
2696
+ properties: {},
2697
+ description: "Schema conversion failed"
2698
+ };
2699
+ }
2700
+ }
2701
+ /**
2702
+ * Convert Zod definition to JSON Schema
2703
+ * @private
2704
+ */
2705
+ convertZodDefToJsonSchema(def) {
2706
+ switch (def.typeName) {
2707
+ case "ZodString":
2708
+ return {
2709
+ type: "string",
2710
+ description: def.description || ""
2711
+ };
2712
+ case "ZodNumber":
2713
+ return {
2714
+ type: "number",
2715
+ description: def.description || ""
2716
+ };
2717
+ case "ZodBoolean":
2718
+ return {
2719
+ type: "boolean",
2720
+ description: def.description || ""
2721
+ };
2722
+ case "ZodArray":
2723
+ return {
2724
+ type: "array",
2725
+ items: this.convertZodDefToJsonSchema(def.type._def),
2726
+ description: def.description || ""
2727
+ };
2728
+ case "ZodObject":
2729
+ const properties = {};
2730
+ const required = [];
2731
+ for (const [key, value] of Object.entries(def.shape())) {
2732
+ properties[key] = this.convertZodDefToJsonSchema(value._def);
2733
+ if (value._def.typeName === "ZodOptional") ; else {
2734
+ required.push(key);
2735
+ }
2736
+ }
2737
+ return {
2738
+ type: "object",
2739
+ properties,
2740
+ required: required.length > 0 ? required : void 0,
2741
+ description: def.description || ""
2742
+ };
2743
+ case "ZodOptional":
2744
+ return this.convertZodDefToJsonSchema(def.innerType._def);
2745
+ case "ZodEnum":
2746
+ return {
2747
+ type: "string",
2748
+ enum: def.values,
2749
+ description: def.description || ""
2750
+ };
2751
+ default:
2752
+ return {
2753
+ type: "object",
2754
+ properties: {},
2755
+ description: def.description || ""
2756
+ };
2757
+ }
2758
+ }
2759
+ /**
2760
+ * Close the connection (alias for disconnect)
2761
+ */
2762
+ close() {
2763
+ void this.disconnect();
2764
+ }
2765
+ /**
2766
+ * Trigger voice provider to respond
2767
+ */
2768
+ async answer(_options) {
2769
+ this.validateConnectionState();
2770
+ this.sendEvent("response.create", {});
2771
+ }
2772
+ /**
2773
+ * Equip the voice provider with instructions
2774
+ * @param instructions Instructions to add
2775
+ */
2776
+ addInstructions(instructions) {
2777
+ if (instructions) {
2778
+ this.options.instructions = instructions;
2779
+ this.log("Instructions added:", instructions);
2780
+ }
2781
+ }
2782
+ };
2783
+
2784
+ export { GeminiLiveVoice };
2785
+ //# sourceMappingURL=index.js.map
2786
+ //# sourceMappingURL=index.js.map