@mastra/voice-google-gemini-live 0.0.0-add-libsql-changeset-20250910154739

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,2788 @@
1
+ 'use strict';
2
+
3
+ var crypto = require('crypto');
4
+ var voice = require('@mastra/core/voice');
5
+ var ws = require('ws');
6
+ var stream = require('stream');
7
+ var events = require('events');
8
+ var googleAuthLibrary = require('google-auth-library');
9
+
10
+ // src/index.ts
11
+ var DEFAULT_AUDIO_CONFIG = {
12
+ inputSampleRate: 16e3,
13
+ outputSampleRate: 24e3,
14
+ encoding: "pcm16",
15
+ channels: 1
16
+ };
17
+ var AudioStreamManager = class {
18
+ speakerStreams = /* @__PURE__ */ new Map();
19
+ currentResponseId;
20
+ MAX_CONCURRENT_STREAMS = 10;
21
+ STREAM_TIMEOUT_MS = 3e4;
22
+ // 30 seconds
23
+ debug;
24
+ audioConfig;
25
+ maxChunkSize = 32768;
26
+ // 32KB max chunk size per Gemini limits
27
+ minSendInterval = 0;
28
+ // No throttling - let the stream control the pace
29
+ lastSendTime = 0;
30
+ pendingChunks = [];
31
+ pendingTimer;
32
+ sendToGemini;
33
+ // Audio buffer management constants
34
+ MAX_BUFFER_SIZE = 50 * 1024 * 1024;
35
+ // 50MB maximum buffer size
36
+ MAX_AUDIO_DURATION = 300;
37
+ // 5 minutes maximum audio duration
38
+ constructor(audioConfig, debug = false) {
39
+ this.audioConfig = audioConfig;
40
+ this.debug = debug;
41
+ }
42
+ /**
43
+ * Provide a sender callback that will be used to deliver messages to Gemini
44
+ */
45
+ setSender(sender) {
46
+ this.sendToGemini = sender;
47
+ }
48
+ /**
49
+ * Get the default audio configuration
50
+ */
51
+ static getDefaultAudioConfig() {
52
+ return { ...DEFAULT_AUDIO_CONFIG };
53
+ }
54
+ /**
55
+ * Create a merged audio configuration with defaults
56
+ */
57
+ static createAudioConfig(customConfig) {
58
+ return {
59
+ ...DEFAULT_AUDIO_CONFIG,
60
+ ...customConfig
61
+ };
62
+ }
63
+ /**
64
+ * Get the current response ID for the next audio chunk
65
+ */
66
+ getCurrentResponseId() {
67
+ return this.currentResponseId;
68
+ }
69
+ /**
70
+ * Set the current response ID for the next audio chunk
71
+ */
72
+ setCurrentResponseId(responseId) {
73
+ this.currentResponseId = responseId;
74
+ }
75
+ /**
76
+ * Get the current speaker stream
77
+ */
78
+ getCurrentSpeakerStream() {
79
+ const currentResponseId = this.getCurrentResponseId();
80
+ if (!currentResponseId) {
81
+ return null;
82
+ }
83
+ const currentStream = this.speakerStreams.get(currentResponseId);
84
+ return currentStream ? currentStream : null;
85
+ }
86
+ /**
87
+ * Add a new speaker stream for a response
88
+ */
89
+ addSpeakerStream(responseId, stream) {
90
+ const streamWithMetadata = Object.assign(stream, {
91
+ id: responseId,
92
+ created: Date.now()
93
+ });
94
+ this.speakerStreams.set(responseId, streamWithMetadata);
95
+ this.log(`Added speaker stream for response: ${responseId}`);
96
+ this.enforceStreamLimits();
97
+ }
98
+ /**
99
+ * Remove a specific speaker stream
100
+ */
101
+ removeSpeakerStream(responseId) {
102
+ const stream = this.speakerStreams.get(responseId);
103
+ if (stream && !stream.destroyed) {
104
+ stream.end();
105
+ setTimeout(() => {
106
+ if (!stream.destroyed) {
107
+ stream.destroy();
108
+ this.log(`Force destroyed stream for response: ${responseId}`);
109
+ }
110
+ }, 1e3);
111
+ }
112
+ this.speakerStreams.delete(responseId);
113
+ this.log(`Removed speaker stream for response: ${responseId}`);
114
+ }
115
+ /**
116
+ * Clean up all speaker streams
117
+ */
118
+ cleanupSpeakerStreams() {
119
+ try {
120
+ if (this.speakerStreams.size === 0) {
121
+ return;
122
+ }
123
+ this.log(`Cleaning up ${this.speakerStreams.size} speaker streams`);
124
+ for (const [responseId, stream] of this.speakerStreams.entries()) {
125
+ try {
126
+ if (!stream.destroyed) {
127
+ stream.end();
128
+ setTimeout(() => {
129
+ if (!stream.destroyed) {
130
+ stream.destroy();
131
+ this.log(`Force destroyed stream for response: ${responseId}`);
132
+ }
133
+ }, 1e3);
134
+ }
135
+ this.speakerStreams.delete(responseId);
136
+ this.log(`Cleaned up speaker stream for response: ${responseId}`);
137
+ } catch (streamError) {
138
+ this.log(`Error cleaning up stream ${responseId}:`, streamError);
139
+ this.speakerStreams.delete(responseId);
140
+ }
141
+ }
142
+ this.currentResponseId = void 0;
143
+ this.log("All speaker streams cleaned up");
144
+ } catch (error) {
145
+ this.log("Error during speaker stream cleanup:", error);
146
+ this.speakerStreams.clear();
147
+ this.currentResponseId = void 0;
148
+ }
149
+ }
150
+ /**
151
+ * Clean up old/stale streams to prevent memory leaks
152
+ */
153
+ cleanupStaleStreams() {
154
+ try {
155
+ const now = Date.now();
156
+ const staleCutoff = now - this.STREAM_TIMEOUT_MS;
157
+ const staleStreams = [];
158
+ for (const [responseId, stream] of this.speakerStreams.entries()) {
159
+ const created = stream.created || 0;
160
+ if (created < staleCutoff) {
161
+ staleStreams.push(responseId);
162
+ }
163
+ }
164
+ if (staleStreams.length > 0) {
165
+ this.log(`Cleaning up ${staleStreams.length} stale streams`);
166
+ for (const responseId of staleStreams) {
167
+ const stream = this.speakerStreams.get(responseId);
168
+ if (stream && !stream.destroyed) {
169
+ stream.end();
170
+ }
171
+ this.speakerStreams.delete(responseId);
172
+ }
173
+ }
174
+ } catch (error) {
175
+ this.log("Error cleaning up stale streams:", error);
176
+ }
177
+ }
178
+ /**
179
+ * Enforce stream limits to prevent memory exhaustion
180
+ */
181
+ enforceStreamLimits() {
182
+ try {
183
+ if (this.speakerStreams.size <= this.MAX_CONCURRENT_STREAMS) {
184
+ return;
185
+ }
186
+ this.log(
187
+ `Stream limit exceeded (${this.speakerStreams.size}/${this.MAX_CONCURRENT_STREAMS}), cleaning up oldest streams`
188
+ );
189
+ const sortedStreams = Array.from(this.speakerStreams.entries()).sort(
190
+ ([, a], [, b]) => (a.created || 0) - (b.created || 0)
191
+ );
192
+ const streamsToRemove = sortedStreams.slice(0, this.speakerStreams.size - this.MAX_CONCURRENT_STREAMS);
193
+ for (const [responseId, stream] of streamsToRemove) {
194
+ if (!stream.destroyed) {
195
+ stream.end();
196
+ }
197
+ this.speakerStreams.delete(responseId);
198
+ this.log(`Removed old stream for response: ${responseId}`);
199
+ }
200
+ } catch (error) {
201
+ this.log("Error enforcing stream limits:", error);
202
+ }
203
+ }
204
+ /**
205
+ * Get information about current streams for debugging
206
+ */
207
+ getStreamInfo() {
208
+ const streamDetails = Array.from(this.speakerStreams.entries()).map(([responseId, stream]) => ({
209
+ responseId,
210
+ created: stream.created || 0,
211
+ destroyed: stream.destroyed
212
+ }));
213
+ return {
214
+ totalStreams: this.speakerStreams.size,
215
+ currentResponseId: this.currentResponseId,
216
+ streamDetails
217
+ };
218
+ }
219
+ /**
220
+ * Convert Int16Array audio data to base64 string for WebSocket transmission
221
+ */
222
+ int16ArrayToBase64(int16Array) {
223
+ const buffer = new ArrayBuffer(int16Array.length * 2);
224
+ const view = new DataView(buffer);
225
+ for (let i = 0; i < int16Array.length; i++) {
226
+ view.setInt16(i * 2, int16Array[i], true);
227
+ }
228
+ const nodeBuffer = Buffer.from(buffer);
229
+ return nodeBuffer.toString("base64");
230
+ }
231
+ /**
232
+ * Convert base64 string to Int16Array audio data
233
+ */
234
+ base64ToInt16Array(base64Audio) {
235
+ try {
236
+ const buffer = Buffer.from(base64Audio, "base64");
237
+ if (buffer.length % 2 !== 0) {
238
+ throw new Error("Invalid audio data: buffer length must be even for 16-bit audio");
239
+ }
240
+ return new Int16Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 2);
241
+ } catch (error) {
242
+ throw new Error(
243
+ `Failed to decode base64 audio data: ${error instanceof Error ? error.message : "Unknown error"}`
244
+ );
245
+ }
246
+ }
247
+ /**
248
+ * Validate and convert audio data to the required format for Gemini Live API
249
+ * Gemini Live expects 16kHz PCM16 for input
250
+ */
251
+ validateAndConvertAudioInput(audioData) {
252
+ if (Buffer.isBuffer(audioData)) {
253
+ if (audioData.length % 2 !== 0) {
254
+ throw new Error("Audio buffer length must be even for 16-bit audio");
255
+ }
256
+ return new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
257
+ }
258
+ if (audioData instanceof Int16Array) {
259
+ return audioData;
260
+ }
261
+ throw new Error("Unsupported audio data format. Expected Buffer or Int16Array");
262
+ }
263
+ /**
264
+ * Process audio chunk for streaming - handles format validation and conversion
265
+ */
266
+ processAudioChunk(chunk) {
267
+ let int16Array;
268
+ if (chunk instanceof Int16Array) {
269
+ int16Array = chunk;
270
+ } else if (Buffer.isBuffer(chunk)) {
271
+ if (chunk.length % 2 !== 0) {
272
+ throw new Error("Audio chunk length must be even for 16-bit audio");
273
+ }
274
+ int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / 2);
275
+ } else if (chunk instanceof Uint8Array) {
276
+ if (chunk.length % 2 !== 0) {
277
+ throw new Error("Audio chunk length must be even for 16-bit audio");
278
+ }
279
+ int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / 2);
280
+ } else {
281
+ throw new Error("Unsupported audio chunk format");
282
+ }
283
+ return this.int16ArrayToBase64(int16Array);
284
+ }
285
+ /**
286
+ * Validate audio format and sample rate for Gemini Live API requirements
287
+ */
288
+ validateAudioFormat(sampleRate, channels) {
289
+ if (sampleRate && sampleRate !== this.audioConfig.inputSampleRate) {
290
+ this.log(
291
+ `Warning: Audio sample rate ${sampleRate}Hz does not match expected ${this.audioConfig.inputSampleRate}Hz`
292
+ );
293
+ }
294
+ if (channels && channels !== this.audioConfig.channels) {
295
+ throw new Error(`Unsupported channel count: ${channels}. Gemini Live API requires mono audio (1 channel)`);
296
+ }
297
+ }
298
+ /**
299
+ * Create an audio message for the Gemini Live API
300
+ */
301
+ createAudioMessage(audioData, messageType = "realtime") {
302
+ if (messageType === "input") {
303
+ return {
304
+ client_content: {
305
+ turns: [
306
+ {
307
+ role: "user",
308
+ parts: [
309
+ {
310
+ inlineData: {
311
+ mimeType: "audio/pcm",
312
+ data: audioData
313
+ }
314
+ }
315
+ ]
316
+ }
317
+ ],
318
+ turnComplete: true
319
+ }
320
+ };
321
+ } else {
322
+ return {
323
+ realtime_input: {
324
+ media_chunks: [
325
+ {
326
+ mime_type: "audio/pcm",
327
+ data: audioData
328
+ }
329
+ ]
330
+ }
331
+ };
332
+ }
333
+ }
334
+ /**
335
+ * Get a speaker stream by response ID
336
+ */
337
+ getSpeakerStream(responseId) {
338
+ return this.speakerStreams.get(responseId);
339
+ }
340
+ /**
341
+ * Create a new speaker stream for a response ID
342
+ */
343
+ createSpeakerStream(responseId) {
344
+ const stream$1 = new stream.PassThrough();
345
+ stream$1.id = responseId;
346
+ stream$1.created = Date.now();
347
+ this.addSpeakerStream(responseId, stream$1);
348
+ return stream$1;
349
+ }
350
+ /**
351
+ * Get the number of active streams
352
+ */
353
+ getActiveStreamCount() {
354
+ return this.speakerStreams.size;
355
+ }
356
+ /**
357
+ * Check if a specific response ID has an active stream
358
+ */
359
+ hasStream(responseId) {
360
+ return this.speakerStreams.has(responseId);
361
+ }
362
+ /**
363
+ * Get all active response IDs
364
+ */
365
+ getActiveResponseIds() {
366
+ return Array.from(this.speakerStreams.keys());
367
+ }
368
+ /**
369
+ * Reset the manager state (useful for testing or reconnection)
370
+ */
371
+ reset() {
372
+ this.cleanupSpeakerStreams();
373
+ this.currentResponseId = void 0;
374
+ this.log("AudioStreamManager reset");
375
+ }
376
+ /**
377
+ * Validate audio chunk size and format
378
+ */
379
+ validateAudioChunk(chunk) {
380
+ if (chunk.length === 0) {
381
+ throw new Error("Audio chunk cannot be empty");
382
+ }
383
+ if (chunk.length > this.maxChunkSize) {
384
+ throw new Error(`Audio chunk size ${chunk.length} exceeds maximum allowed size ${this.maxChunkSize}`);
385
+ }
386
+ if (chunk.length % 2 !== 0) {
387
+ throw new Error("Audio chunk length must be even for 16-bit audio");
388
+ }
389
+ }
390
+ /**
391
+ * Send audio chunk with throttling and validation
392
+ */
393
+ sendAudioChunk(chunk) {
394
+ try {
395
+ this.validateAudioChunk(chunk);
396
+ const now = Date.now();
397
+ if (now - this.lastSendTime < this.minSendInterval) {
398
+ this.pendingChunks.push({ chunk, timestamp: now });
399
+ const delay = this.minSendInterval - (now - this.lastSendTime);
400
+ if (!this.pendingTimer) {
401
+ this.pendingTimer = setTimeout(
402
+ () => {
403
+ this.pendingTimer = void 0;
404
+ this.processPendingChunks();
405
+ },
406
+ Math.max(0, delay)
407
+ );
408
+ }
409
+ return;
410
+ }
411
+ this.processChunk(chunk);
412
+ this.processPendingChunks();
413
+ } catch (error) {
414
+ this.log("Error sending audio chunk:", error);
415
+ throw error;
416
+ }
417
+ }
418
+ /**
419
+ * Handle audio stream processing
420
+ */
421
+ async handleAudioStream(stream) {
422
+ return new Promise((resolve, reject) => {
423
+ const cleanup = () => {
424
+ stream.removeAllListeners();
425
+ };
426
+ stream.on("data", (chunk) => {
427
+ try {
428
+ if (chunk.length > this.maxChunkSize) {
429
+ const chunks = this.splitAudioChunk(chunk);
430
+ for (const subChunk of chunks) {
431
+ this.validateAudioChunk(subChunk);
432
+ this.sendAudioChunk(subChunk);
433
+ }
434
+ } else {
435
+ this.validateAudioChunk(chunk);
436
+ this.sendAudioChunk(chunk);
437
+ }
438
+ } catch (error) {
439
+ cleanup();
440
+ reject(error);
441
+ }
442
+ });
443
+ stream.on("end", () => {
444
+ cleanup();
445
+ resolve();
446
+ });
447
+ stream.on("error", (error) => {
448
+ cleanup();
449
+ reject(error);
450
+ });
451
+ });
452
+ }
453
+ /**
454
+ * Split large audio chunks into smaller ones
455
+ */
456
+ splitAudioChunk(chunk) {
457
+ const chunks = [];
458
+ let offset = 0;
459
+ while (offset < chunk.length) {
460
+ const size = Math.min(this.maxChunkSize, chunk.length - offset);
461
+ chunks.push(chunk.subarray(offset, offset + size));
462
+ offset += size;
463
+ }
464
+ return chunks;
465
+ }
466
+ /**
467
+ * Calculate audio duration from buffer length
468
+ */
469
+ calculateAudioDuration(bufferLength, sampleRate) {
470
+ const effectiveSampleRate = sampleRate || this.audioConfig.inputSampleRate;
471
+ return bufferLength / (effectiveSampleRate * 2);
472
+ }
473
+ /**
474
+ * Validate audio buffer size and duration
475
+ */
476
+ validateAudioBuffer(buffer) {
477
+ if (buffer.length === 0) {
478
+ throw new Error("Audio buffer cannot be empty");
479
+ }
480
+ if (buffer.length > this.MAX_BUFFER_SIZE) {
481
+ throw new Error(
482
+ `Audio buffer size ${buffer.length} exceeds maximum allowed size ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`
483
+ );
484
+ }
485
+ if (buffer.length % 2 !== 0) {
486
+ throw new Error("Audio buffer length must be even for 16-bit audio");
487
+ }
488
+ const duration = this.calculateAudioDuration(buffer.length);
489
+ if (duration > this.MAX_AUDIO_DURATION) {
490
+ throw new Error(
491
+ `Audio duration ${duration.toFixed(2)}s exceeds maximum allowed duration ${this.MAX_AUDIO_DURATION}s`
492
+ );
493
+ }
494
+ }
495
+ /**
496
+ * Process audio buffer for transcription
497
+ * Combines chunks, validates format, and converts to base64
498
+ */
499
+ processAudioBufferForTranscription(audioBuffer) {
500
+ if (audioBuffer.length % 2 !== 0) {
501
+ throw new Error("Invalid audio data: buffer length must be even for 16-bit audio");
502
+ }
503
+ const duration = this.calculateAudioDuration(audioBuffer.length);
504
+ const base64Audio = audioBuffer.toString("base64");
505
+ return {
506
+ base64Audio,
507
+ duration,
508
+ size: audioBuffer.length
509
+ };
510
+ }
511
+ /**
512
+ * Process audio chunks for transcription with buffer management
513
+ * Handles chunk collection, size validation, and buffer management
514
+ */
515
+ processAudioChunksForTranscription(chunks, totalBufferSize) {
516
+ if (totalBufferSize > this.MAX_BUFFER_SIZE) {
517
+ throw new Error(`Audio data exceeds maximum size of ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`);
518
+ }
519
+ const audioBuffer = Buffer.concat(chunks);
520
+ const result = this.processAudioBufferForTranscription(audioBuffer);
521
+ return {
522
+ audioBuffer,
523
+ ...result
524
+ };
525
+ }
526
+ /**
527
+ * Validate audio chunks and calculate total size
528
+ */
529
+ validateAudioChunks(chunks) {
530
+ let totalSize = 0;
531
+ for (const chunk of chunks) {
532
+ if (!Buffer.isBuffer(chunk)) {
533
+ return { totalSize: 0, isValid: false, error: "Invalid chunk format" };
534
+ }
535
+ totalSize += chunk.length;
536
+ if (totalSize > this.MAX_BUFFER_SIZE) {
537
+ return {
538
+ totalSize,
539
+ isValid: false,
540
+ error: `Total size ${totalSize} exceeds maximum allowed size ${this.MAX_BUFFER_SIZE}`
541
+ };
542
+ }
543
+ }
544
+ return { totalSize, isValid: true };
545
+ }
546
+ /**
547
+ * Get audio buffer limits and configuration
548
+ */
549
+ getAudioBufferLimits() {
550
+ return {
551
+ maxBufferSize: this.MAX_BUFFER_SIZE,
552
+ maxAudioDuration: this.MAX_AUDIO_DURATION,
553
+ maxChunkSize: this.maxChunkSize
554
+ };
555
+ }
556
+ /**
557
+ * Get audio configuration
558
+ */
559
+ getAudioConfig() {
560
+ return this.audioConfig;
561
+ }
562
+ /**
563
+ * Log message if debug is enabled
564
+ */
565
+ log(message, ...args) {
566
+ if (this.debug) {
567
+ console.log(`[AudioStreamManager] ${message}`, ...args);
568
+ }
569
+ }
570
+ /**
571
+ * Handle complete audio transcription workflow
572
+ * Manages stream processing, chunk collection, and transcription
573
+ */
574
+ async handleAudioTranscription(audioStream, sendAndAwaitTranscript, onError, timeoutMs = 3e4) {
575
+ return new Promise((resolve, reject) => {
576
+ const chunks = [];
577
+ let isCleanedUp = false;
578
+ let totalBufferSize = 0;
579
+ let isResolved = false;
580
+ const timeout = setTimeout(() => {
581
+ if (!isResolved) {
582
+ cleanup();
583
+ reject(new Error(`Transcription timeout - no response received within ${timeoutMs / 1e3} seconds`));
584
+ }
585
+ }, timeoutMs);
586
+ const onStreamData = (chunk) => {
587
+ try {
588
+ const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
589
+ totalBufferSize += buffer.length;
590
+ if (totalBufferSize > this.MAX_BUFFER_SIZE) {
591
+ cleanup();
592
+ reject(new Error(`Audio data exceeds maximum size of ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`));
593
+ return;
594
+ }
595
+ chunks.push(buffer);
596
+ } catch (error) {
597
+ cleanup();
598
+ reject(
599
+ new Error(`Failed to process audio chunk: ${error instanceof Error ? error.message : "Unknown error"}`)
600
+ );
601
+ }
602
+ };
603
+ const onStreamError = (error) => {
604
+ cleanup();
605
+ reject(new Error(`Audio stream error: ${error.message}`));
606
+ };
607
+ const onStreamEnd = async () => {
608
+ try {
609
+ audioStream.removeListener("data", onStreamData);
610
+ audioStream.removeListener("error", onStreamError);
611
+ const result = this.processAudioChunksForTranscription(chunks, totalBufferSize);
612
+ this.log("Processing audio for transcription:", {
613
+ chunks: chunks.length,
614
+ totalSize: result.size,
615
+ duration: result.duration
616
+ });
617
+ try {
618
+ const transcript = await sendAndAwaitTranscript(result.base64Audio);
619
+ if (!isResolved) {
620
+ isResolved = true;
621
+ cleanup();
622
+ resolve(transcript.trim());
623
+ }
624
+ } catch (error) {
625
+ if (!isResolved) {
626
+ isResolved = true;
627
+ cleanup();
628
+ reject(
629
+ new Error(
630
+ `Failed to obtain transcription: ${error instanceof Error ? error.message : "Unknown error"}`
631
+ )
632
+ );
633
+ }
634
+ }
635
+ } catch (error) {
636
+ cleanup();
637
+ reject(
638
+ new Error(`Failed to process audio stream: ${error instanceof Error ? error.message : "Unknown error"}`)
639
+ );
640
+ }
641
+ };
642
+ const cleanup = () => {
643
+ if (isCleanedUp) return;
644
+ isCleanedUp = true;
645
+ clearTimeout(timeout);
646
+ audioStream.removeListener("data", onStreamData);
647
+ audioStream.removeListener("error", onStreamError);
648
+ audioStream.removeListener("end", onStreamEnd);
649
+ chunks.length = 0;
650
+ };
651
+ audioStream.on("data", onStreamData);
652
+ audioStream.on("error", onStreamError);
653
+ audioStream.on("end", onStreamEnd);
654
+ });
655
+ }
656
+ processChunk(chunk) {
657
+ const base64Audio = this.processAudioChunk(chunk);
658
+ const message = this.createAudioMessage(base64Audio, "realtime");
659
+ if (this.sendToGemini) {
660
+ this.sendToGemini("realtime_input", message);
661
+ } else {
662
+ this.log("No sender configured for AudioStreamManager; dropping audio chunk");
663
+ }
664
+ this.lastSendTime = Date.now();
665
+ this.log(`Sent audio chunk of size: ${chunk.length} bytes`);
666
+ }
667
+ processPendingChunks() {
668
+ while (this.pendingChunks.length > 0) {
669
+ const nextChunk = this.pendingChunks[0];
670
+ const now = Date.now();
671
+ if (nextChunk && now - this.lastSendTime >= this.minSendInterval) {
672
+ this.pendingChunks.shift();
673
+ this.processChunk(nextChunk.chunk);
674
+ } else {
675
+ const delay = this.minSendInterval - (now - this.lastSendTime);
676
+ if (!this.pendingTimer) {
677
+ this.pendingTimer = setTimeout(
678
+ () => {
679
+ this.pendingTimer = void 0;
680
+ this.processPendingChunks();
681
+ },
682
+ Math.max(0, delay)
683
+ );
684
+ }
685
+ break;
686
+ }
687
+ }
688
+ }
689
+ };
690
+
691
+ // src/utils/errors.ts
692
+ var GeminiLiveError = class extends Error {
693
+ code;
694
+ details;
695
+ timestamp;
696
+ constructor(code, message, details) {
697
+ super(message);
698
+ this.name = "GeminiLiveError";
699
+ this.code = code;
700
+ this.details = details;
701
+ this.timestamp = Date.now();
702
+ }
703
+ toEventData() {
704
+ return {
705
+ message: this.message,
706
+ code: this.code,
707
+ details: this.details,
708
+ timestamp: this.timestamp
709
+ };
710
+ }
711
+ };
712
+
713
+ // src/managers/ConnectionManager.ts
714
+ var ConnectionManager = class {
715
+ ws;
716
+ eventEmitter;
717
+ debug;
718
+ timeoutMs;
719
+ constructor(config) {
720
+ this.eventEmitter = new events.EventEmitter();
721
+ this.debug = config.debug;
722
+ this.timeoutMs = config.timeoutMs || 3e4;
723
+ }
724
+ /**
725
+ * Set the WebSocket instance
726
+ */
727
+ setWebSocket(ws) {
728
+ this.ws = ws;
729
+ }
730
+ /**
731
+ * Get the current WebSocket instance
732
+ */
733
+ getWebSocket() {
734
+ return this.ws;
735
+ }
736
+ /**
737
+ * Check if WebSocket is connected
738
+ */
739
+ isConnected() {
740
+ return this.ws?.readyState === ws.WebSocket.OPEN;
741
+ }
742
+ /**
743
+ * Check if WebSocket is connecting
744
+ */
745
+ isConnecting() {
746
+ return this.ws?.readyState === ws.WebSocket.CONNECTING;
747
+ }
748
+ /**
749
+ * Check if WebSocket is closed
750
+ */
751
+ isClosed() {
752
+ return this.ws?.readyState === ws.WebSocket.CLOSED;
753
+ }
754
+ /**
755
+ * Wait for WebSocket to open
756
+ */
757
+ async waitForOpen() {
758
+ return new Promise((resolve, reject) => {
759
+ if (!this.ws) {
760
+ reject(new Error("WebSocket not initialized"));
761
+ return;
762
+ }
763
+ if (this.ws.readyState === ws.WebSocket.OPEN) {
764
+ resolve();
765
+ return;
766
+ }
767
+ const onOpen = () => {
768
+ cleanup();
769
+ resolve();
770
+ };
771
+ const onError = (error) => {
772
+ cleanup();
773
+ reject(new Error(`WebSocket connection failed: ${error.message}`));
774
+ };
775
+ const onClose = () => {
776
+ cleanup();
777
+ reject(new Error("WebSocket connection closed before opening"));
778
+ };
779
+ const cleanup = () => {
780
+ this.ws?.removeListener("open", onOpen);
781
+ this.ws?.removeListener("error", onError);
782
+ this.ws?.removeListener("close", onClose);
783
+ };
784
+ this.ws.once("open", onOpen);
785
+ this.ws.once("error", onError);
786
+ this.ws.once("close", onClose);
787
+ setTimeout(() => {
788
+ cleanup();
789
+ reject(new GeminiLiveError("connection_failed" /* CONNECTION_FAILED */, "WebSocket connection timeout"));
790
+ }, this.timeoutMs);
791
+ });
792
+ }
793
+ /**
794
+ * Send data through WebSocket
795
+ */
796
+ send(data) {
797
+ if (!this.ws) {
798
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket not initialized");
799
+ }
800
+ if (this.ws.readyState !== ws.WebSocket.OPEN) {
801
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket is not open");
802
+ }
803
+ this.ws.send(data);
804
+ }
805
+ /**
806
+ * Close the WebSocket connection
807
+ */
808
+ close() {
809
+ if (this.ws) {
810
+ this.ws.close();
811
+ this.ws = void 0;
812
+ }
813
+ }
814
+ /**
815
+ * Get connection state
816
+ */
817
+ getConnectionState() {
818
+ if (!this.ws) return "disconnected";
819
+ switch (this.ws.readyState) {
820
+ case ws.WebSocket.CONNECTING:
821
+ return "connecting";
822
+ case ws.WebSocket.OPEN:
823
+ return "connected";
824
+ case ws.WebSocket.CLOSED:
825
+ return "closed";
826
+ default:
827
+ return "disconnected";
828
+ }
829
+ }
830
+ /**
831
+ * Validate WebSocket state for operations
832
+ */
833
+ validateWebSocketState() {
834
+ if (!this.ws) {
835
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket not initialized");
836
+ }
837
+ if (this.ws.readyState !== ws.WebSocket.OPEN) {
838
+ throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket is not open");
839
+ }
840
+ }
841
+ /**
842
+ * Log message if debug is enabled
843
+ */
844
+ log(message, ...args) {
845
+ if (this.debug) {
846
+ console.log(`[ConnectionManager] ${message}`, ...args);
847
+ }
848
+ }
849
+ };
850
+
851
+ // src/managers/ContextManager.ts
852
+ var ContextManager = class {
853
+ contextHistory = [];
854
+ maxEntries;
855
+ maxContentLength;
856
+ compressionThreshold;
857
+ compressionEnabled;
858
+ constructor(config = {}) {
859
+ this.maxEntries = config.maxEntries || 100;
860
+ this.maxContentLength = config.maxContentLength || 1e4;
861
+ this.compressionThreshold = config.compressionThreshold || 50;
862
+ this.compressionEnabled = config.compressionEnabled ?? false;
863
+ }
864
+ /**
865
+ * Add entry to context history
866
+ */
867
+ addEntry(role, content) {
868
+ let processedContent = content;
869
+ if (content.length > this.maxContentLength) {
870
+ processedContent = content.substring(0, this.maxContentLength) + "...";
871
+ }
872
+ const entry = {
873
+ role,
874
+ content: processedContent,
875
+ timestamp: Date.now()
876
+ };
877
+ this.contextHistory.push(entry);
878
+ if (this.contextHistory.length > this.maxEntries) {
879
+ if (this.compressionEnabled) {
880
+ this.compressContext();
881
+ } else {
882
+ this.contextHistory = this.contextHistory.slice(-this.maxEntries);
883
+ }
884
+ }
885
+ }
886
+ /**
887
+ * Get context history
888
+ */
889
+ getContextHistory() {
890
+ return [...this.contextHistory];
891
+ }
892
+ /**
893
+ * Get context history as array of role/content pairs
894
+ */
895
+ getContextArray() {
896
+ return this.contextHistory.map((entry) => ({
897
+ role: entry.role,
898
+ content: entry.content
899
+ }));
900
+ }
901
+ /**
902
+ * Clear context history
903
+ */
904
+ clearContext() {
905
+ this.contextHistory = [];
906
+ }
907
+ /**
908
+ * Get context size
909
+ */
910
+ getContextSize() {
911
+ return this.contextHistory.length;
912
+ }
913
+ /**
914
+ * Compress context when it exceeds threshold
915
+ */
916
+ compressContext() {
917
+ if (!this.compressionEnabled || this.contextHistory.length <= this.compressionThreshold) {
918
+ return;
919
+ }
920
+ const keepCount = Math.floor(this.compressionThreshold / 3);
921
+ const firstEntries = this.contextHistory.slice(0, keepCount);
922
+ const lastEntries = this.contextHistory.slice(-keepCount);
923
+ const middleEntries = this.contextHistory.slice(keepCount, -keepCount);
924
+ if (middleEntries.length > 0) {
925
+ const compressedEntry = {
926
+ role: "assistant",
927
+ content: `[Compressed ${middleEntries.length} previous messages]`,
928
+ timestamp: Date.now()
929
+ };
930
+ this.contextHistory = [...firstEntries, compressedEntry, ...lastEntries];
931
+ } else {
932
+ this.contextHistory = [...firstEntries, ...lastEntries];
933
+ }
934
+ }
935
+ /**
936
+ * Enable or disable compression at runtime
937
+ */
938
+ setCompressionEnabled(enabled) {
939
+ this.compressionEnabled = enabled;
940
+ }
941
+ /**
942
+ * Get context summary
943
+ */
944
+ getContextSummary() {
945
+ if (this.contextHistory.length === 0) {
946
+ return {
947
+ totalEntries: 0,
948
+ userEntries: 0,
949
+ assistantEntries: 0,
950
+ oldestTimestamp: null,
951
+ newestTimestamp: null
952
+ };
953
+ }
954
+ const userEntries = this.contextHistory.filter((entry) => entry.role === "user").length;
955
+ const assistantEntries = this.contextHistory.filter((entry) => entry.role === "assistant").length;
956
+ const timestamps = this.contextHistory.map((entry) => entry.timestamp);
957
+ return {
958
+ totalEntries: this.contextHistory.length,
959
+ userEntries,
960
+ assistantEntries,
961
+ oldestTimestamp: Math.min(...timestamps),
962
+ newestTimestamp: Math.max(...timestamps)
963
+ };
964
+ }
965
+ /**
966
+ * Search context for specific content
967
+ */
968
+ searchContext(query, role) {
969
+ const searchQuery = query.toLowerCase();
970
+ return this.contextHistory.filter((entry) => {
971
+ const matchesRole = role ? entry.role === role : true;
972
+ const matchesContent = entry.content.toLowerCase().includes(searchQuery);
973
+ return matchesRole && matchesContent;
974
+ });
975
+ }
976
+ /**
977
+ * Get recent context entries
978
+ */
979
+ getRecentEntries(count) {
980
+ return this.contextHistory.slice(-count);
981
+ }
982
+ /**
983
+ * Get context entries by role
984
+ */
985
+ getEntriesByRole(role) {
986
+ return this.contextHistory.filter((entry) => entry.role === role);
987
+ }
988
+ };
989
+ var AuthManager = class {
990
+ authClient;
991
+ accessToken;
992
+ tokenExpirationTime;
993
+ config;
994
+ constructor(config) {
995
+ this.config = config;
996
+ this.tokenExpirationTime = config.tokenExpirationTime ?? 50 * 60 * 1e3;
997
+ }
998
+ /**
999
+ * Initialize authentication based on configuration
1000
+ */
1001
+ async initialize() {
1002
+ if (this.config.vertexAI) {
1003
+ await this.initializeVertexAI();
1004
+ } else if (this.config.apiKey) {
1005
+ return;
1006
+ } else {
1007
+ throw new GeminiLiveError(
1008
+ "api_key_missing" /* API_KEY_MISSING */,
1009
+ "Either API key or Vertex AI configuration is required"
1010
+ );
1011
+ }
1012
+ }
1013
+ /**
1014
+ * Initialize Vertex AI authentication
1015
+ */
1016
+ async initializeVertexAI() {
1017
+ if (!this.config.project) {
1018
+ throw new GeminiLiveError(
1019
+ "project_id_missing" /* PROJECT_ID_MISSING */,
1020
+ "Google Cloud project ID is required when using Vertex AI"
1021
+ );
1022
+ }
1023
+ const authOptions = {
1024
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
1025
+ projectId: this.config.project
1026
+ };
1027
+ if (this.config.serviceAccountKeyFile) {
1028
+ authOptions.keyFilename = this.config.serviceAccountKeyFile;
1029
+ this.log("Using service account key file for authentication:", this.config.serviceAccountKeyFile);
1030
+ }
1031
+ if (this.config.serviceAccountEmail) {
1032
+ authOptions.clientOptions = { subject: this.config.serviceAccountEmail };
1033
+ this.log("Using service account impersonation:", this.config.serviceAccountEmail);
1034
+ }
1035
+ try {
1036
+ this.authClient = new googleAuthLibrary.GoogleAuth(authOptions);
1037
+ } catch (error) {
1038
+ throw new GeminiLiveError(
1039
+ "authentication_failed" /* AUTHENTICATION_FAILED */,
1040
+ `Failed to initialize Vertex AI authentication: ${error instanceof Error ? error.message : "Unknown error"}`
1041
+ );
1042
+ }
1043
+ }
1044
+ /**
1045
+ * Get access token for Vertex AI
1046
+ */
1047
+ async getAccessToken() {
1048
+ if (!this.config.vertexAI) {
1049
+ throw new GeminiLiveError("authentication_failed" /* AUTHENTICATION_FAILED */, "Vertex AI authentication not configured");
1050
+ }
1051
+ if (!this.authClient) {
1052
+ throw new GeminiLiveError("authentication_failed" /* AUTHENTICATION_FAILED */, "Authentication client not initialized");
1053
+ }
1054
+ if (this.accessToken && this.tokenExpirationTime && Date.now() < this.tokenExpirationTime) {
1055
+ return this.accessToken;
1056
+ }
1057
+ try {
1058
+ const client = await this.authClient.getClient();
1059
+ const token = await client.getAccessToken();
1060
+ if (!token.token) {
1061
+ throw new Error("No access token received");
1062
+ }
1063
+ this.accessToken = token.token;
1064
+ this.tokenExpirationTime = Date.now() + 50 * 60 * 1e3;
1065
+ this.log("Successfully obtained new access token");
1066
+ return this.accessToken;
1067
+ } catch (error) {
1068
+ throw new GeminiLiveError(
1069
+ "authentication_failed" /* AUTHENTICATION_FAILED */,
1070
+ `Failed to get access token: ${error instanceof Error ? error.message : "Unknown error"}`
1071
+ );
1072
+ }
1073
+ }
1074
+ /**
1075
+ * Get API key if using API key authentication
1076
+ */
1077
+ getApiKey() {
1078
+ if (this.config.vertexAI) {
1079
+ return void 0;
1080
+ }
1081
+ return this.config.apiKey;
1082
+ }
1083
+ /**
1084
+ * Check if using Vertex AI authentication
1085
+ */
1086
+ isUsingVertexAI() {
1087
+ return this.config.vertexAI === true;
1088
+ }
1089
+ /**
1090
+ * Check if authentication is configured
1091
+ */
1092
+ isConfigured() {
1093
+ return !!(this.config.apiKey || this.config.vertexAI && this.config.project);
1094
+ }
1095
+ /**
1096
+ * Check if access token is valid
1097
+ */
1098
+ hasValidToken() {
1099
+ if (!this.config.vertexAI) return false;
1100
+ return !!(this.accessToken && this.tokenExpirationTime && Date.now() < this.tokenExpirationTime);
1101
+ }
1102
+ /**
1103
+ * Clear cached authentication data
1104
+ */
1105
+ clearCache() {
1106
+ this.accessToken = void 0;
1107
+ this.tokenExpirationTime = void 0;
1108
+ }
1109
+ /**
1110
+ * Get authentication configuration
1111
+ */
1112
+ getConfig() {
1113
+ return { ...this.config };
1114
+ }
1115
+ /**
1116
+ * Log message if debug is enabled
1117
+ */
1118
+ log(message, ...args) {
1119
+ if (this.config.debug) {
1120
+ console.log(`[AuthManager] ${message}`, ...args);
1121
+ }
1122
+ }
1123
+ };
1124
+ var EventManager = class {
1125
+ eventEmitter;
1126
+ debug;
1127
+ eventCounts = {};
1128
+ constructor(config) {
1129
+ this.eventEmitter = new events.EventEmitter();
1130
+ this.debug = config.debug;
1131
+ }
1132
+ /**
1133
+ * Emit an event with data
1134
+ */
1135
+ emit(event, data) {
1136
+ this.incrementEventCount(event);
1137
+ const result = this.eventEmitter.emit(event, data);
1138
+ if (this.debug) {
1139
+ this.log(`Emitted event: ${event}`, data);
1140
+ }
1141
+ return result;
1142
+ }
1143
+ /**
1144
+ * Add event listener
1145
+ */
1146
+ on(event, callback) {
1147
+ this.eventEmitter.on(event, callback);
1148
+ if (this.debug) {
1149
+ this.log(`Added listener for event: ${event}`);
1150
+ }
1151
+ }
1152
+ /**
1153
+ * Remove event listener
1154
+ */
1155
+ off(event, callback) {
1156
+ this.eventEmitter.off(event, callback);
1157
+ if (this.debug) {
1158
+ this.log(`Removed listener for event: ${event}`);
1159
+ }
1160
+ }
1161
+ /**
1162
+ * Add one-time event listener
1163
+ */
1164
+ once(event, callback) {
1165
+ this.eventEmitter.once(event, callback);
1166
+ if (this.debug) {
1167
+ this.log(`Added one-time listener for event: ${event}`);
1168
+ }
1169
+ }
1170
+ /**
1171
+ * Remove all listeners for an event
1172
+ */
1173
+ removeAllListeners(event) {
1174
+ this.eventEmitter.removeAllListeners(event);
1175
+ if (this.debug) {
1176
+ this.log(`Removed all listeners${event ? ` for event: ${event}` : ""}`);
1177
+ }
1178
+ }
1179
+ /**
1180
+ * Get event listener count
1181
+ */
1182
+ getListenerCount(event) {
1183
+ return this.eventEmitter.listenerCount(event);
1184
+ }
1185
+ /**
1186
+ * Get event listener info
1187
+ */
1188
+ getEventListenerInfo() {
1189
+ const events = this.eventEmitter.eventNames();
1190
+ const info = {};
1191
+ events.forEach((event) => {
1192
+ const eventName = typeof event === "string" ? event : event.toString();
1193
+ info[eventName] = this.eventEmitter.listenerCount(event);
1194
+ });
1195
+ return info;
1196
+ }
1197
+ /**
1198
+ * Get event emission counts
1199
+ */
1200
+ getEventCounts() {
1201
+ return { ...this.eventCounts };
1202
+ }
1203
+ /**
1204
+ * Reset event counts
1205
+ */
1206
+ resetEventCounts() {
1207
+ this.eventCounts = {};
1208
+ }
1209
+ /**
1210
+ * Clean up event listeners
1211
+ */
1212
+ cleanup() {
1213
+ this.eventEmitter.removeAllListeners();
1214
+ this.resetEventCounts();
1215
+ if (this.debug) {
1216
+ this.log("Cleaned up all event listeners");
1217
+ }
1218
+ }
1219
+ /**
1220
+ * Get the underlying EventEmitter
1221
+ */
1222
+ getEventEmitter() {
1223
+ return this.eventEmitter;
1224
+ }
1225
+ /**
1226
+ * Increment event count for tracking
1227
+ */
1228
+ incrementEventCount(event) {
1229
+ this.eventCounts[event] = (this.eventCounts[event] || 0) + 1;
1230
+ }
1231
+ /**
1232
+ * Log message if debug is enabled
1233
+ */
1234
+ log(message, ...args) {
1235
+ if (this.debug) {
1236
+ console.log(`[EventManager] ${message}`, ...args);
1237
+ }
1238
+ }
1239
+ };
1240
+
1241
+ // src/index.ts
1242
+ var DEFAULT_MODEL = "gemini-2.0-flash-exp";
1243
+ var DEFAULT_VOICE = "Puck";
1244
+ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1245
+ ws;
1246
+ eventManager;
1247
+ state = "disconnected";
1248
+ sessionHandle;
1249
+ debug;
1250
+ audioConfig;
1251
+ queue = [];
1252
+ // Managers
1253
+ connectionManager;
1254
+ contextManager;
1255
+ authManager;
1256
+ // Audio chunk concatenation - optimized stream management
1257
+ audioStreamManager;
1258
+ // Session management properties
1259
+ sessionId;
1260
+ sessionStartTime;
1261
+ isResuming = false;
1262
+ sessionDurationTimeout;
1263
+ // Tool integration properties
1264
+ tools;
1265
+ runtimeContext;
1266
+ // Store the configuration options
1267
+ options;
1268
+ /**
1269
+ * Normalize configuration to ensure proper VoiceConfig format
1270
+ * Handles backward compatibility with direct GeminiLiveVoiceConfig
1271
+ * @private
1272
+ */
1273
+ static normalizeConfig(config) {
1274
+ if ("realtimeConfig" in config || "speechModel" in config || "listeningModel" in config) {
1275
+ return config;
1276
+ }
1277
+ const geminiConfig = config;
1278
+ return {
1279
+ speechModel: {
1280
+ name: geminiConfig.model || DEFAULT_MODEL,
1281
+ apiKey: geminiConfig.apiKey
1282
+ },
1283
+ speaker: geminiConfig.speaker || DEFAULT_VOICE,
1284
+ realtimeConfig: {
1285
+ model: geminiConfig.model || DEFAULT_MODEL,
1286
+ apiKey: geminiConfig.apiKey,
1287
+ options: geminiConfig
1288
+ }
1289
+ };
1290
+ }
1291
+ /**
1292
+ * Creates a new GeminiLiveVoice instance
1293
+ *
1294
+ * @param config Configuration options
1295
+ */
1296
+ constructor(config = {}) {
1297
+ const normalizedConfig = _GeminiLiveVoice.normalizeConfig(config);
1298
+ super(normalizedConfig);
1299
+ this.options = normalizedConfig.realtimeConfig?.options || {};
1300
+ const apiKey = this.options.apiKey;
1301
+ if (!apiKey && !this.options.vertexAI) {
1302
+ throw new GeminiLiveError(
1303
+ "api_key_missing" /* API_KEY_MISSING */,
1304
+ "Google API key is required. Set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
1305
+ );
1306
+ }
1307
+ this.debug = this.options.debug || false;
1308
+ this.audioConfig = {
1309
+ ...AudioStreamManager.getDefaultAudioConfig(),
1310
+ ...this.options.audioConfig
1311
+ };
1312
+ this.audioStreamManager = new AudioStreamManager(this.audioConfig, this.debug);
1313
+ this.audioStreamManager.setSender((type, message) => this.sendEvent(type, message));
1314
+ this.eventManager = new EventManager({ debug: this.debug });
1315
+ this.connectionManager = new ConnectionManager({ debug: this.debug, timeoutMs: 3e4 });
1316
+ this.contextManager = new ContextManager({
1317
+ maxEntries: 100,
1318
+ compressionThreshold: 50,
1319
+ compressionEnabled: this.options.sessionConfig?.contextCompression ?? false
1320
+ });
1321
+ this.authManager = new AuthManager({
1322
+ apiKey: this.options.apiKey,
1323
+ vertexAI: this.options.vertexAI,
1324
+ project: this.options.project,
1325
+ serviceAccountKeyFile: this.options.serviceAccountKeyFile,
1326
+ serviceAccountEmail: this.options.serviceAccountEmail,
1327
+ debug: this.debug,
1328
+ tokenExpirationTime: this.options.tokenExpirationTime
1329
+ });
1330
+ if (this.options.vertexAI && !this.options.project) {
1331
+ throw new GeminiLiveError(
1332
+ "project_id_missing" /* PROJECT_ID_MISSING */,
1333
+ "Google Cloud project ID is required when using Vertex AI. Set GOOGLE_CLOUD_PROJECT environment variable or pass project to constructor"
1334
+ );
1335
+ }
1336
+ }
1337
+ /**
1338
+ * Register an event listener
1339
+ * @param event Event name (e.g., 'speaking', 'writing', 'error', 'speaker')
1340
+ * @param callback Callback function that receives event data
1341
+ *
1342
+ * @example
1343
+ * ```typescript
1344
+ * // Listen for audio responses
1345
+ * voice.on('speaking', ({ audio, audioData, sampleRate }) => {
1346
+ * console.log('Received audio chunk:', audioData.length);
1347
+ * });
1348
+ *
1349
+ * // Listen for text responses and transcriptions
1350
+ * voice.on('writing', ({ text, role }) => {
1351
+ * console.log(`${role}: ${text}`);
1352
+ * });
1353
+ *
1354
+ * // Listen for audio streams (for concatenated playback)
1355
+ * voice.on('speaker', (audioStream) => {
1356
+ * audioStream.pipe(playbackDevice);
1357
+ * });
1358
+ *
1359
+ * // Handle errors
1360
+ * voice.on('error', ({ message, code, details }) => {
1361
+ * console.error('Voice error:', message);
1362
+ * });
1363
+ * ```
1364
+ */
1365
+ on(event, callback) {
1366
+ try {
1367
+ this.eventManager.on(event, callback);
1368
+ this.log(`Event listener registered for: ${event}`);
1369
+ } catch (error) {
1370
+ this.log(`Failed to register event listener for ${event}:`, error);
1371
+ throw error;
1372
+ }
1373
+ }
1374
+ /**
1375
+ * Remove an event listener
1376
+ * @param event Event name
1377
+ * @param callback Callback function to remove
1378
+ */
1379
+ off(event, callback) {
1380
+ try {
1381
+ this.eventManager.off(event, callback);
1382
+ this.log(`Event listener removed for: ${event}`);
1383
+ } catch (error) {
1384
+ this.log(`Failed to remove event listener for ${event}:`, error);
1385
+ }
1386
+ }
1387
+ /**
1388
+ * Register a one-time event listener that automatically removes itself after the first emission
1389
+ * @param event Event name
1390
+ * @param callback Callback function that receives event data
1391
+ */
1392
+ once(event, callback) {
1393
+ try {
1394
+ this.eventManager.once(event, callback);
1395
+ this.log(`One-time event listener registered for: ${event}`);
1396
+ } catch (error) {
1397
+ this.log(`Failed to register one-time event listener for ${event}:`, error);
1398
+ throw error;
1399
+ }
1400
+ }
1401
+ /**
1402
+ * Emit an event to listeners with improved error handling
1403
+ * @private
1404
+ */
1405
+ emit(event, data) {
1406
+ try {
1407
+ const listenerCount = this.eventManager.getListenerCount(event);
1408
+ if (listenerCount === 0 && this.debug) {
1409
+ this.log(`No listeners for event: ${String(event)}`);
1410
+ }
1411
+ const result = this.eventManager.emit(event, data);
1412
+ if (this.debug && listenerCount > 0) {
1413
+ this.log(`Emitted event: ${String(event)} to ${listenerCount} listeners`);
1414
+ }
1415
+ return result;
1416
+ } catch (error) {
1417
+ this.log(`Error emitting event ${String(event)}:`, error);
1418
+ if (event !== "error") {
1419
+ try {
1420
+ this.eventManager.getEventEmitter().emit("error", {
1421
+ message: `Failed to emit event: ${String(event)}`,
1422
+ code: "event_emission_error",
1423
+ details: error
1424
+ });
1425
+ } catch (nestedError) {
1426
+ this.log("Critical: Failed to emit error event:", nestedError);
1427
+ }
1428
+ }
1429
+ return false;
1430
+ }
1431
+ }
1432
+ /**
1433
+ * Clean up event listeners to prevent memory leaks
1434
+ * @private
1435
+ */
1436
+ cleanupEventListeners() {
1437
+ try {
1438
+ const events = this.eventManager.getEventEmitter().eventNames();
1439
+ if (this.debug && events.length > 0) {
1440
+ this.log(
1441
+ "Cleaning up event listeners:",
1442
+ events.map((event) => `${String(event)}: ${this.eventManager.getListenerCount(String(event))}`).join(", ")
1443
+ );
1444
+ }
1445
+ this.eventManager.cleanup();
1446
+ this.log("Event listeners cleaned up");
1447
+ } catch (error) {
1448
+ this.log("Error cleaning up event listeners:", error);
1449
+ }
1450
+ }
1451
+ /**
1452
+ * Get current event listener information for debugging
1453
+ * @returns Object with event names and listener counts
1454
+ */
1455
+ getEventListenerInfo() {
1456
+ try {
1457
+ return this.eventManager.getEventListenerInfo();
1458
+ } catch (error) {
1459
+ this.log("Error getting event listener info:", error);
1460
+ return {};
1461
+ }
1462
+ }
1463
+ /**
1464
+ * Create and emit a standardized error
1465
+ * @private
1466
+ */
1467
+ createAndEmitError(code, message, details) {
1468
+ const error = new GeminiLiveError(code, message, details);
1469
+ this.log(`Error [${code}]: ${message}`, details);
1470
+ this.emit("error", error.toEventData());
1471
+ return error;
1472
+ }
1473
+ /**
1474
+ * Handle connection state validation with standardized errors
1475
+ * @private
1476
+ */
1477
+ validateConnectionState() {
1478
+ if (this.state !== "connected") {
1479
+ throw this.createAndEmitError(
1480
+ "not_connected" /* NOT_CONNECTED */,
1481
+ "Not connected to Gemini Live API. Call connect() first.",
1482
+ { currentState: this.state }
1483
+ );
1484
+ }
1485
+ }
1486
+ /**
1487
+ * Handle WebSocket state validation with standardized errors
1488
+ * @private
1489
+ */
1490
+ validateWebSocketState() {
1491
+ if (!this.connectionManager.isConnected()) {
1492
+ throw this.createAndEmitError("websocket_error" /* WEBSOCKET_ERROR */, "WebSocket is not open", {
1493
+ wsExists: !!this.connectionManager.getWebSocket(),
1494
+ readyState: this.connectionManager.getWebSocket()?.readyState,
1495
+ expectedState: ws.WebSocket.OPEN
1496
+ });
1497
+ }
1498
+ }
1499
+ /**
1500
+ * Establish connection to the Gemini Live API
1501
+ */
1502
+ async connect({ runtimeContext } = {}) {
1503
+ return this.traced(async () => {
1504
+ if (this.state === "connected") {
1505
+ this.log("Already connected to Gemini Live API");
1506
+ return;
1507
+ }
1508
+ this.runtimeContext = runtimeContext;
1509
+ this.emit("session", { state: "connecting" });
1510
+ try {
1511
+ let wsUrl;
1512
+ let headers = {};
1513
+ if (this.options.vertexAI) {
1514
+ wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
1515
+ await this.authManager.initialize();
1516
+ const accessToken = await this.authManager.getAccessToken();
1517
+ headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1518
+ this.log("Using Vertex AI authentication with OAuth token");
1519
+ } else {
1520
+ wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1521
+ headers = {
1522
+ headers: {
1523
+ "x-goog-api-key": this.options.apiKey || "",
1524
+ "Content-Type": "application/json"
1525
+ }
1526
+ };
1527
+ this.log("Using Live API authentication with API key");
1528
+ }
1529
+ this.log("Connecting to:", wsUrl);
1530
+ this.ws = new ws.WebSocket(wsUrl, void 0, headers);
1531
+ this.connectionManager.setWebSocket(this.ws);
1532
+ this.setupEventListeners();
1533
+ await this.connectionManager.waitForOpen();
1534
+ if (this.isResuming && this.sessionHandle) {
1535
+ await this.sendSessionResumption();
1536
+ } else {
1537
+ this.sendInitialConfig();
1538
+ this.sessionStartTime = Date.now();
1539
+ this.sessionId = crypto.randomUUID();
1540
+ }
1541
+ await this.waitForSessionCreated();
1542
+ this.state = "connected";
1543
+ this.emit("session", {
1544
+ state: "connected",
1545
+ config: {
1546
+ sessionId: this.sessionId,
1547
+ isResuming: this.isResuming,
1548
+ toolCount: Object.keys(this.tools || {}).length
1549
+ }
1550
+ });
1551
+ this.log("Successfully connected to Gemini Live API", {
1552
+ sessionId: this.sessionId,
1553
+ isResuming: this.isResuming,
1554
+ toolCount: Object.keys(this.tools || {}).length
1555
+ });
1556
+ if (this.options.sessionConfig?.maxDuration) {
1557
+ this.startSessionDurationMonitor();
1558
+ }
1559
+ } catch (error) {
1560
+ this.state = "disconnected";
1561
+ this.log("Connection failed", error);
1562
+ throw error;
1563
+ }
1564
+ }, "gemini-live.connect")();
1565
+ }
1566
+ /**
1567
+ * Disconnect from the Gemini Live API
1568
+ */
1569
+ async disconnect() {
1570
+ if (this.state === "disconnected") {
1571
+ this.log("Already disconnected");
1572
+ return;
1573
+ }
1574
+ this.emit("session", { state: "disconnecting" });
1575
+ if (this.sessionDurationTimeout) {
1576
+ clearTimeout(this.sessionDurationTimeout);
1577
+ this.sessionDurationTimeout = void 0;
1578
+ }
1579
+ if (this.options.sessionConfig?.enableResumption && this.sessionId) {
1580
+ this.sessionHandle = this.sessionId;
1581
+ this.log("Session handle saved for resumption", { handle: this.sessionHandle });
1582
+ }
1583
+ if (this.ws) {
1584
+ this.connectionManager.close();
1585
+ this.ws = void 0;
1586
+ }
1587
+ this.audioStreamManager.cleanupSpeakerStreams();
1588
+ this.authManager.clearCache();
1589
+ this.state = "disconnected";
1590
+ this.isResuming = false;
1591
+ this.emit("session", { state: "disconnected" });
1592
+ this.cleanupEventListeners();
1593
+ this.log("Disconnected from Gemini Live API", {
1594
+ sessionId: this.sessionId,
1595
+ sessionDuration: this.sessionStartTime ? Date.now() - this.sessionStartTime : void 0
1596
+ });
1597
+ }
1598
+ /**
1599
+ * Send text to be converted to speech
1600
+ */
1601
+ async speak(input, options) {
1602
+ return this.traced(async () => {
1603
+ this.validateConnectionState();
1604
+ if (typeof input !== "string") {
1605
+ const chunks = [];
1606
+ for await (const chunk of input) {
1607
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1608
+ }
1609
+ input = Buffer.concat(chunks).toString("utf-8");
1610
+ }
1611
+ if (input.trim().length === 0) {
1612
+ throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1613
+ }
1614
+ this.addToContext("user", input);
1615
+ const textMessage = {
1616
+ client_content: {
1617
+ turns: [
1618
+ {
1619
+ role: "user",
1620
+ parts: [
1621
+ {
1622
+ text: input
1623
+ }
1624
+ ]
1625
+ }
1626
+ ],
1627
+ turnComplete: true
1628
+ }
1629
+ };
1630
+ if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1631
+ const updateMessage = {
1632
+ type: "session.update",
1633
+ session: {
1634
+ generation_config: {
1635
+ ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1636
+ speech_config: {
1637
+ ...options.languageCode ? { language_code: options.languageCode } : {},
1638
+ ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1639
+ }
1640
+ }
1641
+ }
1642
+ };
1643
+ try {
1644
+ this.sendEvent("session.update", updateMessage);
1645
+ this.log("Applied per-turn runtime options", options);
1646
+ } catch (error) {
1647
+ this.log("Failed to apply per-turn runtime options", error);
1648
+ }
1649
+ }
1650
+ try {
1651
+ this.sendEvent("client_content", textMessage);
1652
+ this.log("Text message sent", { text: input });
1653
+ } catch (error) {
1654
+ this.log("Failed to send text message", error);
1655
+ throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1656
+ }
1657
+ }, "gemini-live.speak")();
1658
+ }
1659
+ /**
1660
+ * Send audio stream for processing
1661
+ */
1662
+ async send(audioData) {
1663
+ return this.traced(async () => {
1664
+ this.validateConnectionState();
1665
+ if ("readable" in audioData && typeof audioData.on === "function") {
1666
+ const stream = audioData;
1667
+ stream.on("data", (chunk) => {
1668
+ try {
1669
+ const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1670
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1671
+ this.sendEvent("realtime_input", message);
1672
+ } catch (error) {
1673
+ this.log("Failed to process audio chunk", error);
1674
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1675
+ }
1676
+ });
1677
+ stream.on("error", (error) => {
1678
+ this.log("Audio stream error", error);
1679
+ this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1680
+ });
1681
+ stream.on("end", () => {
1682
+ this.log("Audio stream ended");
1683
+ });
1684
+ } else {
1685
+ const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1686
+ const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1687
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1688
+ this.sendEvent("realtime_input", message);
1689
+ }
1690
+ }, "gemini-live.send")();
1691
+ }
1692
+ /**
1693
+ * Process speech from audio stream (traditional STT interface)
1694
+ */
1695
+ async listen(audioStream, _options) {
1696
+ return this.traced(async () => {
1697
+ this.validateConnectionState();
1698
+ let transcriptionText = "";
1699
+ const onWriting = (data) => {
1700
+ if (data.role === "user") {
1701
+ transcriptionText += data.text;
1702
+ this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1703
+ }
1704
+ };
1705
+ const onError = (error) => {
1706
+ throw new Error(`Transcription failed: ${error.message}`);
1707
+ };
1708
+ const onSession = (data) => {
1709
+ if (data.state === "disconnected") {
1710
+ throw new Error("Session disconnected during transcription");
1711
+ }
1712
+ };
1713
+ this.on("writing", onWriting);
1714
+ this.on("error", onError);
1715
+ this.on("session", onSession);
1716
+ try {
1717
+ const result = await this.audioStreamManager.handleAudioTranscription(
1718
+ audioStream,
1719
+ (base64Audio) => {
1720
+ return new Promise((resolve, reject) => {
1721
+ try {
1722
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1723
+ const cleanup = () => {
1724
+ this.off("turnComplete", onTurnComplete);
1725
+ this.off("error", onErr);
1726
+ };
1727
+ const onTurnComplete = () => {
1728
+ cleanup();
1729
+ resolve(transcriptionText.trim());
1730
+ };
1731
+ const onErr = (e) => {
1732
+ cleanup();
1733
+ reject(new Error(e.message));
1734
+ };
1735
+ this.on("turnComplete", onTurnComplete);
1736
+ this.on("error", onErr);
1737
+ this.sendEvent("client_content", message);
1738
+ this.log("Sent audio for transcription");
1739
+ } catch (err) {
1740
+ reject(err);
1741
+ }
1742
+ });
1743
+ },
1744
+ (error) => {
1745
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1746
+ }
1747
+ );
1748
+ return result;
1749
+ } finally {
1750
+ this.off("writing", onWriting);
1751
+ this.off("error", onError);
1752
+ this.off("session", onSession);
1753
+ }
1754
+ }, "gemini-live.listen")();
1755
+ }
1756
+ /**
1757
+ * Get available speakers/voices
1758
+ */
1759
+ async getSpeakers() {
1760
+ return this.traced(async () => {
1761
+ return [
1762
+ { voiceId: "Puck", description: "Conversational, friendly" },
1763
+ { voiceId: "Charon", description: "Deep, authoritative" },
1764
+ { voiceId: "Kore", description: "Neutral, professional" },
1765
+ { voiceId: "Fenrir", description: "Warm, approachable" }
1766
+ ];
1767
+ }, "gemini-live.getSpeakers")();
1768
+ }
1769
+ /**
1770
+ * Resume a previous session using a session handle
1771
+ */
1772
+ async resumeSession(handle, context) {
1773
+ if (this.state === "connected") {
1774
+ throw new Error("Cannot resume session while already connected. Disconnect first.");
1775
+ }
1776
+ this.log("Attempting to resume session", { handle });
1777
+ this.sessionHandle = handle;
1778
+ this.isResuming = true;
1779
+ if (context && context.length > 0) {
1780
+ this.contextManager.clearContext();
1781
+ for (const item of context) {
1782
+ this.contextManager.addEntry(item.role, item.content);
1783
+ }
1784
+ }
1785
+ try {
1786
+ await this.connect();
1787
+ this.log("Session resumed successfully", { handle, contextItems: context?.length || 0 });
1788
+ } catch (error) {
1789
+ this.isResuming = false;
1790
+ this.sessionHandle = void 0;
1791
+ throw new Error(`Failed to resume session: ${error instanceof Error ? error.message : "Unknown error"}`);
1792
+ }
1793
+ }
1794
+ /**
1795
+ * Update session configuration during an active session
1796
+ * Allows dynamic updates to voice, instructions, tools, and other settings
1797
+ *
1798
+ * @param config Partial configuration to update
1799
+ * @throws Error if not connected or update fails
1800
+ *
1801
+ * @example
1802
+ * ```typescript
1803
+ * // Change voice during conversation
1804
+ * await voice.updateSessionConfig({
1805
+ * speaker: 'Charon'
1806
+ * });
1807
+ *
1808
+ * // Update instructions
1809
+ * await voice.updateSessionConfig({
1810
+ * instructions: 'You are now a helpful coding assistant'
1811
+ * });
1812
+ *
1813
+ * // Add or update tools
1814
+ * await voice.updateSessionConfig({
1815
+ * tools: [{ name: 'new_tool', ... }]
1816
+ * });
1817
+ * ```
1818
+ */
1819
+ async updateSessionConfig(config) {
1820
+ this.validateConnectionState();
1821
+ this.validateWebSocketState();
1822
+ return new Promise((resolve, reject) => {
1823
+ if (config.model) {
1824
+ this.log("Warning: Model cannot be changed during an active session. Ignoring model update.");
1825
+ }
1826
+ if (config.vertexAI !== void 0 || config.project !== void 0 || config.location !== void 0) {
1827
+ this.log("Warning: Authentication settings cannot be changed during an active session.");
1828
+ }
1829
+ const updateMessage = {
1830
+ type: "session.update",
1831
+ session: {}
1832
+ };
1833
+ let hasUpdates = false;
1834
+ if (config.speaker) {
1835
+ hasUpdates = true;
1836
+ updateMessage.session.generation_config = {
1837
+ ...updateMessage.session.generation_config,
1838
+ speech_config: {
1839
+ voice_config: {
1840
+ prebuilt_voice_config: {
1841
+ voice_name: config.speaker
1842
+ }
1843
+ }
1844
+ }
1845
+ };
1846
+ this.speaker = config.speaker;
1847
+ this.log("Updating speaker to:", config.speaker);
1848
+ }
1849
+ if (config.instructions !== void 0) {
1850
+ hasUpdates = true;
1851
+ updateMessage.session.system_instruction = {
1852
+ parts: [{ text: config.instructions }]
1853
+ };
1854
+ this.log("Updating instructions");
1855
+ }
1856
+ if (config.tools !== void 0) {
1857
+ hasUpdates = true;
1858
+ if (config.tools.length > 0) {
1859
+ updateMessage.session.tools = config.tools.map((tool) => ({
1860
+ function_declarations: [
1861
+ {
1862
+ name: tool.name,
1863
+ description: tool.description,
1864
+ parameters: tool.parameters
1865
+ }
1866
+ ]
1867
+ }));
1868
+ } else {
1869
+ updateMessage.session.tools = [];
1870
+ }
1871
+ this.log("Updating tools:", config.tools.length, "tools");
1872
+ }
1873
+ if (this.tools && Object.keys(this.tools).length > 0) {
1874
+ hasUpdates = true;
1875
+ const allTools = [];
1876
+ for (const [toolName, tool] of Object.entries(this.tools)) {
1877
+ try {
1878
+ let parameters;
1879
+ if ("inputSchema" in tool && tool.inputSchema) {
1880
+ if (typeof tool.inputSchema === "object" && "safeParse" in tool.inputSchema) {
1881
+ parameters = this.convertZodSchemaToJsonSchema(tool.inputSchema);
1882
+ } else {
1883
+ parameters = tool.inputSchema;
1884
+ }
1885
+ } else if ("parameters" in tool && tool.parameters) {
1886
+ parameters = tool.parameters;
1887
+ } else {
1888
+ parameters = { type: "object", properties: {} };
1889
+ }
1890
+ allTools.push({
1891
+ function_declarations: [
1892
+ {
1893
+ name: toolName,
1894
+ description: tool.description || `Tool: ${toolName}`,
1895
+ parameters
1896
+ }
1897
+ ]
1898
+ });
1899
+ } catch (error) {
1900
+ this.log("Failed to process tool for session update", { toolName, error });
1901
+ }
1902
+ }
1903
+ if (allTools.length > 0) {
1904
+ updateMessage.session.tools = allTools;
1905
+ this.log("Updating tools from addTools method:", allTools.length, "tools");
1906
+ }
1907
+ }
1908
+ if (config.sessionConfig) {
1909
+ if (config.sessionConfig.vad) {
1910
+ hasUpdates = true;
1911
+ updateMessage.session.vad = {
1912
+ enabled: config.sessionConfig.vad.enabled ?? true,
1913
+ sensitivity: config.sessionConfig.vad.sensitivity ?? 0.5,
1914
+ silence_duration_ms: config.sessionConfig.vad.silenceDurationMs ?? 1e3
1915
+ };
1916
+ this.log("Updating VAD settings:", config.sessionConfig.vad);
1917
+ }
1918
+ if (config.sessionConfig.interrupts) {
1919
+ hasUpdates = true;
1920
+ updateMessage.session.interrupts = {
1921
+ enabled: config.sessionConfig.interrupts.enabled ?? true,
1922
+ allow_user_interruption: config.sessionConfig.interrupts.allowUserInterruption ?? true
1923
+ };
1924
+ this.log("Updating interrupt settings:", config.sessionConfig.interrupts);
1925
+ }
1926
+ if (config.sessionConfig.contextCompression !== void 0) {
1927
+ hasUpdates = true;
1928
+ updateMessage.session.context_compression = config.sessionConfig.contextCompression;
1929
+ this.log("Updating context compression:", config.sessionConfig.contextCompression);
1930
+ this.contextManager.setCompressionEnabled(config.sessionConfig.contextCompression);
1931
+ }
1932
+ }
1933
+ if (!hasUpdates) {
1934
+ this.log("No valid configuration updates to send");
1935
+ resolve();
1936
+ return;
1937
+ }
1938
+ const timeout = setTimeout(() => {
1939
+ cleanup();
1940
+ reject(new Error("Session configuration update timeout - no response received"));
1941
+ }, 1e4);
1942
+ const onSessionUpdated = (data) => {
1943
+ cleanup();
1944
+ this.log("Session configuration updated successfully", data);
1945
+ resolve();
1946
+ };
1947
+ const onError = (error) => {
1948
+ cleanup();
1949
+ this.log("Session configuration update failed", error);
1950
+ reject(new Error(`Failed to update session configuration: ${error.message || "Unknown error"}`));
1951
+ };
1952
+ const cleanup = () => {
1953
+ clearTimeout(timeout);
1954
+ this.eventManager.getEventEmitter().removeListener("session.updated", onSessionUpdated);
1955
+ this.eventManager.getEventEmitter().removeListener("error", onError);
1956
+ };
1957
+ this.eventManager.getEventEmitter().once("session.updated", onSessionUpdated);
1958
+ this.eventManager.getEventEmitter().once("error", onError);
1959
+ try {
1960
+ this.sendEvent("session.update", updateMessage);
1961
+ this.log("Sent session configuration update", updateMessage);
1962
+ } catch (error) {
1963
+ cleanup();
1964
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
1965
+ this.log("Failed to send session configuration update", error);
1966
+ reject(new Error(`Failed to send session configuration update: ${errorMessage}`));
1967
+ }
1968
+ });
1969
+ }
1970
+ /**
1971
+ * Get current connection state
1972
+ */
1973
+ getConnectionState() {
1974
+ return this.state;
1975
+ }
1976
+ /**
1977
+ * Check if currently connected
1978
+ */
1979
+ isConnected() {
1980
+ return this.state === "connected";
1981
+ }
1982
+ /**
1983
+ * Get current speaker stream for audio concatenation
1984
+ * This allows external access to the current audio stream being built
1985
+ */
1986
+ getCurrentSpeakerStream() {
1987
+ return this.audioStreamManager.getCurrentSpeakerStream();
1988
+ }
1989
+ /**
1990
+ * Get session handle for resumption
1991
+ */
1992
+ getSessionHandle() {
1993
+ return this.sessionHandle;
1994
+ }
1995
+ /**
1996
+ * Get comprehensive session information
1997
+ */
1998
+ getSessionInfo() {
1999
+ return {
2000
+ id: this.sessionId,
2001
+ handle: this.sessionHandle,
2002
+ startTime: this.sessionStartTime ? new Date(this.sessionStartTime) : void 0,
2003
+ duration: this.sessionStartTime ? Date.now() - this.sessionStartTime : void 0,
2004
+ state: this.state,
2005
+ config: this.options.sessionConfig,
2006
+ contextSize: this.contextManager.getContextSize()
2007
+ };
2008
+ }
2009
+ /**
2010
+ * Get session context history
2011
+ */
2012
+ getContextHistory() {
2013
+ return this.contextManager.getContextHistory();
2014
+ }
2015
+ /**
2016
+ * Add to context history for session continuity
2017
+ */
2018
+ addToContext(role, content) {
2019
+ this.contextManager.addEntry(role, content);
2020
+ }
2021
+ /**
2022
+ * Clear session context
2023
+ */
2024
+ clearContext() {
2025
+ this.contextManager.clearContext();
2026
+ this.log("Session context cleared");
2027
+ }
2028
+ /**
2029
+ * Enable or disable automatic reconnection
2030
+ */
2031
+ setAutoReconnect(enabled) {
2032
+ if (!this.options.sessionConfig) {
2033
+ this.options.sessionConfig = {};
2034
+ }
2035
+ this.options.sessionConfig.enableResumption = enabled;
2036
+ this.log(`Auto-reconnect ${enabled ? "enabled" : "disabled"}`);
2037
+ }
2038
+ /**
2039
+ * Send session resumption message
2040
+ * @private
2041
+ */
2042
+ async sendSessionResumption() {
2043
+ if (!this.sessionHandle) {
2044
+ throw new Error("No session handle available for resumption");
2045
+ }
2046
+ const context = this.contextManager.getContextArray();
2047
+ const resumeMessage = {
2048
+ session_resume: {
2049
+ handle: this.sessionHandle,
2050
+ ...context.length > 0 && {
2051
+ context
2052
+ }
2053
+ }
2054
+ };
2055
+ try {
2056
+ if (this.ws?.readyState !== ws.WebSocket.OPEN) {
2057
+ throw new Error("WebSocket not ready for session resumption");
2058
+ }
2059
+ this.sendEvent("session_resume", resumeMessage);
2060
+ this.log("Session resumption message sent", { handle: this.sessionHandle });
2061
+ } catch (error) {
2062
+ this.log("Failed to send session resumption", error);
2063
+ throw new Error(`Failed to send session resumption: ${error instanceof Error ? error.message : "Unknown error"}`);
2064
+ }
2065
+ }
2066
+ /**
2067
+ * Start monitoring session duration
2068
+ * @private
2069
+ */
2070
+ startSessionDurationMonitor() {
2071
+ if (!this.options.sessionConfig?.maxDuration) {
2072
+ return;
2073
+ }
2074
+ const durationMs = this.parseDuration(this.options.sessionConfig.maxDuration);
2075
+ if (!durationMs) {
2076
+ this.log("Invalid session duration format", { duration: this.options.sessionConfig.maxDuration });
2077
+ return;
2078
+ }
2079
+ if (this.sessionDurationTimeout) {
2080
+ clearTimeout(this.sessionDurationTimeout);
2081
+ }
2082
+ const warningTime = durationMs - 5 * 60 * 1e3;
2083
+ if (warningTime > 0) {
2084
+ setTimeout(() => {
2085
+ this.emit("sessionExpiring", {
2086
+ expiresIn: 5 * 60 * 1e3,
2087
+ sessionId: this.sessionId
2088
+ });
2089
+ }, warningTime);
2090
+ }
2091
+ this.sessionDurationTimeout = setTimeout(() => {
2092
+ this.log("Session duration limit reached, disconnecting");
2093
+ void this.disconnect();
2094
+ }, durationMs);
2095
+ }
2096
+ /**
2097
+ * Parse duration string to milliseconds
2098
+ * @private
2099
+ */
2100
+ parseDuration(duration) {
2101
+ const match = duration.match(/^(\d+)([hms])$/);
2102
+ if (!match) return null;
2103
+ const value = parseInt(match[1], 10);
2104
+ const unit = match[2];
2105
+ switch (unit) {
2106
+ case "h":
2107
+ return value * 60 * 60 * 1e3;
2108
+ case "m":
2109
+ return value * 60 * 1e3;
2110
+ case "s":
2111
+ return value * 1e3;
2112
+ default:
2113
+ return null;
2114
+ }
2115
+ }
2116
+ /**
2117
+ * Compress context history to manage memory
2118
+ * @private
2119
+ */
2120
+ compressContext() {
2121
+ this.log("compressContext is deprecated; handled by ContextManager");
2122
+ }
2123
+ /**
2124
+ * Setup WebSocket event listeners for Gemini Live API messages
2125
+ * @private
2126
+ */
2127
+ setupEventListeners() {
2128
+ if (!this.ws) {
2129
+ throw new Error("WebSocket not initialized");
2130
+ }
2131
+ this.ws.on("open", () => {
2132
+ this.log("WebSocket connection opened");
2133
+ });
2134
+ this.ws.on("close", (code, reason) => {
2135
+ this.log("WebSocket connection closed", { code, reason: reason.toString() });
2136
+ this.state = "disconnected";
2137
+ this.emit("session", { state: "disconnected" });
2138
+ });
2139
+ this.ws.on("error", (error) => {
2140
+ this.log("WebSocket error", error);
2141
+ this.state = "disconnected";
2142
+ this.emit("session", { state: "disconnected" });
2143
+ this.emit("error", {
2144
+ message: error.message,
2145
+ code: "websocket_error",
2146
+ details: error
2147
+ });
2148
+ });
2149
+ this.ws.on("message", async (message) => {
2150
+ try {
2151
+ const data = JSON.parse(message.toString());
2152
+ await this.handleGeminiMessage(data);
2153
+ } catch (error) {
2154
+ this.log("Failed to parse WebSocket message", error);
2155
+ this.emit("error", {
2156
+ message: "Failed to parse WebSocket message",
2157
+ code: "parse_error",
2158
+ details: error
2159
+ });
2160
+ }
2161
+ });
2162
+ }
2163
+ /**
2164
+ * Handle different types of messages from Gemini Live API
2165
+ * @private
2166
+ */
2167
+ async handleGeminiMessage(data) {
2168
+ this.log("Received message:", JSON.stringify(data, null, 2));
2169
+ if (data.responseId) {
2170
+ this.setCurrentResponseId(data.responseId);
2171
+ this.log("Set current response ID:", data.responseId);
2172
+ }
2173
+ if (data.setup) {
2174
+ this.log("Processing setup message");
2175
+ this.handleSetupComplete(data);
2176
+ } else if (data.setupComplete) {
2177
+ this.log("Processing setupComplete message");
2178
+ this.handleSetupComplete(data);
2179
+ } else if (data.serverContent) {
2180
+ this.log("Processing server content message");
2181
+ this.handleServerContent(data.serverContent);
2182
+ } else if (data.toolCall) {
2183
+ this.log("Processing tool call message");
2184
+ await this.handleToolCall(data);
2185
+ } else if (data.usageMetadata) {
2186
+ this.log("Processing usage metadata message");
2187
+ this.handleUsageUpdate(data);
2188
+ } else if (data.sessionEnd) {
2189
+ this.log("Processing session end message");
2190
+ this.handleSessionEnd(data);
2191
+ } else if (data.error) {
2192
+ this.log("Processing error message");
2193
+ this.handleError(data.error);
2194
+ } else {
2195
+ const messageData = data;
2196
+ if (messageData.type === "setup" || messageData.type === "session.ready" || messageData.type === "ready") {
2197
+ this.log("Processing alternative setup message with type:", messageData.type);
2198
+ this.handleSetupComplete(data);
2199
+ } else if (messageData.sessionHandle) {
2200
+ this.log("Processing session handle message");
2201
+ this.handleSetupComplete(data);
2202
+ } else if (messageData.session || messageData.ready || messageData.status === "ready" || messageData.status === "setup_complete") {
2203
+ this.log("Processing setup completion message with status:", messageData.status);
2204
+ this.handleSetupComplete(data);
2205
+ } else if (messageData.candidates || messageData.promptFeedback) {
2206
+ this.log("Processing BidiGenerateContent response");
2207
+ this.handleSetupComplete(data);
2208
+ } else if (messageData.contents && Array.isArray(messageData.contents)) {
2209
+ this.log("Processing content response");
2210
+ this.handleServerContent({ modelTurn: { parts: messageData.contents.flatMap((c) => c.parts || []) } });
2211
+ this.handleSetupComplete(data);
2212
+ } else if (messageData.candidates && Array.isArray(messageData.candidates)) {
2213
+ this.log("Processing candidates response");
2214
+ this.handleSetupComplete(data);
2215
+ } else {
2216
+ this.log("Unknown message format - no recognized fields found");
2217
+ }
2218
+ }
2219
+ }
2220
+ /**
2221
+ * Handle setup completion message
2222
+ * @private
2223
+ */
2224
+ handleSetupComplete(data) {
2225
+ this.log("Setup completed");
2226
+ const queue = this.queue.splice(0, this.queue.length);
2227
+ if (queue.length > 0) {
2228
+ this.log("Processing queued messages:", queue.length);
2229
+ for (const queuedMessage of queue) {
2230
+ try {
2231
+ this.connectionManager.send(JSON.stringify(queuedMessage));
2232
+ this.log("Sent queued message:", queuedMessage);
2233
+ } catch (err) {
2234
+ this.log("Failed to send queued message, re-queuing:", err);
2235
+ this.queue.unshift(queuedMessage);
2236
+ break;
2237
+ }
2238
+ }
2239
+ }
2240
+ this.eventManager.getEventEmitter().emit("setupComplete", data);
2241
+ }
2242
+ /**
2243
+ * Handle session update confirmation
2244
+ * @private
2245
+ */
2246
+ handleSessionUpdated(data) {
2247
+ this.log("Session updated", data);
2248
+ this.eventManager.getEventEmitter().emit("session.updated", data);
2249
+ this.emit("session", {
2250
+ state: "updated",
2251
+ config: data
2252
+ });
2253
+ }
2254
+ /**
2255
+ * Handle server content (text/audio responses)
2256
+ * @private
2257
+ */
2258
+ handleServerContent(data) {
2259
+ if (!data) {
2260
+ return;
2261
+ }
2262
+ let assistantResponse = "";
2263
+ if (data.modelTurn?.parts) {
2264
+ for (const part of data.modelTurn.parts) {
2265
+ if (part.text) {
2266
+ assistantResponse += part.text;
2267
+ this.emit("writing", {
2268
+ text: part.text,
2269
+ role: "assistant"
2270
+ });
2271
+ }
2272
+ if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
2273
+ try {
2274
+ const audioData = part.inlineData.data;
2275
+ const int16Array = this.audioStreamManager.base64ToInt16Array(audioData);
2276
+ const responseId = this.getCurrentResponseId() || crypto.randomUUID();
2277
+ let speakerStream = this.audioStreamManager.getSpeakerStream(responseId);
2278
+ if (!speakerStream) {
2279
+ this.audioStreamManager.cleanupStaleStreams();
2280
+ this.audioStreamManager.enforceStreamLimits();
2281
+ speakerStream = this.audioStreamManager.createSpeakerStream(responseId);
2282
+ speakerStream.on("error", (streamError) => {
2283
+ this.log(`Speaker stream error for ${responseId}:`, streamError);
2284
+ this.audioStreamManager.removeSpeakerStream(responseId);
2285
+ this.emit("error", {
2286
+ message: "Speaker stream error",
2287
+ code: "speaker_stream_error",
2288
+ details: { responseId, error: streamError }
2289
+ });
2290
+ });
2291
+ speakerStream.on("end", () => {
2292
+ this.log(`Speaker stream ended for response: ${responseId}`);
2293
+ this.audioStreamManager.removeSpeakerStream(responseId);
2294
+ });
2295
+ speakerStream.on("close", () => {
2296
+ this.log(`Speaker stream closed for response: ${responseId}`);
2297
+ this.audioStreamManager.removeSpeakerStream(responseId);
2298
+ });
2299
+ this.log("Created new speaker stream for response:", responseId);
2300
+ this.emit("speaker", speakerStream);
2301
+ }
2302
+ const audioBuffer = Buffer.from(int16Array.buffer, int16Array.byteOffset, int16Array.byteLength);
2303
+ speakerStream.write(audioBuffer);
2304
+ this.log("Wrote audio chunk to stream:", {
2305
+ responseId,
2306
+ chunkSize: audioBuffer.length,
2307
+ totalStreams: this.audioStreamManager.getActiveStreamCount()
2308
+ });
2309
+ this.emit("speaking", {
2310
+ audio: audioData,
2311
+ // Base64 string
2312
+ audioData: int16Array,
2313
+ sampleRate: this.audioConfig.outputSampleRate
2314
+ // Gemini Live outputs at 24kHz
2315
+ });
2316
+ } catch (error) {
2317
+ this.log("Error processing audio data:", error);
2318
+ this.emit("error", {
2319
+ message: "Failed to process audio data",
2320
+ code: "audio_processing_error",
2321
+ details: error
2322
+ });
2323
+ }
2324
+ }
2325
+ }
2326
+ }
2327
+ if (assistantResponse.trim()) {
2328
+ this.addToContext("assistant", assistantResponse);
2329
+ }
2330
+ if (data.turnComplete) {
2331
+ this.log("Turn completed");
2332
+ this.audioStreamManager.cleanupSpeakerStreams();
2333
+ this.emit("turnComplete", {
2334
+ timestamp: Date.now()
2335
+ });
2336
+ }
2337
+ }
2338
+ /**
2339
+ * Handle tool call requests from the model
2340
+ * @private
2341
+ */
2342
+ async handleToolCall(data) {
2343
+ if (!data.toolCall) {
2344
+ return;
2345
+ }
2346
+ const toolName = data.toolCall.name || "";
2347
+ const toolArgs = data.toolCall.args || {};
2348
+ const toolId = data.toolCall.id || crypto.randomUUID();
2349
+ this.log("Processing tool call", { toolName, toolArgs, toolId });
2350
+ this.emit("toolCall", {
2351
+ name: toolName,
2352
+ args: toolArgs,
2353
+ id: toolId
2354
+ });
2355
+ const tool = this.tools?.[toolName];
2356
+ if (!tool) {
2357
+ this.log("Tool not found", { toolName });
2358
+ this.createAndEmitError("tool_not_found" /* TOOL_NOT_FOUND */, `Tool "${toolName}" not found`, {
2359
+ toolName,
2360
+ availableTools: Object.keys(this.tools || {})
2361
+ });
2362
+ return;
2363
+ }
2364
+ try {
2365
+ let result;
2366
+ if (tool.execute) {
2367
+ this.log("Executing tool", { toolName, toolArgs });
2368
+ result = await tool.execute(
2369
+ { context: toolArgs, runtimeContext: this.runtimeContext },
2370
+ {
2371
+ toolCallId: toolId,
2372
+ messages: []
2373
+ }
2374
+ );
2375
+ this.log("Tool executed successfully", { toolName, result });
2376
+ } else {
2377
+ this.log("Tool has no execute function", { toolName });
2378
+ result = { error: "Tool has no execute function" };
2379
+ }
2380
+ const toolResultMessage = {
2381
+ tool_result: {
2382
+ tool_call_id: toolId,
2383
+ result
2384
+ }
2385
+ };
2386
+ this.sendEvent("tool_result", toolResultMessage);
2387
+ this.log("Tool result sent", { toolName, toolId, result });
2388
+ } catch (error) {
2389
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
2390
+ this.log("Tool execution failed", { toolName, error: errorMessage });
2391
+ const errorResultMessage = {
2392
+ tool_result: {
2393
+ tool_call_id: toolId,
2394
+ result: { error: errorMessage }
2395
+ }
2396
+ };
2397
+ this.sendEvent("tool_result", errorResultMessage);
2398
+ this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
2399
+ toolName,
2400
+ toolArgs,
2401
+ error
2402
+ });
2403
+ }
2404
+ }
2405
+ /**
2406
+ * Handle token usage information
2407
+ * @private
2408
+ */
2409
+ handleUsageUpdate(data) {
2410
+ if (data.usageMetadata) {
2411
+ this.emit("usage", {
2412
+ inputTokens: data.usageMetadata.promptTokenCount || 0,
2413
+ outputTokens: data.usageMetadata.responseTokenCount || 0,
2414
+ totalTokens: data.usageMetadata.totalTokenCount || 0,
2415
+ modality: this.determineModality(data)
2416
+ });
2417
+ }
2418
+ }
2419
+ /**
2420
+ * Handle session end
2421
+ * @private
2422
+ */
2423
+ handleSessionEnd(data) {
2424
+ this.log("Session ended", data.sessionEnd?.reason);
2425
+ this.state = "disconnected";
2426
+ this.emit("session", { state: "disconnected" });
2427
+ }
2428
+ /**
2429
+ * Handle errors
2430
+ * @private
2431
+ */
2432
+ handleError(error) {
2433
+ if (!error) {
2434
+ this.log("Received error from Gemini Live API (no error details)");
2435
+ return;
2436
+ }
2437
+ this.log("Received error from Gemini Live API", error);
2438
+ this.emit("error", {
2439
+ message: error.message || "Unknown error",
2440
+ code: error.code || "unknown_error",
2441
+ details: error.details
2442
+ });
2443
+ }
2444
+ /**
2445
+ * Determine the modality from message data
2446
+ * @private
2447
+ */
2448
+ determineModality(data) {
2449
+ if (data.serverContent?.modelTurn?.parts?.some((part) => part.inlineData?.mimeType?.includes("audio"))) {
2450
+ return "audio";
2451
+ }
2452
+ if (data.serverContent?.modelTurn?.parts?.some((part) => part.inlineData?.mimeType?.includes("video"))) {
2453
+ return "video";
2454
+ }
2455
+ return "text";
2456
+ }
2457
+ /**
2458
+ * Send initial configuration to Gemini Live API
2459
+ * @private
2460
+ */
2461
+ sendInitialConfig() {
2462
+ if (!this.ws || !this.connectionManager.isConnected()) {
2463
+ throw new Error("WebSocket not connected");
2464
+ }
2465
+ const setupMessage = {
2466
+ setup: {
2467
+ model: `models/${this.options.model}`
2468
+ }
2469
+ };
2470
+ if (this.options.instructions) {
2471
+ setupMessage.setup.systemInstruction = {
2472
+ parts: [{ text: this.options.instructions }]
2473
+ };
2474
+ }
2475
+ const allTools = [];
2476
+ if (this.options.tools && this.options.tools.length > 0) {
2477
+ for (const tool of this.options.tools) {
2478
+ allTools.push({
2479
+ functionDeclarations: [
2480
+ {
2481
+ name: tool.name,
2482
+ description: tool.description,
2483
+ parameters: tool.parameters
2484
+ }
2485
+ ]
2486
+ });
2487
+ }
2488
+ }
2489
+ if (this.tools && Object.keys(this.tools).length > 0) {
2490
+ for (const [toolName, tool] of Object.entries(this.tools)) {
2491
+ try {
2492
+ let parameters;
2493
+ if ("inputSchema" in tool && tool.inputSchema) {
2494
+ if (typeof tool.inputSchema === "object" && "safeParse" in tool.inputSchema) {
2495
+ parameters = this.convertZodSchemaToJsonSchema(tool.inputSchema);
2496
+ } else {
2497
+ parameters = tool.inputSchema;
2498
+ }
2499
+ } else if ("parameters" in tool && tool.parameters) {
2500
+ parameters = tool.parameters;
2501
+ } else {
2502
+ parameters = { type: "object", properties: {} };
2503
+ }
2504
+ allTools.push({
2505
+ functionDeclarations: [
2506
+ {
2507
+ name: toolName,
2508
+ description: tool.description || `Tool: ${toolName}`,
2509
+ parameters
2510
+ }
2511
+ ]
2512
+ });
2513
+ } catch (error) {
2514
+ this.log("Failed to process tool", { toolName, error });
2515
+ }
2516
+ }
2517
+ }
2518
+ if (allTools.length > 0) {
2519
+ setupMessage.setup.tools = allTools;
2520
+ this.log("Including tools in setup message", { toolCount: allTools.length });
2521
+ }
2522
+ this.log("Sending Live API setup message:", setupMessage);
2523
+ try {
2524
+ this.sendEvent("setup", setupMessage);
2525
+ } catch (error) {
2526
+ this.log("Failed to send Live API setup message:", error);
2527
+ throw new Error(
2528
+ `Failed to send Live API setup message: ${error instanceof Error ? error.message : "Unknown error"}`
2529
+ );
2530
+ }
2531
+ }
2532
+ /**
2533
+ * Wait for Gemini Live session to be created and ready
2534
+ * @private
2535
+ */
2536
+ waitForSessionCreated() {
2537
+ return new Promise((resolve, reject) => {
2538
+ let isResolved = false;
2539
+ const onSetupComplete = () => {
2540
+ if (!isResolved) {
2541
+ isResolved = true;
2542
+ cleanup();
2543
+ resolve();
2544
+ }
2545
+ };
2546
+ const onError = (errorData) => {
2547
+ if (!isResolved) {
2548
+ isResolved = true;
2549
+ cleanup();
2550
+ reject(new Error(`Session creation failed: ${errorData.message || "Unknown error"}`));
2551
+ }
2552
+ };
2553
+ const onSessionEnd = () => {
2554
+ if (!isResolved) {
2555
+ isResolved = true;
2556
+ cleanup();
2557
+ reject(new Error("Session ended before setup completed"));
2558
+ }
2559
+ };
2560
+ const cleanup = () => {
2561
+ this.eventManager.getEventEmitter().removeListener("setupComplete", onSetupComplete);
2562
+ this.eventManager.getEventEmitter().removeListener("error", onError);
2563
+ this.eventManager.getEventEmitter().removeListener("sessionEnd", onSessionEnd);
2564
+ };
2565
+ this.eventManager.getEventEmitter().once("setupComplete", onSetupComplete);
2566
+ this.eventManager.getEventEmitter().once("error", onError);
2567
+ this.eventManager.getEventEmitter().once("sessionEnd", onSessionEnd);
2568
+ setTimeout(() => {
2569
+ if (!isResolved) {
2570
+ isResolved = true;
2571
+ cleanup();
2572
+ reject(new Error("Session creation timeout"));
2573
+ }
2574
+ }, 3e4);
2575
+ });
2576
+ }
2577
+ /**
2578
+ * Get OAuth access token for Vertex AI authentication
2579
+ * Implements token caching and automatic refresh
2580
+ * @private
2581
+ */
2582
+ async getAccessToken() {
2583
+ if (!this.options.vertexAI) {
2584
+ throw new Error("getAccessToken should only be called for Vertex AI mode");
2585
+ }
2586
+ return this.authManager.getAccessToken();
2587
+ }
2588
+ /**
2589
+ * Get the current response ID from the server message
2590
+ * This is needed to associate audio chunks with their respective responses.
2591
+ * @private
2592
+ */
2593
+ getCurrentResponseId() {
2594
+ return this.audioStreamManager.getCurrentResponseId();
2595
+ }
2596
+ /**
2597
+ * Set the current response ID for the next audio chunk.
2598
+ * This is used to track the response ID for the current turn.
2599
+ * @private
2600
+ */
2601
+ setCurrentResponseId(responseId) {
2602
+ this.audioStreamManager.setCurrentResponseId(responseId);
2603
+ }
2604
+ /**
2605
+ * Send an event to the Gemini Live API with queueing support
2606
+ * @private
2607
+ */
2608
+ sendEvent(type, data) {
2609
+ let message;
2610
+ if (type === "setup" && data.setup) {
2611
+ message = data;
2612
+ } else if (type === "client_content" && data.client_content) {
2613
+ message = data;
2614
+ } else if (type === "realtime_input" && data.realtime_input) {
2615
+ message = data;
2616
+ } else if (type === "session.update" && data.session) {
2617
+ message = data;
2618
+ } else {
2619
+ message = { type, ...data };
2620
+ }
2621
+ if (!this.ws || !this.connectionManager.isConnected()) {
2622
+ this.queue.push(message);
2623
+ this.log("Queued message:", { type, data });
2624
+ } else {
2625
+ this.connectionManager.send(JSON.stringify(message));
2626
+ this.log("Sent message:", { type, data });
2627
+ }
2628
+ }
2629
+ /**
2630
+ * Equip the voice provider with tools
2631
+ * @param tools Object containing tool definitions that can be called by the voice model
2632
+ *
2633
+ * @example
2634
+ * ```typescript
2635
+ * const weatherTool = createTool({
2636
+ * id: "getWeather",
2637
+ * description: "Get the current weather for a location",
2638
+ * inputSchema: z.object({
2639
+ * location: z.string().describe("The city and state, e.g. San Francisco, CA"),
2640
+ * }),
2641
+ * execute: async ({ context }) => {
2642
+ * // Fetch weather data from an API
2643
+ * const response = await fetch(
2644
+ * `https://api.weather.com?location=${encodeURIComponent(context.location)}`,
2645
+ * );
2646
+ * const data = await response.json();
2647
+ * return {
2648
+ * message: `The current temperature in ${context.location} is ${data.temperature}°F with ${data.conditions}.`,
2649
+ * };
2650
+ * },
2651
+ * });
2652
+ *
2653
+ * voice.addTools({
2654
+ * getWeather: weatherTool,
2655
+ * });
2656
+ * ```
2657
+ */
2658
+ addTools(tools) {
2659
+ this.tools = tools;
2660
+ this.log("Tools added to Gemini Live Voice", { toolCount: Object.keys(tools || {}).length });
2661
+ }
2662
+ /**
2663
+ * Get the current tools configured for this voice instance
2664
+ * @returns Object containing the current tools
2665
+ */
2666
+ getTools() {
2667
+ return this.tools;
2668
+ }
2669
+ log(message, ...args) {
2670
+ if (this.debug) {
2671
+ console.log(`[GeminiLiveVoice] ${message}`, ...args);
2672
+ }
2673
+ }
2674
+ /**
2675
+ * Convert Zod schema to JSON Schema for tool parameters
2676
+ * @private
2677
+ */
2678
+ convertZodSchemaToJsonSchema(schema) {
2679
+ try {
2680
+ if (typeof schema.toJSON === "function") {
2681
+ return schema.toJSON();
2682
+ }
2683
+ if (schema._def) {
2684
+ return this.convertZodDefToJsonSchema(schema._def);
2685
+ }
2686
+ if (typeof schema === "object" && !schema.safeParse) {
2687
+ return schema;
2688
+ }
2689
+ return {
2690
+ type: "object",
2691
+ properties: {},
2692
+ description: schema.description || ""
2693
+ };
2694
+ } catch (error) {
2695
+ this.log("Failed to convert Zod schema to JSON schema", { error, schema });
2696
+ return {
2697
+ type: "object",
2698
+ properties: {},
2699
+ description: "Schema conversion failed"
2700
+ };
2701
+ }
2702
+ }
2703
+ /**
2704
+ * Convert Zod definition to JSON Schema
2705
+ * @private
2706
+ */
2707
+ convertZodDefToJsonSchema(def) {
2708
+ switch (def.typeName) {
2709
+ case "ZodString":
2710
+ return {
2711
+ type: "string",
2712
+ description: def.description || ""
2713
+ };
2714
+ case "ZodNumber":
2715
+ return {
2716
+ type: "number",
2717
+ description: def.description || ""
2718
+ };
2719
+ case "ZodBoolean":
2720
+ return {
2721
+ type: "boolean",
2722
+ description: def.description || ""
2723
+ };
2724
+ case "ZodArray":
2725
+ return {
2726
+ type: "array",
2727
+ items: this.convertZodDefToJsonSchema(def.type._def),
2728
+ description: def.description || ""
2729
+ };
2730
+ case "ZodObject":
2731
+ const properties = {};
2732
+ const required = [];
2733
+ for (const [key, value] of Object.entries(def.shape())) {
2734
+ properties[key] = this.convertZodDefToJsonSchema(value._def);
2735
+ if (value._def.typeName === "ZodOptional") ; else {
2736
+ required.push(key);
2737
+ }
2738
+ }
2739
+ return {
2740
+ type: "object",
2741
+ properties,
2742
+ required: required.length > 0 ? required : void 0,
2743
+ description: def.description || ""
2744
+ };
2745
+ case "ZodOptional":
2746
+ return this.convertZodDefToJsonSchema(def.innerType._def);
2747
+ case "ZodEnum":
2748
+ return {
2749
+ type: "string",
2750
+ enum: def.values,
2751
+ description: def.description || ""
2752
+ };
2753
+ default:
2754
+ return {
2755
+ type: "object",
2756
+ properties: {},
2757
+ description: def.description || ""
2758
+ };
2759
+ }
2760
+ }
2761
+ /**
2762
+ * Close the connection (alias for disconnect)
2763
+ */
2764
+ close() {
2765
+ void this.disconnect();
2766
+ }
2767
+ /**
2768
+ * Trigger voice provider to respond
2769
+ */
2770
+ async answer(_options) {
2771
+ this.validateConnectionState();
2772
+ this.sendEvent("response.create", {});
2773
+ }
2774
+ /**
2775
+ * Equip the voice provider with instructions
2776
+ * @param instructions Instructions to add
2777
+ */
2778
+ addInstructions(instructions) {
2779
+ if (instructions) {
2780
+ this.options.instructions = instructions;
2781
+ this.log("Instructions added:", instructions);
2782
+ }
2783
+ }
2784
+ };
2785
+
2786
+ exports.GeminiLiveVoice = GeminiLiveVoice;
2787
+ //# sourceMappingURL=index.cjs.map
2788
+ //# sourceMappingURL=index.cjs.map