@mastra/voice-google-gemini-live 0.0.0-add-libsql-changeset-20250910154739
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +159 -0
- package/LICENSE.md +15 -0
- package/README.md +459 -0
- package/dist/index.cjs +2788 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +436 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2786 -0
- package/dist/index.js.map +1 -0
- package/dist/managers/AudioStreamManager.d.ts +207 -0
- package/dist/managers/AudioStreamManager.d.ts.map +1 -0
- package/dist/managers/AuthManager.d.ts +57 -0
- package/dist/managers/AuthManager.d.ts.map +1 -0
- package/dist/managers/ConnectionManager.d.ts +57 -0
- package/dist/managers/ConnectionManager.d.ts.map +1 -0
- package/dist/managers/ContextManager.d.ts +73 -0
- package/dist/managers/ContextManager.d.ts.map +1 -0
- package/dist/managers/EventManager.d.ts +64 -0
- package/dist/managers/EventManager.d.ts.map +1 -0
- package/dist/managers/SessionManager.d.ts +84 -0
- package/dist/managers/SessionManager.d.ts.map +1 -0
- package/dist/managers/index.d.ts +12 -0
- package/dist/managers/index.d.ts.map +1 -0
- package/dist/types.d.ts +319 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/utils/errors.d.ts +17 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/package.json +66 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,2788 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var crypto = require('crypto');
|
|
4
|
+
var voice = require('@mastra/core/voice');
|
|
5
|
+
var ws = require('ws');
|
|
6
|
+
var stream = require('stream');
|
|
7
|
+
var events = require('events');
|
|
8
|
+
var googleAuthLibrary = require('google-auth-library');
|
|
9
|
+
|
|
10
|
+
// src/index.ts
|
|
11
|
+
var DEFAULT_AUDIO_CONFIG = {
|
|
12
|
+
inputSampleRate: 16e3,
|
|
13
|
+
outputSampleRate: 24e3,
|
|
14
|
+
encoding: "pcm16",
|
|
15
|
+
channels: 1
|
|
16
|
+
};
|
|
17
|
+
var AudioStreamManager = class {
|
|
18
|
+
speakerStreams = /* @__PURE__ */ new Map();
|
|
19
|
+
currentResponseId;
|
|
20
|
+
MAX_CONCURRENT_STREAMS = 10;
|
|
21
|
+
STREAM_TIMEOUT_MS = 3e4;
|
|
22
|
+
// 30 seconds
|
|
23
|
+
debug;
|
|
24
|
+
audioConfig;
|
|
25
|
+
maxChunkSize = 32768;
|
|
26
|
+
// 32KB max chunk size per Gemini limits
|
|
27
|
+
minSendInterval = 0;
|
|
28
|
+
// No throttling - let the stream control the pace
|
|
29
|
+
lastSendTime = 0;
|
|
30
|
+
pendingChunks = [];
|
|
31
|
+
pendingTimer;
|
|
32
|
+
sendToGemini;
|
|
33
|
+
// Audio buffer management constants
|
|
34
|
+
MAX_BUFFER_SIZE = 50 * 1024 * 1024;
|
|
35
|
+
// 50MB maximum buffer size
|
|
36
|
+
MAX_AUDIO_DURATION = 300;
|
|
37
|
+
// 5 minutes maximum audio duration
|
|
38
|
+
constructor(audioConfig, debug = false) {
|
|
39
|
+
this.audioConfig = audioConfig;
|
|
40
|
+
this.debug = debug;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Provide a sender callback that will be used to deliver messages to Gemini
|
|
44
|
+
*/
|
|
45
|
+
setSender(sender) {
|
|
46
|
+
this.sendToGemini = sender;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get the default audio configuration
|
|
50
|
+
*/
|
|
51
|
+
static getDefaultAudioConfig() {
|
|
52
|
+
return { ...DEFAULT_AUDIO_CONFIG };
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Create a merged audio configuration with defaults
|
|
56
|
+
*/
|
|
57
|
+
static createAudioConfig(customConfig) {
|
|
58
|
+
return {
|
|
59
|
+
...DEFAULT_AUDIO_CONFIG,
|
|
60
|
+
...customConfig
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Get the current response ID for the next audio chunk
|
|
65
|
+
*/
|
|
66
|
+
getCurrentResponseId() {
|
|
67
|
+
return this.currentResponseId;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Set the current response ID for the next audio chunk
|
|
71
|
+
*/
|
|
72
|
+
setCurrentResponseId(responseId) {
|
|
73
|
+
this.currentResponseId = responseId;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Get the current speaker stream
|
|
77
|
+
*/
|
|
78
|
+
getCurrentSpeakerStream() {
|
|
79
|
+
const currentResponseId = this.getCurrentResponseId();
|
|
80
|
+
if (!currentResponseId) {
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
const currentStream = this.speakerStreams.get(currentResponseId);
|
|
84
|
+
return currentStream ? currentStream : null;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Add a new speaker stream for a response
|
|
88
|
+
*/
|
|
89
|
+
addSpeakerStream(responseId, stream) {
|
|
90
|
+
const streamWithMetadata = Object.assign(stream, {
|
|
91
|
+
id: responseId,
|
|
92
|
+
created: Date.now()
|
|
93
|
+
});
|
|
94
|
+
this.speakerStreams.set(responseId, streamWithMetadata);
|
|
95
|
+
this.log(`Added speaker stream for response: ${responseId}`);
|
|
96
|
+
this.enforceStreamLimits();
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Remove a specific speaker stream
|
|
100
|
+
*/
|
|
101
|
+
removeSpeakerStream(responseId) {
|
|
102
|
+
const stream = this.speakerStreams.get(responseId);
|
|
103
|
+
if (stream && !stream.destroyed) {
|
|
104
|
+
stream.end();
|
|
105
|
+
setTimeout(() => {
|
|
106
|
+
if (!stream.destroyed) {
|
|
107
|
+
stream.destroy();
|
|
108
|
+
this.log(`Force destroyed stream for response: ${responseId}`);
|
|
109
|
+
}
|
|
110
|
+
}, 1e3);
|
|
111
|
+
}
|
|
112
|
+
this.speakerStreams.delete(responseId);
|
|
113
|
+
this.log(`Removed speaker stream for response: ${responseId}`);
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Clean up all speaker streams
|
|
117
|
+
*/
|
|
118
|
+
cleanupSpeakerStreams() {
|
|
119
|
+
try {
|
|
120
|
+
if (this.speakerStreams.size === 0) {
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
this.log(`Cleaning up ${this.speakerStreams.size} speaker streams`);
|
|
124
|
+
for (const [responseId, stream] of this.speakerStreams.entries()) {
|
|
125
|
+
try {
|
|
126
|
+
if (!stream.destroyed) {
|
|
127
|
+
stream.end();
|
|
128
|
+
setTimeout(() => {
|
|
129
|
+
if (!stream.destroyed) {
|
|
130
|
+
stream.destroy();
|
|
131
|
+
this.log(`Force destroyed stream for response: ${responseId}`);
|
|
132
|
+
}
|
|
133
|
+
}, 1e3);
|
|
134
|
+
}
|
|
135
|
+
this.speakerStreams.delete(responseId);
|
|
136
|
+
this.log(`Cleaned up speaker stream for response: ${responseId}`);
|
|
137
|
+
} catch (streamError) {
|
|
138
|
+
this.log(`Error cleaning up stream ${responseId}:`, streamError);
|
|
139
|
+
this.speakerStreams.delete(responseId);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
this.currentResponseId = void 0;
|
|
143
|
+
this.log("All speaker streams cleaned up");
|
|
144
|
+
} catch (error) {
|
|
145
|
+
this.log("Error during speaker stream cleanup:", error);
|
|
146
|
+
this.speakerStreams.clear();
|
|
147
|
+
this.currentResponseId = void 0;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Clean up old/stale streams to prevent memory leaks
|
|
152
|
+
*/
|
|
153
|
+
cleanupStaleStreams() {
|
|
154
|
+
try {
|
|
155
|
+
const now = Date.now();
|
|
156
|
+
const staleCutoff = now - this.STREAM_TIMEOUT_MS;
|
|
157
|
+
const staleStreams = [];
|
|
158
|
+
for (const [responseId, stream] of this.speakerStreams.entries()) {
|
|
159
|
+
const created = stream.created || 0;
|
|
160
|
+
if (created < staleCutoff) {
|
|
161
|
+
staleStreams.push(responseId);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
if (staleStreams.length > 0) {
|
|
165
|
+
this.log(`Cleaning up ${staleStreams.length} stale streams`);
|
|
166
|
+
for (const responseId of staleStreams) {
|
|
167
|
+
const stream = this.speakerStreams.get(responseId);
|
|
168
|
+
if (stream && !stream.destroyed) {
|
|
169
|
+
stream.end();
|
|
170
|
+
}
|
|
171
|
+
this.speakerStreams.delete(responseId);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
} catch (error) {
|
|
175
|
+
this.log("Error cleaning up stale streams:", error);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Enforce stream limits to prevent memory exhaustion
|
|
180
|
+
*/
|
|
181
|
+
enforceStreamLimits() {
|
|
182
|
+
try {
|
|
183
|
+
if (this.speakerStreams.size <= this.MAX_CONCURRENT_STREAMS) {
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
this.log(
|
|
187
|
+
`Stream limit exceeded (${this.speakerStreams.size}/${this.MAX_CONCURRENT_STREAMS}), cleaning up oldest streams`
|
|
188
|
+
);
|
|
189
|
+
const sortedStreams = Array.from(this.speakerStreams.entries()).sort(
|
|
190
|
+
([, a], [, b]) => (a.created || 0) - (b.created || 0)
|
|
191
|
+
);
|
|
192
|
+
const streamsToRemove = sortedStreams.slice(0, this.speakerStreams.size - this.MAX_CONCURRENT_STREAMS);
|
|
193
|
+
for (const [responseId, stream] of streamsToRemove) {
|
|
194
|
+
if (!stream.destroyed) {
|
|
195
|
+
stream.end();
|
|
196
|
+
}
|
|
197
|
+
this.speakerStreams.delete(responseId);
|
|
198
|
+
this.log(`Removed old stream for response: ${responseId}`);
|
|
199
|
+
}
|
|
200
|
+
} catch (error) {
|
|
201
|
+
this.log("Error enforcing stream limits:", error);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Get information about current streams for debugging
|
|
206
|
+
*/
|
|
207
|
+
getStreamInfo() {
|
|
208
|
+
const streamDetails = Array.from(this.speakerStreams.entries()).map(([responseId, stream]) => ({
|
|
209
|
+
responseId,
|
|
210
|
+
created: stream.created || 0,
|
|
211
|
+
destroyed: stream.destroyed
|
|
212
|
+
}));
|
|
213
|
+
return {
|
|
214
|
+
totalStreams: this.speakerStreams.size,
|
|
215
|
+
currentResponseId: this.currentResponseId,
|
|
216
|
+
streamDetails
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Convert Int16Array audio data to base64 string for WebSocket transmission
|
|
221
|
+
*/
|
|
222
|
+
int16ArrayToBase64(int16Array) {
|
|
223
|
+
const buffer = new ArrayBuffer(int16Array.length * 2);
|
|
224
|
+
const view = new DataView(buffer);
|
|
225
|
+
for (let i = 0; i < int16Array.length; i++) {
|
|
226
|
+
view.setInt16(i * 2, int16Array[i], true);
|
|
227
|
+
}
|
|
228
|
+
const nodeBuffer = Buffer.from(buffer);
|
|
229
|
+
return nodeBuffer.toString("base64");
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Convert base64 string to Int16Array audio data
|
|
233
|
+
*/
|
|
234
|
+
base64ToInt16Array(base64Audio) {
|
|
235
|
+
try {
|
|
236
|
+
const buffer = Buffer.from(base64Audio, "base64");
|
|
237
|
+
if (buffer.length % 2 !== 0) {
|
|
238
|
+
throw new Error("Invalid audio data: buffer length must be even for 16-bit audio");
|
|
239
|
+
}
|
|
240
|
+
return new Int16Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 2);
|
|
241
|
+
} catch (error) {
|
|
242
|
+
throw new Error(
|
|
243
|
+
`Failed to decode base64 audio data: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
244
|
+
);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Validate and convert audio data to the required format for Gemini Live API
|
|
249
|
+
* Gemini Live expects 16kHz PCM16 for input
|
|
250
|
+
*/
|
|
251
|
+
validateAndConvertAudioInput(audioData) {
|
|
252
|
+
if (Buffer.isBuffer(audioData)) {
|
|
253
|
+
if (audioData.length % 2 !== 0) {
|
|
254
|
+
throw new Error("Audio buffer length must be even for 16-bit audio");
|
|
255
|
+
}
|
|
256
|
+
return new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
|
|
257
|
+
}
|
|
258
|
+
if (audioData instanceof Int16Array) {
|
|
259
|
+
return audioData;
|
|
260
|
+
}
|
|
261
|
+
throw new Error("Unsupported audio data format. Expected Buffer or Int16Array");
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Process audio chunk for streaming - handles format validation and conversion
|
|
265
|
+
*/
|
|
266
|
+
processAudioChunk(chunk) {
|
|
267
|
+
let int16Array;
|
|
268
|
+
if (chunk instanceof Int16Array) {
|
|
269
|
+
int16Array = chunk;
|
|
270
|
+
} else if (Buffer.isBuffer(chunk)) {
|
|
271
|
+
if (chunk.length % 2 !== 0) {
|
|
272
|
+
throw new Error("Audio chunk length must be even for 16-bit audio");
|
|
273
|
+
}
|
|
274
|
+
int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / 2);
|
|
275
|
+
} else if (chunk instanceof Uint8Array) {
|
|
276
|
+
if (chunk.length % 2 !== 0) {
|
|
277
|
+
throw new Error("Audio chunk length must be even for 16-bit audio");
|
|
278
|
+
}
|
|
279
|
+
int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / 2);
|
|
280
|
+
} else {
|
|
281
|
+
throw new Error("Unsupported audio chunk format");
|
|
282
|
+
}
|
|
283
|
+
return this.int16ArrayToBase64(int16Array);
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Validate audio format and sample rate for Gemini Live API requirements
|
|
287
|
+
*/
|
|
288
|
+
validateAudioFormat(sampleRate, channels) {
|
|
289
|
+
if (sampleRate && sampleRate !== this.audioConfig.inputSampleRate) {
|
|
290
|
+
this.log(
|
|
291
|
+
`Warning: Audio sample rate ${sampleRate}Hz does not match expected ${this.audioConfig.inputSampleRate}Hz`
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
if (channels && channels !== this.audioConfig.channels) {
|
|
295
|
+
throw new Error(`Unsupported channel count: ${channels}. Gemini Live API requires mono audio (1 channel)`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Create an audio message for the Gemini Live API
|
|
300
|
+
*/
|
|
301
|
+
createAudioMessage(audioData, messageType = "realtime") {
|
|
302
|
+
if (messageType === "input") {
|
|
303
|
+
return {
|
|
304
|
+
client_content: {
|
|
305
|
+
turns: [
|
|
306
|
+
{
|
|
307
|
+
role: "user",
|
|
308
|
+
parts: [
|
|
309
|
+
{
|
|
310
|
+
inlineData: {
|
|
311
|
+
mimeType: "audio/pcm",
|
|
312
|
+
data: audioData
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
]
|
|
316
|
+
}
|
|
317
|
+
],
|
|
318
|
+
turnComplete: true
|
|
319
|
+
}
|
|
320
|
+
};
|
|
321
|
+
} else {
|
|
322
|
+
return {
|
|
323
|
+
realtime_input: {
|
|
324
|
+
media_chunks: [
|
|
325
|
+
{
|
|
326
|
+
mime_type: "audio/pcm",
|
|
327
|
+
data: audioData
|
|
328
|
+
}
|
|
329
|
+
]
|
|
330
|
+
}
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Get a speaker stream by response ID
|
|
336
|
+
*/
|
|
337
|
+
getSpeakerStream(responseId) {
|
|
338
|
+
return this.speakerStreams.get(responseId);
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Create a new speaker stream for a response ID
|
|
342
|
+
*/
|
|
343
|
+
createSpeakerStream(responseId) {
|
|
344
|
+
const stream$1 = new stream.PassThrough();
|
|
345
|
+
stream$1.id = responseId;
|
|
346
|
+
stream$1.created = Date.now();
|
|
347
|
+
this.addSpeakerStream(responseId, stream$1);
|
|
348
|
+
return stream$1;
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Get the number of active streams
|
|
352
|
+
*/
|
|
353
|
+
getActiveStreamCount() {
|
|
354
|
+
return this.speakerStreams.size;
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Check if a specific response ID has an active stream
|
|
358
|
+
*/
|
|
359
|
+
hasStream(responseId) {
|
|
360
|
+
return this.speakerStreams.has(responseId);
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Get all active response IDs
|
|
364
|
+
*/
|
|
365
|
+
getActiveResponseIds() {
|
|
366
|
+
return Array.from(this.speakerStreams.keys());
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Reset the manager state (useful for testing or reconnection)
|
|
370
|
+
*/
|
|
371
|
+
reset() {
|
|
372
|
+
this.cleanupSpeakerStreams();
|
|
373
|
+
this.currentResponseId = void 0;
|
|
374
|
+
this.log("AudioStreamManager reset");
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Validate audio chunk size and format
|
|
378
|
+
*/
|
|
379
|
+
validateAudioChunk(chunk) {
|
|
380
|
+
if (chunk.length === 0) {
|
|
381
|
+
throw new Error("Audio chunk cannot be empty");
|
|
382
|
+
}
|
|
383
|
+
if (chunk.length > this.maxChunkSize) {
|
|
384
|
+
throw new Error(`Audio chunk size ${chunk.length} exceeds maximum allowed size ${this.maxChunkSize}`);
|
|
385
|
+
}
|
|
386
|
+
if (chunk.length % 2 !== 0) {
|
|
387
|
+
throw new Error("Audio chunk length must be even for 16-bit audio");
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Send audio chunk with throttling and validation
|
|
392
|
+
*/
|
|
393
|
+
sendAudioChunk(chunk) {
|
|
394
|
+
try {
|
|
395
|
+
this.validateAudioChunk(chunk);
|
|
396
|
+
const now = Date.now();
|
|
397
|
+
if (now - this.lastSendTime < this.minSendInterval) {
|
|
398
|
+
this.pendingChunks.push({ chunk, timestamp: now });
|
|
399
|
+
const delay = this.minSendInterval - (now - this.lastSendTime);
|
|
400
|
+
if (!this.pendingTimer) {
|
|
401
|
+
this.pendingTimer = setTimeout(
|
|
402
|
+
() => {
|
|
403
|
+
this.pendingTimer = void 0;
|
|
404
|
+
this.processPendingChunks();
|
|
405
|
+
},
|
|
406
|
+
Math.max(0, delay)
|
|
407
|
+
);
|
|
408
|
+
}
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
this.processChunk(chunk);
|
|
412
|
+
this.processPendingChunks();
|
|
413
|
+
} catch (error) {
|
|
414
|
+
this.log("Error sending audio chunk:", error);
|
|
415
|
+
throw error;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Handle audio stream processing
|
|
420
|
+
*/
|
|
421
|
+
async handleAudioStream(stream) {
|
|
422
|
+
return new Promise((resolve, reject) => {
|
|
423
|
+
const cleanup = () => {
|
|
424
|
+
stream.removeAllListeners();
|
|
425
|
+
};
|
|
426
|
+
stream.on("data", (chunk) => {
|
|
427
|
+
try {
|
|
428
|
+
if (chunk.length > this.maxChunkSize) {
|
|
429
|
+
const chunks = this.splitAudioChunk(chunk);
|
|
430
|
+
for (const subChunk of chunks) {
|
|
431
|
+
this.validateAudioChunk(subChunk);
|
|
432
|
+
this.sendAudioChunk(subChunk);
|
|
433
|
+
}
|
|
434
|
+
} else {
|
|
435
|
+
this.validateAudioChunk(chunk);
|
|
436
|
+
this.sendAudioChunk(chunk);
|
|
437
|
+
}
|
|
438
|
+
} catch (error) {
|
|
439
|
+
cleanup();
|
|
440
|
+
reject(error);
|
|
441
|
+
}
|
|
442
|
+
});
|
|
443
|
+
stream.on("end", () => {
|
|
444
|
+
cleanup();
|
|
445
|
+
resolve();
|
|
446
|
+
});
|
|
447
|
+
stream.on("error", (error) => {
|
|
448
|
+
cleanup();
|
|
449
|
+
reject(error);
|
|
450
|
+
});
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* Split large audio chunks into smaller ones
|
|
455
|
+
*/
|
|
456
|
+
splitAudioChunk(chunk) {
|
|
457
|
+
const chunks = [];
|
|
458
|
+
let offset = 0;
|
|
459
|
+
while (offset < chunk.length) {
|
|
460
|
+
const size = Math.min(this.maxChunkSize, chunk.length - offset);
|
|
461
|
+
chunks.push(chunk.subarray(offset, offset + size));
|
|
462
|
+
offset += size;
|
|
463
|
+
}
|
|
464
|
+
return chunks;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Calculate audio duration from buffer length
|
|
468
|
+
*/
|
|
469
|
+
calculateAudioDuration(bufferLength, sampleRate) {
|
|
470
|
+
const effectiveSampleRate = sampleRate || this.audioConfig.inputSampleRate;
|
|
471
|
+
return bufferLength / (effectiveSampleRate * 2);
|
|
472
|
+
}
|
|
473
|
+
/**
|
|
474
|
+
* Validate audio buffer size and duration
|
|
475
|
+
*/
|
|
476
|
+
validateAudioBuffer(buffer) {
|
|
477
|
+
if (buffer.length === 0) {
|
|
478
|
+
throw new Error("Audio buffer cannot be empty");
|
|
479
|
+
}
|
|
480
|
+
if (buffer.length > this.MAX_BUFFER_SIZE) {
|
|
481
|
+
throw new Error(
|
|
482
|
+
`Audio buffer size ${buffer.length} exceeds maximum allowed size ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`
|
|
483
|
+
);
|
|
484
|
+
}
|
|
485
|
+
if (buffer.length % 2 !== 0) {
|
|
486
|
+
throw new Error("Audio buffer length must be even for 16-bit audio");
|
|
487
|
+
}
|
|
488
|
+
const duration = this.calculateAudioDuration(buffer.length);
|
|
489
|
+
if (duration > this.MAX_AUDIO_DURATION) {
|
|
490
|
+
throw new Error(
|
|
491
|
+
`Audio duration ${duration.toFixed(2)}s exceeds maximum allowed duration ${this.MAX_AUDIO_DURATION}s`
|
|
492
|
+
);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
/**
|
|
496
|
+
* Process audio buffer for transcription
|
|
497
|
+
* Combines chunks, validates format, and converts to base64
|
|
498
|
+
*/
|
|
499
|
+
processAudioBufferForTranscription(audioBuffer) {
|
|
500
|
+
if (audioBuffer.length % 2 !== 0) {
|
|
501
|
+
throw new Error("Invalid audio data: buffer length must be even for 16-bit audio");
|
|
502
|
+
}
|
|
503
|
+
const duration = this.calculateAudioDuration(audioBuffer.length);
|
|
504
|
+
const base64Audio = audioBuffer.toString("base64");
|
|
505
|
+
return {
|
|
506
|
+
base64Audio,
|
|
507
|
+
duration,
|
|
508
|
+
size: audioBuffer.length
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
/**
|
|
512
|
+
* Process audio chunks for transcription with buffer management
|
|
513
|
+
* Handles chunk collection, size validation, and buffer management
|
|
514
|
+
*/
|
|
515
|
+
processAudioChunksForTranscription(chunks, totalBufferSize) {
|
|
516
|
+
if (totalBufferSize > this.MAX_BUFFER_SIZE) {
|
|
517
|
+
throw new Error(`Audio data exceeds maximum size of ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`);
|
|
518
|
+
}
|
|
519
|
+
const audioBuffer = Buffer.concat(chunks);
|
|
520
|
+
const result = this.processAudioBufferForTranscription(audioBuffer);
|
|
521
|
+
return {
|
|
522
|
+
audioBuffer,
|
|
523
|
+
...result
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* Validate audio chunks and calculate total size
|
|
528
|
+
*/
|
|
529
|
+
validateAudioChunks(chunks) {
|
|
530
|
+
let totalSize = 0;
|
|
531
|
+
for (const chunk of chunks) {
|
|
532
|
+
if (!Buffer.isBuffer(chunk)) {
|
|
533
|
+
return { totalSize: 0, isValid: false, error: "Invalid chunk format" };
|
|
534
|
+
}
|
|
535
|
+
totalSize += chunk.length;
|
|
536
|
+
if (totalSize > this.MAX_BUFFER_SIZE) {
|
|
537
|
+
return {
|
|
538
|
+
totalSize,
|
|
539
|
+
isValid: false,
|
|
540
|
+
error: `Total size ${totalSize} exceeds maximum allowed size ${this.MAX_BUFFER_SIZE}`
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
return { totalSize, isValid: true };
|
|
545
|
+
}
|
|
546
|
+
/**
|
|
547
|
+
* Get audio buffer limits and configuration
|
|
548
|
+
*/
|
|
549
|
+
getAudioBufferLimits() {
|
|
550
|
+
return {
|
|
551
|
+
maxBufferSize: this.MAX_BUFFER_SIZE,
|
|
552
|
+
maxAudioDuration: this.MAX_AUDIO_DURATION,
|
|
553
|
+
maxChunkSize: this.maxChunkSize
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* Get audio configuration
|
|
558
|
+
*/
|
|
559
|
+
getAudioConfig() {
|
|
560
|
+
return this.audioConfig;
|
|
561
|
+
}
|
|
562
|
+
/**
|
|
563
|
+
* Log message if debug is enabled
|
|
564
|
+
*/
|
|
565
|
+
log(message, ...args) {
|
|
566
|
+
if (this.debug) {
|
|
567
|
+
console.log(`[AudioStreamManager] ${message}`, ...args);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
/**
|
|
571
|
+
* Handle complete audio transcription workflow
|
|
572
|
+
* Manages stream processing, chunk collection, and transcription
|
|
573
|
+
*/
|
|
574
|
+
async handleAudioTranscription(audioStream, sendAndAwaitTranscript, onError, timeoutMs = 3e4) {
|
|
575
|
+
return new Promise((resolve, reject) => {
|
|
576
|
+
const chunks = [];
|
|
577
|
+
let isCleanedUp = false;
|
|
578
|
+
let totalBufferSize = 0;
|
|
579
|
+
let isResolved = false;
|
|
580
|
+
const timeout = setTimeout(() => {
|
|
581
|
+
if (!isResolved) {
|
|
582
|
+
cleanup();
|
|
583
|
+
reject(new Error(`Transcription timeout - no response received within ${timeoutMs / 1e3} seconds`));
|
|
584
|
+
}
|
|
585
|
+
}, timeoutMs);
|
|
586
|
+
const onStreamData = (chunk) => {
|
|
587
|
+
try {
|
|
588
|
+
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
589
|
+
totalBufferSize += buffer.length;
|
|
590
|
+
if (totalBufferSize > this.MAX_BUFFER_SIZE) {
|
|
591
|
+
cleanup();
|
|
592
|
+
reject(new Error(`Audio data exceeds maximum size of ${this.MAX_BUFFER_SIZE / (1024 * 1024)}MB`));
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
chunks.push(buffer);
|
|
596
|
+
} catch (error) {
|
|
597
|
+
cleanup();
|
|
598
|
+
reject(
|
|
599
|
+
new Error(`Failed to process audio chunk: ${error instanceof Error ? error.message : "Unknown error"}`)
|
|
600
|
+
);
|
|
601
|
+
}
|
|
602
|
+
};
|
|
603
|
+
const onStreamError = (error) => {
|
|
604
|
+
cleanup();
|
|
605
|
+
reject(new Error(`Audio stream error: ${error.message}`));
|
|
606
|
+
};
|
|
607
|
+
const onStreamEnd = async () => {
|
|
608
|
+
try {
|
|
609
|
+
audioStream.removeListener("data", onStreamData);
|
|
610
|
+
audioStream.removeListener("error", onStreamError);
|
|
611
|
+
const result = this.processAudioChunksForTranscription(chunks, totalBufferSize);
|
|
612
|
+
this.log("Processing audio for transcription:", {
|
|
613
|
+
chunks: chunks.length,
|
|
614
|
+
totalSize: result.size,
|
|
615
|
+
duration: result.duration
|
|
616
|
+
});
|
|
617
|
+
try {
|
|
618
|
+
const transcript = await sendAndAwaitTranscript(result.base64Audio);
|
|
619
|
+
if (!isResolved) {
|
|
620
|
+
isResolved = true;
|
|
621
|
+
cleanup();
|
|
622
|
+
resolve(transcript.trim());
|
|
623
|
+
}
|
|
624
|
+
} catch (error) {
|
|
625
|
+
if (!isResolved) {
|
|
626
|
+
isResolved = true;
|
|
627
|
+
cleanup();
|
|
628
|
+
reject(
|
|
629
|
+
new Error(
|
|
630
|
+
`Failed to obtain transcription: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
631
|
+
)
|
|
632
|
+
);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
} catch (error) {
|
|
636
|
+
cleanup();
|
|
637
|
+
reject(
|
|
638
|
+
new Error(`Failed to process audio stream: ${error instanceof Error ? error.message : "Unknown error"}`)
|
|
639
|
+
);
|
|
640
|
+
}
|
|
641
|
+
};
|
|
642
|
+
const cleanup = () => {
|
|
643
|
+
if (isCleanedUp) return;
|
|
644
|
+
isCleanedUp = true;
|
|
645
|
+
clearTimeout(timeout);
|
|
646
|
+
audioStream.removeListener("data", onStreamData);
|
|
647
|
+
audioStream.removeListener("error", onStreamError);
|
|
648
|
+
audioStream.removeListener("end", onStreamEnd);
|
|
649
|
+
chunks.length = 0;
|
|
650
|
+
};
|
|
651
|
+
audioStream.on("data", onStreamData);
|
|
652
|
+
audioStream.on("error", onStreamError);
|
|
653
|
+
audioStream.on("end", onStreamEnd);
|
|
654
|
+
});
|
|
655
|
+
}
|
|
656
|
+
processChunk(chunk) {
|
|
657
|
+
const base64Audio = this.processAudioChunk(chunk);
|
|
658
|
+
const message = this.createAudioMessage(base64Audio, "realtime");
|
|
659
|
+
if (this.sendToGemini) {
|
|
660
|
+
this.sendToGemini("realtime_input", message);
|
|
661
|
+
} else {
|
|
662
|
+
this.log("No sender configured for AudioStreamManager; dropping audio chunk");
|
|
663
|
+
}
|
|
664
|
+
this.lastSendTime = Date.now();
|
|
665
|
+
this.log(`Sent audio chunk of size: ${chunk.length} bytes`);
|
|
666
|
+
}
|
|
667
|
+
processPendingChunks() {
|
|
668
|
+
while (this.pendingChunks.length > 0) {
|
|
669
|
+
const nextChunk = this.pendingChunks[0];
|
|
670
|
+
const now = Date.now();
|
|
671
|
+
if (nextChunk && now - this.lastSendTime >= this.minSendInterval) {
|
|
672
|
+
this.pendingChunks.shift();
|
|
673
|
+
this.processChunk(nextChunk.chunk);
|
|
674
|
+
} else {
|
|
675
|
+
const delay = this.minSendInterval - (now - this.lastSendTime);
|
|
676
|
+
if (!this.pendingTimer) {
|
|
677
|
+
this.pendingTimer = setTimeout(
|
|
678
|
+
() => {
|
|
679
|
+
this.pendingTimer = void 0;
|
|
680
|
+
this.processPendingChunks();
|
|
681
|
+
},
|
|
682
|
+
Math.max(0, delay)
|
|
683
|
+
);
|
|
684
|
+
}
|
|
685
|
+
break;
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
};
|
|
690
|
+
|
|
691
|
+
// src/utils/errors.ts
|
|
692
|
+
var GeminiLiveError = class extends Error {
|
|
693
|
+
code;
|
|
694
|
+
details;
|
|
695
|
+
timestamp;
|
|
696
|
+
constructor(code, message, details) {
|
|
697
|
+
super(message);
|
|
698
|
+
this.name = "GeminiLiveError";
|
|
699
|
+
this.code = code;
|
|
700
|
+
this.details = details;
|
|
701
|
+
this.timestamp = Date.now();
|
|
702
|
+
}
|
|
703
|
+
toEventData() {
|
|
704
|
+
return {
|
|
705
|
+
message: this.message,
|
|
706
|
+
code: this.code,
|
|
707
|
+
details: this.details,
|
|
708
|
+
timestamp: this.timestamp
|
|
709
|
+
};
|
|
710
|
+
}
|
|
711
|
+
};
|
|
712
|
+
|
|
713
|
+
// src/managers/ConnectionManager.ts
|
|
714
|
+
var ConnectionManager = class {
|
|
715
|
+
ws;
|
|
716
|
+
eventEmitter;
|
|
717
|
+
debug;
|
|
718
|
+
timeoutMs;
|
|
719
|
+
constructor(config) {
|
|
720
|
+
this.eventEmitter = new events.EventEmitter();
|
|
721
|
+
this.debug = config.debug;
|
|
722
|
+
this.timeoutMs = config.timeoutMs || 3e4;
|
|
723
|
+
}
|
|
724
|
+
/**
|
|
725
|
+
* Set the WebSocket instance
|
|
726
|
+
*/
|
|
727
|
+
setWebSocket(ws) {
|
|
728
|
+
this.ws = ws;
|
|
729
|
+
}
|
|
730
|
+
/**
|
|
731
|
+
* Get the current WebSocket instance
|
|
732
|
+
*/
|
|
733
|
+
getWebSocket() {
|
|
734
|
+
return this.ws;
|
|
735
|
+
}
|
|
736
|
+
/**
|
|
737
|
+
* Check if WebSocket is connected
|
|
738
|
+
*/
|
|
739
|
+
isConnected() {
|
|
740
|
+
return this.ws?.readyState === ws.WebSocket.OPEN;
|
|
741
|
+
}
|
|
742
|
+
/**
|
|
743
|
+
* Check if WebSocket is connecting
|
|
744
|
+
*/
|
|
745
|
+
isConnecting() {
|
|
746
|
+
return this.ws?.readyState === ws.WebSocket.CONNECTING;
|
|
747
|
+
}
|
|
748
|
+
/**
|
|
749
|
+
* Check if WebSocket is closed
|
|
750
|
+
*/
|
|
751
|
+
isClosed() {
|
|
752
|
+
return this.ws?.readyState === ws.WebSocket.CLOSED;
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* Wait for WebSocket to open
|
|
756
|
+
*/
|
|
757
|
+
async waitForOpen() {
|
|
758
|
+
return new Promise((resolve, reject) => {
|
|
759
|
+
if (!this.ws) {
|
|
760
|
+
reject(new Error("WebSocket not initialized"));
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
763
|
+
if (this.ws.readyState === ws.WebSocket.OPEN) {
|
|
764
|
+
resolve();
|
|
765
|
+
return;
|
|
766
|
+
}
|
|
767
|
+
const onOpen = () => {
|
|
768
|
+
cleanup();
|
|
769
|
+
resolve();
|
|
770
|
+
};
|
|
771
|
+
const onError = (error) => {
|
|
772
|
+
cleanup();
|
|
773
|
+
reject(new Error(`WebSocket connection failed: ${error.message}`));
|
|
774
|
+
};
|
|
775
|
+
const onClose = () => {
|
|
776
|
+
cleanup();
|
|
777
|
+
reject(new Error("WebSocket connection closed before opening"));
|
|
778
|
+
};
|
|
779
|
+
const cleanup = () => {
|
|
780
|
+
this.ws?.removeListener("open", onOpen);
|
|
781
|
+
this.ws?.removeListener("error", onError);
|
|
782
|
+
this.ws?.removeListener("close", onClose);
|
|
783
|
+
};
|
|
784
|
+
this.ws.once("open", onOpen);
|
|
785
|
+
this.ws.once("error", onError);
|
|
786
|
+
this.ws.once("close", onClose);
|
|
787
|
+
setTimeout(() => {
|
|
788
|
+
cleanup();
|
|
789
|
+
reject(new GeminiLiveError("connection_failed" /* CONNECTION_FAILED */, "WebSocket connection timeout"));
|
|
790
|
+
}, this.timeoutMs);
|
|
791
|
+
});
|
|
792
|
+
}
|
|
793
|
+
/**
|
|
794
|
+
* Send data through WebSocket
|
|
795
|
+
*/
|
|
796
|
+
send(data) {
|
|
797
|
+
if (!this.ws) {
|
|
798
|
+
throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket not initialized");
|
|
799
|
+
}
|
|
800
|
+
if (this.ws.readyState !== ws.WebSocket.OPEN) {
|
|
801
|
+
throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket is not open");
|
|
802
|
+
}
|
|
803
|
+
this.ws.send(data);
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Close the WebSocket connection
|
|
807
|
+
*/
|
|
808
|
+
close() {
|
|
809
|
+
if (this.ws) {
|
|
810
|
+
this.ws.close();
|
|
811
|
+
this.ws = void 0;
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
/**
|
|
815
|
+
* Get connection state
|
|
816
|
+
*/
|
|
817
|
+
getConnectionState() {
|
|
818
|
+
if (!this.ws) return "disconnected";
|
|
819
|
+
switch (this.ws.readyState) {
|
|
820
|
+
case ws.WebSocket.CONNECTING:
|
|
821
|
+
return "connecting";
|
|
822
|
+
case ws.WebSocket.OPEN:
|
|
823
|
+
return "connected";
|
|
824
|
+
case ws.WebSocket.CLOSED:
|
|
825
|
+
return "closed";
|
|
826
|
+
default:
|
|
827
|
+
return "disconnected";
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
/**
|
|
831
|
+
* Validate WebSocket state for operations
|
|
832
|
+
*/
|
|
833
|
+
validateWebSocketState() {
|
|
834
|
+
if (!this.ws) {
|
|
835
|
+
throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket not initialized");
|
|
836
|
+
}
|
|
837
|
+
if (this.ws.readyState !== ws.WebSocket.OPEN) {
|
|
838
|
+
throw new GeminiLiveError("connection_not_established" /* CONNECTION_NOT_ESTABLISHED */, "WebSocket is not open");
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
/**
|
|
842
|
+
* Log message if debug is enabled
|
|
843
|
+
*/
|
|
844
|
+
log(message, ...args) {
|
|
845
|
+
if (this.debug) {
|
|
846
|
+
console.log(`[ConnectionManager] ${message}`, ...args);
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
};
|
|
850
|
+
|
|
851
|
+
// src/managers/ContextManager.ts
|
|
852
|
+
var ContextManager = class {
|
|
853
|
+
contextHistory = [];
|
|
854
|
+
maxEntries;
|
|
855
|
+
maxContentLength;
|
|
856
|
+
compressionThreshold;
|
|
857
|
+
compressionEnabled;
|
|
858
|
+
constructor(config = {}) {
|
|
859
|
+
this.maxEntries = config.maxEntries || 100;
|
|
860
|
+
this.maxContentLength = config.maxContentLength || 1e4;
|
|
861
|
+
this.compressionThreshold = config.compressionThreshold || 50;
|
|
862
|
+
this.compressionEnabled = config.compressionEnabled ?? false;
|
|
863
|
+
}
|
|
864
|
+
/**
|
|
865
|
+
* Add entry to context history
|
|
866
|
+
*/
|
|
867
|
+
addEntry(role, content) {
|
|
868
|
+
let processedContent = content;
|
|
869
|
+
if (content.length > this.maxContentLength) {
|
|
870
|
+
processedContent = content.substring(0, this.maxContentLength) + "...";
|
|
871
|
+
}
|
|
872
|
+
const entry = {
|
|
873
|
+
role,
|
|
874
|
+
content: processedContent,
|
|
875
|
+
timestamp: Date.now()
|
|
876
|
+
};
|
|
877
|
+
this.contextHistory.push(entry);
|
|
878
|
+
if (this.contextHistory.length > this.maxEntries) {
|
|
879
|
+
if (this.compressionEnabled) {
|
|
880
|
+
this.compressContext();
|
|
881
|
+
} else {
|
|
882
|
+
this.contextHistory = this.contextHistory.slice(-this.maxEntries);
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* Get context history
|
|
888
|
+
*/
|
|
889
|
+
getContextHistory() {
|
|
890
|
+
return [...this.contextHistory];
|
|
891
|
+
}
|
|
892
|
+
/**
|
|
893
|
+
* Get context history as array of role/content pairs
|
|
894
|
+
*/
|
|
895
|
+
getContextArray() {
|
|
896
|
+
return this.contextHistory.map((entry) => ({
|
|
897
|
+
role: entry.role,
|
|
898
|
+
content: entry.content
|
|
899
|
+
}));
|
|
900
|
+
}
|
|
901
|
+
/**
|
|
902
|
+
* Clear context history
|
|
903
|
+
*/
|
|
904
|
+
clearContext() {
|
|
905
|
+
this.contextHistory = [];
|
|
906
|
+
}
|
|
907
|
+
/**
|
|
908
|
+
* Get context size
|
|
909
|
+
*/
|
|
910
|
+
getContextSize() {
|
|
911
|
+
return this.contextHistory.length;
|
|
912
|
+
}
|
|
913
|
+
/**
|
|
914
|
+
* Compress context when it exceeds threshold
|
|
915
|
+
*/
|
|
916
|
+
compressContext() {
|
|
917
|
+
if (!this.compressionEnabled || this.contextHistory.length <= this.compressionThreshold) {
|
|
918
|
+
return;
|
|
919
|
+
}
|
|
920
|
+
const keepCount = Math.floor(this.compressionThreshold / 3);
|
|
921
|
+
const firstEntries = this.contextHistory.slice(0, keepCount);
|
|
922
|
+
const lastEntries = this.contextHistory.slice(-keepCount);
|
|
923
|
+
const middleEntries = this.contextHistory.slice(keepCount, -keepCount);
|
|
924
|
+
if (middleEntries.length > 0) {
|
|
925
|
+
const compressedEntry = {
|
|
926
|
+
role: "assistant",
|
|
927
|
+
content: `[Compressed ${middleEntries.length} previous messages]`,
|
|
928
|
+
timestamp: Date.now()
|
|
929
|
+
};
|
|
930
|
+
this.contextHistory = [...firstEntries, compressedEntry, ...lastEntries];
|
|
931
|
+
} else {
|
|
932
|
+
this.contextHistory = [...firstEntries, ...lastEntries];
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
/**
|
|
936
|
+
* Enable or disable compression at runtime
|
|
937
|
+
*/
|
|
938
|
+
setCompressionEnabled(enabled) {
|
|
939
|
+
this.compressionEnabled = enabled;
|
|
940
|
+
}
|
|
941
|
+
/**
|
|
942
|
+
* Get context summary
|
|
943
|
+
*/
|
|
944
|
+
getContextSummary() {
|
|
945
|
+
if (this.contextHistory.length === 0) {
|
|
946
|
+
return {
|
|
947
|
+
totalEntries: 0,
|
|
948
|
+
userEntries: 0,
|
|
949
|
+
assistantEntries: 0,
|
|
950
|
+
oldestTimestamp: null,
|
|
951
|
+
newestTimestamp: null
|
|
952
|
+
};
|
|
953
|
+
}
|
|
954
|
+
const userEntries = this.contextHistory.filter((entry) => entry.role === "user").length;
|
|
955
|
+
const assistantEntries = this.contextHistory.filter((entry) => entry.role === "assistant").length;
|
|
956
|
+
const timestamps = this.contextHistory.map((entry) => entry.timestamp);
|
|
957
|
+
return {
|
|
958
|
+
totalEntries: this.contextHistory.length,
|
|
959
|
+
userEntries,
|
|
960
|
+
assistantEntries,
|
|
961
|
+
oldestTimestamp: Math.min(...timestamps),
|
|
962
|
+
newestTimestamp: Math.max(...timestamps)
|
|
963
|
+
};
|
|
964
|
+
}
|
|
965
|
+
/**
|
|
966
|
+
* Search context for specific content
|
|
967
|
+
*/
|
|
968
|
+
searchContext(query, role) {
|
|
969
|
+
const searchQuery = query.toLowerCase();
|
|
970
|
+
return this.contextHistory.filter((entry) => {
|
|
971
|
+
const matchesRole = role ? entry.role === role : true;
|
|
972
|
+
const matchesContent = entry.content.toLowerCase().includes(searchQuery);
|
|
973
|
+
return matchesRole && matchesContent;
|
|
974
|
+
});
|
|
975
|
+
}
|
|
976
|
+
/**
|
|
977
|
+
* Get recent context entries
|
|
978
|
+
*/
|
|
979
|
+
getRecentEntries(count) {
|
|
980
|
+
return this.contextHistory.slice(-count);
|
|
981
|
+
}
|
|
982
|
+
/**
|
|
983
|
+
* Get context entries by role
|
|
984
|
+
*/
|
|
985
|
+
getEntriesByRole(role) {
|
|
986
|
+
return this.contextHistory.filter((entry) => entry.role === role);
|
|
987
|
+
}
|
|
988
|
+
};
|
|
989
|
+
var AuthManager = class {
|
|
990
|
+
authClient;
|
|
991
|
+
accessToken;
|
|
992
|
+
tokenExpirationTime;
|
|
993
|
+
config;
|
|
994
|
+
constructor(config) {
|
|
995
|
+
this.config = config;
|
|
996
|
+
this.tokenExpirationTime = config.tokenExpirationTime ?? 50 * 60 * 1e3;
|
|
997
|
+
}
|
|
998
|
+
/**
|
|
999
|
+
* Initialize authentication based on configuration
|
|
1000
|
+
*/
|
|
1001
|
+
async initialize() {
|
|
1002
|
+
if (this.config.vertexAI) {
|
|
1003
|
+
await this.initializeVertexAI();
|
|
1004
|
+
} else if (this.config.apiKey) {
|
|
1005
|
+
return;
|
|
1006
|
+
} else {
|
|
1007
|
+
throw new GeminiLiveError(
|
|
1008
|
+
"api_key_missing" /* API_KEY_MISSING */,
|
|
1009
|
+
"Either API key or Vertex AI configuration is required"
|
|
1010
|
+
);
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
/**
|
|
1014
|
+
* Initialize Vertex AI authentication
|
|
1015
|
+
*/
|
|
1016
|
+
async initializeVertexAI() {
|
|
1017
|
+
if (!this.config.project) {
|
|
1018
|
+
throw new GeminiLiveError(
|
|
1019
|
+
"project_id_missing" /* PROJECT_ID_MISSING */,
|
|
1020
|
+
"Google Cloud project ID is required when using Vertex AI"
|
|
1021
|
+
);
|
|
1022
|
+
}
|
|
1023
|
+
const authOptions = {
|
|
1024
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
1025
|
+
projectId: this.config.project
|
|
1026
|
+
};
|
|
1027
|
+
if (this.config.serviceAccountKeyFile) {
|
|
1028
|
+
authOptions.keyFilename = this.config.serviceAccountKeyFile;
|
|
1029
|
+
this.log("Using service account key file for authentication:", this.config.serviceAccountKeyFile);
|
|
1030
|
+
}
|
|
1031
|
+
if (this.config.serviceAccountEmail) {
|
|
1032
|
+
authOptions.clientOptions = { subject: this.config.serviceAccountEmail };
|
|
1033
|
+
this.log("Using service account impersonation:", this.config.serviceAccountEmail);
|
|
1034
|
+
}
|
|
1035
|
+
try {
|
|
1036
|
+
this.authClient = new googleAuthLibrary.GoogleAuth(authOptions);
|
|
1037
|
+
} catch (error) {
|
|
1038
|
+
throw new GeminiLiveError(
|
|
1039
|
+
"authentication_failed" /* AUTHENTICATION_FAILED */,
|
|
1040
|
+
`Failed to initialize Vertex AI authentication: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1041
|
+
);
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
/**
|
|
1045
|
+
* Get access token for Vertex AI
|
|
1046
|
+
*/
|
|
1047
|
+
async getAccessToken() {
|
|
1048
|
+
if (!this.config.vertexAI) {
|
|
1049
|
+
throw new GeminiLiveError("authentication_failed" /* AUTHENTICATION_FAILED */, "Vertex AI authentication not configured");
|
|
1050
|
+
}
|
|
1051
|
+
if (!this.authClient) {
|
|
1052
|
+
throw new GeminiLiveError("authentication_failed" /* AUTHENTICATION_FAILED */, "Authentication client not initialized");
|
|
1053
|
+
}
|
|
1054
|
+
if (this.accessToken && this.tokenExpirationTime && Date.now() < this.tokenExpirationTime) {
|
|
1055
|
+
return this.accessToken;
|
|
1056
|
+
}
|
|
1057
|
+
try {
|
|
1058
|
+
const client = await this.authClient.getClient();
|
|
1059
|
+
const token = await client.getAccessToken();
|
|
1060
|
+
if (!token.token) {
|
|
1061
|
+
throw new Error("No access token received");
|
|
1062
|
+
}
|
|
1063
|
+
this.accessToken = token.token;
|
|
1064
|
+
this.tokenExpirationTime = Date.now() + 50 * 60 * 1e3;
|
|
1065
|
+
this.log("Successfully obtained new access token");
|
|
1066
|
+
return this.accessToken;
|
|
1067
|
+
} catch (error) {
|
|
1068
|
+
throw new GeminiLiveError(
|
|
1069
|
+
"authentication_failed" /* AUTHENTICATION_FAILED */,
|
|
1070
|
+
`Failed to get access token: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1071
|
+
);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
/**
|
|
1075
|
+
* Get API key if using API key authentication
|
|
1076
|
+
*/
|
|
1077
|
+
getApiKey() {
|
|
1078
|
+
if (this.config.vertexAI) {
|
|
1079
|
+
return void 0;
|
|
1080
|
+
}
|
|
1081
|
+
return this.config.apiKey;
|
|
1082
|
+
}
|
|
1083
|
+
/**
|
|
1084
|
+
* Check if using Vertex AI authentication
|
|
1085
|
+
*/
|
|
1086
|
+
isUsingVertexAI() {
|
|
1087
|
+
return this.config.vertexAI === true;
|
|
1088
|
+
}
|
|
1089
|
+
/**
|
|
1090
|
+
* Check if authentication is configured
|
|
1091
|
+
*/
|
|
1092
|
+
isConfigured() {
|
|
1093
|
+
return !!(this.config.apiKey || this.config.vertexAI && this.config.project);
|
|
1094
|
+
}
|
|
1095
|
+
/**
|
|
1096
|
+
* Check if access token is valid
|
|
1097
|
+
*/
|
|
1098
|
+
hasValidToken() {
|
|
1099
|
+
if (!this.config.vertexAI) return false;
|
|
1100
|
+
return !!(this.accessToken && this.tokenExpirationTime && Date.now() < this.tokenExpirationTime);
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Clear cached authentication data
|
|
1104
|
+
*/
|
|
1105
|
+
clearCache() {
|
|
1106
|
+
this.accessToken = void 0;
|
|
1107
|
+
this.tokenExpirationTime = void 0;
|
|
1108
|
+
}
|
|
1109
|
+
/**
|
|
1110
|
+
* Get authentication configuration
|
|
1111
|
+
*/
|
|
1112
|
+
getConfig() {
|
|
1113
|
+
return { ...this.config };
|
|
1114
|
+
}
|
|
1115
|
+
/**
|
|
1116
|
+
* Log message if debug is enabled
|
|
1117
|
+
*/
|
|
1118
|
+
log(message, ...args) {
|
|
1119
|
+
if (this.config.debug) {
|
|
1120
|
+
console.log(`[AuthManager] ${message}`, ...args);
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
};
|
|
1124
|
+
var EventManager = class {
|
|
1125
|
+
eventEmitter;
|
|
1126
|
+
debug;
|
|
1127
|
+
eventCounts = {};
|
|
1128
|
+
constructor(config) {
|
|
1129
|
+
this.eventEmitter = new events.EventEmitter();
|
|
1130
|
+
this.debug = config.debug;
|
|
1131
|
+
}
|
|
1132
|
+
/**
|
|
1133
|
+
* Emit an event with data
|
|
1134
|
+
*/
|
|
1135
|
+
emit(event, data) {
|
|
1136
|
+
this.incrementEventCount(event);
|
|
1137
|
+
const result = this.eventEmitter.emit(event, data);
|
|
1138
|
+
if (this.debug) {
|
|
1139
|
+
this.log(`Emitted event: ${event}`, data);
|
|
1140
|
+
}
|
|
1141
|
+
return result;
|
|
1142
|
+
}
|
|
1143
|
+
/**
|
|
1144
|
+
* Add event listener
|
|
1145
|
+
*/
|
|
1146
|
+
on(event, callback) {
|
|
1147
|
+
this.eventEmitter.on(event, callback);
|
|
1148
|
+
if (this.debug) {
|
|
1149
|
+
this.log(`Added listener for event: ${event}`);
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
/**
|
|
1153
|
+
* Remove event listener
|
|
1154
|
+
*/
|
|
1155
|
+
off(event, callback) {
|
|
1156
|
+
this.eventEmitter.off(event, callback);
|
|
1157
|
+
if (this.debug) {
|
|
1158
|
+
this.log(`Removed listener for event: ${event}`);
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Add one-time event listener
|
|
1163
|
+
*/
|
|
1164
|
+
once(event, callback) {
|
|
1165
|
+
this.eventEmitter.once(event, callback);
|
|
1166
|
+
if (this.debug) {
|
|
1167
|
+
this.log(`Added one-time listener for event: ${event}`);
|
|
1168
|
+
}
|
|
1169
|
+
}
|
|
1170
|
+
/**
|
|
1171
|
+
* Remove all listeners for an event
|
|
1172
|
+
*/
|
|
1173
|
+
removeAllListeners(event) {
|
|
1174
|
+
this.eventEmitter.removeAllListeners(event);
|
|
1175
|
+
if (this.debug) {
|
|
1176
|
+
this.log(`Removed all listeners${event ? ` for event: ${event}` : ""}`);
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
/**
|
|
1180
|
+
* Get event listener count
|
|
1181
|
+
*/
|
|
1182
|
+
getListenerCount(event) {
|
|
1183
|
+
return this.eventEmitter.listenerCount(event);
|
|
1184
|
+
}
|
|
1185
|
+
/**
|
|
1186
|
+
* Get event listener info
|
|
1187
|
+
*/
|
|
1188
|
+
getEventListenerInfo() {
|
|
1189
|
+
const events = this.eventEmitter.eventNames();
|
|
1190
|
+
const info = {};
|
|
1191
|
+
events.forEach((event) => {
|
|
1192
|
+
const eventName = typeof event === "string" ? event : event.toString();
|
|
1193
|
+
info[eventName] = this.eventEmitter.listenerCount(event);
|
|
1194
|
+
});
|
|
1195
|
+
return info;
|
|
1196
|
+
}
|
|
1197
|
+
/**
|
|
1198
|
+
* Get event emission counts
|
|
1199
|
+
*/
|
|
1200
|
+
getEventCounts() {
|
|
1201
|
+
return { ...this.eventCounts };
|
|
1202
|
+
}
|
|
1203
|
+
/**
|
|
1204
|
+
* Reset event counts
|
|
1205
|
+
*/
|
|
1206
|
+
resetEventCounts() {
|
|
1207
|
+
this.eventCounts = {};
|
|
1208
|
+
}
|
|
1209
|
+
/**
|
|
1210
|
+
* Clean up event listeners
|
|
1211
|
+
*/
|
|
1212
|
+
cleanup() {
|
|
1213
|
+
this.eventEmitter.removeAllListeners();
|
|
1214
|
+
this.resetEventCounts();
|
|
1215
|
+
if (this.debug) {
|
|
1216
|
+
this.log("Cleaned up all event listeners");
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
/**
|
|
1220
|
+
* Get the underlying EventEmitter
|
|
1221
|
+
*/
|
|
1222
|
+
getEventEmitter() {
|
|
1223
|
+
return this.eventEmitter;
|
|
1224
|
+
}
|
|
1225
|
+
/**
|
|
1226
|
+
* Increment event count for tracking
|
|
1227
|
+
*/
|
|
1228
|
+
incrementEventCount(event) {
|
|
1229
|
+
this.eventCounts[event] = (this.eventCounts[event] || 0) + 1;
|
|
1230
|
+
}
|
|
1231
|
+
/**
|
|
1232
|
+
* Log message if debug is enabled
|
|
1233
|
+
*/
|
|
1234
|
+
log(message, ...args) {
|
|
1235
|
+
if (this.debug) {
|
|
1236
|
+
console.log(`[EventManager] ${message}`, ...args);
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
};
|
|
1240
|
+
|
|
1241
|
+
// src/index.ts
|
|
1242
|
+
var DEFAULT_MODEL = "gemini-2.0-flash-exp";
|
|
1243
|
+
var DEFAULT_VOICE = "Puck";
|
|
1244
|
+
var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
1245
|
+
ws;
|
|
1246
|
+
eventManager;
|
|
1247
|
+
state = "disconnected";
|
|
1248
|
+
sessionHandle;
|
|
1249
|
+
debug;
|
|
1250
|
+
audioConfig;
|
|
1251
|
+
queue = [];
|
|
1252
|
+
// Managers
|
|
1253
|
+
connectionManager;
|
|
1254
|
+
contextManager;
|
|
1255
|
+
authManager;
|
|
1256
|
+
// Audio chunk concatenation - optimized stream management
|
|
1257
|
+
audioStreamManager;
|
|
1258
|
+
// Session management properties
|
|
1259
|
+
sessionId;
|
|
1260
|
+
sessionStartTime;
|
|
1261
|
+
isResuming = false;
|
|
1262
|
+
sessionDurationTimeout;
|
|
1263
|
+
// Tool integration properties
|
|
1264
|
+
tools;
|
|
1265
|
+
runtimeContext;
|
|
1266
|
+
// Store the configuration options
|
|
1267
|
+
options;
|
|
1268
|
+
/**
|
|
1269
|
+
* Normalize configuration to ensure proper VoiceConfig format
|
|
1270
|
+
* Handles backward compatibility with direct GeminiLiveVoiceConfig
|
|
1271
|
+
* @private
|
|
1272
|
+
*/
|
|
1273
|
+
static normalizeConfig(config) {
|
|
1274
|
+
if ("realtimeConfig" in config || "speechModel" in config || "listeningModel" in config) {
|
|
1275
|
+
return config;
|
|
1276
|
+
}
|
|
1277
|
+
const geminiConfig = config;
|
|
1278
|
+
return {
|
|
1279
|
+
speechModel: {
|
|
1280
|
+
name: geminiConfig.model || DEFAULT_MODEL,
|
|
1281
|
+
apiKey: geminiConfig.apiKey
|
|
1282
|
+
},
|
|
1283
|
+
speaker: geminiConfig.speaker || DEFAULT_VOICE,
|
|
1284
|
+
realtimeConfig: {
|
|
1285
|
+
model: geminiConfig.model || DEFAULT_MODEL,
|
|
1286
|
+
apiKey: geminiConfig.apiKey,
|
|
1287
|
+
options: geminiConfig
|
|
1288
|
+
}
|
|
1289
|
+
};
|
|
1290
|
+
}
|
|
1291
|
+
/**
|
|
1292
|
+
* Creates a new GeminiLiveVoice instance
|
|
1293
|
+
*
|
|
1294
|
+
* @param config Configuration options
|
|
1295
|
+
*/
|
|
1296
|
+
constructor(config = {}) {
|
|
1297
|
+
const normalizedConfig = _GeminiLiveVoice.normalizeConfig(config);
|
|
1298
|
+
super(normalizedConfig);
|
|
1299
|
+
this.options = normalizedConfig.realtimeConfig?.options || {};
|
|
1300
|
+
const apiKey = this.options.apiKey;
|
|
1301
|
+
if (!apiKey && !this.options.vertexAI) {
|
|
1302
|
+
throw new GeminiLiveError(
|
|
1303
|
+
"api_key_missing" /* API_KEY_MISSING */,
|
|
1304
|
+
"Google API key is required. Set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
|
|
1305
|
+
);
|
|
1306
|
+
}
|
|
1307
|
+
this.debug = this.options.debug || false;
|
|
1308
|
+
this.audioConfig = {
|
|
1309
|
+
...AudioStreamManager.getDefaultAudioConfig(),
|
|
1310
|
+
...this.options.audioConfig
|
|
1311
|
+
};
|
|
1312
|
+
this.audioStreamManager = new AudioStreamManager(this.audioConfig, this.debug);
|
|
1313
|
+
this.audioStreamManager.setSender((type, message) => this.sendEvent(type, message));
|
|
1314
|
+
this.eventManager = new EventManager({ debug: this.debug });
|
|
1315
|
+
this.connectionManager = new ConnectionManager({ debug: this.debug, timeoutMs: 3e4 });
|
|
1316
|
+
this.contextManager = new ContextManager({
|
|
1317
|
+
maxEntries: 100,
|
|
1318
|
+
compressionThreshold: 50,
|
|
1319
|
+
compressionEnabled: this.options.sessionConfig?.contextCompression ?? false
|
|
1320
|
+
});
|
|
1321
|
+
this.authManager = new AuthManager({
|
|
1322
|
+
apiKey: this.options.apiKey,
|
|
1323
|
+
vertexAI: this.options.vertexAI,
|
|
1324
|
+
project: this.options.project,
|
|
1325
|
+
serviceAccountKeyFile: this.options.serviceAccountKeyFile,
|
|
1326
|
+
serviceAccountEmail: this.options.serviceAccountEmail,
|
|
1327
|
+
debug: this.debug,
|
|
1328
|
+
tokenExpirationTime: this.options.tokenExpirationTime
|
|
1329
|
+
});
|
|
1330
|
+
if (this.options.vertexAI && !this.options.project) {
|
|
1331
|
+
throw new GeminiLiveError(
|
|
1332
|
+
"project_id_missing" /* PROJECT_ID_MISSING */,
|
|
1333
|
+
"Google Cloud project ID is required when using Vertex AI. Set GOOGLE_CLOUD_PROJECT environment variable or pass project to constructor"
|
|
1334
|
+
);
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
/**
|
|
1338
|
+
* Register an event listener
|
|
1339
|
+
* @param event Event name (e.g., 'speaking', 'writing', 'error', 'speaker')
|
|
1340
|
+
* @param callback Callback function that receives event data
|
|
1341
|
+
*
|
|
1342
|
+
* @example
|
|
1343
|
+
* ```typescript
|
|
1344
|
+
* // Listen for audio responses
|
|
1345
|
+
* voice.on('speaking', ({ audio, audioData, sampleRate }) => {
|
|
1346
|
+
* console.log('Received audio chunk:', audioData.length);
|
|
1347
|
+
* });
|
|
1348
|
+
*
|
|
1349
|
+
* // Listen for text responses and transcriptions
|
|
1350
|
+
* voice.on('writing', ({ text, role }) => {
|
|
1351
|
+
* console.log(`${role}: ${text}`);
|
|
1352
|
+
* });
|
|
1353
|
+
*
|
|
1354
|
+
* // Listen for audio streams (for concatenated playback)
|
|
1355
|
+
* voice.on('speaker', (audioStream) => {
|
|
1356
|
+
* audioStream.pipe(playbackDevice);
|
|
1357
|
+
* });
|
|
1358
|
+
*
|
|
1359
|
+
* // Handle errors
|
|
1360
|
+
* voice.on('error', ({ message, code, details }) => {
|
|
1361
|
+
* console.error('Voice error:', message);
|
|
1362
|
+
* });
|
|
1363
|
+
* ```
|
|
1364
|
+
*/
|
|
1365
|
+
on(event, callback) {
|
|
1366
|
+
try {
|
|
1367
|
+
this.eventManager.on(event, callback);
|
|
1368
|
+
this.log(`Event listener registered for: ${event}`);
|
|
1369
|
+
} catch (error) {
|
|
1370
|
+
this.log(`Failed to register event listener for ${event}:`, error);
|
|
1371
|
+
throw error;
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
/**
|
|
1375
|
+
* Remove an event listener
|
|
1376
|
+
* @param event Event name
|
|
1377
|
+
* @param callback Callback function to remove
|
|
1378
|
+
*/
|
|
1379
|
+
off(event, callback) {
|
|
1380
|
+
try {
|
|
1381
|
+
this.eventManager.off(event, callback);
|
|
1382
|
+
this.log(`Event listener removed for: ${event}`);
|
|
1383
|
+
} catch (error) {
|
|
1384
|
+
this.log(`Failed to remove event listener for ${event}:`, error);
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1387
|
+
/**
|
|
1388
|
+
* Register a one-time event listener that automatically removes itself after the first emission
|
|
1389
|
+
* @param event Event name
|
|
1390
|
+
* @param callback Callback function that receives event data
|
|
1391
|
+
*/
|
|
1392
|
+
once(event, callback) {
|
|
1393
|
+
try {
|
|
1394
|
+
this.eventManager.once(event, callback);
|
|
1395
|
+
this.log(`One-time event listener registered for: ${event}`);
|
|
1396
|
+
} catch (error) {
|
|
1397
|
+
this.log(`Failed to register one-time event listener for ${event}:`, error);
|
|
1398
|
+
throw error;
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
/**
|
|
1402
|
+
* Emit an event to listeners with improved error handling
|
|
1403
|
+
* @private
|
|
1404
|
+
*/
|
|
1405
|
+
emit(event, data) {
|
|
1406
|
+
try {
|
|
1407
|
+
const listenerCount = this.eventManager.getListenerCount(event);
|
|
1408
|
+
if (listenerCount === 0 && this.debug) {
|
|
1409
|
+
this.log(`No listeners for event: ${String(event)}`);
|
|
1410
|
+
}
|
|
1411
|
+
const result = this.eventManager.emit(event, data);
|
|
1412
|
+
if (this.debug && listenerCount > 0) {
|
|
1413
|
+
this.log(`Emitted event: ${String(event)} to ${listenerCount} listeners`);
|
|
1414
|
+
}
|
|
1415
|
+
return result;
|
|
1416
|
+
} catch (error) {
|
|
1417
|
+
this.log(`Error emitting event ${String(event)}:`, error);
|
|
1418
|
+
if (event !== "error") {
|
|
1419
|
+
try {
|
|
1420
|
+
this.eventManager.getEventEmitter().emit("error", {
|
|
1421
|
+
message: `Failed to emit event: ${String(event)}`,
|
|
1422
|
+
code: "event_emission_error",
|
|
1423
|
+
details: error
|
|
1424
|
+
});
|
|
1425
|
+
} catch (nestedError) {
|
|
1426
|
+
this.log("Critical: Failed to emit error event:", nestedError);
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
return false;
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
/**
|
|
1433
|
+
* Clean up event listeners to prevent memory leaks
|
|
1434
|
+
* @private
|
|
1435
|
+
*/
|
|
1436
|
+
cleanupEventListeners() {
|
|
1437
|
+
try {
|
|
1438
|
+
const events = this.eventManager.getEventEmitter().eventNames();
|
|
1439
|
+
if (this.debug && events.length > 0) {
|
|
1440
|
+
this.log(
|
|
1441
|
+
"Cleaning up event listeners:",
|
|
1442
|
+
events.map((event) => `${String(event)}: ${this.eventManager.getListenerCount(String(event))}`).join(", ")
|
|
1443
|
+
);
|
|
1444
|
+
}
|
|
1445
|
+
this.eventManager.cleanup();
|
|
1446
|
+
this.log("Event listeners cleaned up");
|
|
1447
|
+
} catch (error) {
|
|
1448
|
+
this.log("Error cleaning up event listeners:", error);
|
|
1449
|
+
}
|
|
1450
|
+
}
|
|
1451
|
+
/**
|
|
1452
|
+
* Get current event listener information for debugging
|
|
1453
|
+
* @returns Object with event names and listener counts
|
|
1454
|
+
*/
|
|
1455
|
+
getEventListenerInfo() {
|
|
1456
|
+
try {
|
|
1457
|
+
return this.eventManager.getEventListenerInfo();
|
|
1458
|
+
} catch (error) {
|
|
1459
|
+
this.log("Error getting event listener info:", error);
|
|
1460
|
+
return {};
|
|
1461
|
+
}
|
|
1462
|
+
}
|
|
1463
|
+
/**
|
|
1464
|
+
* Create and emit a standardized error
|
|
1465
|
+
* @private
|
|
1466
|
+
*/
|
|
1467
|
+
createAndEmitError(code, message, details) {
|
|
1468
|
+
const error = new GeminiLiveError(code, message, details);
|
|
1469
|
+
this.log(`Error [${code}]: ${message}`, details);
|
|
1470
|
+
this.emit("error", error.toEventData());
|
|
1471
|
+
return error;
|
|
1472
|
+
}
|
|
1473
|
+
/**
|
|
1474
|
+
* Handle connection state validation with standardized errors
|
|
1475
|
+
* @private
|
|
1476
|
+
*/
|
|
1477
|
+
validateConnectionState() {
|
|
1478
|
+
if (this.state !== "connected") {
|
|
1479
|
+
throw this.createAndEmitError(
|
|
1480
|
+
"not_connected" /* NOT_CONNECTED */,
|
|
1481
|
+
"Not connected to Gemini Live API. Call connect() first.",
|
|
1482
|
+
{ currentState: this.state }
|
|
1483
|
+
);
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
/**
|
|
1487
|
+
* Handle WebSocket state validation with standardized errors
|
|
1488
|
+
* @private
|
|
1489
|
+
*/
|
|
1490
|
+
validateWebSocketState() {
|
|
1491
|
+
if (!this.connectionManager.isConnected()) {
|
|
1492
|
+
throw this.createAndEmitError("websocket_error" /* WEBSOCKET_ERROR */, "WebSocket is not open", {
|
|
1493
|
+
wsExists: !!this.connectionManager.getWebSocket(),
|
|
1494
|
+
readyState: this.connectionManager.getWebSocket()?.readyState,
|
|
1495
|
+
expectedState: ws.WebSocket.OPEN
|
|
1496
|
+
});
|
|
1497
|
+
}
|
|
1498
|
+
}
|
|
1499
|
+
/**
|
|
1500
|
+
* Establish connection to the Gemini Live API
|
|
1501
|
+
*/
|
|
1502
|
+
async connect({ runtimeContext } = {}) {
|
|
1503
|
+
return this.traced(async () => {
|
|
1504
|
+
if (this.state === "connected") {
|
|
1505
|
+
this.log("Already connected to Gemini Live API");
|
|
1506
|
+
return;
|
|
1507
|
+
}
|
|
1508
|
+
this.runtimeContext = runtimeContext;
|
|
1509
|
+
this.emit("session", { state: "connecting" });
|
|
1510
|
+
try {
|
|
1511
|
+
let wsUrl;
|
|
1512
|
+
let headers = {};
|
|
1513
|
+
if (this.options.vertexAI) {
|
|
1514
|
+
wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
|
|
1515
|
+
await this.authManager.initialize();
|
|
1516
|
+
const accessToken = await this.authManager.getAccessToken();
|
|
1517
|
+
headers = { headers: { Authorization: `Bearer ${accessToken}` } };
|
|
1518
|
+
this.log("Using Vertex AI authentication with OAuth token");
|
|
1519
|
+
} else {
|
|
1520
|
+
wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
1521
|
+
headers = {
|
|
1522
|
+
headers: {
|
|
1523
|
+
"x-goog-api-key": this.options.apiKey || "",
|
|
1524
|
+
"Content-Type": "application/json"
|
|
1525
|
+
}
|
|
1526
|
+
};
|
|
1527
|
+
this.log("Using Live API authentication with API key");
|
|
1528
|
+
}
|
|
1529
|
+
this.log("Connecting to:", wsUrl);
|
|
1530
|
+
this.ws = new ws.WebSocket(wsUrl, void 0, headers);
|
|
1531
|
+
this.connectionManager.setWebSocket(this.ws);
|
|
1532
|
+
this.setupEventListeners();
|
|
1533
|
+
await this.connectionManager.waitForOpen();
|
|
1534
|
+
if (this.isResuming && this.sessionHandle) {
|
|
1535
|
+
await this.sendSessionResumption();
|
|
1536
|
+
} else {
|
|
1537
|
+
this.sendInitialConfig();
|
|
1538
|
+
this.sessionStartTime = Date.now();
|
|
1539
|
+
this.sessionId = crypto.randomUUID();
|
|
1540
|
+
}
|
|
1541
|
+
await this.waitForSessionCreated();
|
|
1542
|
+
this.state = "connected";
|
|
1543
|
+
this.emit("session", {
|
|
1544
|
+
state: "connected",
|
|
1545
|
+
config: {
|
|
1546
|
+
sessionId: this.sessionId,
|
|
1547
|
+
isResuming: this.isResuming,
|
|
1548
|
+
toolCount: Object.keys(this.tools || {}).length
|
|
1549
|
+
}
|
|
1550
|
+
});
|
|
1551
|
+
this.log("Successfully connected to Gemini Live API", {
|
|
1552
|
+
sessionId: this.sessionId,
|
|
1553
|
+
isResuming: this.isResuming,
|
|
1554
|
+
toolCount: Object.keys(this.tools || {}).length
|
|
1555
|
+
});
|
|
1556
|
+
if (this.options.sessionConfig?.maxDuration) {
|
|
1557
|
+
this.startSessionDurationMonitor();
|
|
1558
|
+
}
|
|
1559
|
+
} catch (error) {
|
|
1560
|
+
this.state = "disconnected";
|
|
1561
|
+
this.log("Connection failed", error);
|
|
1562
|
+
throw error;
|
|
1563
|
+
}
|
|
1564
|
+
}, "gemini-live.connect")();
|
|
1565
|
+
}
|
|
1566
|
+
/**
|
|
1567
|
+
* Disconnect from the Gemini Live API
|
|
1568
|
+
*/
|
|
1569
|
+
async disconnect() {
|
|
1570
|
+
if (this.state === "disconnected") {
|
|
1571
|
+
this.log("Already disconnected");
|
|
1572
|
+
return;
|
|
1573
|
+
}
|
|
1574
|
+
this.emit("session", { state: "disconnecting" });
|
|
1575
|
+
if (this.sessionDurationTimeout) {
|
|
1576
|
+
clearTimeout(this.sessionDurationTimeout);
|
|
1577
|
+
this.sessionDurationTimeout = void 0;
|
|
1578
|
+
}
|
|
1579
|
+
if (this.options.sessionConfig?.enableResumption && this.sessionId) {
|
|
1580
|
+
this.sessionHandle = this.sessionId;
|
|
1581
|
+
this.log("Session handle saved for resumption", { handle: this.sessionHandle });
|
|
1582
|
+
}
|
|
1583
|
+
if (this.ws) {
|
|
1584
|
+
this.connectionManager.close();
|
|
1585
|
+
this.ws = void 0;
|
|
1586
|
+
}
|
|
1587
|
+
this.audioStreamManager.cleanupSpeakerStreams();
|
|
1588
|
+
this.authManager.clearCache();
|
|
1589
|
+
this.state = "disconnected";
|
|
1590
|
+
this.isResuming = false;
|
|
1591
|
+
this.emit("session", { state: "disconnected" });
|
|
1592
|
+
this.cleanupEventListeners();
|
|
1593
|
+
this.log("Disconnected from Gemini Live API", {
|
|
1594
|
+
sessionId: this.sessionId,
|
|
1595
|
+
sessionDuration: this.sessionStartTime ? Date.now() - this.sessionStartTime : void 0
|
|
1596
|
+
});
|
|
1597
|
+
}
|
|
1598
|
+
/**
|
|
1599
|
+
* Send text to be converted to speech
|
|
1600
|
+
*/
|
|
1601
|
+
async speak(input, options) {
|
|
1602
|
+
return this.traced(async () => {
|
|
1603
|
+
this.validateConnectionState();
|
|
1604
|
+
if (typeof input !== "string") {
|
|
1605
|
+
const chunks = [];
|
|
1606
|
+
for await (const chunk of input) {
|
|
1607
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1608
|
+
}
|
|
1609
|
+
input = Buffer.concat(chunks).toString("utf-8");
|
|
1610
|
+
}
|
|
1611
|
+
if (input.trim().length === 0) {
|
|
1612
|
+
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1613
|
+
}
|
|
1614
|
+
this.addToContext("user", input);
|
|
1615
|
+
const textMessage = {
|
|
1616
|
+
client_content: {
|
|
1617
|
+
turns: [
|
|
1618
|
+
{
|
|
1619
|
+
role: "user",
|
|
1620
|
+
parts: [
|
|
1621
|
+
{
|
|
1622
|
+
text: input
|
|
1623
|
+
}
|
|
1624
|
+
]
|
|
1625
|
+
}
|
|
1626
|
+
],
|
|
1627
|
+
turnComplete: true
|
|
1628
|
+
}
|
|
1629
|
+
};
|
|
1630
|
+
if (options && (options.speaker || options.languageCode || options.responseModalities)) {
|
|
1631
|
+
const updateMessage = {
|
|
1632
|
+
type: "session.update",
|
|
1633
|
+
session: {
|
|
1634
|
+
generation_config: {
|
|
1635
|
+
...options.responseModalities ? { response_modalities: options.responseModalities } : {},
|
|
1636
|
+
speech_config: {
|
|
1637
|
+
...options.languageCode ? { language_code: options.languageCode } : {},
|
|
1638
|
+
...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
};
|
|
1643
|
+
try {
|
|
1644
|
+
this.sendEvent("session.update", updateMessage);
|
|
1645
|
+
this.log("Applied per-turn runtime options", options);
|
|
1646
|
+
} catch (error) {
|
|
1647
|
+
this.log("Failed to apply per-turn runtime options", error);
|
|
1648
|
+
}
|
|
1649
|
+
}
|
|
1650
|
+
try {
|
|
1651
|
+
this.sendEvent("client_content", textMessage);
|
|
1652
|
+
this.log("Text message sent", { text: input });
|
|
1653
|
+
} catch (error) {
|
|
1654
|
+
this.log("Failed to send text message", error);
|
|
1655
|
+
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1656
|
+
}
|
|
1657
|
+
}, "gemini-live.speak")();
|
|
1658
|
+
}
|
|
1659
|
+
/**
|
|
1660
|
+
* Send audio stream for processing
|
|
1661
|
+
*/
|
|
1662
|
+
async send(audioData) {
|
|
1663
|
+
return this.traced(async () => {
|
|
1664
|
+
this.validateConnectionState();
|
|
1665
|
+
if ("readable" in audioData && typeof audioData.on === "function") {
|
|
1666
|
+
const stream = audioData;
|
|
1667
|
+
stream.on("data", (chunk) => {
|
|
1668
|
+
try {
|
|
1669
|
+
const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
|
|
1670
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1671
|
+
this.sendEvent("realtime_input", message);
|
|
1672
|
+
} catch (error) {
|
|
1673
|
+
this.log("Failed to process audio chunk", error);
|
|
1674
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
|
|
1675
|
+
}
|
|
1676
|
+
});
|
|
1677
|
+
stream.on("error", (error) => {
|
|
1678
|
+
this.log("Audio stream error", error);
|
|
1679
|
+
this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
|
|
1680
|
+
});
|
|
1681
|
+
stream.on("end", () => {
|
|
1682
|
+
this.log("Audio stream ended");
|
|
1683
|
+
});
|
|
1684
|
+
} else {
|
|
1685
|
+
const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
|
|
1686
|
+
const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
|
|
1687
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1688
|
+
this.sendEvent("realtime_input", message);
|
|
1689
|
+
}
|
|
1690
|
+
}, "gemini-live.send")();
|
|
1691
|
+
}
|
|
1692
|
+
/**
|
|
1693
|
+
* Process speech from audio stream (traditional STT interface)
|
|
1694
|
+
*/
|
|
1695
|
+
async listen(audioStream, _options) {
|
|
1696
|
+
return this.traced(async () => {
|
|
1697
|
+
this.validateConnectionState();
|
|
1698
|
+
let transcriptionText = "";
|
|
1699
|
+
const onWriting = (data) => {
|
|
1700
|
+
if (data.role === "user") {
|
|
1701
|
+
transcriptionText += data.text;
|
|
1702
|
+
this.log("Received transcription text:", { text: data.text, total: transcriptionText });
|
|
1703
|
+
}
|
|
1704
|
+
};
|
|
1705
|
+
const onError = (error) => {
|
|
1706
|
+
throw new Error(`Transcription failed: ${error.message}`);
|
|
1707
|
+
};
|
|
1708
|
+
const onSession = (data) => {
|
|
1709
|
+
if (data.state === "disconnected") {
|
|
1710
|
+
throw new Error("Session disconnected during transcription");
|
|
1711
|
+
}
|
|
1712
|
+
};
|
|
1713
|
+
this.on("writing", onWriting);
|
|
1714
|
+
this.on("error", onError);
|
|
1715
|
+
this.on("session", onSession);
|
|
1716
|
+
try {
|
|
1717
|
+
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1718
|
+
audioStream,
|
|
1719
|
+
(base64Audio) => {
|
|
1720
|
+
return new Promise((resolve, reject) => {
|
|
1721
|
+
try {
|
|
1722
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1723
|
+
const cleanup = () => {
|
|
1724
|
+
this.off("turnComplete", onTurnComplete);
|
|
1725
|
+
this.off("error", onErr);
|
|
1726
|
+
};
|
|
1727
|
+
const onTurnComplete = () => {
|
|
1728
|
+
cleanup();
|
|
1729
|
+
resolve(transcriptionText.trim());
|
|
1730
|
+
};
|
|
1731
|
+
const onErr = (e) => {
|
|
1732
|
+
cleanup();
|
|
1733
|
+
reject(new Error(e.message));
|
|
1734
|
+
};
|
|
1735
|
+
this.on("turnComplete", onTurnComplete);
|
|
1736
|
+
this.on("error", onErr);
|
|
1737
|
+
this.sendEvent("client_content", message);
|
|
1738
|
+
this.log("Sent audio for transcription");
|
|
1739
|
+
} catch (err) {
|
|
1740
|
+
reject(err);
|
|
1741
|
+
}
|
|
1742
|
+
});
|
|
1743
|
+
},
|
|
1744
|
+
(error) => {
|
|
1745
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1746
|
+
}
|
|
1747
|
+
);
|
|
1748
|
+
return result;
|
|
1749
|
+
} finally {
|
|
1750
|
+
this.off("writing", onWriting);
|
|
1751
|
+
this.off("error", onError);
|
|
1752
|
+
this.off("session", onSession);
|
|
1753
|
+
}
|
|
1754
|
+
}, "gemini-live.listen")();
|
|
1755
|
+
}
|
|
1756
|
+
/**
|
|
1757
|
+
* Get available speakers/voices
|
|
1758
|
+
*/
|
|
1759
|
+
async getSpeakers() {
|
|
1760
|
+
return this.traced(async () => {
|
|
1761
|
+
return [
|
|
1762
|
+
{ voiceId: "Puck", description: "Conversational, friendly" },
|
|
1763
|
+
{ voiceId: "Charon", description: "Deep, authoritative" },
|
|
1764
|
+
{ voiceId: "Kore", description: "Neutral, professional" },
|
|
1765
|
+
{ voiceId: "Fenrir", description: "Warm, approachable" }
|
|
1766
|
+
];
|
|
1767
|
+
}, "gemini-live.getSpeakers")();
|
|
1768
|
+
}
|
|
1769
|
+
/**
|
|
1770
|
+
* Resume a previous session using a session handle
|
|
1771
|
+
*/
|
|
1772
|
+
async resumeSession(handle, context) {
|
|
1773
|
+
if (this.state === "connected") {
|
|
1774
|
+
throw new Error("Cannot resume session while already connected. Disconnect first.");
|
|
1775
|
+
}
|
|
1776
|
+
this.log("Attempting to resume session", { handle });
|
|
1777
|
+
this.sessionHandle = handle;
|
|
1778
|
+
this.isResuming = true;
|
|
1779
|
+
if (context && context.length > 0) {
|
|
1780
|
+
this.contextManager.clearContext();
|
|
1781
|
+
for (const item of context) {
|
|
1782
|
+
this.contextManager.addEntry(item.role, item.content);
|
|
1783
|
+
}
|
|
1784
|
+
}
|
|
1785
|
+
try {
|
|
1786
|
+
await this.connect();
|
|
1787
|
+
this.log("Session resumed successfully", { handle, contextItems: context?.length || 0 });
|
|
1788
|
+
} catch (error) {
|
|
1789
|
+
this.isResuming = false;
|
|
1790
|
+
this.sessionHandle = void 0;
|
|
1791
|
+
throw new Error(`Failed to resume session: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
/**
|
|
1795
|
+
* Update session configuration during an active session
|
|
1796
|
+
* Allows dynamic updates to voice, instructions, tools, and other settings
|
|
1797
|
+
*
|
|
1798
|
+
* @param config Partial configuration to update
|
|
1799
|
+
* @throws Error if not connected or update fails
|
|
1800
|
+
*
|
|
1801
|
+
* @example
|
|
1802
|
+
* ```typescript
|
|
1803
|
+
* // Change voice during conversation
|
|
1804
|
+
* await voice.updateSessionConfig({
|
|
1805
|
+
* speaker: 'Charon'
|
|
1806
|
+
* });
|
|
1807
|
+
*
|
|
1808
|
+
* // Update instructions
|
|
1809
|
+
* await voice.updateSessionConfig({
|
|
1810
|
+
* instructions: 'You are now a helpful coding assistant'
|
|
1811
|
+
* });
|
|
1812
|
+
*
|
|
1813
|
+
* // Add or update tools
|
|
1814
|
+
* await voice.updateSessionConfig({
|
|
1815
|
+
* tools: [{ name: 'new_tool', ... }]
|
|
1816
|
+
* });
|
|
1817
|
+
* ```
|
|
1818
|
+
*/
|
|
1819
|
+
async updateSessionConfig(config) {
|
|
1820
|
+
this.validateConnectionState();
|
|
1821
|
+
this.validateWebSocketState();
|
|
1822
|
+
return new Promise((resolve, reject) => {
|
|
1823
|
+
if (config.model) {
|
|
1824
|
+
this.log("Warning: Model cannot be changed during an active session. Ignoring model update.");
|
|
1825
|
+
}
|
|
1826
|
+
if (config.vertexAI !== void 0 || config.project !== void 0 || config.location !== void 0) {
|
|
1827
|
+
this.log("Warning: Authentication settings cannot be changed during an active session.");
|
|
1828
|
+
}
|
|
1829
|
+
const updateMessage = {
|
|
1830
|
+
type: "session.update",
|
|
1831
|
+
session: {}
|
|
1832
|
+
};
|
|
1833
|
+
let hasUpdates = false;
|
|
1834
|
+
if (config.speaker) {
|
|
1835
|
+
hasUpdates = true;
|
|
1836
|
+
updateMessage.session.generation_config = {
|
|
1837
|
+
...updateMessage.session.generation_config,
|
|
1838
|
+
speech_config: {
|
|
1839
|
+
voice_config: {
|
|
1840
|
+
prebuilt_voice_config: {
|
|
1841
|
+
voice_name: config.speaker
|
|
1842
|
+
}
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
};
|
|
1846
|
+
this.speaker = config.speaker;
|
|
1847
|
+
this.log("Updating speaker to:", config.speaker);
|
|
1848
|
+
}
|
|
1849
|
+
if (config.instructions !== void 0) {
|
|
1850
|
+
hasUpdates = true;
|
|
1851
|
+
updateMessage.session.system_instruction = {
|
|
1852
|
+
parts: [{ text: config.instructions }]
|
|
1853
|
+
};
|
|
1854
|
+
this.log("Updating instructions");
|
|
1855
|
+
}
|
|
1856
|
+
if (config.tools !== void 0) {
|
|
1857
|
+
hasUpdates = true;
|
|
1858
|
+
if (config.tools.length > 0) {
|
|
1859
|
+
updateMessage.session.tools = config.tools.map((tool) => ({
|
|
1860
|
+
function_declarations: [
|
|
1861
|
+
{
|
|
1862
|
+
name: tool.name,
|
|
1863
|
+
description: tool.description,
|
|
1864
|
+
parameters: tool.parameters
|
|
1865
|
+
}
|
|
1866
|
+
]
|
|
1867
|
+
}));
|
|
1868
|
+
} else {
|
|
1869
|
+
updateMessage.session.tools = [];
|
|
1870
|
+
}
|
|
1871
|
+
this.log("Updating tools:", config.tools.length, "tools");
|
|
1872
|
+
}
|
|
1873
|
+
if (this.tools && Object.keys(this.tools).length > 0) {
|
|
1874
|
+
hasUpdates = true;
|
|
1875
|
+
const allTools = [];
|
|
1876
|
+
for (const [toolName, tool] of Object.entries(this.tools)) {
|
|
1877
|
+
try {
|
|
1878
|
+
let parameters;
|
|
1879
|
+
if ("inputSchema" in tool && tool.inputSchema) {
|
|
1880
|
+
if (typeof tool.inputSchema === "object" && "safeParse" in tool.inputSchema) {
|
|
1881
|
+
parameters = this.convertZodSchemaToJsonSchema(tool.inputSchema);
|
|
1882
|
+
} else {
|
|
1883
|
+
parameters = tool.inputSchema;
|
|
1884
|
+
}
|
|
1885
|
+
} else if ("parameters" in tool && tool.parameters) {
|
|
1886
|
+
parameters = tool.parameters;
|
|
1887
|
+
} else {
|
|
1888
|
+
parameters = { type: "object", properties: {} };
|
|
1889
|
+
}
|
|
1890
|
+
allTools.push({
|
|
1891
|
+
function_declarations: [
|
|
1892
|
+
{
|
|
1893
|
+
name: toolName,
|
|
1894
|
+
description: tool.description || `Tool: ${toolName}`,
|
|
1895
|
+
parameters
|
|
1896
|
+
}
|
|
1897
|
+
]
|
|
1898
|
+
});
|
|
1899
|
+
} catch (error) {
|
|
1900
|
+
this.log("Failed to process tool for session update", { toolName, error });
|
|
1901
|
+
}
|
|
1902
|
+
}
|
|
1903
|
+
if (allTools.length > 0) {
|
|
1904
|
+
updateMessage.session.tools = allTools;
|
|
1905
|
+
this.log("Updating tools from addTools method:", allTools.length, "tools");
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
if (config.sessionConfig) {
|
|
1909
|
+
if (config.sessionConfig.vad) {
|
|
1910
|
+
hasUpdates = true;
|
|
1911
|
+
updateMessage.session.vad = {
|
|
1912
|
+
enabled: config.sessionConfig.vad.enabled ?? true,
|
|
1913
|
+
sensitivity: config.sessionConfig.vad.sensitivity ?? 0.5,
|
|
1914
|
+
silence_duration_ms: config.sessionConfig.vad.silenceDurationMs ?? 1e3
|
|
1915
|
+
};
|
|
1916
|
+
this.log("Updating VAD settings:", config.sessionConfig.vad);
|
|
1917
|
+
}
|
|
1918
|
+
if (config.sessionConfig.interrupts) {
|
|
1919
|
+
hasUpdates = true;
|
|
1920
|
+
updateMessage.session.interrupts = {
|
|
1921
|
+
enabled: config.sessionConfig.interrupts.enabled ?? true,
|
|
1922
|
+
allow_user_interruption: config.sessionConfig.interrupts.allowUserInterruption ?? true
|
|
1923
|
+
};
|
|
1924
|
+
this.log("Updating interrupt settings:", config.sessionConfig.interrupts);
|
|
1925
|
+
}
|
|
1926
|
+
if (config.sessionConfig.contextCompression !== void 0) {
|
|
1927
|
+
hasUpdates = true;
|
|
1928
|
+
updateMessage.session.context_compression = config.sessionConfig.contextCompression;
|
|
1929
|
+
this.log("Updating context compression:", config.sessionConfig.contextCompression);
|
|
1930
|
+
this.contextManager.setCompressionEnabled(config.sessionConfig.contextCompression);
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
if (!hasUpdates) {
|
|
1934
|
+
this.log("No valid configuration updates to send");
|
|
1935
|
+
resolve();
|
|
1936
|
+
return;
|
|
1937
|
+
}
|
|
1938
|
+
const timeout = setTimeout(() => {
|
|
1939
|
+
cleanup();
|
|
1940
|
+
reject(new Error("Session configuration update timeout - no response received"));
|
|
1941
|
+
}, 1e4);
|
|
1942
|
+
const onSessionUpdated = (data) => {
|
|
1943
|
+
cleanup();
|
|
1944
|
+
this.log("Session configuration updated successfully", data);
|
|
1945
|
+
resolve();
|
|
1946
|
+
};
|
|
1947
|
+
const onError = (error) => {
|
|
1948
|
+
cleanup();
|
|
1949
|
+
this.log("Session configuration update failed", error);
|
|
1950
|
+
reject(new Error(`Failed to update session configuration: ${error.message || "Unknown error"}`));
|
|
1951
|
+
};
|
|
1952
|
+
const cleanup = () => {
|
|
1953
|
+
clearTimeout(timeout);
|
|
1954
|
+
this.eventManager.getEventEmitter().removeListener("session.updated", onSessionUpdated);
|
|
1955
|
+
this.eventManager.getEventEmitter().removeListener("error", onError);
|
|
1956
|
+
};
|
|
1957
|
+
this.eventManager.getEventEmitter().once("session.updated", onSessionUpdated);
|
|
1958
|
+
this.eventManager.getEventEmitter().once("error", onError);
|
|
1959
|
+
try {
|
|
1960
|
+
this.sendEvent("session.update", updateMessage);
|
|
1961
|
+
this.log("Sent session configuration update", updateMessage);
|
|
1962
|
+
} catch (error) {
|
|
1963
|
+
cleanup();
|
|
1964
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
1965
|
+
this.log("Failed to send session configuration update", error);
|
|
1966
|
+
reject(new Error(`Failed to send session configuration update: ${errorMessage}`));
|
|
1967
|
+
}
|
|
1968
|
+
});
|
|
1969
|
+
}
|
|
1970
|
+
/**
|
|
1971
|
+
* Get current connection state
|
|
1972
|
+
*/
|
|
1973
|
+
getConnectionState() {
|
|
1974
|
+
return this.state;
|
|
1975
|
+
}
|
|
1976
|
+
/**
|
|
1977
|
+
* Check if currently connected
|
|
1978
|
+
*/
|
|
1979
|
+
isConnected() {
|
|
1980
|
+
return this.state === "connected";
|
|
1981
|
+
}
|
|
1982
|
+
/**
|
|
1983
|
+
* Get current speaker stream for audio concatenation
|
|
1984
|
+
* This allows external access to the current audio stream being built
|
|
1985
|
+
*/
|
|
1986
|
+
getCurrentSpeakerStream() {
|
|
1987
|
+
return this.audioStreamManager.getCurrentSpeakerStream();
|
|
1988
|
+
}
|
|
1989
|
+
/**
|
|
1990
|
+
* Get session handle for resumption
|
|
1991
|
+
*/
|
|
1992
|
+
getSessionHandle() {
|
|
1993
|
+
return this.sessionHandle;
|
|
1994
|
+
}
|
|
1995
|
+
/**
|
|
1996
|
+
* Get comprehensive session information
|
|
1997
|
+
*/
|
|
1998
|
+
getSessionInfo() {
|
|
1999
|
+
return {
|
|
2000
|
+
id: this.sessionId,
|
|
2001
|
+
handle: this.sessionHandle,
|
|
2002
|
+
startTime: this.sessionStartTime ? new Date(this.sessionStartTime) : void 0,
|
|
2003
|
+
duration: this.sessionStartTime ? Date.now() - this.sessionStartTime : void 0,
|
|
2004
|
+
state: this.state,
|
|
2005
|
+
config: this.options.sessionConfig,
|
|
2006
|
+
contextSize: this.contextManager.getContextSize()
|
|
2007
|
+
};
|
|
2008
|
+
}
|
|
2009
|
+
/**
|
|
2010
|
+
* Get session context history
|
|
2011
|
+
*/
|
|
2012
|
+
getContextHistory() {
|
|
2013
|
+
return this.contextManager.getContextHistory();
|
|
2014
|
+
}
|
|
2015
|
+
/**
|
|
2016
|
+
* Add to context history for session continuity
|
|
2017
|
+
*/
|
|
2018
|
+
addToContext(role, content) {
|
|
2019
|
+
this.contextManager.addEntry(role, content);
|
|
2020
|
+
}
|
|
2021
|
+
/**
|
|
2022
|
+
* Clear session context
|
|
2023
|
+
*/
|
|
2024
|
+
clearContext() {
|
|
2025
|
+
this.contextManager.clearContext();
|
|
2026
|
+
this.log("Session context cleared");
|
|
2027
|
+
}
|
|
2028
|
+
/**
|
|
2029
|
+
* Enable or disable automatic reconnection
|
|
2030
|
+
*/
|
|
2031
|
+
setAutoReconnect(enabled) {
|
|
2032
|
+
if (!this.options.sessionConfig) {
|
|
2033
|
+
this.options.sessionConfig = {};
|
|
2034
|
+
}
|
|
2035
|
+
this.options.sessionConfig.enableResumption = enabled;
|
|
2036
|
+
this.log(`Auto-reconnect ${enabled ? "enabled" : "disabled"}`);
|
|
2037
|
+
}
|
|
2038
|
+
/**
|
|
2039
|
+
* Send session resumption message
|
|
2040
|
+
* @private
|
|
2041
|
+
*/
|
|
2042
|
+
async sendSessionResumption() {
|
|
2043
|
+
if (!this.sessionHandle) {
|
|
2044
|
+
throw new Error("No session handle available for resumption");
|
|
2045
|
+
}
|
|
2046
|
+
const context = this.contextManager.getContextArray();
|
|
2047
|
+
const resumeMessage = {
|
|
2048
|
+
session_resume: {
|
|
2049
|
+
handle: this.sessionHandle,
|
|
2050
|
+
...context.length > 0 && {
|
|
2051
|
+
context
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
};
|
|
2055
|
+
try {
|
|
2056
|
+
if (this.ws?.readyState !== ws.WebSocket.OPEN) {
|
|
2057
|
+
throw new Error("WebSocket not ready for session resumption");
|
|
2058
|
+
}
|
|
2059
|
+
this.sendEvent("session_resume", resumeMessage);
|
|
2060
|
+
this.log("Session resumption message sent", { handle: this.sessionHandle });
|
|
2061
|
+
} catch (error) {
|
|
2062
|
+
this.log("Failed to send session resumption", error);
|
|
2063
|
+
throw new Error(`Failed to send session resumption: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
/**
|
|
2067
|
+
* Start monitoring session duration
|
|
2068
|
+
* @private
|
|
2069
|
+
*/
|
|
2070
|
+
startSessionDurationMonitor() {
|
|
2071
|
+
if (!this.options.sessionConfig?.maxDuration) {
|
|
2072
|
+
return;
|
|
2073
|
+
}
|
|
2074
|
+
const durationMs = this.parseDuration(this.options.sessionConfig.maxDuration);
|
|
2075
|
+
if (!durationMs) {
|
|
2076
|
+
this.log("Invalid session duration format", { duration: this.options.sessionConfig.maxDuration });
|
|
2077
|
+
return;
|
|
2078
|
+
}
|
|
2079
|
+
if (this.sessionDurationTimeout) {
|
|
2080
|
+
clearTimeout(this.sessionDurationTimeout);
|
|
2081
|
+
}
|
|
2082
|
+
const warningTime = durationMs - 5 * 60 * 1e3;
|
|
2083
|
+
if (warningTime > 0) {
|
|
2084
|
+
setTimeout(() => {
|
|
2085
|
+
this.emit("sessionExpiring", {
|
|
2086
|
+
expiresIn: 5 * 60 * 1e3,
|
|
2087
|
+
sessionId: this.sessionId
|
|
2088
|
+
});
|
|
2089
|
+
}, warningTime);
|
|
2090
|
+
}
|
|
2091
|
+
this.sessionDurationTimeout = setTimeout(() => {
|
|
2092
|
+
this.log("Session duration limit reached, disconnecting");
|
|
2093
|
+
void this.disconnect();
|
|
2094
|
+
}, durationMs);
|
|
2095
|
+
}
|
|
2096
|
+
/**
|
|
2097
|
+
* Parse duration string to milliseconds
|
|
2098
|
+
* @private
|
|
2099
|
+
*/
|
|
2100
|
+
parseDuration(duration) {
|
|
2101
|
+
const match = duration.match(/^(\d+)([hms])$/);
|
|
2102
|
+
if (!match) return null;
|
|
2103
|
+
const value = parseInt(match[1], 10);
|
|
2104
|
+
const unit = match[2];
|
|
2105
|
+
switch (unit) {
|
|
2106
|
+
case "h":
|
|
2107
|
+
return value * 60 * 60 * 1e3;
|
|
2108
|
+
case "m":
|
|
2109
|
+
return value * 60 * 1e3;
|
|
2110
|
+
case "s":
|
|
2111
|
+
return value * 1e3;
|
|
2112
|
+
default:
|
|
2113
|
+
return null;
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
/**
|
|
2117
|
+
* Compress context history to manage memory
|
|
2118
|
+
* @private
|
|
2119
|
+
*/
|
|
2120
|
+
compressContext() {
|
|
2121
|
+
this.log("compressContext is deprecated; handled by ContextManager");
|
|
2122
|
+
}
|
|
2123
|
+
/**
|
|
2124
|
+
* Setup WebSocket event listeners for Gemini Live API messages
|
|
2125
|
+
* @private
|
|
2126
|
+
*/
|
|
2127
|
+
setupEventListeners() {
|
|
2128
|
+
if (!this.ws) {
|
|
2129
|
+
throw new Error("WebSocket not initialized");
|
|
2130
|
+
}
|
|
2131
|
+
this.ws.on("open", () => {
|
|
2132
|
+
this.log("WebSocket connection opened");
|
|
2133
|
+
});
|
|
2134
|
+
this.ws.on("close", (code, reason) => {
|
|
2135
|
+
this.log("WebSocket connection closed", { code, reason: reason.toString() });
|
|
2136
|
+
this.state = "disconnected";
|
|
2137
|
+
this.emit("session", { state: "disconnected" });
|
|
2138
|
+
});
|
|
2139
|
+
this.ws.on("error", (error) => {
|
|
2140
|
+
this.log("WebSocket error", error);
|
|
2141
|
+
this.state = "disconnected";
|
|
2142
|
+
this.emit("session", { state: "disconnected" });
|
|
2143
|
+
this.emit("error", {
|
|
2144
|
+
message: error.message,
|
|
2145
|
+
code: "websocket_error",
|
|
2146
|
+
details: error
|
|
2147
|
+
});
|
|
2148
|
+
});
|
|
2149
|
+
this.ws.on("message", async (message) => {
|
|
2150
|
+
try {
|
|
2151
|
+
const data = JSON.parse(message.toString());
|
|
2152
|
+
await this.handleGeminiMessage(data);
|
|
2153
|
+
} catch (error) {
|
|
2154
|
+
this.log("Failed to parse WebSocket message", error);
|
|
2155
|
+
this.emit("error", {
|
|
2156
|
+
message: "Failed to parse WebSocket message",
|
|
2157
|
+
code: "parse_error",
|
|
2158
|
+
details: error
|
|
2159
|
+
});
|
|
2160
|
+
}
|
|
2161
|
+
});
|
|
2162
|
+
}
|
|
2163
|
+
/**
|
|
2164
|
+
* Handle different types of messages from Gemini Live API
|
|
2165
|
+
* @private
|
|
2166
|
+
*/
|
|
2167
|
+
async handleGeminiMessage(data) {
|
|
2168
|
+
this.log("Received message:", JSON.stringify(data, null, 2));
|
|
2169
|
+
if (data.responseId) {
|
|
2170
|
+
this.setCurrentResponseId(data.responseId);
|
|
2171
|
+
this.log("Set current response ID:", data.responseId);
|
|
2172
|
+
}
|
|
2173
|
+
if (data.setup) {
|
|
2174
|
+
this.log("Processing setup message");
|
|
2175
|
+
this.handleSetupComplete(data);
|
|
2176
|
+
} else if (data.setupComplete) {
|
|
2177
|
+
this.log("Processing setupComplete message");
|
|
2178
|
+
this.handleSetupComplete(data);
|
|
2179
|
+
} else if (data.serverContent) {
|
|
2180
|
+
this.log("Processing server content message");
|
|
2181
|
+
this.handleServerContent(data.serverContent);
|
|
2182
|
+
} else if (data.toolCall) {
|
|
2183
|
+
this.log("Processing tool call message");
|
|
2184
|
+
await this.handleToolCall(data);
|
|
2185
|
+
} else if (data.usageMetadata) {
|
|
2186
|
+
this.log("Processing usage metadata message");
|
|
2187
|
+
this.handleUsageUpdate(data);
|
|
2188
|
+
} else if (data.sessionEnd) {
|
|
2189
|
+
this.log("Processing session end message");
|
|
2190
|
+
this.handleSessionEnd(data);
|
|
2191
|
+
} else if (data.error) {
|
|
2192
|
+
this.log("Processing error message");
|
|
2193
|
+
this.handleError(data.error);
|
|
2194
|
+
} else {
|
|
2195
|
+
const messageData = data;
|
|
2196
|
+
if (messageData.type === "setup" || messageData.type === "session.ready" || messageData.type === "ready") {
|
|
2197
|
+
this.log("Processing alternative setup message with type:", messageData.type);
|
|
2198
|
+
this.handleSetupComplete(data);
|
|
2199
|
+
} else if (messageData.sessionHandle) {
|
|
2200
|
+
this.log("Processing session handle message");
|
|
2201
|
+
this.handleSetupComplete(data);
|
|
2202
|
+
} else if (messageData.session || messageData.ready || messageData.status === "ready" || messageData.status === "setup_complete") {
|
|
2203
|
+
this.log("Processing setup completion message with status:", messageData.status);
|
|
2204
|
+
this.handleSetupComplete(data);
|
|
2205
|
+
} else if (messageData.candidates || messageData.promptFeedback) {
|
|
2206
|
+
this.log("Processing BidiGenerateContent response");
|
|
2207
|
+
this.handleSetupComplete(data);
|
|
2208
|
+
} else if (messageData.contents && Array.isArray(messageData.contents)) {
|
|
2209
|
+
this.log("Processing content response");
|
|
2210
|
+
this.handleServerContent({ modelTurn: { parts: messageData.contents.flatMap((c) => c.parts || []) } });
|
|
2211
|
+
this.handleSetupComplete(data);
|
|
2212
|
+
} else if (messageData.candidates && Array.isArray(messageData.candidates)) {
|
|
2213
|
+
this.log("Processing candidates response");
|
|
2214
|
+
this.handleSetupComplete(data);
|
|
2215
|
+
} else {
|
|
2216
|
+
this.log("Unknown message format - no recognized fields found");
|
|
2217
|
+
}
|
|
2218
|
+
}
|
|
2219
|
+
}
|
|
2220
|
+
/**
|
|
2221
|
+
* Handle setup completion message
|
|
2222
|
+
* @private
|
|
2223
|
+
*/
|
|
2224
|
+
handleSetupComplete(data) {
|
|
2225
|
+
this.log("Setup completed");
|
|
2226
|
+
const queue = this.queue.splice(0, this.queue.length);
|
|
2227
|
+
if (queue.length > 0) {
|
|
2228
|
+
this.log("Processing queued messages:", queue.length);
|
|
2229
|
+
for (const queuedMessage of queue) {
|
|
2230
|
+
try {
|
|
2231
|
+
this.connectionManager.send(JSON.stringify(queuedMessage));
|
|
2232
|
+
this.log("Sent queued message:", queuedMessage);
|
|
2233
|
+
} catch (err) {
|
|
2234
|
+
this.log("Failed to send queued message, re-queuing:", err);
|
|
2235
|
+
this.queue.unshift(queuedMessage);
|
|
2236
|
+
break;
|
|
2237
|
+
}
|
|
2238
|
+
}
|
|
2239
|
+
}
|
|
2240
|
+
this.eventManager.getEventEmitter().emit("setupComplete", data);
|
|
2241
|
+
}
|
|
2242
|
+
/**
|
|
2243
|
+
* Handle session update confirmation
|
|
2244
|
+
* @private
|
|
2245
|
+
*/
|
|
2246
|
+
handleSessionUpdated(data) {
|
|
2247
|
+
this.log("Session updated", data);
|
|
2248
|
+
this.eventManager.getEventEmitter().emit("session.updated", data);
|
|
2249
|
+
this.emit("session", {
|
|
2250
|
+
state: "updated",
|
|
2251
|
+
config: data
|
|
2252
|
+
});
|
|
2253
|
+
}
|
|
2254
|
+
/**
|
|
2255
|
+
* Handle server content (text/audio responses)
|
|
2256
|
+
* @private
|
|
2257
|
+
*/
|
|
2258
|
+
handleServerContent(data) {
|
|
2259
|
+
if (!data) {
|
|
2260
|
+
return;
|
|
2261
|
+
}
|
|
2262
|
+
let assistantResponse = "";
|
|
2263
|
+
if (data.modelTurn?.parts) {
|
|
2264
|
+
for (const part of data.modelTurn.parts) {
|
|
2265
|
+
if (part.text) {
|
|
2266
|
+
assistantResponse += part.text;
|
|
2267
|
+
this.emit("writing", {
|
|
2268
|
+
text: part.text,
|
|
2269
|
+
role: "assistant"
|
|
2270
|
+
});
|
|
2271
|
+
}
|
|
2272
|
+
if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
|
|
2273
|
+
try {
|
|
2274
|
+
const audioData = part.inlineData.data;
|
|
2275
|
+
const int16Array = this.audioStreamManager.base64ToInt16Array(audioData);
|
|
2276
|
+
const responseId = this.getCurrentResponseId() || crypto.randomUUID();
|
|
2277
|
+
let speakerStream = this.audioStreamManager.getSpeakerStream(responseId);
|
|
2278
|
+
if (!speakerStream) {
|
|
2279
|
+
this.audioStreamManager.cleanupStaleStreams();
|
|
2280
|
+
this.audioStreamManager.enforceStreamLimits();
|
|
2281
|
+
speakerStream = this.audioStreamManager.createSpeakerStream(responseId);
|
|
2282
|
+
speakerStream.on("error", (streamError) => {
|
|
2283
|
+
this.log(`Speaker stream error for ${responseId}:`, streamError);
|
|
2284
|
+
this.audioStreamManager.removeSpeakerStream(responseId);
|
|
2285
|
+
this.emit("error", {
|
|
2286
|
+
message: "Speaker stream error",
|
|
2287
|
+
code: "speaker_stream_error",
|
|
2288
|
+
details: { responseId, error: streamError }
|
|
2289
|
+
});
|
|
2290
|
+
});
|
|
2291
|
+
speakerStream.on("end", () => {
|
|
2292
|
+
this.log(`Speaker stream ended for response: ${responseId}`);
|
|
2293
|
+
this.audioStreamManager.removeSpeakerStream(responseId);
|
|
2294
|
+
});
|
|
2295
|
+
speakerStream.on("close", () => {
|
|
2296
|
+
this.log(`Speaker stream closed for response: ${responseId}`);
|
|
2297
|
+
this.audioStreamManager.removeSpeakerStream(responseId);
|
|
2298
|
+
});
|
|
2299
|
+
this.log("Created new speaker stream for response:", responseId);
|
|
2300
|
+
this.emit("speaker", speakerStream);
|
|
2301
|
+
}
|
|
2302
|
+
const audioBuffer = Buffer.from(int16Array.buffer, int16Array.byteOffset, int16Array.byteLength);
|
|
2303
|
+
speakerStream.write(audioBuffer);
|
|
2304
|
+
this.log("Wrote audio chunk to stream:", {
|
|
2305
|
+
responseId,
|
|
2306
|
+
chunkSize: audioBuffer.length,
|
|
2307
|
+
totalStreams: this.audioStreamManager.getActiveStreamCount()
|
|
2308
|
+
});
|
|
2309
|
+
this.emit("speaking", {
|
|
2310
|
+
audio: audioData,
|
|
2311
|
+
// Base64 string
|
|
2312
|
+
audioData: int16Array,
|
|
2313
|
+
sampleRate: this.audioConfig.outputSampleRate
|
|
2314
|
+
// Gemini Live outputs at 24kHz
|
|
2315
|
+
});
|
|
2316
|
+
} catch (error) {
|
|
2317
|
+
this.log("Error processing audio data:", error);
|
|
2318
|
+
this.emit("error", {
|
|
2319
|
+
message: "Failed to process audio data",
|
|
2320
|
+
code: "audio_processing_error",
|
|
2321
|
+
details: error
|
|
2322
|
+
});
|
|
2323
|
+
}
|
|
2324
|
+
}
|
|
2325
|
+
}
|
|
2326
|
+
}
|
|
2327
|
+
if (assistantResponse.trim()) {
|
|
2328
|
+
this.addToContext("assistant", assistantResponse);
|
|
2329
|
+
}
|
|
2330
|
+
if (data.turnComplete) {
|
|
2331
|
+
this.log("Turn completed");
|
|
2332
|
+
this.audioStreamManager.cleanupSpeakerStreams();
|
|
2333
|
+
this.emit("turnComplete", {
|
|
2334
|
+
timestamp: Date.now()
|
|
2335
|
+
});
|
|
2336
|
+
}
|
|
2337
|
+
}
|
|
2338
|
+
/**
|
|
2339
|
+
* Handle tool call requests from the model
|
|
2340
|
+
* @private
|
|
2341
|
+
*/
|
|
2342
|
+
async handleToolCall(data) {
|
|
2343
|
+
if (!data.toolCall) {
|
|
2344
|
+
return;
|
|
2345
|
+
}
|
|
2346
|
+
const toolName = data.toolCall.name || "";
|
|
2347
|
+
const toolArgs = data.toolCall.args || {};
|
|
2348
|
+
const toolId = data.toolCall.id || crypto.randomUUID();
|
|
2349
|
+
this.log("Processing tool call", { toolName, toolArgs, toolId });
|
|
2350
|
+
this.emit("toolCall", {
|
|
2351
|
+
name: toolName,
|
|
2352
|
+
args: toolArgs,
|
|
2353
|
+
id: toolId
|
|
2354
|
+
});
|
|
2355
|
+
const tool = this.tools?.[toolName];
|
|
2356
|
+
if (!tool) {
|
|
2357
|
+
this.log("Tool not found", { toolName });
|
|
2358
|
+
this.createAndEmitError("tool_not_found" /* TOOL_NOT_FOUND */, `Tool "${toolName}" not found`, {
|
|
2359
|
+
toolName,
|
|
2360
|
+
availableTools: Object.keys(this.tools || {})
|
|
2361
|
+
});
|
|
2362
|
+
return;
|
|
2363
|
+
}
|
|
2364
|
+
try {
|
|
2365
|
+
let result;
|
|
2366
|
+
if (tool.execute) {
|
|
2367
|
+
this.log("Executing tool", { toolName, toolArgs });
|
|
2368
|
+
result = await tool.execute(
|
|
2369
|
+
{ context: toolArgs, runtimeContext: this.runtimeContext },
|
|
2370
|
+
{
|
|
2371
|
+
toolCallId: toolId,
|
|
2372
|
+
messages: []
|
|
2373
|
+
}
|
|
2374
|
+
);
|
|
2375
|
+
this.log("Tool executed successfully", { toolName, result });
|
|
2376
|
+
} else {
|
|
2377
|
+
this.log("Tool has no execute function", { toolName });
|
|
2378
|
+
result = { error: "Tool has no execute function" };
|
|
2379
|
+
}
|
|
2380
|
+
const toolResultMessage = {
|
|
2381
|
+
tool_result: {
|
|
2382
|
+
tool_call_id: toolId,
|
|
2383
|
+
result
|
|
2384
|
+
}
|
|
2385
|
+
};
|
|
2386
|
+
this.sendEvent("tool_result", toolResultMessage);
|
|
2387
|
+
this.log("Tool result sent", { toolName, toolId, result });
|
|
2388
|
+
} catch (error) {
|
|
2389
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
2390
|
+
this.log("Tool execution failed", { toolName, error: errorMessage });
|
|
2391
|
+
const errorResultMessage = {
|
|
2392
|
+
tool_result: {
|
|
2393
|
+
tool_call_id: toolId,
|
|
2394
|
+
result: { error: errorMessage }
|
|
2395
|
+
}
|
|
2396
|
+
};
|
|
2397
|
+
this.sendEvent("tool_result", errorResultMessage);
|
|
2398
|
+
this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
|
|
2399
|
+
toolName,
|
|
2400
|
+
toolArgs,
|
|
2401
|
+
error
|
|
2402
|
+
});
|
|
2403
|
+
}
|
|
2404
|
+
}
|
|
2405
|
+
/**
|
|
2406
|
+
* Handle token usage information
|
|
2407
|
+
* @private
|
|
2408
|
+
*/
|
|
2409
|
+
handleUsageUpdate(data) {
|
|
2410
|
+
if (data.usageMetadata) {
|
|
2411
|
+
this.emit("usage", {
|
|
2412
|
+
inputTokens: data.usageMetadata.promptTokenCount || 0,
|
|
2413
|
+
outputTokens: data.usageMetadata.responseTokenCount || 0,
|
|
2414
|
+
totalTokens: data.usageMetadata.totalTokenCount || 0,
|
|
2415
|
+
modality: this.determineModality(data)
|
|
2416
|
+
});
|
|
2417
|
+
}
|
|
2418
|
+
}
|
|
2419
|
+
/**
|
|
2420
|
+
* Handle session end
|
|
2421
|
+
* @private
|
|
2422
|
+
*/
|
|
2423
|
+
handleSessionEnd(data) {
|
|
2424
|
+
this.log("Session ended", data.sessionEnd?.reason);
|
|
2425
|
+
this.state = "disconnected";
|
|
2426
|
+
this.emit("session", { state: "disconnected" });
|
|
2427
|
+
}
|
|
2428
|
+
/**
|
|
2429
|
+
* Handle errors
|
|
2430
|
+
* @private
|
|
2431
|
+
*/
|
|
2432
|
+
handleError(error) {
|
|
2433
|
+
if (!error) {
|
|
2434
|
+
this.log("Received error from Gemini Live API (no error details)");
|
|
2435
|
+
return;
|
|
2436
|
+
}
|
|
2437
|
+
this.log("Received error from Gemini Live API", error);
|
|
2438
|
+
this.emit("error", {
|
|
2439
|
+
message: error.message || "Unknown error",
|
|
2440
|
+
code: error.code || "unknown_error",
|
|
2441
|
+
details: error.details
|
|
2442
|
+
});
|
|
2443
|
+
}
|
|
2444
|
+
/**
|
|
2445
|
+
* Determine the modality from message data
|
|
2446
|
+
* @private
|
|
2447
|
+
*/
|
|
2448
|
+
determineModality(data) {
|
|
2449
|
+
if (data.serverContent?.modelTurn?.parts?.some((part) => part.inlineData?.mimeType?.includes("audio"))) {
|
|
2450
|
+
return "audio";
|
|
2451
|
+
}
|
|
2452
|
+
if (data.serverContent?.modelTurn?.parts?.some((part) => part.inlineData?.mimeType?.includes("video"))) {
|
|
2453
|
+
return "video";
|
|
2454
|
+
}
|
|
2455
|
+
return "text";
|
|
2456
|
+
}
|
|
2457
|
+
/**
|
|
2458
|
+
* Send initial configuration to Gemini Live API
|
|
2459
|
+
* @private
|
|
2460
|
+
*/
|
|
2461
|
+
sendInitialConfig() {
|
|
2462
|
+
if (!this.ws || !this.connectionManager.isConnected()) {
|
|
2463
|
+
throw new Error("WebSocket not connected");
|
|
2464
|
+
}
|
|
2465
|
+
const setupMessage = {
|
|
2466
|
+
setup: {
|
|
2467
|
+
model: `models/${this.options.model}`
|
|
2468
|
+
}
|
|
2469
|
+
};
|
|
2470
|
+
if (this.options.instructions) {
|
|
2471
|
+
setupMessage.setup.systemInstruction = {
|
|
2472
|
+
parts: [{ text: this.options.instructions }]
|
|
2473
|
+
};
|
|
2474
|
+
}
|
|
2475
|
+
const allTools = [];
|
|
2476
|
+
if (this.options.tools && this.options.tools.length > 0) {
|
|
2477
|
+
for (const tool of this.options.tools) {
|
|
2478
|
+
allTools.push({
|
|
2479
|
+
functionDeclarations: [
|
|
2480
|
+
{
|
|
2481
|
+
name: tool.name,
|
|
2482
|
+
description: tool.description,
|
|
2483
|
+
parameters: tool.parameters
|
|
2484
|
+
}
|
|
2485
|
+
]
|
|
2486
|
+
});
|
|
2487
|
+
}
|
|
2488
|
+
}
|
|
2489
|
+
if (this.tools && Object.keys(this.tools).length > 0) {
|
|
2490
|
+
for (const [toolName, tool] of Object.entries(this.tools)) {
|
|
2491
|
+
try {
|
|
2492
|
+
let parameters;
|
|
2493
|
+
if ("inputSchema" in tool && tool.inputSchema) {
|
|
2494
|
+
if (typeof tool.inputSchema === "object" && "safeParse" in tool.inputSchema) {
|
|
2495
|
+
parameters = this.convertZodSchemaToJsonSchema(tool.inputSchema);
|
|
2496
|
+
} else {
|
|
2497
|
+
parameters = tool.inputSchema;
|
|
2498
|
+
}
|
|
2499
|
+
} else if ("parameters" in tool && tool.parameters) {
|
|
2500
|
+
parameters = tool.parameters;
|
|
2501
|
+
} else {
|
|
2502
|
+
parameters = { type: "object", properties: {} };
|
|
2503
|
+
}
|
|
2504
|
+
allTools.push({
|
|
2505
|
+
functionDeclarations: [
|
|
2506
|
+
{
|
|
2507
|
+
name: toolName,
|
|
2508
|
+
description: tool.description || `Tool: ${toolName}`,
|
|
2509
|
+
parameters
|
|
2510
|
+
}
|
|
2511
|
+
]
|
|
2512
|
+
});
|
|
2513
|
+
} catch (error) {
|
|
2514
|
+
this.log("Failed to process tool", { toolName, error });
|
|
2515
|
+
}
|
|
2516
|
+
}
|
|
2517
|
+
}
|
|
2518
|
+
if (allTools.length > 0) {
|
|
2519
|
+
setupMessage.setup.tools = allTools;
|
|
2520
|
+
this.log("Including tools in setup message", { toolCount: allTools.length });
|
|
2521
|
+
}
|
|
2522
|
+
this.log("Sending Live API setup message:", setupMessage);
|
|
2523
|
+
try {
|
|
2524
|
+
this.sendEvent("setup", setupMessage);
|
|
2525
|
+
} catch (error) {
|
|
2526
|
+
this.log("Failed to send Live API setup message:", error);
|
|
2527
|
+
throw new Error(
|
|
2528
|
+
`Failed to send Live API setup message: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
2529
|
+
);
|
|
2530
|
+
}
|
|
2531
|
+
}
|
|
2532
|
+
/**
|
|
2533
|
+
* Wait for Gemini Live session to be created and ready
|
|
2534
|
+
* @private
|
|
2535
|
+
*/
|
|
2536
|
+
waitForSessionCreated() {
|
|
2537
|
+
return new Promise((resolve, reject) => {
|
|
2538
|
+
let isResolved = false;
|
|
2539
|
+
const onSetupComplete = () => {
|
|
2540
|
+
if (!isResolved) {
|
|
2541
|
+
isResolved = true;
|
|
2542
|
+
cleanup();
|
|
2543
|
+
resolve();
|
|
2544
|
+
}
|
|
2545
|
+
};
|
|
2546
|
+
const onError = (errorData) => {
|
|
2547
|
+
if (!isResolved) {
|
|
2548
|
+
isResolved = true;
|
|
2549
|
+
cleanup();
|
|
2550
|
+
reject(new Error(`Session creation failed: ${errorData.message || "Unknown error"}`));
|
|
2551
|
+
}
|
|
2552
|
+
};
|
|
2553
|
+
const onSessionEnd = () => {
|
|
2554
|
+
if (!isResolved) {
|
|
2555
|
+
isResolved = true;
|
|
2556
|
+
cleanup();
|
|
2557
|
+
reject(new Error("Session ended before setup completed"));
|
|
2558
|
+
}
|
|
2559
|
+
};
|
|
2560
|
+
const cleanup = () => {
|
|
2561
|
+
this.eventManager.getEventEmitter().removeListener("setupComplete", onSetupComplete);
|
|
2562
|
+
this.eventManager.getEventEmitter().removeListener("error", onError);
|
|
2563
|
+
this.eventManager.getEventEmitter().removeListener("sessionEnd", onSessionEnd);
|
|
2564
|
+
};
|
|
2565
|
+
this.eventManager.getEventEmitter().once("setupComplete", onSetupComplete);
|
|
2566
|
+
this.eventManager.getEventEmitter().once("error", onError);
|
|
2567
|
+
this.eventManager.getEventEmitter().once("sessionEnd", onSessionEnd);
|
|
2568
|
+
setTimeout(() => {
|
|
2569
|
+
if (!isResolved) {
|
|
2570
|
+
isResolved = true;
|
|
2571
|
+
cleanup();
|
|
2572
|
+
reject(new Error("Session creation timeout"));
|
|
2573
|
+
}
|
|
2574
|
+
}, 3e4);
|
|
2575
|
+
});
|
|
2576
|
+
}
|
|
2577
|
+
/**
|
|
2578
|
+
* Get OAuth access token for Vertex AI authentication
|
|
2579
|
+
* Implements token caching and automatic refresh
|
|
2580
|
+
* @private
|
|
2581
|
+
*/
|
|
2582
|
+
async getAccessToken() {
|
|
2583
|
+
if (!this.options.vertexAI) {
|
|
2584
|
+
throw new Error("getAccessToken should only be called for Vertex AI mode");
|
|
2585
|
+
}
|
|
2586
|
+
return this.authManager.getAccessToken();
|
|
2587
|
+
}
|
|
2588
|
+
/**
|
|
2589
|
+
* Get the current response ID from the server message
|
|
2590
|
+
* This is needed to associate audio chunks with their respective responses.
|
|
2591
|
+
* @private
|
|
2592
|
+
*/
|
|
2593
|
+
getCurrentResponseId() {
|
|
2594
|
+
return this.audioStreamManager.getCurrentResponseId();
|
|
2595
|
+
}
|
|
2596
|
+
/**
|
|
2597
|
+
* Set the current response ID for the next audio chunk.
|
|
2598
|
+
* This is used to track the response ID for the current turn.
|
|
2599
|
+
* @private
|
|
2600
|
+
*/
|
|
2601
|
+
setCurrentResponseId(responseId) {
|
|
2602
|
+
this.audioStreamManager.setCurrentResponseId(responseId);
|
|
2603
|
+
}
|
|
2604
|
+
/**
|
|
2605
|
+
* Send an event to the Gemini Live API with queueing support
|
|
2606
|
+
* @private
|
|
2607
|
+
*/
|
|
2608
|
+
sendEvent(type, data) {
|
|
2609
|
+
let message;
|
|
2610
|
+
if (type === "setup" && data.setup) {
|
|
2611
|
+
message = data;
|
|
2612
|
+
} else if (type === "client_content" && data.client_content) {
|
|
2613
|
+
message = data;
|
|
2614
|
+
} else if (type === "realtime_input" && data.realtime_input) {
|
|
2615
|
+
message = data;
|
|
2616
|
+
} else if (type === "session.update" && data.session) {
|
|
2617
|
+
message = data;
|
|
2618
|
+
} else {
|
|
2619
|
+
message = { type, ...data };
|
|
2620
|
+
}
|
|
2621
|
+
if (!this.ws || !this.connectionManager.isConnected()) {
|
|
2622
|
+
this.queue.push(message);
|
|
2623
|
+
this.log("Queued message:", { type, data });
|
|
2624
|
+
} else {
|
|
2625
|
+
this.connectionManager.send(JSON.stringify(message));
|
|
2626
|
+
this.log("Sent message:", { type, data });
|
|
2627
|
+
}
|
|
2628
|
+
}
|
|
2629
|
+
/**
|
|
2630
|
+
* Equip the voice provider with tools
|
|
2631
|
+
* @param tools Object containing tool definitions that can be called by the voice model
|
|
2632
|
+
*
|
|
2633
|
+
* @example
|
|
2634
|
+
* ```typescript
|
|
2635
|
+
* const weatherTool = createTool({
|
|
2636
|
+
* id: "getWeather",
|
|
2637
|
+
* description: "Get the current weather for a location",
|
|
2638
|
+
* inputSchema: z.object({
|
|
2639
|
+
* location: z.string().describe("The city and state, e.g. San Francisco, CA"),
|
|
2640
|
+
* }),
|
|
2641
|
+
* execute: async ({ context }) => {
|
|
2642
|
+
* // Fetch weather data from an API
|
|
2643
|
+
* const response = await fetch(
|
|
2644
|
+
* `https://api.weather.com?location=${encodeURIComponent(context.location)}`,
|
|
2645
|
+
* );
|
|
2646
|
+
* const data = await response.json();
|
|
2647
|
+
* return {
|
|
2648
|
+
* message: `The current temperature in ${context.location} is ${data.temperature}°F with ${data.conditions}.`,
|
|
2649
|
+
* };
|
|
2650
|
+
* },
|
|
2651
|
+
* });
|
|
2652
|
+
*
|
|
2653
|
+
* voice.addTools({
|
|
2654
|
+
* getWeather: weatherTool,
|
|
2655
|
+
* });
|
|
2656
|
+
* ```
|
|
2657
|
+
*/
|
|
2658
|
+
addTools(tools) {
|
|
2659
|
+
this.tools = tools;
|
|
2660
|
+
this.log("Tools added to Gemini Live Voice", { toolCount: Object.keys(tools || {}).length });
|
|
2661
|
+
}
|
|
2662
|
+
/**
|
|
2663
|
+
* Get the current tools configured for this voice instance
|
|
2664
|
+
* @returns Object containing the current tools
|
|
2665
|
+
*/
|
|
2666
|
+
getTools() {
|
|
2667
|
+
return this.tools;
|
|
2668
|
+
}
|
|
2669
|
+
log(message, ...args) {
|
|
2670
|
+
if (this.debug) {
|
|
2671
|
+
console.log(`[GeminiLiveVoice] ${message}`, ...args);
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2674
|
+
/**
|
|
2675
|
+
* Convert Zod schema to JSON Schema for tool parameters
|
|
2676
|
+
* @private
|
|
2677
|
+
*/
|
|
2678
|
+
convertZodSchemaToJsonSchema(schema) {
|
|
2679
|
+
try {
|
|
2680
|
+
if (typeof schema.toJSON === "function") {
|
|
2681
|
+
return schema.toJSON();
|
|
2682
|
+
}
|
|
2683
|
+
if (schema._def) {
|
|
2684
|
+
return this.convertZodDefToJsonSchema(schema._def);
|
|
2685
|
+
}
|
|
2686
|
+
if (typeof schema === "object" && !schema.safeParse) {
|
|
2687
|
+
return schema;
|
|
2688
|
+
}
|
|
2689
|
+
return {
|
|
2690
|
+
type: "object",
|
|
2691
|
+
properties: {},
|
|
2692
|
+
description: schema.description || ""
|
|
2693
|
+
};
|
|
2694
|
+
} catch (error) {
|
|
2695
|
+
this.log("Failed to convert Zod schema to JSON schema", { error, schema });
|
|
2696
|
+
return {
|
|
2697
|
+
type: "object",
|
|
2698
|
+
properties: {},
|
|
2699
|
+
description: "Schema conversion failed"
|
|
2700
|
+
};
|
|
2701
|
+
}
|
|
2702
|
+
}
|
|
2703
|
+
/**
|
|
2704
|
+
* Convert Zod definition to JSON Schema
|
|
2705
|
+
* @private
|
|
2706
|
+
*/
|
|
2707
|
+
convertZodDefToJsonSchema(def) {
|
|
2708
|
+
switch (def.typeName) {
|
|
2709
|
+
case "ZodString":
|
|
2710
|
+
return {
|
|
2711
|
+
type: "string",
|
|
2712
|
+
description: def.description || ""
|
|
2713
|
+
};
|
|
2714
|
+
case "ZodNumber":
|
|
2715
|
+
return {
|
|
2716
|
+
type: "number",
|
|
2717
|
+
description: def.description || ""
|
|
2718
|
+
};
|
|
2719
|
+
case "ZodBoolean":
|
|
2720
|
+
return {
|
|
2721
|
+
type: "boolean",
|
|
2722
|
+
description: def.description || ""
|
|
2723
|
+
};
|
|
2724
|
+
case "ZodArray":
|
|
2725
|
+
return {
|
|
2726
|
+
type: "array",
|
|
2727
|
+
items: this.convertZodDefToJsonSchema(def.type._def),
|
|
2728
|
+
description: def.description || ""
|
|
2729
|
+
};
|
|
2730
|
+
case "ZodObject":
|
|
2731
|
+
const properties = {};
|
|
2732
|
+
const required = [];
|
|
2733
|
+
for (const [key, value] of Object.entries(def.shape())) {
|
|
2734
|
+
properties[key] = this.convertZodDefToJsonSchema(value._def);
|
|
2735
|
+
if (value._def.typeName === "ZodOptional") ; else {
|
|
2736
|
+
required.push(key);
|
|
2737
|
+
}
|
|
2738
|
+
}
|
|
2739
|
+
return {
|
|
2740
|
+
type: "object",
|
|
2741
|
+
properties,
|
|
2742
|
+
required: required.length > 0 ? required : void 0,
|
|
2743
|
+
description: def.description || ""
|
|
2744
|
+
};
|
|
2745
|
+
case "ZodOptional":
|
|
2746
|
+
return this.convertZodDefToJsonSchema(def.innerType._def);
|
|
2747
|
+
case "ZodEnum":
|
|
2748
|
+
return {
|
|
2749
|
+
type: "string",
|
|
2750
|
+
enum: def.values,
|
|
2751
|
+
description: def.description || ""
|
|
2752
|
+
};
|
|
2753
|
+
default:
|
|
2754
|
+
return {
|
|
2755
|
+
type: "object",
|
|
2756
|
+
properties: {},
|
|
2757
|
+
description: def.description || ""
|
|
2758
|
+
};
|
|
2759
|
+
}
|
|
2760
|
+
}
|
|
2761
|
+
/**
|
|
2762
|
+
* Close the connection (alias for disconnect)
|
|
2763
|
+
*/
|
|
2764
|
+
close() {
|
|
2765
|
+
void this.disconnect();
|
|
2766
|
+
}
|
|
2767
|
+
/**
|
|
2768
|
+
* Trigger voice provider to respond
|
|
2769
|
+
*/
|
|
2770
|
+
async answer(_options) {
|
|
2771
|
+
this.validateConnectionState();
|
|
2772
|
+
this.sendEvent("response.create", {});
|
|
2773
|
+
}
|
|
2774
|
+
/**
|
|
2775
|
+
* Equip the voice provider with instructions
|
|
2776
|
+
* @param instructions Instructions to add
|
|
2777
|
+
*/
|
|
2778
|
+
addInstructions(instructions) {
|
|
2779
|
+
if (instructions) {
|
|
2780
|
+
this.options.instructions = instructions;
|
|
2781
|
+
this.log("Instructions added:", instructions);
|
|
2782
|
+
}
|
|
2783
|
+
}
|
|
2784
|
+
};
|
|
2785
|
+
|
|
2786
|
+
exports.GeminiLiveVoice = GeminiLiveVoice;
|
|
2787
|
+
//# sourceMappingURL=index.cjs.map
|
|
2788
|
+
//# sourceMappingURL=index.cjs.map
|