@just-every/ensemble 0.2.78 → 0.2.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/tool_execution.d.ts.map +1 -1
- package/dist/config/tool_execution.js +2 -11
- package/dist/config/tool_execution.js.map +1 -1
- package/dist/core/ensemble_embed.d.ts.map +1 -1
- package/dist/core/ensemble_embed.js +2 -4
- package/dist/core/ensemble_embed.js.map +1 -1
- package/dist/core/ensemble_image.d.ts.map +1 -1
- package/dist/core/ensemble_image.js +1 -1
- package/dist/core/ensemble_image.js.map +1 -1
- package/dist/core/ensemble_listen.d.ts.map +1 -1
- package/dist/core/ensemble_listen.js +3 -5
- package/dist/core/ensemble_listen.js.map +1 -1
- package/dist/core/ensemble_live.d.ts +14 -0
- package/dist/core/ensemble_live.d.ts.map +1 -0
- package/dist/core/ensemble_live.js +382 -0
- package/dist/core/ensemble_live.js.map +1 -0
- package/dist/core/ensemble_request.d.ts.map +1 -1
- package/dist/core/ensemble_request.js +5 -13
- package/dist/core/ensemble_request.js.map +1 -1
- package/dist/core/ensemble_voice.d.ts.map +1 -1
- package/dist/core/ensemble_voice.js +1 -1
- package/dist/core/ensemble_voice.js.map +1 -1
- package/dist/data/model_data.d.ts.map +1 -1
- package/dist/data/model_data.js +2 -11
- package/dist/data/model_data.js.map +1 -1
- package/dist/index.d.ts +6 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -5
- package/dist/index.js.map +1 -1
- package/dist/model_providers/base_provider.d.ts.map +1 -1
- package/dist/model_providers/base_provider.js +1 -1
- package/dist/model_providers/base_provider.js.map +1 -1
- package/dist/model_providers/claude.d.ts.map +1 -1
- package/dist/model_providers/claude.js +48 -101
- package/dist/model_providers/claude.js.map +1 -1
- package/dist/model_providers/deepseek.d.ts.map +1 -1
- package/dist/model_providers/deepseek.js +7 -18
- package/dist/model_providers/deepseek.js.map +1 -1
- package/dist/model_providers/elevenlabs.d.ts.map +1 -1
- package/dist/model_providers/elevenlabs.js +3 -7
- package/dist/model_providers/elevenlabs.js.map +1 -1
- package/dist/model_providers/gemini.d.ts +3 -4
- package/dist/model_providers/gemini.d.ts.map +1 -1
- package/dist/model_providers/gemini.js +499 -111
- package/dist/model_providers/gemini.js.map +1 -1
- package/dist/model_providers/grok.d.ts.map +1 -1
- package/dist/model_providers/grok.js +1 -2
- package/dist/model_providers/grok.js.map +1 -1
- package/dist/model_providers/model_provider.d.ts.map +1 -1
- package/dist/model_providers/model_provider.js +10 -20
- package/dist/model_providers/model_provider.js.map +1 -1
- package/dist/model_providers/openai.d.ts.map +1 -1
- package/dist/model_providers/openai.js +50 -100
- package/dist/model_providers/openai.js.map +1 -1
- package/dist/model_providers/openai_chat.d.ts.map +1 -1
- package/dist/model_providers/openai_chat.js +39 -72
- package/dist/model_providers/openai_chat.js.map +1 -1
- package/dist/model_providers/test_provider.d.ts.map +1 -1
- package/dist/model_providers/test_provider.js +7 -17
- package/dist/model_providers/test_provider.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/errors.d.ts.map +1 -1
- package/dist/types/errors.js.map +1 -1
- package/dist/types/types.d.ts +158 -7
- package/dist/types/types.d.ts.map +1 -1
- package/dist/utils/agent.d.ts.map +1 -1
- package/dist/utils/agent.js +5 -16
- package/dist/utils/agent.js.map +1 -1
- package/dist/utils/citation_tracker.d.ts.map +1 -1
- package/dist/utils/citation_tracker.js.map +1 -1
- package/dist/utils/config_manager.d.ts.map +1 -1
- package/dist/utils/config_manager.js +12 -4
- package/dist/utils/config_manager.js.map +1 -1
- package/dist/utils/cost_tracker.d.ts.map +1 -1
- package/dist/utils/cost_tracker.js +13 -26
- package/dist/utils/cost_tracker.js.map +1 -1
- package/dist/utils/create_tool_function.d.ts.map +1 -1
- package/dist/utils/create_tool_function.js +4 -16
- package/dist/utils/create_tool_function.js.map +1 -1
- package/dist/utils/delta_buffer.d.ts.map +1 -1
- package/dist/utils/delta_buffer.js +1 -2
- package/dist/utils/delta_buffer.js.map +1 -1
- package/dist/utils/ensemble_result.d.ts.map +1 -1
- package/dist/utils/ensemble_result.js +9 -24
- package/dist/utils/ensemble_result.js.map +1 -1
- package/dist/utils/event_controller.d.ts.map +1 -1
- package/dist/utils/event_controller.js.map +1 -1
- package/dist/utils/external_models.d.ts.map +1 -1
- package/dist/utils/external_models.js.map +1 -1
- package/dist/utils/image_to_text.d.ts.map +1 -1
- package/dist/utils/image_to_text.js +1 -2
- package/dist/utils/image_to_text.js.map +1 -1
- package/dist/utils/image_utils.d.ts.map +1 -1
- package/dist/utils/image_utils.js.map +1 -1
- package/dist/utils/image_validation.d.ts.map +1 -1
- package/dist/utils/image_validation.js.map +1 -1
- package/dist/utils/llm_logger.d.ts.map +1 -1
- package/dist/utils/llm_logger.js.map +1 -1
- package/dist/utils/message_history.d.ts.map +1 -1
- package/dist/utils/message_history.js +9 -20
- package/dist/utils/message_history.js.map +1 -1
- package/dist/utils/model_class_config.d.ts.map +1 -1
- package/dist/utils/model_class_config.js +1 -1
- package/dist/utils/model_class_config.js.map +1 -1
- package/dist/utils/pause_controller.d.ts.map +1 -1
- package/dist/utils/pause_controller.js.map +1 -1
- package/dist/utils/quota_tracker.d.ts.map +1 -1
- package/dist/utils/quota_tracker.js +19 -49
- package/dist/utils/quota_tracker.js.map +1 -1
- package/dist/utils/retry_handler.d.ts.map +1 -1
- package/dist/utils/retry_handler.js.map +1 -1
- package/dist/utils/running_tool_tracker.d.ts.map +1 -1
- package/dist/utils/running_tool_tracker.js.map +1 -1
- package/dist/utils/sequential_queue.d.ts.map +1 -1
- package/dist/utils/sequential_queue.js.map +1 -1
- package/dist/utils/stream_handler.d.ts.map +1 -1
- package/dist/utils/stream_handler.js +1 -1
- package/dist/utils/stream_handler.js.map +1 -1
- package/dist/utils/summary_utils.d.ts.map +1 -1
- package/dist/utils/summary_utils.js +3 -8
- package/dist/utils/summary_utils.js.map +1 -1
- package/dist/utils/test_utils.d.ts.map +1 -1
- package/dist/utils/test_utils.js +1 -3
- package/dist/utils/test_utils.js.map +1 -1
- package/dist/utils/tool_execution_manager.d.ts.map +1 -1
- package/dist/utils/tool_execution_manager.js +3 -9
- package/dist/utils/tool_execution_manager.js.map +1 -1
- package/dist/utils/tool_parameter_utils.d.ts.map +1 -1
- package/dist/utils/tool_parameter_utils.js +2 -6
- package/dist/utils/tool_parameter_utils.js.map +1 -1
- package/dist/utils/tool_result_processor.d.ts.map +1 -1
- package/dist/utils/tool_result_processor.js +7 -18
- package/dist/utils/tool_result_processor.js.map +1 -1
- package/dist/utils/verification.d.ts.map +1 -1
- package/dist/utils/verification.js.map +1 -1
- package/package.json +4 -2
|
@@ -2,9 +2,9 @@ import { GoogleGenAI, Type, FunctionCallingConfigMode, Modality, } from '@google
|
|
|
2
2
|
import { v4 as uuidv4 } from 'uuid';
|
|
3
3
|
import { BaseModelProvider } from './base_provider.js';
|
|
4
4
|
import { costTracker } from '../index.js';
|
|
5
|
-
import { log_llm_error, log_llm_request, log_llm_response
|
|
5
|
+
import { log_llm_error, log_llm_request, log_llm_response } from '../utils/llm_logger.js';
|
|
6
6
|
import { isPaused } from '../utils/pause_controller.js';
|
|
7
|
-
import { appendMessageWithImage, resizeAndTruncateForGemini
|
|
7
|
+
import { appendMessageWithImage, resizeAndTruncateForGemini } from '../utils/image_utils.js';
|
|
8
8
|
function convertParameterToGeminiFormat(param) {
|
|
9
9
|
let type = Type.STRING;
|
|
10
10
|
switch (param.type) {
|
|
@@ -85,8 +85,7 @@ function convertParameterToGeminiFormat(param) {
|
|
|
85
85
|
if (param.properties && typeof param.properties === 'object') {
|
|
86
86
|
result.properties = {};
|
|
87
87
|
for (const [propName, propSchema] of Object.entries(param.properties)) {
|
|
88
|
-
result.properties[propName] =
|
|
89
|
-
convertParameterToGeminiFormat(propSchema);
|
|
88
|
+
result.properties[propName] = convertParameterToGeminiFormat(propSchema);
|
|
90
89
|
}
|
|
91
90
|
}
|
|
92
91
|
else {
|
|
@@ -161,9 +160,7 @@ async function convertToGeminiFunctionDeclarations(tools) {
|
|
|
161
160
|
parameters: {
|
|
162
161
|
type: Type.OBJECT,
|
|
163
162
|
properties,
|
|
164
|
-
required: Array.isArray(resolvedParams?.required)
|
|
165
|
-
? resolvedParams.required
|
|
166
|
-
: [],
|
|
163
|
+
required: Array.isArray(resolvedParams?.required) ? resolvedParams.required : [],
|
|
167
164
|
},
|
|
168
165
|
};
|
|
169
166
|
}));
|
|
@@ -218,10 +215,7 @@ async function convertToGeminiContents(model, messages) {
|
|
|
218
215
|
let args = {};
|
|
219
216
|
try {
|
|
220
217
|
const parsedArgs = JSON.parse(msg.arguments || '{}');
|
|
221
|
-
args =
|
|
222
|
-
typeof parsedArgs === 'object' && parsedArgs !== null
|
|
223
|
-
? parsedArgs
|
|
224
|
-
: { value: parsedArgs };
|
|
218
|
+
args = typeof parsedArgs === 'object' && parsedArgs !== null ? parsedArgs : { value: parsedArgs };
|
|
225
219
|
}
|
|
226
220
|
catch (e) {
|
|
227
221
|
console.error(`Failed to parse function call arguments for ${msg.name}:`, msg.arguments, e);
|
|
@@ -264,8 +258,7 @@ async function convertToGeminiContents(model, messages) {
|
|
|
264
258
|
contents = await appendMessageWithImage(model, contents, message, {
|
|
265
259
|
read: () => textOutput,
|
|
266
260
|
write: value => {
|
|
267
|
-
message.parts[0].functionResponse.response.content =
|
|
268
|
-
value;
|
|
261
|
+
message.parts[0].functionResponse.response.content = value;
|
|
269
262
|
return message;
|
|
270
263
|
},
|
|
271
264
|
}, addImagesToInput);
|
|
@@ -275,9 +268,7 @@ async function convertToGeminiContents(model, messages) {
|
|
|
275
268
|
if (typeof msg.content === 'string') {
|
|
276
269
|
textContent = msg.content;
|
|
277
270
|
}
|
|
278
|
-
else if (msg.content &&
|
|
279
|
-
typeof msg.content === 'object' &&
|
|
280
|
-
'text' in msg.content) {
|
|
271
|
+
else if (msg.content && typeof msg.content === 'object' && 'text' in msg.content) {
|
|
281
272
|
textContent = msg.content.text;
|
|
282
273
|
}
|
|
283
274
|
else {
|
|
@@ -326,15 +317,14 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
326
317
|
this._client = new GoogleGenAI({
|
|
327
318
|
apiKey: apiKey,
|
|
328
319
|
vertexai: false,
|
|
320
|
+
httpOptions: { apiVersion: 'v1alpha' },
|
|
329
321
|
});
|
|
330
322
|
}
|
|
331
323
|
return this._client;
|
|
332
324
|
}
|
|
333
325
|
async createEmbedding(input, model, opts) {
|
|
334
326
|
try {
|
|
335
|
-
let actualModelId = model.startsWith('gemini/')
|
|
336
|
-
? model.substring(7)
|
|
337
|
-
: model;
|
|
327
|
+
let actualModelId = model.startsWith('gemini/') ? model.substring(7) : model;
|
|
338
328
|
let thinkingConfig = null;
|
|
339
329
|
for (const [suffix, budget] of Object.entries(THINKING_BUDGET_CONFIGS)) {
|
|
340
330
|
if (actualModelId.endsWith(suffix)) {
|
|
@@ -355,9 +345,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
355
345
|
payload.config.thinkingConfig = thinkingConfig;
|
|
356
346
|
}
|
|
357
347
|
const response = await this.client.models.embedContent(payload);
|
|
358
|
-
console.log('[Gemini] Embedding response structure:', JSON.stringify(response, (key, value) => key === 'values' &&
|
|
359
|
-
Array.isArray(value) &&
|
|
360
|
-
value.length > 10
|
|
348
|
+
console.log('[Gemini] Embedding response structure:', JSON.stringify(response, (key, value) => key === 'values' && Array.isArray(value) && value.length > 10
|
|
361
349
|
? `[${value.length} items]`
|
|
362
350
|
: value, 2));
|
|
363
351
|
if (!response.embeddings || !Array.isArray(response.embeddings)) {
|
|
@@ -376,11 +364,8 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
376
364
|
}
|
|
377
365
|
else {
|
|
378
366
|
console.warn('[Gemini] Could not find expected "values" property in embeddings response');
|
|
379
|
-
extractedValues =
|
|
380
|
-
|
|
381
|
-
dimensions = Array.isArray(extractedValues[0])
|
|
382
|
-
? extractedValues[0].length
|
|
383
|
-
: 0;
|
|
367
|
+
extractedValues = response.embeddings;
|
|
368
|
+
dimensions = Array.isArray(extractedValues[0]) ? extractedValues[0].length : 0;
|
|
384
369
|
}
|
|
385
370
|
}
|
|
386
371
|
costTracker.addUsage({
|
|
@@ -396,8 +381,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
396
381
|
}
|
|
397
382
|
else {
|
|
398
383
|
let result;
|
|
399
|
-
if (Array.isArray(extractedValues) &&
|
|
400
|
-
extractedValues.length >= 1) {
|
|
384
|
+
if (Array.isArray(extractedValues) && extractedValues.length >= 1) {
|
|
401
385
|
const firstValue = extractedValues[0];
|
|
402
386
|
if (Array.isArray(firstValue)) {
|
|
403
387
|
result = firstValue;
|
|
@@ -417,10 +401,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
417
401
|
adjustedResult = result.slice(0, 3072);
|
|
418
402
|
}
|
|
419
403
|
else {
|
|
420
|
-
adjustedResult = [
|
|
421
|
-
...result,
|
|
422
|
-
...Array(3072 - result.length).fill(0),
|
|
423
|
-
];
|
|
404
|
+
adjustedResult = [...result, ...Array(3072 - result.length).fill(0)];
|
|
424
405
|
}
|
|
425
406
|
}
|
|
426
407
|
return adjustedResult;
|
|
@@ -444,8 +425,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
444
425
|
catch (error) {
|
|
445
426
|
attempts++;
|
|
446
427
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
447
|
-
if (errorMsg.includes('Incomplete JSON segment') &&
|
|
448
|
-
attempts <= maxRetries) {
|
|
428
|
+
if (errorMsg.includes('Incomplete JSON segment') && attempts <= maxRetries) {
|
|
449
429
|
console.warn(`[Gemini] Incomplete JSON segment error, retrying (${attempts}/${maxRetries})...`);
|
|
450
430
|
await new Promise(resolve => setTimeout(resolve, 1000 * attempts));
|
|
451
431
|
continue;
|
|
@@ -456,9 +436,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
456
436
|
}
|
|
457
437
|
async *createResponseStream(messages, model, agent) {
|
|
458
438
|
const { getToolsFromAgent } = await import('../utils/agent.js');
|
|
459
|
-
const tools = agent
|
|
460
|
-
? await getToolsFromAgent(agent)
|
|
461
|
-
: [];
|
|
439
|
+
const tools = agent ? await getToolsFromAgent(agent) : [];
|
|
462
440
|
const settings = agent?.modelSettings;
|
|
463
441
|
let messageId = uuidv4();
|
|
464
442
|
let contentBuffer = '';
|
|
@@ -526,8 +504,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
526
504
|
if ('additionalProperties' in obj) {
|
|
527
505
|
delete obj.additionalProperties;
|
|
528
506
|
}
|
|
529
|
-
if (obj.properties &&
|
|
530
|
-
typeof obj.properties === 'object') {
|
|
507
|
+
if (obj.properties && typeof obj.properties === 'object') {
|
|
531
508
|
Object.values(obj.properties).forEach(prop => {
|
|
532
509
|
removeAdditionalProperties(prop);
|
|
533
510
|
});
|
|
@@ -559,9 +536,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
559
536
|
settings.tool_choice?.type === 'function' &&
|
|
560
537
|
settings.tool_choice?.function?.name) {
|
|
561
538
|
toolChoice = FunctionCallingConfigMode.ANY;
|
|
562
|
-
allowedFunctionNames = [
|
|
563
|
-
settings.tool_choice.function.name,
|
|
564
|
-
];
|
|
539
|
+
allowedFunctionNames = [settings.tool_choice.function.name];
|
|
565
540
|
}
|
|
566
541
|
else if (settings.tool_choice === 'required') {
|
|
567
542
|
toolChoice = FunctionCallingConfigMode.ANY;
|
|
@@ -579,8 +554,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
579
554
|
},
|
|
580
555
|
};
|
|
581
556
|
if (allowedFunctionNames.length > 0) {
|
|
582
|
-
config.toolConfig.functionCallingConfig.allowedFunctionNames =
|
|
583
|
-
allowedFunctionNames;
|
|
557
|
+
config.toolConfig.functionCallingConfig.allowedFunctionNames = allowedFunctionNames;
|
|
584
558
|
}
|
|
585
559
|
}
|
|
586
560
|
}
|
|
@@ -697,8 +671,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
697
671
|
message_id: messageId,
|
|
698
672
|
order: eventOrder++,
|
|
699
673
|
};
|
|
700
|
-
contentBuffer +=
|
|
701
|
-
'\n\nSearch Results:\n' + formatted + '\n';
|
|
674
|
+
contentBuffer += '\n\nSearch Results:\n' + formatted + '\n';
|
|
702
675
|
}
|
|
703
676
|
}
|
|
704
677
|
}
|
|
@@ -743,9 +716,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
743
716
|
}
|
|
744
717
|
catch (error) {
|
|
745
718
|
log_llm_error(requestId, error);
|
|
746
|
-
const errorMessage = error instanceof Error
|
|
747
|
-
? error.stack || error.message
|
|
748
|
-
: String(error);
|
|
719
|
+
const errorMessage = error instanceof Error ? error.stack || error.message : String(error);
|
|
749
720
|
if (errorMessage.includes('Incomplete JSON segment')) {
|
|
750
721
|
console.error('[Gemini] Stream terminated with incomplete JSON. This may indicate network issues or timeouts.');
|
|
751
722
|
}
|
|
@@ -796,8 +767,7 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
796
767
|
},
|
|
797
768
|
});
|
|
798
769
|
const images = [];
|
|
799
|
-
if (response.generatedImages &&
|
|
800
|
-
response.generatedImages.length > 0) {
|
|
770
|
+
if (response.generatedImages && response.generatedImages.length > 0) {
|
|
801
771
|
for (const generatedImage of response.generatedImages) {
|
|
802
772
|
if (generatedImage.image?.imageBytes) {
|
|
803
773
|
const base64Image = `data:image/png;base64,${generatedImage.image.imageBytes}`;
|
|
@@ -879,14 +849,12 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
879
849
|
throw new Error('No audio generated from Gemini TTS');
|
|
880
850
|
}
|
|
881
851
|
const candidate = response.candidates[0];
|
|
882
|
-
if (!candidate.content.parts ||
|
|
883
|
-
candidate.content.parts.length === 0) {
|
|
852
|
+
if (!candidate.content.parts || candidate.content.parts.length === 0) {
|
|
884
853
|
throw new Error('No audio parts in Gemini TTS response');
|
|
885
854
|
}
|
|
886
855
|
let audioData;
|
|
887
856
|
for (const part of candidate.content.parts) {
|
|
888
|
-
if (part.inlineData &&
|
|
889
|
-
part.inlineData.mimeType?.includes('audio')) {
|
|
857
|
+
if (part.inlineData && part.inlineData.mimeType?.includes('audio')) {
|
|
890
858
|
audioData = part.inlineData.data;
|
|
891
859
|
break;
|
|
892
860
|
}
|
|
@@ -974,40 +942,59 @@ export class GeminiProvider extends BaseModelProvider {
|
|
|
974
942
|
console.warn(`[Gemini] Unknown voice '${voice}', using default voice 'Kore'`);
|
|
975
943
|
return 'Kore';
|
|
976
944
|
}
|
|
977
|
-
async *createTranscription(audio, model, opts) {
|
|
945
|
+
async *createTranscription(audio, agent, model, opts) {
|
|
978
946
|
let session = null;
|
|
979
947
|
let audioBuffer = Buffer.alloc(0);
|
|
980
948
|
let isConnected = false;
|
|
981
949
|
try {
|
|
982
|
-
const ai = new GoogleGenAI({
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
950
|
+
const ai = new GoogleGenAI({
|
|
951
|
+
apiKey: this.apiKey,
|
|
952
|
+
httpOptions: { apiVersion: 'v1alpha' },
|
|
953
|
+
});
|
|
954
|
+
const realtimeInputConfig = opts?.realtimeInputConfig || {
|
|
955
|
+
automaticActivityDetection: {
|
|
956
|
+
disabled: false,
|
|
957
|
+
startOfSpeechSensitivity: 'START_SENSITIVITY_HIGH',
|
|
958
|
+
endOfSpeechSensitivity: 'END_SENSITIVITY_LOW',
|
|
959
|
+
},
|
|
960
|
+
};
|
|
961
|
+
const speechConfig = opts?.speechConfig || {
|
|
962
|
+
languageCode: 'en-US',
|
|
987
963
|
};
|
|
988
|
-
const systemInstruction =
|
|
964
|
+
const systemInstruction = agent.instructions ||
|
|
989
965
|
`You are a real-time transcription assistant. Your only task is to transcribe speech as you hear it. DO NOT ADD YOUR OWN RESPONSE OR COMMENTARY. TRANSCRIBE WHAT YOU HEAR ONLY.
|
|
990
966
|
Respond immediately with transcribed text as you process the audio.
|
|
991
967
|
If quick corrections are used e.g. "Let's go to Point A, no Point B" then just remove incorrect part e.g. respond with "Let's go to Point B".
|
|
992
|
-
When it makes the transcription clearer, remove filler words (like "um") add punctuation, correct obvious grammar issues and add in missing words
|
|
968
|
+
When it makes the transcription clearer, remove filler words (like "um") add punctuation, correct obvious grammar issues and add in missing words.
|
|
969
|
+
|
|
970
|
+
EXAMPLES:
|
|
971
|
+
User: What capital of France
|
|
972
|
+
Model: What's the capital of France?
|
|
973
|
+
|
|
974
|
+
User: How about um we then do no actually how about you tell me the weather
|
|
975
|
+
Model: How about you tell me the weather?
|
|
976
|
+
|
|
977
|
+
User: Ok ignore all that lets start again
|
|
978
|
+
Model: Ok ignore all that, let's start again.`;
|
|
993
979
|
console.log('[Gemini] Connecting to Live API for transcription...');
|
|
994
980
|
const connectionPromise = new Promise((resolve, reject) => {
|
|
995
981
|
const timeout = setTimeout(() => {
|
|
996
982
|
reject(new Error('Connection timeout'));
|
|
997
983
|
}, 30000);
|
|
984
|
+
const config = {
|
|
985
|
+
responseModalities: [Modality.TEXT],
|
|
986
|
+
speechConfig,
|
|
987
|
+
realtimeInputConfig,
|
|
988
|
+
systemInstruction: {
|
|
989
|
+
parts: [{ text: systemInstruction }],
|
|
990
|
+
},
|
|
991
|
+
inputAudioTranscription: {},
|
|
992
|
+
};
|
|
993
|
+
console.dir(config, { depth: null });
|
|
998
994
|
ai.live
|
|
999
995
|
.connect({
|
|
1000
996
|
model: model,
|
|
1001
|
-
config
|
|
1002
|
-
responseModalities: [Modality.TEXT],
|
|
1003
|
-
systemInstruction: {
|
|
1004
|
-
parts: [{ text: systemInstruction }],
|
|
1005
|
-
},
|
|
1006
|
-
realtimeInputConfig: {
|
|
1007
|
-
automaticActivityDetection: realtimeConfig,
|
|
1008
|
-
},
|
|
1009
|
-
inputAudioTranscription: true,
|
|
1010
|
-
},
|
|
997
|
+
config,
|
|
1011
998
|
callbacks: {
|
|
1012
999
|
onopen: () => {
|
|
1013
1000
|
clearTimeout(timeout);
|
|
@@ -1016,27 +1003,18 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
|
|
|
1016
1003
|
resolve();
|
|
1017
1004
|
},
|
|
1018
1005
|
onmessage: async (msg) => {
|
|
1019
|
-
if (msg.serverContent?.
|
|
1020
|
-
const
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
type: 'transcription_preview',
|
|
1029
|
-
timestamp: new Date().toISOString(),
|
|
1030
|
-
text: transcriptionText,
|
|
1031
|
-
isFinal: true,
|
|
1032
|
-
};
|
|
1033
|
-
transcriptEvents.push(previewEvent);
|
|
1034
|
-
console.debug('[Gemini] Received input transcription:', transcriptionText);
|
|
1035
|
-
}
|
|
1006
|
+
if (msg.serverContent?.inputTranscription?.text) {
|
|
1007
|
+
const previewEvent = {
|
|
1008
|
+
type: 'transcription_preview',
|
|
1009
|
+
timestamp: new Date().toISOString(),
|
|
1010
|
+
text: msg.serverContent.inputTranscription.text,
|
|
1011
|
+
isFinal: true,
|
|
1012
|
+
};
|
|
1013
|
+
transcriptEvents.push(previewEvent);
|
|
1014
|
+
console.debug('[Gemini] Received input transcription:', msg.serverContent.inputTranscription.text);
|
|
1036
1015
|
}
|
|
1037
1016
|
if (msg.serverContent?.modelTurn?.parts) {
|
|
1038
|
-
for (const part of msg.serverContent
|
|
1039
|
-
.modelTurn.parts) {
|
|
1017
|
+
for (const part of msg.serverContent.modelTurn.parts) {
|
|
1040
1018
|
if (part.text && part.text.trim()) {
|
|
1041
1019
|
const deltaEvent = {
|
|
1042
1020
|
type: 'transcription_delta',
|
|
@@ -1058,26 +1036,34 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
|
|
|
1058
1036
|
if (msg.usageMetadata) {
|
|
1059
1037
|
costTracker.addUsage({
|
|
1060
1038
|
model: model,
|
|
1061
|
-
input_tokens: msg.usageMetadata
|
|
1062
|
-
|
|
1063
|
-
output_tokens: msg.usageMetadata
|
|
1064
|
-
.responseTokenCount || 0,
|
|
1039
|
+
input_tokens: msg.usageMetadata.promptTokenCount || 0,
|
|
1040
|
+
output_tokens: msg.usageMetadata.responseTokenCount || 0,
|
|
1065
1041
|
input_modality: 'audio',
|
|
1066
1042
|
output_modality: 'text',
|
|
1067
1043
|
metadata: {
|
|
1068
|
-
totalTokens: msg.usageMetadata
|
|
1069
|
-
.totalTokenCount || 0,
|
|
1044
|
+
totalTokens: msg.usageMetadata.totalTokenCount || 0,
|
|
1070
1045
|
source: 'gemini-live-transcription',
|
|
1071
1046
|
},
|
|
1072
1047
|
});
|
|
1073
1048
|
}
|
|
1074
1049
|
},
|
|
1075
1050
|
onerror: (err) => {
|
|
1076
|
-
console.error('[Gemini] Live API error:',
|
|
1051
|
+
console.error('[Gemini] Live API error:', {
|
|
1052
|
+
code: err.code,
|
|
1053
|
+
reason: err.reason,
|
|
1054
|
+
wasClean: err.wasClean,
|
|
1055
|
+
});
|
|
1077
1056
|
connectionError = err;
|
|
1078
1057
|
},
|
|
1079
|
-
onclose: () => {
|
|
1058
|
+
onclose: (event) => {
|
|
1080
1059
|
console.log('[Gemini] Live session closed');
|
|
1060
|
+
if (event) {
|
|
1061
|
+
console.log('[Gemini] Close event details:', {
|
|
1062
|
+
code: event.code,
|
|
1063
|
+
reason: event.reason,
|
|
1064
|
+
wasClean: event.wasClean,
|
|
1065
|
+
});
|
|
1066
|
+
}
|
|
1081
1067
|
isConnected = false;
|
|
1082
1068
|
},
|
|
1083
1069
|
},
|
|
@@ -1124,10 +1110,7 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
|
|
|
1124
1110
|
if (done)
|
|
1125
1111
|
break;
|
|
1126
1112
|
if (value) {
|
|
1127
|
-
audioBuffer = Buffer.concat([
|
|
1128
|
-
audioBuffer,
|
|
1129
|
-
Buffer.from(value),
|
|
1130
|
-
]);
|
|
1113
|
+
audioBuffer = Buffer.concat([audioBuffer, Buffer.from(value)]);
|
|
1131
1114
|
while (audioBuffer.length >= chunkSize) {
|
|
1132
1115
|
const chunk = audioBuffer.slice(0, chunkSize);
|
|
1133
1116
|
audioBuffer = audioBuffer.slice(chunkSize);
|
|
@@ -1170,21 +1153,34 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
|
|
|
1170
1153
|
const errorEvent = {
|
|
1171
1154
|
type: 'error',
|
|
1172
1155
|
timestamp: new Date().toISOString(),
|
|
1173
|
-
error: error instanceof Error
|
|
1174
|
-
? error.message
|
|
1175
|
-
: 'Transcription failed',
|
|
1156
|
+
error: error instanceof Error ? error.message : 'Transcription failed',
|
|
1176
1157
|
};
|
|
1177
1158
|
yield errorEvent;
|
|
1178
1159
|
}
|
|
1179
1160
|
}
|
|
1161
|
+
async createLiveSession(config, agent, model, opts) {
|
|
1162
|
+
console.log(`[Gemini] Creating Live session with model ${model}`);
|
|
1163
|
+
const liveModels = [
|
|
1164
|
+
'gemini-2.0-flash-live-001',
|
|
1165
|
+
'gemini-live-2.5-flash-preview',
|
|
1166
|
+
'gemini-2.5-flash-preview-native-audio-dialog',
|
|
1167
|
+
'gemini-2.5-flash-exp-native-audio-thinking-dialog',
|
|
1168
|
+
'gemini-2.0-flash-exp',
|
|
1169
|
+
];
|
|
1170
|
+
if (!liveModels.some(m => model.includes(m))) {
|
|
1171
|
+
throw new Error(`Model ${model} does not support Live API. Supported models: ${liveModels.join(', ')}`);
|
|
1172
|
+
}
|
|
1173
|
+
const sessionId = uuidv4();
|
|
1174
|
+
const liveSession = new GeminiLiveSession(sessionId, this.client, model, config, agent, opts);
|
|
1175
|
+
await liveSession.initialize();
|
|
1176
|
+
return liveSession;
|
|
1177
|
+
}
|
|
1180
1178
|
}
|
|
1181
1179
|
function normalizeAudioSource(source) {
|
|
1182
1180
|
if (source instanceof ReadableStream) {
|
|
1183
1181
|
return source;
|
|
1184
1182
|
}
|
|
1185
|
-
if (typeof source === 'object' &&
|
|
1186
|
-
source !== null &&
|
|
1187
|
-
Symbol.asyncIterator in source) {
|
|
1183
|
+
if (typeof source === 'object' && source !== null && Symbol.asyncIterator in source) {
|
|
1188
1184
|
return new ReadableStream({
|
|
1189
1185
|
async start(controller) {
|
|
1190
1186
|
try {
|
|
@@ -1214,5 +1210,397 @@ function normalizeAudioSource(source) {
|
|
|
1214
1210
|
}
|
|
1215
1211
|
throw new Error(`Unsupported audio source type: ${typeof source}`);
|
|
1216
1212
|
}
|
|
1213
|
+
class GeminiLiveSession {
|
|
1214
|
+
sessionId;
|
|
1215
|
+
ai;
|
|
1216
|
+
model;
|
|
1217
|
+
config;
|
|
1218
|
+
agent;
|
|
1219
|
+
options;
|
|
1220
|
+
session = null;
|
|
1221
|
+
eventQueue = [];
|
|
1222
|
+
eventResolvers = [];
|
|
1223
|
+
_isActive = true;
|
|
1224
|
+
sessionClosed = false;
|
|
1225
|
+
messageHistory = [];
|
|
1226
|
+
currentTurn = null;
|
|
1227
|
+
constructor(sessionId, ai, model, config, agent, options) {
|
|
1228
|
+
this.sessionId = sessionId;
|
|
1229
|
+
this.ai = ai;
|
|
1230
|
+
this.model = model;
|
|
1231
|
+
this.config = config;
|
|
1232
|
+
this.agent = agent;
|
|
1233
|
+
this.options = options;
|
|
1234
|
+
}
|
|
1235
|
+
async initialize() {
|
|
1236
|
+
const connectionPromise = new Promise((resolve, reject) => {
|
|
1237
|
+
const timeout = setTimeout(() => {
|
|
1238
|
+
reject(new Error('Connection timeout'));
|
|
1239
|
+
}, 30000);
|
|
1240
|
+
const tools = [];
|
|
1241
|
+
if (this.config.tools) {
|
|
1242
|
+
for (const toolGroup of this.config.tools) {
|
|
1243
|
+
if (toolGroup.functionDeclarations) {
|
|
1244
|
+
const functionDeclarations = toolGroup.functionDeclarations.map(func => ({
|
|
1245
|
+
name: func.name,
|
|
1246
|
+
description: func.description,
|
|
1247
|
+
parameters: convertParameterToGeminiFormat(func.parameters),
|
|
1248
|
+
}));
|
|
1249
|
+
tools.push({ functionDeclarations });
|
|
1250
|
+
}
|
|
1251
|
+
if (toolGroup.codeExecution) {
|
|
1252
|
+
tools.push({ codeExecution: {} });
|
|
1253
|
+
}
|
|
1254
|
+
if (toolGroup.googleSearch) {
|
|
1255
|
+
tools.push({ googleSearch: {} });
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
let systemInstruction = undefined;
|
|
1260
|
+
if (this.agent.instructions) {
|
|
1261
|
+
systemInstruction = {
|
|
1262
|
+
parts: [{ text: this.agent.instructions }],
|
|
1263
|
+
};
|
|
1264
|
+
}
|
|
1265
|
+
const responseModalities = this.config.responseModalities[0] === 'AUDIO' ? [Modality.AUDIO] : [Modality.TEXT];
|
|
1266
|
+
const config = {
|
|
1267
|
+
responseModalities,
|
|
1268
|
+
systemInstruction,
|
|
1269
|
+
tools: tools.length > 0 ? tools : undefined,
|
|
1270
|
+
};
|
|
1271
|
+
if (this.config.responseModalities[0] === 'AUDIO' && this.config.speechConfig) {
|
|
1272
|
+
config.speechConfig = {
|
|
1273
|
+
voiceConfig: this.config.speechConfig.voiceConfig,
|
|
1274
|
+
};
|
|
1275
|
+
}
|
|
1276
|
+
if (this.config.realtimeInputConfig) {
|
|
1277
|
+
config.realtimeInputConfig = {
|
|
1278
|
+
automaticActivityDetection: this.config.realtimeInputConfig.automaticActivityDetection
|
|
1279
|
+
? {
|
|
1280
|
+
disabled: this.config.realtimeInputConfig.automaticActivityDetection.disabled,
|
|
1281
|
+
}
|
|
1282
|
+
: undefined,
|
|
1283
|
+
};
|
|
1284
|
+
}
|
|
1285
|
+
if (this.config.inputAudioTranscription) {
|
|
1286
|
+
config.inputAudioTranscription = true;
|
|
1287
|
+
}
|
|
1288
|
+
if (this.config.outputAudioTranscription) {
|
|
1289
|
+
config.outputAudioTranscription = true;
|
|
1290
|
+
}
|
|
1291
|
+
if (this.config.enableAffectiveDialog) {
|
|
1292
|
+
config.enableAffectiveDialog = true;
|
|
1293
|
+
}
|
|
1294
|
+
if (this.config.proactivity) {
|
|
1295
|
+
config.proactivity = this.config.proactivity;
|
|
1296
|
+
}
|
|
1297
|
+
console.log('[Gemini] Connecting with config:', JSON.stringify(config, null, 2));
|
|
1298
|
+
this.ai.live
|
|
1299
|
+
.connect({
|
|
1300
|
+
model: this.model,
|
|
1301
|
+
config,
|
|
1302
|
+
callbacks: {
|
|
1303
|
+
onopen: () => {
|
|
1304
|
+
clearTimeout(timeout);
|
|
1305
|
+
console.log('[Gemini] Live session connected');
|
|
1306
|
+
this.pushEvent({
|
|
1307
|
+
type: 'live_ready',
|
|
1308
|
+
timestamp: new Date().toISOString(),
|
|
1309
|
+
});
|
|
1310
|
+
resolve();
|
|
1311
|
+
},
|
|
1312
|
+
onmessage: (msg) => {
|
|
1313
|
+
this.handleMessage(msg);
|
|
1314
|
+
},
|
|
1315
|
+
onerror: (err) => {
|
|
1316
|
+
console.error('[Gemini] Live API error:', err);
|
|
1317
|
+
console.error('[Gemini] Error details:', JSON.stringify(err, null, 2));
|
|
1318
|
+
this.pushEvent({
|
|
1319
|
+
type: 'error',
|
|
1320
|
+
timestamp: new Date().toISOString(),
|
|
1321
|
+
error: err.message || String(err),
|
|
1322
|
+
code: err.code,
|
|
1323
|
+
recoverable: true,
|
|
1324
|
+
});
|
|
1325
|
+
},
|
|
1326
|
+
onclose: (event) => {
|
|
1327
|
+
console.log('[Gemini] Live session closed', event);
|
|
1328
|
+
if (event) {
|
|
1329
|
+
console.log('[Gemini] Close event details:', {
|
|
1330
|
+
code: event.code,
|
|
1331
|
+
reason: event.reason,
|
|
1332
|
+
wasClean: event.wasClean,
|
|
1333
|
+
});
|
|
1334
|
+
}
|
|
1335
|
+
this._isActive = false;
|
|
1336
|
+
this.sessionClosed = true;
|
|
1337
|
+
this.resolveAllWaitingEvents();
|
|
1338
|
+
},
|
|
1339
|
+
},
|
|
1340
|
+
})
|
|
1341
|
+
.then(s => {
|
|
1342
|
+
this.session = s;
|
|
1343
|
+
});
|
|
1344
|
+
});
|
|
1345
|
+
await connectionPromise;
|
|
1346
|
+
}
|
|
1347
|
+
handleMessage(msg) {
|
|
1348
|
+
console.log('[Gemini] Received message:', JSON.stringify(msg, null, 2));
|
|
1349
|
+
if (msg.error) {
|
|
1350
|
+
console.error('[Gemini] Error in message:', msg.error);
|
|
1351
|
+
this.pushEvent({
|
|
1352
|
+
type: 'error',
|
|
1353
|
+
timestamp: new Date().toISOString(),
|
|
1354
|
+
error: msg.error.message || JSON.stringify(msg.error),
|
|
1355
|
+
code: msg.error.code || 'UNKNOWN_ERROR',
|
|
1356
|
+
recoverable: false,
|
|
1357
|
+
});
|
|
1358
|
+
return;
|
|
1359
|
+
}
|
|
1360
|
+
if (msg.serverContent?.modelTurn?.parts) {
|
|
1361
|
+
for (const part of msg.serverContent.modelTurn.parts) {
|
|
1362
|
+
if (part.inlineData?.mimeType?.startsWith('audio/')) {
|
|
1363
|
+
this.pushEvent({
|
|
1364
|
+
type: 'audio_output',
|
|
1365
|
+
timestamp: new Date().toISOString(),
|
|
1366
|
+
data: part.inlineData.data,
|
|
1367
|
+
format: {
|
|
1368
|
+
sampleRate: 24000,
|
|
1369
|
+
channels: 1,
|
|
1370
|
+
encoding: 'pcm',
|
|
1371
|
+
},
|
|
1372
|
+
});
|
|
1373
|
+
}
|
|
1374
|
+
if (part.text) {
|
|
1375
|
+
if (!this.currentTurn || this.currentTurn.role !== 'model') {
|
|
1376
|
+
this.currentTurn = { role: 'model', text: '' };
|
|
1377
|
+
this.pushEvent({
|
|
1378
|
+
type: 'turn_start',
|
|
1379
|
+
timestamp: new Date().toISOString(),
|
|
1380
|
+
role: 'model',
|
|
1381
|
+
});
|
|
1382
|
+
}
|
|
1383
|
+
this.currentTurn.text += part.text;
|
|
1384
|
+
this.pushEvent({
|
|
1385
|
+
type: 'text_delta',
|
|
1386
|
+
timestamp: new Date().toISOString(),
|
|
1387
|
+
delta: part.text,
|
|
1388
|
+
});
|
|
1389
|
+
this.pushEvent({
|
|
1390
|
+
type: 'message_delta',
|
|
1391
|
+
timestamp: new Date().toISOString(),
|
|
1392
|
+
delta: part.text,
|
|
1393
|
+
});
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
if (msg.serverContent?.modelTurn?.parts) {
|
|
1398
|
+
for (const part of msg.serverContent.modelTurn.parts) {
|
|
1399
|
+
if (part.functionCall) {
|
|
1400
|
+
const toolCall = {
|
|
1401
|
+
id: uuidv4(),
|
|
1402
|
+
type: 'function',
|
|
1403
|
+
function: {
|
|
1404
|
+
name: part.functionCall.name,
|
|
1405
|
+
arguments: JSON.stringify(part.functionCall.args),
|
|
1406
|
+
},
|
|
1407
|
+
};
|
|
1408
|
+
this.pushEvent({
|
|
1409
|
+
type: 'tool_call',
|
|
1410
|
+
timestamp: new Date().toISOString(),
|
|
1411
|
+
toolCalls: [toolCall],
|
|
1412
|
+
});
|
|
1413
|
+
}
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
if (msg.serverContent?.inputAudioTranscription) {
|
|
1417
|
+
const text = msg.serverContent.inputAudioTranscription.text ||
|
|
1418
|
+
msg.serverContent.inputAudioTranscription.transcript ||
|
|
1419
|
+
'';
|
|
1420
|
+
if (text) {
|
|
1421
|
+
this.pushEvent({
|
|
1422
|
+
type: 'transcription_input',
|
|
1423
|
+
timestamp: new Date().toISOString(),
|
|
1424
|
+
text,
|
|
1425
|
+
});
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
if (msg.serverContent?.outputTranscription) {
|
|
1429
|
+
const text = msg.serverContent.outputTranscription.text || '';
|
|
1430
|
+
if (text) {
|
|
1431
|
+
this.pushEvent({
|
|
1432
|
+
type: 'transcription_output',
|
|
1433
|
+
timestamp: new Date().toISOString(),
|
|
1434
|
+
text,
|
|
1435
|
+
});
|
|
1436
|
+
}
|
|
1437
|
+
}
|
|
1438
|
+
if (msg.serverContent?.turnComplete) {
|
|
1439
|
+
if (this.currentTurn) {
|
|
1440
|
+
const message = this.currentTurn.role === 'model'
|
|
1441
|
+
? {
|
|
1442
|
+
type: 'message',
|
|
1443
|
+
role: 'assistant',
|
|
1444
|
+
content: this.currentTurn.text,
|
|
1445
|
+
status: 'completed',
|
|
1446
|
+
}
|
|
1447
|
+
: {
|
|
1448
|
+
type: 'message',
|
|
1449
|
+
role: 'user',
|
|
1450
|
+
content: this.currentTurn.text,
|
|
1451
|
+
};
|
|
1452
|
+
this.messageHistory.push(message);
|
|
1453
|
+
this.pushEvent({
|
|
1454
|
+
type: 'turn_complete',
|
|
1455
|
+
timestamp: new Date().toISOString(),
|
|
1456
|
+
role: this.currentTurn.role,
|
|
1457
|
+
message,
|
|
1458
|
+
});
|
|
1459
|
+
this.currentTurn = null;
|
|
1460
|
+
}
|
|
1461
|
+
}
|
|
1462
|
+
if (msg.serverContent?.interrupted) {
|
|
1463
|
+
const cancelledToolCalls = [];
|
|
1464
|
+
if (msg.serverContent.cancelledFunctionCalls) {
|
|
1465
|
+
cancelledToolCalls.push(...msg.serverContent.cancelledFunctionCalls.map((fc) => fc.id));
|
|
1466
|
+
}
|
|
1467
|
+
this.pushEvent({
|
|
1468
|
+
type: 'interrupted',
|
|
1469
|
+
timestamp: new Date().toISOString(),
|
|
1470
|
+
cancelledToolCalls,
|
|
1471
|
+
});
|
|
1472
|
+
}
|
|
1473
|
+
if (msg.usageMetadata) {
|
|
1474
|
+
const usage = msg.usageMetadata;
|
|
1475
|
+
const inputTokens = usage.promptTokenCount || 0;
|
|
1476
|
+
const outputTokens = usage.candidatesTokenCount || 0;
|
|
1477
|
+
const totalTokens = usage.totalTokenCount || 0;
|
|
1478
|
+
costTracker.addUsage({
|
|
1479
|
+
model: this.model,
|
|
1480
|
+
input_tokens: inputTokens,
|
|
1481
|
+
output_tokens: outputTokens,
|
|
1482
|
+
cached_tokens: usage.cachedContentTokenCount || 0,
|
|
1483
|
+
metadata: {
|
|
1484
|
+
total_tokens: totalTokens,
|
|
1485
|
+
source: 'gemini-live',
|
|
1486
|
+
},
|
|
1487
|
+
});
|
|
1488
|
+
const inputCost = undefined;
|
|
1489
|
+
const outputCost = undefined;
|
|
1490
|
+
const totalCost = undefined;
|
|
1491
|
+
this.pushEvent({
|
|
1492
|
+
type: 'cost_update',
|
|
1493
|
+
timestamp: new Date().toISOString(),
|
|
1494
|
+
usage: {
|
|
1495
|
+
inputTokens,
|
|
1496
|
+
outputTokens,
|
|
1497
|
+
totalTokens,
|
|
1498
|
+
inputCost,
|
|
1499
|
+
outputCost,
|
|
1500
|
+
totalCost,
|
|
1501
|
+
},
|
|
1502
|
+
});
|
|
1503
|
+
}
|
|
1504
|
+
}
|
|
1505
|
+
async sendAudio(audio) {
|
|
1506
|
+
if (!this.session || !this._isActive) {
|
|
1507
|
+
console.error(`[GeminiLiveSession ${this.sessionId}] Cannot send audio - session not active`);
|
|
1508
|
+
throw new Error('Session is not active');
|
|
1509
|
+
}
|
|
1510
|
+
console.log(`[GeminiLiveSession ${this.sessionId}] Sending audio: ${audio.data.length} chars (base64), mimeType: ${audio.mimeType}`);
|
|
1511
|
+
try {
|
|
1512
|
+
await this.session.sendRealtimeInput({
|
|
1513
|
+
media: {
|
|
1514
|
+
mimeType: audio.mimeType,
|
|
1515
|
+
data: audio.data,
|
|
1516
|
+
},
|
|
1517
|
+
});
|
|
1518
|
+
console.log(`[GeminiLiveSession ${this.sessionId}] Audio sent successfully`);
|
|
1519
|
+
}
|
|
1520
|
+
catch (error) {
|
|
1521
|
+
console.error(`[GeminiLiveSession ${this.sessionId}] Error sending audio:`, error);
|
|
1522
|
+
throw error;
|
|
1523
|
+
}
|
|
1524
|
+
const size = Math.ceil((audio.data.length * 3) / 4);
|
|
1525
|
+
this.pushEvent({
|
|
1526
|
+
type: 'audio_input',
|
|
1527
|
+
timestamp: new Date().toISOString(),
|
|
1528
|
+
size,
|
|
1529
|
+
});
|
|
1530
|
+
}
|
|
1531
|
+
async sendText(text, role = 'user') {
|
|
1532
|
+
if (!this.session || !this._isActive) {
|
|
1533
|
+
throw new Error('Session is not active');
|
|
1534
|
+
}
|
|
1535
|
+
const message = {
|
|
1536
|
+
role: role === 'assistant' ? 'model' : 'user',
|
|
1537
|
+
parts: [{ text }],
|
|
1538
|
+
};
|
|
1539
|
+
await this.session.sendClientContent({
|
|
1540
|
+
turns: [message],
|
|
1541
|
+
});
|
|
1542
|
+
this.pushEvent({
|
|
1543
|
+
type: 'turn_start',
|
|
1544
|
+
timestamp: new Date().toISOString(),
|
|
1545
|
+
role: role === 'assistant' ? 'model' : 'user',
|
|
1546
|
+
});
|
|
1547
|
+
}
|
|
1548
|
+
async sendToolResponse(toolResults) {
|
|
1549
|
+
if (!this.session || !this._isActive) {
|
|
1550
|
+
throw new Error('Session is not active');
|
|
1551
|
+
}
|
|
1552
|
+
const functionResponses = toolResults.map(result => ({
|
|
1553
|
+
id: result.call_id || result.id,
|
|
1554
|
+
name: result.toolCall.function.name,
|
|
1555
|
+
response: result.error ? { error: result.error } : { result: result.output },
|
|
1556
|
+
}));
|
|
1557
|
+
await this.session.sendToolResponse({ functionResponses });
|
|
1558
|
+
}
|
|
1559
|
+
async *getEventStream() {
|
|
1560
|
+
while (this._isActive || this.eventQueue.length > 0) {
|
|
1561
|
+
if (this.eventQueue.length > 0) {
|
|
1562
|
+
yield this.eventQueue.shift();
|
|
1563
|
+
}
|
|
1564
|
+
else {
|
|
1565
|
+
const result = await new Promise(resolve => {
|
|
1566
|
+
if (this.sessionClosed && this.eventQueue.length === 0) {
|
|
1567
|
+
resolve({ done: true, value: undefined });
|
|
1568
|
+
}
|
|
1569
|
+
else {
|
|
1570
|
+
this.eventResolvers.push(resolve);
|
|
1571
|
+
}
|
|
1572
|
+
});
|
|
1573
|
+
if (result.done)
|
|
1574
|
+
break;
|
|
1575
|
+
if (result.value)
|
|
1576
|
+
yield result.value;
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
async close() {
|
|
1581
|
+
if (this.session && this._isActive) {
|
|
1582
|
+
this._isActive = false;
|
|
1583
|
+
await this.session.close();
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
isActive() {
|
|
1587
|
+
return this._isActive;
|
|
1588
|
+
}
|
|
1589
|
+
pushEvent(event) {
|
|
1590
|
+
if (this.eventResolvers.length > 0) {
|
|
1591
|
+
const resolver = this.eventResolvers.shift();
|
|
1592
|
+
resolver({ value: event, done: false });
|
|
1593
|
+
}
|
|
1594
|
+
else {
|
|
1595
|
+
this.eventQueue.push(event);
|
|
1596
|
+
}
|
|
1597
|
+
}
|
|
1598
|
+
resolveAllWaitingEvents() {
|
|
1599
|
+
for (const resolver of this.eventResolvers) {
|
|
1600
|
+
resolver({ done: true, value: undefined });
|
|
1601
|
+
}
|
|
1602
|
+
this.eventResolvers = [];
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1217
1605
|
export const geminiProvider = new GeminiProvider();
|
|
1218
1606
|
//# sourceMappingURL=gemini.js.map
|