@just-every/ensemble 0.2.78 → 0.2.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/dist/config/tool_execution.d.ts.map +1 -1
  2. package/dist/config/tool_execution.js +2 -11
  3. package/dist/config/tool_execution.js.map +1 -1
  4. package/dist/core/ensemble_embed.d.ts.map +1 -1
  5. package/dist/core/ensemble_embed.js +2 -4
  6. package/dist/core/ensemble_embed.js.map +1 -1
  7. package/dist/core/ensemble_image.d.ts.map +1 -1
  8. package/dist/core/ensemble_image.js +1 -1
  9. package/dist/core/ensemble_image.js.map +1 -1
  10. package/dist/core/ensemble_listen.d.ts.map +1 -1
  11. package/dist/core/ensemble_listen.js +3 -5
  12. package/dist/core/ensemble_listen.js.map +1 -1
  13. package/dist/core/ensemble_live.d.ts +14 -0
  14. package/dist/core/ensemble_live.d.ts.map +1 -0
  15. package/dist/core/ensemble_live.js +382 -0
  16. package/dist/core/ensemble_live.js.map +1 -0
  17. package/dist/core/ensemble_request.d.ts.map +1 -1
  18. package/dist/core/ensemble_request.js +5 -13
  19. package/dist/core/ensemble_request.js.map +1 -1
  20. package/dist/core/ensemble_voice.d.ts.map +1 -1
  21. package/dist/core/ensemble_voice.js +1 -1
  22. package/dist/core/ensemble_voice.js.map +1 -1
  23. package/dist/data/model_data.d.ts.map +1 -1
  24. package/dist/data/model_data.js +2 -11
  25. package/dist/data/model_data.js.map +1 -1
  26. package/dist/index.d.ts +6 -5
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +6 -5
  29. package/dist/index.js.map +1 -1
  30. package/dist/model_providers/base_provider.d.ts.map +1 -1
  31. package/dist/model_providers/base_provider.js +1 -1
  32. package/dist/model_providers/base_provider.js.map +1 -1
  33. package/dist/model_providers/claude.d.ts.map +1 -1
  34. package/dist/model_providers/claude.js +48 -101
  35. package/dist/model_providers/claude.js.map +1 -1
  36. package/dist/model_providers/deepseek.d.ts.map +1 -1
  37. package/dist/model_providers/deepseek.js +7 -18
  38. package/dist/model_providers/deepseek.js.map +1 -1
  39. package/dist/model_providers/elevenlabs.d.ts.map +1 -1
  40. package/dist/model_providers/elevenlabs.js +3 -7
  41. package/dist/model_providers/elevenlabs.js.map +1 -1
  42. package/dist/model_providers/gemini.d.ts +3 -4
  43. package/dist/model_providers/gemini.d.ts.map +1 -1
  44. package/dist/model_providers/gemini.js +499 -111
  45. package/dist/model_providers/gemini.js.map +1 -1
  46. package/dist/model_providers/grok.d.ts.map +1 -1
  47. package/dist/model_providers/grok.js +1 -2
  48. package/dist/model_providers/grok.js.map +1 -1
  49. package/dist/model_providers/model_provider.d.ts.map +1 -1
  50. package/dist/model_providers/model_provider.js +10 -20
  51. package/dist/model_providers/model_provider.js.map +1 -1
  52. package/dist/model_providers/openai.d.ts.map +1 -1
  53. package/dist/model_providers/openai.js +50 -100
  54. package/dist/model_providers/openai.js.map +1 -1
  55. package/dist/model_providers/openai_chat.d.ts.map +1 -1
  56. package/dist/model_providers/openai_chat.js +39 -72
  57. package/dist/model_providers/openai_chat.js.map +1 -1
  58. package/dist/model_providers/test_provider.d.ts.map +1 -1
  59. package/dist/model_providers/test_provider.js +7 -17
  60. package/dist/model_providers/test_provider.js.map +1 -1
  61. package/dist/tsconfig.tsbuildinfo +1 -1
  62. package/dist/types/errors.d.ts.map +1 -1
  63. package/dist/types/errors.js.map +1 -1
  64. package/dist/types/types.d.ts +158 -7
  65. package/dist/types/types.d.ts.map +1 -1
  66. package/dist/utils/agent.d.ts.map +1 -1
  67. package/dist/utils/agent.js +5 -16
  68. package/dist/utils/agent.js.map +1 -1
  69. package/dist/utils/citation_tracker.d.ts.map +1 -1
  70. package/dist/utils/citation_tracker.js.map +1 -1
  71. package/dist/utils/config_manager.d.ts.map +1 -1
  72. package/dist/utils/config_manager.js +12 -4
  73. package/dist/utils/config_manager.js.map +1 -1
  74. package/dist/utils/cost_tracker.d.ts.map +1 -1
  75. package/dist/utils/cost_tracker.js +13 -26
  76. package/dist/utils/cost_tracker.js.map +1 -1
  77. package/dist/utils/create_tool_function.d.ts.map +1 -1
  78. package/dist/utils/create_tool_function.js +4 -16
  79. package/dist/utils/create_tool_function.js.map +1 -1
  80. package/dist/utils/delta_buffer.d.ts.map +1 -1
  81. package/dist/utils/delta_buffer.js +1 -2
  82. package/dist/utils/delta_buffer.js.map +1 -1
  83. package/dist/utils/ensemble_result.d.ts.map +1 -1
  84. package/dist/utils/ensemble_result.js +9 -24
  85. package/dist/utils/ensemble_result.js.map +1 -1
  86. package/dist/utils/event_controller.d.ts.map +1 -1
  87. package/dist/utils/event_controller.js.map +1 -1
  88. package/dist/utils/external_models.d.ts.map +1 -1
  89. package/dist/utils/external_models.js.map +1 -1
  90. package/dist/utils/image_to_text.d.ts.map +1 -1
  91. package/dist/utils/image_to_text.js +1 -2
  92. package/dist/utils/image_to_text.js.map +1 -1
  93. package/dist/utils/image_utils.d.ts.map +1 -1
  94. package/dist/utils/image_utils.js.map +1 -1
  95. package/dist/utils/image_validation.d.ts.map +1 -1
  96. package/dist/utils/image_validation.js.map +1 -1
  97. package/dist/utils/llm_logger.d.ts.map +1 -1
  98. package/dist/utils/llm_logger.js.map +1 -1
  99. package/dist/utils/message_history.d.ts.map +1 -1
  100. package/dist/utils/message_history.js +9 -20
  101. package/dist/utils/message_history.js.map +1 -1
  102. package/dist/utils/model_class_config.d.ts.map +1 -1
  103. package/dist/utils/model_class_config.js +1 -1
  104. package/dist/utils/model_class_config.js.map +1 -1
  105. package/dist/utils/pause_controller.d.ts.map +1 -1
  106. package/dist/utils/pause_controller.js.map +1 -1
  107. package/dist/utils/quota_tracker.d.ts.map +1 -1
  108. package/dist/utils/quota_tracker.js +19 -49
  109. package/dist/utils/quota_tracker.js.map +1 -1
  110. package/dist/utils/retry_handler.d.ts.map +1 -1
  111. package/dist/utils/retry_handler.js.map +1 -1
  112. package/dist/utils/running_tool_tracker.d.ts.map +1 -1
  113. package/dist/utils/running_tool_tracker.js.map +1 -1
  114. package/dist/utils/sequential_queue.d.ts.map +1 -1
  115. package/dist/utils/sequential_queue.js.map +1 -1
  116. package/dist/utils/stream_handler.d.ts.map +1 -1
  117. package/dist/utils/stream_handler.js +1 -1
  118. package/dist/utils/stream_handler.js.map +1 -1
  119. package/dist/utils/summary_utils.d.ts.map +1 -1
  120. package/dist/utils/summary_utils.js +3 -8
  121. package/dist/utils/summary_utils.js.map +1 -1
  122. package/dist/utils/test_utils.d.ts.map +1 -1
  123. package/dist/utils/test_utils.js +1 -3
  124. package/dist/utils/test_utils.js.map +1 -1
  125. package/dist/utils/tool_execution_manager.d.ts.map +1 -1
  126. package/dist/utils/tool_execution_manager.js +3 -9
  127. package/dist/utils/tool_execution_manager.js.map +1 -1
  128. package/dist/utils/tool_parameter_utils.d.ts.map +1 -1
  129. package/dist/utils/tool_parameter_utils.js +2 -6
  130. package/dist/utils/tool_parameter_utils.js.map +1 -1
  131. package/dist/utils/tool_result_processor.d.ts.map +1 -1
  132. package/dist/utils/tool_result_processor.js +7 -18
  133. package/dist/utils/tool_result_processor.js.map +1 -1
  134. package/dist/utils/verification.d.ts.map +1 -1
  135. package/dist/utils/verification.js.map +1 -1
  136. package/package.json +4 -2
@@ -2,9 +2,9 @@ import { GoogleGenAI, Type, FunctionCallingConfigMode, Modality, } from '@google
2
2
  import { v4 as uuidv4 } from 'uuid';
3
3
  import { BaseModelProvider } from './base_provider.js';
4
4
  import { costTracker } from '../index.js';
5
- import { log_llm_error, log_llm_request, log_llm_response, } from '../utils/llm_logger.js';
5
+ import { log_llm_error, log_llm_request, log_llm_response } from '../utils/llm_logger.js';
6
6
  import { isPaused } from '../utils/pause_controller.js';
7
- import { appendMessageWithImage, resizeAndTruncateForGemini, } from '../utils/image_utils.js';
7
+ import { appendMessageWithImage, resizeAndTruncateForGemini } from '../utils/image_utils.js';
8
8
  function convertParameterToGeminiFormat(param) {
9
9
  let type = Type.STRING;
10
10
  switch (param.type) {
@@ -85,8 +85,7 @@ function convertParameterToGeminiFormat(param) {
85
85
  if (param.properties && typeof param.properties === 'object') {
86
86
  result.properties = {};
87
87
  for (const [propName, propSchema] of Object.entries(param.properties)) {
88
- result.properties[propName] =
89
- convertParameterToGeminiFormat(propSchema);
88
+ result.properties[propName] = convertParameterToGeminiFormat(propSchema);
90
89
  }
91
90
  }
92
91
  else {
@@ -161,9 +160,7 @@ async function convertToGeminiFunctionDeclarations(tools) {
161
160
  parameters: {
162
161
  type: Type.OBJECT,
163
162
  properties,
164
- required: Array.isArray(resolvedParams?.required)
165
- ? resolvedParams.required
166
- : [],
163
+ required: Array.isArray(resolvedParams?.required) ? resolvedParams.required : [],
167
164
  },
168
165
  };
169
166
  }));
@@ -218,10 +215,7 @@ async function convertToGeminiContents(model, messages) {
218
215
  let args = {};
219
216
  try {
220
217
  const parsedArgs = JSON.parse(msg.arguments || '{}');
221
- args =
222
- typeof parsedArgs === 'object' && parsedArgs !== null
223
- ? parsedArgs
224
- : { value: parsedArgs };
218
+ args = typeof parsedArgs === 'object' && parsedArgs !== null ? parsedArgs : { value: parsedArgs };
225
219
  }
226
220
  catch (e) {
227
221
  console.error(`Failed to parse function call arguments for ${msg.name}:`, msg.arguments, e);
@@ -264,8 +258,7 @@ async function convertToGeminiContents(model, messages) {
264
258
  contents = await appendMessageWithImage(model, contents, message, {
265
259
  read: () => textOutput,
266
260
  write: value => {
267
- message.parts[0].functionResponse.response.content =
268
- value;
261
+ message.parts[0].functionResponse.response.content = value;
269
262
  return message;
270
263
  },
271
264
  }, addImagesToInput);
@@ -275,9 +268,7 @@ async function convertToGeminiContents(model, messages) {
275
268
  if (typeof msg.content === 'string') {
276
269
  textContent = msg.content;
277
270
  }
278
- else if (msg.content &&
279
- typeof msg.content === 'object' &&
280
- 'text' in msg.content) {
271
+ else if (msg.content && typeof msg.content === 'object' && 'text' in msg.content) {
281
272
  textContent = msg.content.text;
282
273
  }
283
274
  else {
@@ -326,15 +317,14 @@ export class GeminiProvider extends BaseModelProvider {
326
317
  this._client = new GoogleGenAI({
327
318
  apiKey: apiKey,
328
319
  vertexai: false,
320
+ httpOptions: { apiVersion: 'v1alpha' },
329
321
  });
330
322
  }
331
323
  return this._client;
332
324
  }
333
325
  async createEmbedding(input, model, opts) {
334
326
  try {
335
- let actualModelId = model.startsWith('gemini/')
336
- ? model.substring(7)
337
- : model;
327
+ let actualModelId = model.startsWith('gemini/') ? model.substring(7) : model;
338
328
  let thinkingConfig = null;
339
329
  for (const [suffix, budget] of Object.entries(THINKING_BUDGET_CONFIGS)) {
340
330
  if (actualModelId.endsWith(suffix)) {
@@ -355,9 +345,7 @@ export class GeminiProvider extends BaseModelProvider {
355
345
  payload.config.thinkingConfig = thinkingConfig;
356
346
  }
357
347
  const response = await this.client.models.embedContent(payload);
358
- console.log('[Gemini] Embedding response structure:', JSON.stringify(response, (key, value) => key === 'values' &&
359
- Array.isArray(value) &&
360
- value.length > 10
348
+ console.log('[Gemini] Embedding response structure:', JSON.stringify(response, (key, value) => key === 'values' && Array.isArray(value) && value.length > 10
361
349
  ? `[${value.length} items]`
362
350
  : value, 2));
363
351
  if (!response.embeddings || !Array.isArray(response.embeddings)) {
@@ -376,11 +364,8 @@ export class GeminiProvider extends BaseModelProvider {
376
364
  }
377
365
  else {
378
366
  console.warn('[Gemini] Could not find expected "values" property in embeddings response');
379
- extractedValues =
380
- response.embeddings;
381
- dimensions = Array.isArray(extractedValues[0])
382
- ? extractedValues[0].length
383
- : 0;
367
+ extractedValues = response.embeddings;
368
+ dimensions = Array.isArray(extractedValues[0]) ? extractedValues[0].length : 0;
384
369
  }
385
370
  }
386
371
  costTracker.addUsage({
@@ -396,8 +381,7 @@ export class GeminiProvider extends BaseModelProvider {
396
381
  }
397
382
  else {
398
383
  let result;
399
- if (Array.isArray(extractedValues) &&
400
- extractedValues.length >= 1) {
384
+ if (Array.isArray(extractedValues) && extractedValues.length >= 1) {
401
385
  const firstValue = extractedValues[0];
402
386
  if (Array.isArray(firstValue)) {
403
387
  result = firstValue;
@@ -417,10 +401,7 @@ export class GeminiProvider extends BaseModelProvider {
417
401
  adjustedResult = result.slice(0, 3072);
418
402
  }
419
403
  else {
420
- adjustedResult = [
421
- ...result,
422
- ...Array(3072 - result.length).fill(0),
423
- ];
404
+ adjustedResult = [...result, ...Array(3072 - result.length).fill(0)];
424
405
  }
425
406
  }
426
407
  return adjustedResult;
@@ -444,8 +425,7 @@ export class GeminiProvider extends BaseModelProvider {
444
425
  catch (error) {
445
426
  attempts++;
446
427
  const errorMsg = error instanceof Error ? error.message : String(error);
447
- if (errorMsg.includes('Incomplete JSON segment') &&
448
- attempts <= maxRetries) {
428
+ if (errorMsg.includes('Incomplete JSON segment') && attempts <= maxRetries) {
449
429
  console.warn(`[Gemini] Incomplete JSON segment error, retrying (${attempts}/${maxRetries})...`);
450
430
  await new Promise(resolve => setTimeout(resolve, 1000 * attempts));
451
431
  continue;
@@ -456,9 +436,7 @@ export class GeminiProvider extends BaseModelProvider {
456
436
  }
457
437
  async *createResponseStream(messages, model, agent) {
458
438
  const { getToolsFromAgent } = await import('../utils/agent.js');
459
- const tools = agent
460
- ? await getToolsFromAgent(agent)
461
- : [];
439
+ const tools = agent ? await getToolsFromAgent(agent) : [];
462
440
  const settings = agent?.modelSettings;
463
441
  let messageId = uuidv4();
464
442
  let contentBuffer = '';
@@ -526,8 +504,7 @@ export class GeminiProvider extends BaseModelProvider {
526
504
  if ('additionalProperties' in obj) {
527
505
  delete obj.additionalProperties;
528
506
  }
529
- if (obj.properties &&
530
- typeof obj.properties === 'object') {
507
+ if (obj.properties && typeof obj.properties === 'object') {
531
508
  Object.values(obj.properties).forEach(prop => {
532
509
  removeAdditionalProperties(prop);
533
510
  });
@@ -559,9 +536,7 @@ export class GeminiProvider extends BaseModelProvider {
559
536
  settings.tool_choice?.type === 'function' &&
560
537
  settings.tool_choice?.function?.name) {
561
538
  toolChoice = FunctionCallingConfigMode.ANY;
562
- allowedFunctionNames = [
563
- settings.tool_choice.function.name,
564
- ];
539
+ allowedFunctionNames = [settings.tool_choice.function.name];
565
540
  }
566
541
  else if (settings.tool_choice === 'required') {
567
542
  toolChoice = FunctionCallingConfigMode.ANY;
@@ -579,8 +554,7 @@ export class GeminiProvider extends BaseModelProvider {
579
554
  },
580
555
  };
581
556
  if (allowedFunctionNames.length > 0) {
582
- config.toolConfig.functionCallingConfig.allowedFunctionNames =
583
- allowedFunctionNames;
557
+ config.toolConfig.functionCallingConfig.allowedFunctionNames = allowedFunctionNames;
584
558
  }
585
559
  }
586
560
  }
@@ -697,8 +671,7 @@ export class GeminiProvider extends BaseModelProvider {
697
671
  message_id: messageId,
698
672
  order: eventOrder++,
699
673
  };
700
- contentBuffer +=
701
- '\n\nSearch Results:\n' + formatted + '\n';
674
+ contentBuffer += '\n\nSearch Results:\n' + formatted + '\n';
702
675
  }
703
676
  }
704
677
  }
@@ -743,9 +716,7 @@ export class GeminiProvider extends BaseModelProvider {
743
716
  }
744
717
  catch (error) {
745
718
  log_llm_error(requestId, error);
746
- const errorMessage = error instanceof Error
747
- ? error.stack || error.message
748
- : String(error);
719
+ const errorMessage = error instanceof Error ? error.stack || error.message : String(error);
749
720
  if (errorMessage.includes('Incomplete JSON segment')) {
750
721
  console.error('[Gemini] Stream terminated with incomplete JSON. This may indicate network issues or timeouts.');
751
722
  }
@@ -796,8 +767,7 @@ export class GeminiProvider extends BaseModelProvider {
796
767
  },
797
768
  });
798
769
  const images = [];
799
- if (response.generatedImages &&
800
- response.generatedImages.length > 0) {
770
+ if (response.generatedImages && response.generatedImages.length > 0) {
801
771
  for (const generatedImage of response.generatedImages) {
802
772
  if (generatedImage.image?.imageBytes) {
803
773
  const base64Image = `data:image/png;base64,${generatedImage.image.imageBytes}`;
@@ -879,14 +849,12 @@ export class GeminiProvider extends BaseModelProvider {
879
849
  throw new Error('No audio generated from Gemini TTS');
880
850
  }
881
851
  const candidate = response.candidates[0];
882
- if (!candidate.content.parts ||
883
- candidate.content.parts.length === 0) {
852
+ if (!candidate.content.parts || candidate.content.parts.length === 0) {
884
853
  throw new Error('No audio parts in Gemini TTS response');
885
854
  }
886
855
  let audioData;
887
856
  for (const part of candidate.content.parts) {
888
- if (part.inlineData &&
889
- part.inlineData.mimeType?.includes('audio')) {
857
+ if (part.inlineData && part.inlineData.mimeType?.includes('audio')) {
890
858
  audioData = part.inlineData.data;
891
859
  break;
892
860
  }
@@ -974,40 +942,59 @@ export class GeminiProvider extends BaseModelProvider {
974
942
  console.warn(`[Gemini] Unknown voice '${voice}', using default voice 'Kore'`);
975
943
  return 'Kore';
976
944
  }
977
- async *createTranscription(audio, model, opts) {
945
+ async *createTranscription(audio, agent, model, opts) {
978
946
  let session = null;
979
947
  let audioBuffer = Buffer.alloc(0);
980
948
  let isConnected = false;
981
949
  try {
982
- const ai = new GoogleGenAI({ apiKey: this.apiKey });
983
- const realtimeConfig = opts?.realtimeConfig
984
- ?.automaticActivityDetection || {
985
- prefixPaddingMs: 20,
986
- silenceDurationMs: 100,
950
+ const ai = new GoogleGenAI({
951
+ apiKey: this.apiKey,
952
+ httpOptions: { apiVersion: 'v1alpha' },
953
+ });
954
+ const realtimeInputConfig = opts?.realtimeInputConfig || {
955
+ automaticActivityDetection: {
956
+ disabled: false,
957
+ startOfSpeechSensitivity: 'START_SENSITIVITY_HIGH',
958
+ endOfSpeechSensitivity: 'END_SENSITIVITY_LOW',
959
+ },
960
+ };
961
+ const speechConfig = opts?.speechConfig || {
962
+ languageCode: 'en-US',
987
963
  };
988
- const systemInstruction = opts?.agent?.instructions ||
964
+ const systemInstruction = agent.instructions ||
989
965
  `You are a real-time transcription assistant. Your only task is to transcribe speech as you hear it. DO NOT ADD YOUR OWN RESPONSE OR COMMENTARY. TRANSCRIBE WHAT YOU HEAR ONLY.
990
966
  Respond immediately with transcribed text as you process the audio.
991
967
  If quick corrections are used e.g. "Let's go to Point A, no Point B" then just remove incorrect part e.g. respond with "Let's go to Point B".
992
- When it makes the transcription clearer, remove filler words (like "um") add punctuation, correct obvious grammar issues and add in missing words.`;
968
+ When it makes the transcription clearer, remove filler words (like "um") add punctuation, correct obvious grammar issues and add in missing words.
969
+
970
+ EXAMPLES:
971
+ User: What capital of France
972
+ Model: What's the capital of France?
973
+
974
+ User: How about um we then do no actually how about you tell me the weather
975
+ Model: How about you tell me the weather?
976
+
977
+ User: Ok ignore all that lets start again
978
+ Model: Ok ignore all that, let's start again.`;
993
979
  console.log('[Gemini] Connecting to Live API for transcription...');
994
980
  const connectionPromise = new Promise((resolve, reject) => {
995
981
  const timeout = setTimeout(() => {
996
982
  reject(new Error('Connection timeout'));
997
983
  }, 30000);
984
+ const config = {
985
+ responseModalities: [Modality.TEXT],
986
+ speechConfig,
987
+ realtimeInputConfig,
988
+ systemInstruction: {
989
+ parts: [{ text: systemInstruction }],
990
+ },
991
+ inputAudioTranscription: {},
992
+ };
993
+ console.dir(config, { depth: null });
998
994
  ai.live
999
995
  .connect({
1000
996
  model: model,
1001
- config: {
1002
- responseModalities: [Modality.TEXT],
1003
- systemInstruction: {
1004
- parts: [{ text: systemInstruction }],
1005
- },
1006
- realtimeInputConfig: {
1007
- automaticActivityDetection: realtimeConfig,
1008
- },
1009
- inputAudioTranscription: true,
1010
- },
997
+ config,
1011
998
  callbacks: {
1012
999
  onopen: () => {
1013
1000
  clearTimeout(timeout);
@@ -1016,27 +1003,18 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
1016
1003
  resolve();
1017
1004
  },
1018
1005
  onmessage: async (msg) => {
1019
- if (msg.serverContent?.inputAudioTranscription) {
1020
- const transcriptionText = msg.serverContent
1021
- .inputAudioTranscription.text ||
1022
- msg.serverContent
1023
- .inputAudioTranscription
1024
- .transcript ||
1025
- '';
1026
- if (transcriptionText) {
1027
- const previewEvent = {
1028
- type: 'transcription_preview',
1029
- timestamp: new Date().toISOString(),
1030
- text: transcriptionText,
1031
- isFinal: true,
1032
- };
1033
- transcriptEvents.push(previewEvent);
1034
- console.debug('[Gemini] Received input transcription:', transcriptionText);
1035
- }
1006
+ if (msg.serverContent?.inputTranscription?.text) {
1007
+ const previewEvent = {
1008
+ type: 'transcription_preview',
1009
+ timestamp: new Date().toISOString(),
1010
+ text: msg.serverContent.inputTranscription.text,
1011
+ isFinal: true,
1012
+ };
1013
+ transcriptEvents.push(previewEvent);
1014
+ console.debug('[Gemini] Received input transcription:', msg.serverContent.inputTranscription.text);
1036
1015
  }
1037
1016
  if (msg.serverContent?.modelTurn?.parts) {
1038
- for (const part of msg.serverContent
1039
- .modelTurn.parts) {
1017
+ for (const part of msg.serverContent.modelTurn.parts) {
1040
1018
  if (part.text && part.text.trim()) {
1041
1019
  const deltaEvent = {
1042
1020
  type: 'transcription_delta',
@@ -1058,26 +1036,34 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
1058
1036
  if (msg.usageMetadata) {
1059
1037
  costTracker.addUsage({
1060
1038
  model: model,
1061
- input_tokens: msg.usageMetadata
1062
- .promptTokenCount || 0,
1063
- output_tokens: msg.usageMetadata
1064
- .responseTokenCount || 0,
1039
+ input_tokens: msg.usageMetadata.promptTokenCount || 0,
1040
+ output_tokens: msg.usageMetadata.responseTokenCount || 0,
1065
1041
  input_modality: 'audio',
1066
1042
  output_modality: 'text',
1067
1043
  metadata: {
1068
- totalTokens: msg.usageMetadata
1069
- .totalTokenCount || 0,
1044
+ totalTokens: msg.usageMetadata.totalTokenCount || 0,
1070
1045
  source: 'gemini-live-transcription',
1071
1046
  },
1072
1047
  });
1073
1048
  }
1074
1049
  },
1075
1050
  onerror: (err) => {
1076
- console.error('[Gemini] Live API error:', err);
1051
+ console.error('[Gemini] Live API error:', {
1052
+ code: err.code,
1053
+ reason: err.reason,
1054
+ wasClean: err.wasClean,
1055
+ });
1077
1056
  connectionError = err;
1078
1057
  },
1079
- onclose: () => {
1058
+ onclose: (event) => {
1080
1059
  console.log('[Gemini] Live session closed');
1060
+ if (event) {
1061
+ console.log('[Gemini] Close event details:', {
1062
+ code: event.code,
1063
+ reason: event.reason,
1064
+ wasClean: event.wasClean,
1065
+ });
1066
+ }
1081
1067
  isConnected = false;
1082
1068
  },
1083
1069
  },
@@ -1124,10 +1110,7 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
1124
1110
  if (done)
1125
1111
  break;
1126
1112
  if (value) {
1127
- audioBuffer = Buffer.concat([
1128
- audioBuffer,
1129
- Buffer.from(value),
1130
- ]);
1113
+ audioBuffer = Buffer.concat([audioBuffer, Buffer.from(value)]);
1131
1114
  while (audioBuffer.length >= chunkSize) {
1132
1115
  const chunk = audioBuffer.slice(0, chunkSize);
1133
1116
  audioBuffer = audioBuffer.slice(chunkSize);
@@ -1170,21 +1153,34 @@ When it makes the transcription clearer, remove filler words (like "um") add pun
1170
1153
  const errorEvent = {
1171
1154
  type: 'error',
1172
1155
  timestamp: new Date().toISOString(),
1173
- error: error instanceof Error
1174
- ? error.message
1175
- : 'Transcription failed',
1156
+ error: error instanceof Error ? error.message : 'Transcription failed',
1176
1157
  };
1177
1158
  yield errorEvent;
1178
1159
  }
1179
1160
  }
1161
+ async createLiveSession(config, agent, model, opts) {
1162
+ console.log(`[Gemini] Creating Live session with model ${model}`);
1163
+ const liveModels = [
1164
+ 'gemini-2.0-flash-live-001',
1165
+ 'gemini-live-2.5-flash-preview',
1166
+ 'gemini-2.5-flash-preview-native-audio-dialog',
1167
+ 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
1168
+ 'gemini-2.0-flash-exp',
1169
+ ];
1170
+ if (!liveModels.some(m => model.includes(m))) {
1171
+ throw new Error(`Model ${model} does not support Live API. Supported models: ${liveModels.join(', ')}`);
1172
+ }
1173
+ const sessionId = uuidv4();
1174
+ const liveSession = new GeminiLiveSession(sessionId, this.client, model, config, agent, opts);
1175
+ await liveSession.initialize();
1176
+ return liveSession;
1177
+ }
1180
1178
  }
1181
1179
  function normalizeAudioSource(source) {
1182
1180
  if (source instanceof ReadableStream) {
1183
1181
  return source;
1184
1182
  }
1185
- if (typeof source === 'object' &&
1186
- source !== null &&
1187
- Symbol.asyncIterator in source) {
1183
+ if (typeof source === 'object' && source !== null && Symbol.asyncIterator in source) {
1188
1184
  return new ReadableStream({
1189
1185
  async start(controller) {
1190
1186
  try {
@@ -1214,5 +1210,397 @@ function normalizeAudioSource(source) {
1214
1210
  }
1215
1211
  throw new Error(`Unsupported audio source type: ${typeof source}`);
1216
1212
  }
1213
+ class GeminiLiveSession {
1214
+ sessionId;
1215
+ ai;
1216
+ model;
1217
+ config;
1218
+ agent;
1219
+ options;
1220
+ session = null;
1221
+ eventQueue = [];
1222
+ eventResolvers = [];
1223
+ _isActive = true;
1224
+ sessionClosed = false;
1225
+ messageHistory = [];
1226
+ currentTurn = null;
1227
+ constructor(sessionId, ai, model, config, agent, options) {
1228
+ this.sessionId = sessionId;
1229
+ this.ai = ai;
1230
+ this.model = model;
1231
+ this.config = config;
1232
+ this.agent = agent;
1233
+ this.options = options;
1234
+ }
1235
+ async initialize() {
1236
+ const connectionPromise = new Promise((resolve, reject) => {
1237
+ const timeout = setTimeout(() => {
1238
+ reject(new Error('Connection timeout'));
1239
+ }, 30000);
1240
+ const tools = [];
1241
+ if (this.config.tools) {
1242
+ for (const toolGroup of this.config.tools) {
1243
+ if (toolGroup.functionDeclarations) {
1244
+ const functionDeclarations = toolGroup.functionDeclarations.map(func => ({
1245
+ name: func.name,
1246
+ description: func.description,
1247
+ parameters: convertParameterToGeminiFormat(func.parameters),
1248
+ }));
1249
+ tools.push({ functionDeclarations });
1250
+ }
1251
+ if (toolGroup.codeExecution) {
1252
+ tools.push({ codeExecution: {} });
1253
+ }
1254
+ if (toolGroup.googleSearch) {
1255
+ tools.push({ googleSearch: {} });
1256
+ }
1257
+ }
1258
+ }
1259
+ let systemInstruction = undefined;
1260
+ if (this.agent.instructions) {
1261
+ systemInstruction = {
1262
+ parts: [{ text: this.agent.instructions }],
1263
+ };
1264
+ }
1265
+ const responseModalities = this.config.responseModalities[0] === 'AUDIO' ? [Modality.AUDIO] : [Modality.TEXT];
1266
+ const config = {
1267
+ responseModalities,
1268
+ systemInstruction,
1269
+ tools: tools.length > 0 ? tools : undefined,
1270
+ };
1271
+ if (this.config.responseModalities[0] === 'AUDIO' && this.config.speechConfig) {
1272
+ config.speechConfig = {
1273
+ voiceConfig: this.config.speechConfig.voiceConfig,
1274
+ };
1275
+ }
1276
+ if (this.config.realtimeInputConfig) {
1277
+ config.realtimeInputConfig = {
1278
+ automaticActivityDetection: this.config.realtimeInputConfig.automaticActivityDetection
1279
+ ? {
1280
+ disabled: this.config.realtimeInputConfig.automaticActivityDetection.disabled,
1281
+ }
1282
+ : undefined,
1283
+ };
1284
+ }
1285
+ if (this.config.inputAudioTranscription) {
1286
+ config.inputAudioTranscription = true;
1287
+ }
1288
+ if (this.config.outputAudioTranscription) {
1289
+ config.outputAudioTranscription = true;
1290
+ }
1291
+ if (this.config.enableAffectiveDialog) {
1292
+ config.enableAffectiveDialog = true;
1293
+ }
1294
+ if (this.config.proactivity) {
1295
+ config.proactivity = this.config.proactivity;
1296
+ }
1297
+ console.log('[Gemini] Connecting with config:', JSON.stringify(config, null, 2));
1298
+ this.ai.live
1299
+ .connect({
1300
+ model: this.model,
1301
+ config,
1302
+ callbacks: {
1303
+ onopen: () => {
1304
+ clearTimeout(timeout);
1305
+ console.log('[Gemini] Live session connected');
1306
+ this.pushEvent({
1307
+ type: 'live_ready',
1308
+ timestamp: new Date().toISOString(),
1309
+ });
1310
+ resolve();
1311
+ },
1312
+ onmessage: (msg) => {
1313
+ this.handleMessage(msg);
1314
+ },
1315
+ onerror: (err) => {
1316
+ console.error('[Gemini] Live API error:', err);
1317
+ console.error('[Gemini] Error details:', JSON.stringify(err, null, 2));
1318
+ this.pushEvent({
1319
+ type: 'error',
1320
+ timestamp: new Date().toISOString(),
1321
+ error: err.message || String(err),
1322
+ code: err.code,
1323
+ recoverable: true,
1324
+ });
1325
+ },
1326
+ onclose: (event) => {
1327
+ console.log('[Gemini] Live session closed', event);
1328
+ if (event) {
1329
+ console.log('[Gemini] Close event details:', {
1330
+ code: event.code,
1331
+ reason: event.reason,
1332
+ wasClean: event.wasClean,
1333
+ });
1334
+ }
1335
+ this._isActive = false;
1336
+ this.sessionClosed = true;
1337
+ this.resolveAllWaitingEvents();
1338
+ },
1339
+ },
1340
+ })
1341
+ .then(s => {
1342
+ this.session = s;
1343
+ });
1344
+ });
1345
+ await connectionPromise;
1346
+ }
1347
+ handleMessage(msg) {
1348
+ console.log('[Gemini] Received message:', JSON.stringify(msg, null, 2));
1349
+ if (msg.error) {
1350
+ console.error('[Gemini] Error in message:', msg.error);
1351
+ this.pushEvent({
1352
+ type: 'error',
1353
+ timestamp: new Date().toISOString(),
1354
+ error: msg.error.message || JSON.stringify(msg.error),
1355
+ code: msg.error.code || 'UNKNOWN_ERROR',
1356
+ recoverable: false,
1357
+ });
1358
+ return;
1359
+ }
1360
+ if (msg.serverContent?.modelTurn?.parts) {
1361
+ for (const part of msg.serverContent.modelTurn.parts) {
1362
+ if (part.inlineData?.mimeType?.startsWith('audio/')) {
1363
+ this.pushEvent({
1364
+ type: 'audio_output',
1365
+ timestamp: new Date().toISOString(),
1366
+ data: part.inlineData.data,
1367
+ format: {
1368
+ sampleRate: 24000,
1369
+ channels: 1,
1370
+ encoding: 'pcm',
1371
+ },
1372
+ });
1373
+ }
1374
+ if (part.text) {
1375
+ if (!this.currentTurn || this.currentTurn.role !== 'model') {
1376
+ this.currentTurn = { role: 'model', text: '' };
1377
+ this.pushEvent({
1378
+ type: 'turn_start',
1379
+ timestamp: new Date().toISOString(),
1380
+ role: 'model',
1381
+ });
1382
+ }
1383
+ this.currentTurn.text += part.text;
1384
+ this.pushEvent({
1385
+ type: 'text_delta',
1386
+ timestamp: new Date().toISOString(),
1387
+ delta: part.text,
1388
+ });
1389
+ this.pushEvent({
1390
+ type: 'message_delta',
1391
+ timestamp: new Date().toISOString(),
1392
+ delta: part.text,
1393
+ });
1394
+ }
1395
+ }
1396
+ }
1397
+ if (msg.serverContent?.modelTurn?.parts) {
1398
+ for (const part of msg.serverContent.modelTurn.parts) {
1399
+ if (part.functionCall) {
1400
+ const toolCall = {
1401
+ id: uuidv4(),
1402
+ type: 'function',
1403
+ function: {
1404
+ name: part.functionCall.name,
1405
+ arguments: JSON.stringify(part.functionCall.args),
1406
+ },
1407
+ };
1408
+ this.pushEvent({
1409
+ type: 'tool_call',
1410
+ timestamp: new Date().toISOString(),
1411
+ toolCalls: [toolCall],
1412
+ });
1413
+ }
1414
+ }
1415
+ }
1416
+ if (msg.serverContent?.inputAudioTranscription) {
1417
+ const text = msg.serverContent.inputAudioTranscription.text ||
1418
+ msg.serverContent.inputAudioTranscription.transcript ||
1419
+ '';
1420
+ if (text) {
1421
+ this.pushEvent({
1422
+ type: 'transcription_input',
1423
+ timestamp: new Date().toISOString(),
1424
+ text,
1425
+ });
1426
+ }
1427
+ }
1428
+ if (msg.serverContent?.outputTranscription) {
1429
+ const text = msg.serverContent.outputTranscription.text || '';
1430
+ if (text) {
1431
+ this.pushEvent({
1432
+ type: 'transcription_output',
1433
+ timestamp: new Date().toISOString(),
1434
+ text,
1435
+ });
1436
+ }
1437
+ }
1438
+ if (msg.serverContent?.turnComplete) {
1439
+ if (this.currentTurn) {
1440
+ const message = this.currentTurn.role === 'model'
1441
+ ? {
1442
+ type: 'message',
1443
+ role: 'assistant',
1444
+ content: this.currentTurn.text,
1445
+ status: 'completed',
1446
+ }
1447
+ : {
1448
+ type: 'message',
1449
+ role: 'user',
1450
+ content: this.currentTurn.text,
1451
+ };
1452
+ this.messageHistory.push(message);
1453
+ this.pushEvent({
1454
+ type: 'turn_complete',
1455
+ timestamp: new Date().toISOString(),
1456
+ role: this.currentTurn.role,
1457
+ message,
1458
+ });
1459
+ this.currentTurn = null;
1460
+ }
1461
+ }
1462
+ if (msg.serverContent?.interrupted) {
1463
+ const cancelledToolCalls = [];
1464
+ if (msg.serverContent.cancelledFunctionCalls) {
1465
+ cancelledToolCalls.push(...msg.serverContent.cancelledFunctionCalls.map((fc) => fc.id));
1466
+ }
1467
+ this.pushEvent({
1468
+ type: 'interrupted',
1469
+ timestamp: new Date().toISOString(),
1470
+ cancelledToolCalls,
1471
+ });
1472
+ }
1473
+ if (msg.usageMetadata) {
1474
+ const usage = msg.usageMetadata;
1475
+ const inputTokens = usage.promptTokenCount || 0;
1476
+ const outputTokens = usage.candidatesTokenCount || 0;
1477
+ const totalTokens = usage.totalTokenCount || 0;
1478
+ costTracker.addUsage({
1479
+ model: this.model,
1480
+ input_tokens: inputTokens,
1481
+ output_tokens: outputTokens,
1482
+ cached_tokens: usage.cachedContentTokenCount || 0,
1483
+ metadata: {
1484
+ total_tokens: totalTokens,
1485
+ source: 'gemini-live',
1486
+ },
1487
+ });
1488
+ const inputCost = undefined;
1489
+ const outputCost = undefined;
1490
+ const totalCost = undefined;
1491
+ this.pushEvent({
1492
+ type: 'cost_update',
1493
+ timestamp: new Date().toISOString(),
1494
+ usage: {
1495
+ inputTokens,
1496
+ outputTokens,
1497
+ totalTokens,
1498
+ inputCost,
1499
+ outputCost,
1500
+ totalCost,
1501
+ },
1502
+ });
1503
+ }
1504
+ }
1505
+ async sendAudio(audio) {
1506
+ if (!this.session || !this._isActive) {
1507
+ console.error(`[GeminiLiveSession ${this.sessionId}] Cannot send audio - session not active`);
1508
+ throw new Error('Session is not active');
1509
+ }
1510
+ console.log(`[GeminiLiveSession ${this.sessionId}] Sending audio: ${audio.data.length} chars (base64), mimeType: ${audio.mimeType}`);
1511
+ try {
1512
+ await this.session.sendRealtimeInput({
1513
+ media: {
1514
+ mimeType: audio.mimeType,
1515
+ data: audio.data,
1516
+ },
1517
+ });
1518
+ console.log(`[GeminiLiveSession ${this.sessionId}] Audio sent successfully`);
1519
+ }
1520
+ catch (error) {
1521
+ console.error(`[GeminiLiveSession ${this.sessionId}] Error sending audio:`, error);
1522
+ throw error;
1523
+ }
1524
+ const size = Math.ceil((audio.data.length * 3) / 4);
1525
+ this.pushEvent({
1526
+ type: 'audio_input',
1527
+ timestamp: new Date().toISOString(),
1528
+ size,
1529
+ });
1530
+ }
1531
+ async sendText(text, role = 'user') {
1532
+ if (!this.session || !this._isActive) {
1533
+ throw new Error('Session is not active');
1534
+ }
1535
+ const message = {
1536
+ role: role === 'assistant' ? 'model' : 'user',
1537
+ parts: [{ text }],
1538
+ };
1539
+ await this.session.sendClientContent({
1540
+ turns: [message],
1541
+ });
1542
+ this.pushEvent({
1543
+ type: 'turn_start',
1544
+ timestamp: new Date().toISOString(),
1545
+ role: role === 'assistant' ? 'model' : 'user',
1546
+ });
1547
+ }
1548
+ async sendToolResponse(toolResults) {
1549
+ if (!this.session || !this._isActive) {
1550
+ throw new Error('Session is not active');
1551
+ }
1552
+ const functionResponses = toolResults.map(result => ({
1553
+ id: result.call_id || result.id,
1554
+ name: result.toolCall.function.name,
1555
+ response: result.error ? { error: result.error } : { result: result.output },
1556
+ }));
1557
+ await this.session.sendToolResponse({ functionResponses });
1558
+ }
1559
+ async *getEventStream() {
1560
+ while (this._isActive || this.eventQueue.length > 0) {
1561
+ if (this.eventQueue.length > 0) {
1562
+ yield this.eventQueue.shift();
1563
+ }
1564
+ else {
1565
+ const result = await new Promise(resolve => {
1566
+ if (this.sessionClosed && this.eventQueue.length === 0) {
1567
+ resolve({ done: true, value: undefined });
1568
+ }
1569
+ else {
1570
+ this.eventResolvers.push(resolve);
1571
+ }
1572
+ });
1573
+ if (result.done)
1574
+ break;
1575
+ if (result.value)
1576
+ yield result.value;
1577
+ }
1578
+ }
1579
+ }
1580
+ async close() {
1581
+ if (this.session && this._isActive) {
1582
+ this._isActive = false;
1583
+ await this.session.close();
1584
+ }
1585
+ }
1586
+ isActive() {
1587
+ return this._isActive;
1588
+ }
1589
+ pushEvent(event) {
1590
+ if (this.eventResolvers.length > 0) {
1591
+ const resolver = this.eventResolvers.shift();
1592
+ resolver({ value: event, done: false });
1593
+ }
1594
+ else {
1595
+ this.eventQueue.push(event);
1596
+ }
1597
+ }
1598
+ resolveAllWaitingEvents() {
1599
+ for (const resolver of this.eventResolvers) {
1600
+ resolver({ done: true, value: undefined });
1601
+ }
1602
+ this.eventResolvers = [];
1603
+ }
1604
+ }
1217
1605
  export const geminiProvider = new GeminiProvider();
1218
1606
  //# sourceMappingURL=gemini.js.map