@just-every/ensemble 0.2.79 → 0.2.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/dist/config/tool_execution.d.ts.map +1 -1
  2. package/dist/config/tool_execution.js +2 -11
  3. package/dist/config/tool_execution.js.map +1 -1
  4. package/dist/core/ensemble_embed.d.ts.map +1 -1
  5. package/dist/core/ensemble_embed.js +2 -4
  6. package/dist/core/ensemble_embed.js.map +1 -1
  7. package/dist/core/ensemble_image.d.ts.map +1 -1
  8. package/dist/core/ensemble_image.js +1 -1
  9. package/dist/core/ensemble_image.js.map +1 -1
  10. package/dist/core/ensemble_listen.d.ts.map +1 -1
  11. package/dist/core/ensemble_listen.js +2 -4
  12. package/dist/core/ensemble_listen.js.map +1 -1
  13. package/dist/core/ensemble_live.d.ts +14 -0
  14. package/dist/core/ensemble_live.d.ts.map +1 -0
  15. package/dist/core/ensemble_live.js +382 -0
  16. package/dist/core/ensemble_live.js.map +1 -0
  17. package/dist/core/ensemble_request.d.ts.map +1 -1
  18. package/dist/core/ensemble_request.js +5 -13
  19. package/dist/core/ensemble_request.js.map +1 -1
  20. package/dist/core/ensemble_voice.d.ts.map +1 -1
  21. package/dist/core/ensemble_voice.js +1 -1
  22. package/dist/core/ensemble_voice.js.map +1 -1
  23. package/dist/data/model_data.d.ts.map +1 -1
  24. package/dist/data/model_data.js +85 -11
  25. package/dist/data/model_data.js.map +1 -1
  26. package/dist/index.d.ts +6 -5
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +6 -5
  29. package/dist/index.js.map +1 -1
  30. package/dist/model_providers/base_provider.d.ts.map +1 -1
  31. package/dist/model_providers/base_provider.js +1 -1
  32. package/dist/model_providers/base_provider.js.map +1 -1
  33. package/dist/model_providers/claude.d.ts.map +1 -1
  34. package/dist/model_providers/claude.js +48 -101
  35. package/dist/model_providers/claude.js.map +1 -1
  36. package/dist/model_providers/deepseek.d.ts.map +1 -1
  37. package/dist/model_providers/deepseek.js +7 -18
  38. package/dist/model_providers/deepseek.js.map +1 -1
  39. package/dist/model_providers/elevenlabs.d.ts.map +1 -1
  40. package/dist/model_providers/elevenlabs.js +3 -7
  41. package/dist/model_providers/elevenlabs.js.map +1 -1
  42. package/dist/model_providers/gemini.d.ts +2 -1
  43. package/dist/model_providers/gemini.d.ts.map +1 -1
  44. package/dist/model_providers/gemini.js +512 -147
  45. package/dist/model_providers/gemini.js.map +1 -1
  46. package/dist/model_providers/grok.d.ts.map +1 -1
  47. package/dist/model_providers/grok.js +1 -2
  48. package/dist/model_providers/grok.js.map +1 -1
  49. package/dist/model_providers/model_provider.d.ts.map +1 -1
  50. package/dist/model_providers/model_provider.js +10 -20
  51. package/dist/model_providers/model_provider.js.map +1 -1
  52. package/dist/model_providers/openai.d.ts +2 -1
  53. package/dist/model_providers/openai.d.ts.map +1 -1
  54. package/dist/model_providers/openai.js +261 -100
  55. package/dist/model_providers/openai.js.map +1 -1
  56. package/dist/model_providers/openai_chat.d.ts.map +1 -1
  57. package/dist/model_providers/openai_chat.js +39 -72
  58. package/dist/model_providers/openai_chat.js.map +1 -1
  59. package/dist/model_providers/test_provider.d.ts.map +1 -1
  60. package/dist/model_providers/test_provider.js +7 -17
  61. package/dist/model_providers/test_provider.js.map +1 -1
  62. package/dist/tsconfig.tsbuildinfo +1 -1
  63. package/dist/types/errors.d.ts.map +1 -1
  64. package/dist/types/errors.js.map +1 -1
  65. package/dist/types/types.d.ts +162 -7
  66. package/dist/types/types.d.ts.map +1 -1
  67. package/dist/utils/agent.d.ts.map +1 -1
  68. package/dist/utils/agent.js +5 -16
  69. package/dist/utils/agent.js.map +1 -1
  70. package/dist/utils/citation_tracker.d.ts.map +1 -1
  71. package/dist/utils/citation_tracker.js.map +1 -1
  72. package/dist/utils/config_manager.d.ts.map +1 -1
  73. package/dist/utils/config_manager.js +12 -4
  74. package/dist/utils/config_manager.js.map +1 -1
  75. package/dist/utils/cost_tracker.d.ts.map +1 -1
  76. package/dist/utils/cost_tracker.js +13 -26
  77. package/dist/utils/cost_tracker.js.map +1 -1
  78. package/dist/utils/create_tool_function.d.ts.map +1 -1
  79. package/dist/utils/create_tool_function.js +4 -16
  80. package/dist/utils/create_tool_function.js.map +1 -1
  81. package/dist/utils/delta_buffer.d.ts.map +1 -1
  82. package/dist/utils/delta_buffer.js +1 -2
  83. package/dist/utils/delta_buffer.js.map +1 -1
  84. package/dist/utils/ensemble_result.d.ts.map +1 -1
  85. package/dist/utils/ensemble_result.js +9 -24
  86. package/dist/utils/ensemble_result.js.map +1 -1
  87. package/dist/utils/event_controller.d.ts.map +1 -1
  88. package/dist/utils/event_controller.js.map +1 -1
  89. package/dist/utils/external_models.d.ts.map +1 -1
  90. package/dist/utils/external_models.js.map +1 -1
  91. package/dist/utils/image_to_text.d.ts.map +1 -1
  92. package/dist/utils/image_to_text.js +1 -2
  93. package/dist/utils/image_to_text.js.map +1 -1
  94. package/dist/utils/image_utils.d.ts.map +1 -1
  95. package/dist/utils/image_utils.js.map +1 -1
  96. package/dist/utils/image_validation.d.ts.map +1 -1
  97. package/dist/utils/image_validation.js.map +1 -1
  98. package/dist/utils/llm_logger.d.ts.map +1 -1
  99. package/dist/utils/llm_logger.js.map +1 -1
  100. package/dist/utils/message_history.d.ts.map +1 -1
  101. package/dist/utils/message_history.js +9 -20
  102. package/dist/utils/message_history.js.map +1 -1
  103. package/dist/utils/model_class_config.d.ts.map +1 -1
  104. package/dist/utils/model_class_config.js +1 -1
  105. package/dist/utils/model_class_config.js.map +1 -1
  106. package/dist/utils/pause_controller.d.ts.map +1 -1
  107. package/dist/utils/pause_controller.js.map +1 -1
  108. package/dist/utils/quota_tracker.d.ts.map +1 -1
  109. package/dist/utils/quota_tracker.js +19 -49
  110. package/dist/utils/quota_tracker.js.map +1 -1
  111. package/dist/utils/retry_handler.d.ts.map +1 -1
  112. package/dist/utils/retry_handler.js.map +1 -1
  113. package/dist/utils/running_tool_tracker.d.ts.map +1 -1
  114. package/dist/utils/running_tool_tracker.js.map +1 -1
  115. package/dist/utils/sequential_queue.d.ts.map +1 -1
  116. package/dist/utils/sequential_queue.js.map +1 -1
  117. package/dist/utils/stream_handler.d.ts.map +1 -1
  118. package/dist/utils/stream_handler.js +1 -1
  119. package/dist/utils/stream_handler.js.map +1 -1
  120. package/dist/utils/summary_utils.d.ts.map +1 -1
  121. package/dist/utils/summary_utils.js +3 -8
  122. package/dist/utils/summary_utils.js.map +1 -1
  123. package/dist/utils/test_utils.d.ts.map +1 -1
  124. package/dist/utils/test_utils.js +1 -3
  125. package/dist/utils/test_utils.js.map +1 -1
  126. package/dist/utils/tool_execution_manager.d.ts.map +1 -1
  127. package/dist/utils/tool_execution_manager.js +3 -9
  128. package/dist/utils/tool_execution_manager.js.map +1 -1
  129. package/dist/utils/tool_parameter_utils.d.ts.map +1 -1
  130. package/dist/utils/tool_parameter_utils.js +2 -6
  131. package/dist/utils/tool_parameter_utils.js.map +1 -1
  132. package/dist/utils/tool_result_processor.d.ts.map +1 -1
  133. package/dist/utils/tool_result_processor.js +7 -18
  134. package/dist/utils/tool_result_processor.js.map +1 -1
  135. package/dist/utils/verification.d.ts.map +1 -1
  136. package/dist/utils/verification.js.map +1 -1
  137. package/package.json +4 -2
@@ -1,11 +1,11 @@
1
1
  import { BaseModelProvider } from './base_provider.js';
2
2
  import OpenAI, { toFile } from 'openai';
3
3
  import { costTracker } from '../index.js';
4
- import { log_llm_request, log_llm_response, log_llm_error, } from '../utils/llm_logger.js';
4
+ import { log_llm_request, log_llm_response, log_llm_error } from '../utils/llm_logger.js';
5
5
  import { isPaused } from '../utils/pause_controller.js';
6
- import { appendMessageWithImage, resizeAndSplitForOpenAI, } from '../utils/image_utils.js';
7
- import { bufferDelta, flushBufferedDeltas, } from '../utils/delta_buffer.js';
8
- import { createCitationTracker, formatCitation, generateFootnotes, } from '../utils/citation_tracker.js';
6
+ import { appendMessageWithImage, resizeAndSplitForOpenAI } from '../utils/image_utils.js';
7
+ import { bufferDelta, flushBufferedDeltas } from '../utils/delta_buffer.js';
8
+ import { createCitationTracker, formatCitation, generateFootnotes } from '../utils/citation_tracker.js';
9
9
  const BROWSER_WIDTH = 1024;
10
10
  const BROWSER_HEIGHT = 1536;
11
11
  function processSchemaForOpenAI(schema, originalProperties) {
@@ -47,8 +47,7 @@ function processSchemaForOpenAI(schema, originalProperties) {
47
47
  delete schema[keyword];
48
48
  }
49
49
  });
50
- const isObject = schema.type === 'object' ||
51
- (schema.type === undefined && schema.properties !== undefined);
50
+ const isObject = schema.type === 'object' || (schema.type === undefined && schema.properties !== undefined);
52
51
  for (const key of ['anyOf', 'allOf']) {
53
52
  if (Array.isArray(schema[key])) {
54
53
  schema[key].forEach((variantSchema) => processSchemaRecursively(variantSchema));
@@ -98,8 +97,7 @@ function processSchemaForOpenAI(schema, originalProperties) {
98
97
  delete processedSchema.required;
99
98
  }
100
99
  }
101
- if (processedSchema.properties &&
102
- processedSchema.additionalProperties === undefined) {
100
+ if (processedSchema.properties && processedSchema.additionalProperties === undefined) {
103
101
  processedSchema.additionalProperties = false;
104
102
  }
105
103
  return processedSchema;
@@ -258,9 +256,7 @@ export class OpenAIProvider extends BaseModelProvider {
258
256
  input_tokens: inputTokens,
259
257
  output_tokens: 0,
260
258
  metadata: {
261
- dimensions: response.data[0]?.embedding.length ||
262
- opts?.dimensions ||
263
- 1536,
259
+ dimensions: response.data[0]?.embedding.length || opts?.dimensions || 1536,
264
260
  },
265
261
  });
266
262
  if (Array.isArray(input) && input.length > 1) {
@@ -284,21 +280,17 @@ export class OpenAIProvider extends BaseModelProvider {
284
280
  quality = 'medium';
285
281
  else if (opts?.quality === 'hd')
286
282
  quality = 'high';
287
- else if (opts?.quality === 'low' ||
288
- opts?.quality === 'medium' ||
289
- opts?.quality === 'high') {
283
+ else if (opts?.quality === 'low' || opts?.quality === 'medium' || opts?.quality === 'high') {
290
284
  quality = opts.quality;
291
285
  }
292
286
  let size = 'auto';
293
287
  if (opts?.size === 'square' || opts?.size === '1024x1024') {
294
288
  size = '1024x1024';
295
289
  }
296
- else if (opts?.size === 'landscape' ||
297
- opts?.size === '1536x1024') {
290
+ else if (opts?.size === 'landscape' || opts?.size === '1536x1024') {
298
291
  size = '1536x1024';
299
292
  }
300
- else if (opts?.size === 'portrait' ||
301
- opts?.size === '1024x1536') {
293
+ else if (opts?.size === 'portrait' || opts?.size === '1024x1536') {
302
294
  size = '1024x1536';
303
295
  }
304
296
  const background = 'auto';
@@ -307,17 +299,16 @@ export class OpenAIProvider extends BaseModelProvider {
307
299
  let response;
308
300
  if (source_images) {
309
301
  console.log('[OpenAI] Using images.edit with source_images');
310
- const imageArray = Array.isArray(source_images)
311
- ? source_images
312
- : [source_images];
302
+ const imageArray = Array.isArray(source_images) ? source_images : [source_images];
313
303
  const imageFiles = [];
314
304
  for (const sourceImg of imageArray) {
315
305
  let imageFile;
316
- if (sourceImg.startsWith('http://') ||
317
- sourceImg.startsWith('https://')) {
306
+ if (sourceImg.startsWith('http://') || sourceImg.startsWith('https://')) {
318
307
  const imageResponse = await fetch(sourceImg);
319
308
  const imageBuffer = await imageResponse.arrayBuffer();
320
- imageFile = await toFile(new Uint8Array(imageBuffer), `image_${imageFiles.length}.png`, { type: 'image/png' });
309
+ imageFile = await toFile(new Uint8Array(imageBuffer), `image_${imageFiles.length}.png`, {
310
+ type: 'image/png',
311
+ });
321
312
  }
322
313
  else {
323
314
  let base64Data = sourceImg;
@@ -325,7 +316,9 @@ export class OpenAIProvider extends BaseModelProvider {
325
316
  base64Data = sourceImg.split(',')[1];
326
317
  }
327
318
  const binaryData = Buffer.from(base64Data, 'base64');
328
- imageFile = await toFile(new Uint8Array(binaryData), `image_${imageFiles.length}.png`, { type: 'image/png' });
319
+ imageFile = await toFile(new Uint8Array(binaryData), `image_${imageFiles.length}.png`, {
320
+ type: 'image/png',
321
+ });
329
322
  }
330
323
  imageFiles.push(imageFile);
331
324
  }
@@ -468,45 +461,36 @@ export class OpenAIProvider extends BaseModelProvider {
468
461
  }
469
462
  async *createResponseStream(messages, model, agent) {
470
463
  const { getToolsFromAgent } = await import('../utils/agent.js');
471
- const tools = agent
472
- ? await getToolsFromAgent(agent)
473
- : [];
464
+ const tools = agent ? await getToolsFromAgent(agent) : [];
474
465
  const settings = agent?.modelSettings;
475
466
  let requestId;
476
467
  try {
477
468
  let input = [];
478
469
  for (const messageFull of messages) {
479
470
  let message = { ...messageFull };
480
- const originalModel = message
481
- .model;
471
+ const originalModel = message.model;
482
472
  delete message.timestamp;
483
473
  delete message.model;
484
474
  delete message.pinned;
485
475
  if (message.type === 'thinking') {
486
- if (model.startsWith('o') &&
487
- message.thinking_id &&
488
- model === originalModel) {
476
+ if (model.startsWith('o') && message.thinking_id && model === originalModel) {
489
477
  console.log(`[OpenAI] Processing thinking message with ID: ${message.thinking_id}`, message);
490
478
  const match = message.thinking_id.match(/^(rs_[A-Za-z0-9]+)-(\d)$/);
491
479
  if (match) {
492
480
  const reasoningId = match[1];
493
481
  const summaryIndex = parseInt(match[2], 10);
494
- const summaryText = typeof message.content === 'string'
495
- ? message.content
496
- : JSON.stringify(message.content);
482
+ const summaryText = typeof message.content === 'string' ? message.content : JSON.stringify(message.content);
497
483
  const summaryEntry = {
498
484
  type: 'summary_text',
499
485
  text: summaryText,
500
486
  };
501
- const existingIndex = input.findIndex((item) => item.type === 'reasoning' &&
502
- item.id === reasoningId);
487
+ const existingIndex = input.findIndex((item) => item.type === 'reasoning' && item.id === reasoningId);
503
488
  if (existingIndex !== -1) {
504
489
  const existingItem = input[existingIndex];
505
490
  if (!existingItem.summary) {
506
491
  existingItem.summary = [];
507
492
  }
508
- existingItem.summary[summaryIndex] =
509
- summaryEntry;
493
+ existingItem.summary[summaryIndex] = summaryEntry;
510
494
  input[existingIndex] = existingItem;
511
495
  }
512
496
  else {
@@ -530,9 +514,7 @@ export class OpenAIProvider extends BaseModelProvider {
530
514
  continue;
531
515
  }
532
516
  if (message.type === 'function_call') {
533
- if (message.id &&
534
- (!message.id.startsWith('fc_') ||
535
- model !== originalModel)) {
517
+ if (message.id && (!message.id.startsWith('fc_') || model !== originalModel)) {
536
518
  const { id, ...rest } = message;
537
519
  message = rest;
538
520
  }
@@ -545,12 +527,8 @@ export class OpenAIProvider extends BaseModelProvider {
545
527
  input = await appendMessageWithImage(model, input, messageToAdd, 'output', addImagesToInput, `function call output of ${message.name}`);
546
528
  continue;
547
529
  }
548
- if ((message.type ?? 'message') === 'message' &&
549
- 'content' in message) {
550
- if ('id' in message &&
551
- message.id &&
552
- (!message.id.startsWith('msg_') ||
553
- model !== originalModel)) {
530
+ if ((message.type ?? 'message') === 'message' && 'content' in message) {
531
+ if ('id' in message && message.id && (!message.id.startsWith('msg_') || model !== originalModel)) {
554
532
  const { id, ...rest } = message;
555
533
  message = rest;
556
534
  console.log(`[OpenAI] Removed message ID: ${id} model: ${model} originalModel: ${originalModel}`);
@@ -580,11 +558,7 @@ export class OpenAIProvider extends BaseModelProvider {
580
558
  requestParams.top_p = settings.top_p;
581
559
  }
582
560
  }
583
- const REASONING_EFFORT_CONFIGS = [
584
- 'low',
585
- 'medium',
586
- 'high',
587
- ];
561
+ const REASONING_EFFORT_CONFIGS = ['low', 'medium', 'high'];
588
562
  let hasEffortSuffix = false;
589
563
  for (const effort of REASONING_EFFORT_CONFIGS) {
590
564
  const suffix = `-${effort}`;
@@ -651,22 +625,18 @@ export class OpenAIProvider extends BaseModelProvider {
651
625
  }
652
626
  if (event.type === 'response.in_progress') {
653
627
  }
654
- else if (event.type === 'response.completed' &&
655
- event.response?.usage) {
628
+ else if (event.type === 'response.completed' && event.response?.usage) {
656
629
  costTracker.addUsage({
657
630
  model,
658
631
  input_tokens: event.response.usage.input_tokens || 0,
659
632
  output_tokens: event.response.usage.output_tokens || 0,
660
- cached_tokens: event.response.usage.input_tokens_details
661
- ?.cached_tokens || 0,
633
+ cached_tokens: event.response.usage.input_tokens_details?.cached_tokens || 0,
662
634
  metadata: {
663
- reasoning_tokens: event.response.usage.output_tokens_details
664
- ?.reasoning_tokens || 0,
635
+ reasoning_tokens: event.response.usage.output_tokens_details?.reasoning_tokens || 0,
665
636
  },
666
637
  });
667
638
  }
668
- else if (event.type === 'response.failed' &&
669
- event.response?.error) {
639
+ else if (event.type === 'response.failed' && event.response?.error) {
670
640
  const errorInfo = event.response.error;
671
641
  log_llm_error(requestId, errorInfo);
672
642
  console.error(`Response ${event.response.id} failed: [${errorInfo.code}] ${errorInfo.message}`);
@@ -675,8 +645,7 @@ export class OpenAIProvider extends BaseModelProvider {
675
645
  error: `OpenAI response failed: [${errorInfo.code}] ${errorInfo.message}`,
676
646
  };
677
647
  }
678
- else if (event.type === 'response.incomplete' &&
679
- event.response?.incomplete_details) {
648
+ else if (event.type === 'response.incomplete' && event.response?.incomplete_details) {
680
649
  const reason = event.response.incomplete_details.reason;
681
650
  log_llm_error(requestId, 'OpenAI response incomplete: ' + reason);
682
651
  console.warn(`Response ${event.response.id} incomplete: ${reason}`);
@@ -685,8 +654,7 @@ export class OpenAIProvider extends BaseModelProvider {
685
654
  error: 'OpenAI response incomplete: ' + reason,
686
655
  };
687
656
  }
688
- else if (event.type === 'response.output_item.added' &&
689
- event.item) {
657
+ else if (event.type === 'response.output_item.added' && event.item) {
690
658
  if (event.item.type === 'function_call') {
691
659
  if (!toolCallStates.has(event.item.id)) {
692
660
  toolCallStates.set(event.item.id, {
@@ -704,10 +672,8 @@ export class OpenAIProvider extends BaseModelProvider {
704
672
  }
705
673
  }
706
674
  }
707
- else if (event.type === 'response.output_item.done' &&
708
- event.item) {
709
- if (event.item.type === 'reasoning' &&
710
- !event.item.summary.length) {
675
+ else if (event.type === 'response.output_item.done' && event.item) {
676
+ if (event.item.type === 'reasoning' && !event.item.summary.length) {
711
677
  yield {
712
678
  type: 'message_complete',
713
679
  content: '',
@@ -716,14 +682,11 @@ export class OpenAIProvider extends BaseModelProvider {
716
682
  };
717
683
  }
718
684
  }
719
- else if (event.type === 'response.content_part.added' &&
720
- event.part) {
685
+ else if (event.type === 'response.content_part.added' && event.part) {
721
686
  }
722
- else if (event.type === 'response.content_part.done' &&
723
- event.part) {
687
+ else if (event.type === 'response.content_part.done' && event.part) {
724
688
  }
725
- else if (event.type === 'response.output_text.delta' &&
726
- event.delta) {
689
+ else if (event.type === 'response.output_text.delta' && event.delta) {
727
690
  const itemId = event.item_id;
728
691
  let position = messagePositions.get(itemId) ?? 0;
729
692
  for (const ev of bufferDelta(deltaBuffers, itemId, event.delta, content => ({
@@ -736,15 +699,12 @@ export class OpenAIProvider extends BaseModelProvider {
736
699
  }
737
700
  messagePositions.set(itemId, position);
738
701
  }
739
- else if (event.type ===
740
- 'response.output_text.annotation.added' &&
702
+ else if (event.type === 'response.output_text.annotation.added' &&
741
703
  event.annotation) {
742
704
  const eventData = event;
743
- if (eventData.annotation?.type === 'url_citation' &&
744
- eventData.annotation.url) {
705
+ if (eventData.annotation?.type === 'url_citation' && eventData.annotation.url) {
745
706
  const marker = formatCitation(citationTracker, {
746
- title: eventData.annotation.title ||
747
- eventData.annotation.url,
707
+ title: eventData.annotation.title || eventData.annotation.url,
748
708
  url: eventData.annotation.url,
749
709
  });
750
710
  let position = messagePositions.get(eventData.item_id) ?? 0;
@@ -760,8 +720,7 @@ export class OpenAIProvider extends BaseModelProvider {
760
720
  console.log('Annotation added:', eventData.annotation);
761
721
  }
762
722
  }
763
- else if (event.type === 'response.output_text.done' &&
764
- event.text !== undefined) {
723
+ else if (event.type === 'response.output_text.done' && event.text !== undefined) {
765
724
  const itemId = event.item_id;
766
725
  let finalText = event.text;
767
726
  if (citationTracker.citations.size > 0) {
@@ -775,12 +734,10 @@ export class OpenAIProvider extends BaseModelProvider {
775
734
  };
776
735
  messagePositions.delete(itemId);
777
736
  }
778
- else if (event.type === 'response.refusal.delta' &&
779
- event.delta) {
737
+ else if (event.type === 'response.refusal.delta' && event.delta) {
780
738
  console.log(`Refusal delta for item ${event.item_id}: ${event.delta}`);
781
739
  }
782
- else if (event.type === 'response.refusal.done' &&
783
- event.refusal) {
740
+ else if (event.type === 'response.refusal.done' && event.refusal) {
784
741
  log_llm_error(requestId, 'OpenAI refusal error: ' + event.refusal);
785
742
  console.log(`Refusal done for item ${event.item_id}: ${event.refusal}`);
786
743
  yield {
@@ -788,9 +745,7 @@ export class OpenAIProvider extends BaseModelProvider {
788
745
  error: 'OpenAI refusal error: ' + event.refusal,
789
746
  };
790
747
  }
791
- else if (event.type ===
792
- 'response.function_call_arguments.delta' &&
793
- event.delta) {
748
+ else if (event.type === 'response.function_call_arguments.delta' && event.delta) {
794
749
  const currentCall = toolCallStates.get(event.item_id);
795
750
  if (currentCall) {
796
751
  currentCall.function.arguments += event.delta;
@@ -799,8 +754,7 @@ export class OpenAIProvider extends BaseModelProvider {
799
754
  console.warn(`Received function_call_arguments.delta for unknown item_id: ${event.item_id}`);
800
755
  }
801
756
  }
802
- else if (event.type ===
803
- 'response.function_call_arguments.done' &&
757
+ else if (event.type === 'response.function_call_arguments.done' &&
804
758
  event.arguments !== undefined) {
805
759
  const currentCall = toolCallStates.get(event.item_id);
806
760
  if (currentCall) {
@@ -839,9 +793,7 @@ export class OpenAIProvider extends BaseModelProvider {
839
793
  else if (event.type === 'response.reasoning_summary_part.done') {
840
794
  console.log(`Reasoning summary part done for item ${event.item_id}, index ${event.summary_index}`);
841
795
  }
842
- else if (event.type ===
843
- 'response.reasoning_summary_text.delta' &&
844
- event.delta) {
796
+ else if (event.type === 'response.reasoning_summary_text.delta' && event.delta) {
845
797
  const itemId = event.item_id + '-' + event.summary_index;
846
798
  let position = reasoningPositions.get(itemId) ?? 0;
847
799
  reasoningAggregates.set(itemId, reasoningAggregates.get(itemId) + event.delta);
@@ -854,8 +806,7 @@ export class OpenAIProvider extends BaseModelProvider {
854
806
  };
855
807
  reasoningPositions.set(itemId, position);
856
808
  }
857
- else if (event.type === 'response.reasoning_summary_text.done' &&
858
- event.text !== undefined) {
809
+ else if (event.type === 'response.reasoning_summary_text.done' && event.text !== undefined) {
859
810
  const itemId = event.item_id + '-' + event.summary_index;
860
811
  const aggregatedThinking = event.text;
861
812
  yield {
@@ -920,11 +871,221 @@ export class OpenAIProvider extends BaseModelProvider {
920
871
  console.error('Error in OpenAI streaming response:', error);
921
872
  yield {
922
873
  type: 'error',
923
- error: 'OpenAI streaming error: ' +
924
- (error instanceof Error ? error.stack : String(error)),
874
+ error: 'OpenAI streaming error: ' + (error instanceof Error ? error.stack : String(error)),
925
875
  };
926
876
  }
927
877
  }
878
+ async *createTranscription(audio, agent, model, opts) {
879
+ const transcriptionModels = ['gpt-4o-transcribe', 'gpt-4o-mini-transcribe', 'whisper-1'];
880
+ if (!transcriptionModels.includes(model)) {
881
+ throw new Error(`Model ${model} does not support transcription. Supported models: ${transcriptionModels.join(', ')}`);
882
+ }
883
+ let ws = null;
884
+ let isConnected = false;
885
+ let connectionError = null;
886
+ try {
887
+ const { WebSocket } = await import('ws');
888
+ const apiKey = this.apiKey || process.env.OPENAI_API_KEY;
889
+ if (!apiKey) {
890
+ throw new Error('Failed to initialize OpenAI transcription. Make sure OPENAI_API_KEY is set.');
891
+ }
892
+ const wsUrl = 'wss://api.openai.com/v1/realtime?intent=transcription';
893
+ ws = new WebSocket(wsUrl, {
894
+ headers: {
895
+ Authorization: 'Bearer ' + apiKey,
896
+ 'OpenAI-Beta': 'realtime=v1',
897
+ },
898
+ });
899
+ const transcriptEvents = [];
900
+ const connectionPromise = new Promise((resolve, reject) => {
901
+ const timeout = setTimeout(() => {
902
+ reject(new Error('Connection timeout'));
903
+ }, 10000);
904
+ ws.on('open', () => {
905
+ clearTimeout(timeout);
906
+ console.log('[OpenAI] WebSocket connected for transcription');
907
+ isConnected = true;
908
+ resolve();
909
+ });
910
+ ws.on('error', error => {
911
+ clearTimeout(timeout);
912
+ connectionError = error;
913
+ reject(error);
914
+ });
915
+ });
916
+ ws.on('message', (data) => {
917
+ try {
918
+ const event = JSON.parse(data.toString());
919
+ switch (event.type) {
920
+ case 'transcription_session.created':
921
+ case 'session.created': {
922
+ const sessionUpdate = {
923
+ type: 'transcription_session.update',
924
+ input_audio_format: opts?.audioFormat?.encoding === 'pcm' ? 'pcm16' : 'pcm16',
925
+ input_audio_transcription: {
926
+ model: model,
927
+ prompt: opts?.prompt || '',
928
+ language: opts?.language || '',
929
+ },
930
+ turn_detection: opts?.vad === false
931
+ ? null
932
+ : {
933
+ type: 'server_vad',
934
+ threshold: 0.5,
935
+ prefix_padding_ms: 300,
936
+ silence_duration_ms: 500,
937
+ },
938
+ input_audio_noise_reduction: opts?.noiseReduction === null
939
+ ? null
940
+ : {
941
+ type: opts?.noiseReduction || 'near_field',
942
+ },
943
+ };
944
+ ws.send(JSON.stringify(sessionUpdate));
945
+ break;
946
+ }
947
+ case 'conversation.item.input_audio_transcription.delta': {
948
+ if (model !== 'whisper-1') {
949
+ const deltaEvent = {
950
+ type: 'transcription_delta',
951
+ timestamp: new Date().toISOString(),
952
+ delta: event.delta,
953
+ partial: true,
954
+ };
955
+ transcriptEvents.push(deltaEvent);
956
+ }
957
+ break;
958
+ }
959
+ case 'conversation.item.input_audio_transcription.completed': {
960
+ const completeText = event.transcript;
961
+ if (model === 'whisper-1') {
962
+ const deltaEvent = {
963
+ type: 'transcription_delta',
964
+ timestamp: new Date().toISOString(),
965
+ delta: completeText,
966
+ partial: false,
967
+ };
968
+ transcriptEvents.push(deltaEvent);
969
+ }
970
+ const turnEvent = {
971
+ type: 'transcription_turn',
972
+ timestamp: new Date().toISOString(),
973
+ text: completeText,
974
+ };
975
+ transcriptEvents.push(turnEvent);
976
+ break;
977
+ }
978
+ case 'input_audio_buffer.speech_started': {
979
+ const previewEvent = {
980
+ type: 'transcription_preview',
981
+ timestamp: new Date().toISOString(),
982
+ text: '',
983
+ isFinal: false,
984
+ };
985
+ transcriptEvents.push(previewEvent);
986
+ break;
987
+ }
988
+ case 'input_audio_buffer.speech_stopped': {
989
+ break;
990
+ }
991
+ case 'error': {
992
+ const errorEvent = {
993
+ type: 'error',
994
+ timestamp: new Date().toISOString(),
995
+ error: event.error?.message || 'Unknown error',
996
+ };
997
+ transcriptEvents.push(errorEvent);
998
+ break;
999
+ }
1000
+ }
1001
+ }
1002
+ catch (error) {
1003
+ console.error('[OpenAI] Error processing message:', error);
1004
+ }
1005
+ });
1006
+ ws.on('close', () => {
1007
+ console.log('[OpenAI] WebSocket closed');
1008
+ isConnected = false;
1009
+ });
1010
+ await connectionPromise;
1011
+ const audioStream = normalizeAudioSource(audio);
1012
+ const reader = audioStream.getReader();
1013
+ try {
1014
+ while (true) {
1015
+ const { done, value } = await reader.read();
1016
+ if (done)
1017
+ break;
1018
+ if (value && ws && isConnected) {
1019
+ const audioEvent = {
1020
+ type: 'input_audio_buffer.append',
1021
+ audio: Buffer.from(value).toString('base64'),
1022
+ };
1023
+ ws.send(JSON.stringify(audioEvent));
1024
+ }
1025
+ if (transcriptEvents.length > 0) {
1026
+ const events = transcriptEvents.splice(0, transcriptEvents.length);
1027
+ for (const event of events) {
1028
+ yield event;
1029
+ }
1030
+ }
1031
+ if (connectionError) {
1032
+ throw connectionError;
1033
+ }
1034
+ }
1035
+ if (opts?.vad === false && ws && isConnected) {
1036
+ ws.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));
1037
+ }
1038
+ await new Promise(resolve => setTimeout(resolve, 1000));
1039
+ if (transcriptEvents.length > 0) {
1040
+ const events = transcriptEvents.splice(0, transcriptEvents.length);
1041
+ for (const event of events) {
1042
+ yield event;
1043
+ }
1044
+ }
1045
+ const completeEvent = {
1046
+ type: 'transcription_complete',
1047
+ timestamp: new Date().toISOString(),
1048
+ };
1049
+ yield completeEvent;
1050
+ }
1051
+ finally {
1052
+ reader.releaseLock();
1053
+ if (ws && ws.readyState === ws.OPEN) {
1054
+ ws.close();
1055
+ }
1056
+ }
1057
+ }
1058
+ catch (error) {
1059
+ console.error('[OpenAI] Transcription error:', error);
1060
+ const errorEvent = {
1061
+ type: 'error',
1062
+ timestamp: new Date().toISOString(),
1063
+ error: error instanceof Error ? error.message : 'Transcription failed',
1064
+ };
1065
+ yield errorEvent;
1066
+ }
1067
+ }
1068
+ }
1069
+ function normalizeAudioSource(source) {
1070
+ if (source instanceof ReadableStream) {
1071
+ return source;
1072
+ }
1073
+ if (typeof source === 'object' && source !== null && Symbol.asyncIterator in source) {
1074
+ return new ReadableStream({
1075
+ async start(controller) {
1076
+ try {
1077
+ for await (const chunk of source) {
1078
+ controller.enqueue(chunk);
1079
+ }
1080
+ controller.close();
1081
+ }
1082
+ catch (error) {
1083
+ controller.error(error);
1084
+ }
1085
+ },
1086
+ });
1087
+ }
1088
+ throw new Error('Invalid audio source type');
928
1089
  }
929
1090
  export const openaiProvider = new OpenAIProvider();
930
1091
  //# sourceMappingURL=openai.js.map