@just-every/ensemble 0.2.182 → 0.2.183

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { BaseModelProvider } from './base_provider.js';
4
4
  import { costTracker } from '../utils/cost_tracker.js';
5
5
  import { log_llm_error, log_llm_request, log_llm_response } from '../utils/llm_logger.js';
6
6
  import { isPaused } from '../utils/pause_controller.js';
7
- import { appendMessageWithImage, normalizeImageDataUrl, resizeAndTruncateForGemini } from '../utils/image_utils.js';
7
+ import { appendMessageWithImage, normalizeImageDataUrl, resizeAndTruncateForGemini, resizeDataUrl, } from '../utils/image_utils.js';
8
8
  import { hasEventHandler } from '../utils/event_controller.js';
9
9
  import { truncateLargeValues } from '../utils/truncate_utils.js';
10
10
  function convertParameterToGeminiFormat(param) {
@@ -225,6 +225,56 @@ function formatGroundingChunks(chunks) {
225
225
  .map((c, i) => `${i + 1}. ${c.web.title || 'Untitled'} – ${c.web.uri}`)
226
226
  .join('\n');
227
227
  }
228
+ function normalizeGroundingChunk(chunk) {
229
+ if (!chunk || typeof chunk !== 'object')
230
+ return null;
231
+ const webUri = chunk?.web?.uri;
232
+ const webTitle = chunk?.web?.title;
233
+ const imageUri = chunk?.image?.imageUri || chunk?.image?.image_uri || chunk?.image_uri;
234
+ const imageLandingUri = chunk?.image?.uri || chunk?.uri;
235
+ const uri = webUri || imageLandingUri;
236
+ if (!uri && !imageUri)
237
+ return null;
238
+ return {
239
+ ...(uri ? { uri } : {}),
240
+ ...(imageUri ? { image_uri: imageUri } : {}),
241
+ ...(webTitle ? { title: webTitle } : {}),
242
+ };
243
+ }
244
+ function dedupeGroundingChunks(chunks) {
245
+ const seen = new Set();
246
+ const out = [];
247
+ for (const chunk of chunks) {
248
+ const key = `${chunk.uri || ''}|${chunk.image_uri || ''}|${chunk.title || ''}`;
249
+ if (seen.has(key))
250
+ continue;
251
+ seen.add(key);
252
+ out.push(chunk);
253
+ }
254
+ return out;
255
+ }
256
+ function mergeImageMetadata(target, source) {
257
+ const next = {
258
+ ...target,
259
+ model: source.model || target.model,
260
+ };
261
+ if (source.grounding) {
262
+ const t = target.grounding || {};
263
+ const s = source.grounding;
264
+ next.grounding = {
265
+ ...t,
266
+ ...s,
267
+ imageSearchQueries: Array.from(new Set([...(t.imageSearchQueries || []), ...(s.imageSearchQueries || [])])),
268
+ webSearchQueries: Array.from(new Set([...(t.webSearchQueries || []), ...(s.webSearchQueries || [])])),
269
+ groundingChunks: dedupeGroundingChunks([...(t.groundingChunks || []), ...(s.groundingChunks || [])]),
270
+ groundingSupports: [...(t.groundingSupports || []), ...(s.groundingSupports || [])],
271
+ };
272
+ }
273
+ next.thought_signatures = Array.from(new Set([...(target.thought_signatures || []), ...(source.thought_signatures || [])]));
274
+ next.thoughts = [...(target.thoughts || []), ...(source.thoughts || [])];
275
+ next.citations = dedupeGroundingChunks([...(target.citations || []), ...(source.citations || [])]);
276
+ return next;
277
+ }
228
278
  async function addImagesToInput(input, images, source) {
229
279
  for (const [image_id, imageData] of Object.entries(images)) {
230
280
  const processedImageData = await resizeAndTruncateForGemini(imageData);
@@ -396,6 +446,54 @@ const THINKING_BUDGET_CONFIGS = {
396
446
  '-high': 12288,
397
447
  '-max': 24576,
398
448
  };
449
+ const GEMINI_31_FLASH_IMAGE_05K_DIMENSIONS = {
450
+ '1:1': { width: 512, height: 512 },
451
+ '1:4': { width: 256, height: 1024 },
452
+ '1:8': { width: 192, height: 1536 },
453
+ '2:3': { width: 424, height: 632 },
454
+ '3:2': { width: 632, height: 424 },
455
+ '3:4': { width: 448, height: 600 },
456
+ '4:1': { width: 1024, height: 256 },
457
+ '4:3': { width: 600, height: 448 },
458
+ '4:5': { width: 464, height: 576 },
459
+ '5:4': { width: 576, height: 464 },
460
+ '8:1': { width: 1536, height: 192 },
461
+ '9:16': { width: 384, height: 688 },
462
+ '16:9': { width: 688, height: 384 },
463
+ '21:9': { width: 792, height: 168 },
464
+ };
465
+ const GEMINI_3_PRO_IMAGE_DIMENSION_PRESETS = {
466
+ '1024x1024': { ar: '1:1', imageSize: '1K' },
467
+ '848x1264': { ar: '2:3', imageSize: '1K' },
468
+ '1264x848': { ar: '3:2', imageSize: '1K' },
469
+ '896x1200': { ar: '3:4', imageSize: '1K' },
470
+ '1200x896': { ar: '4:3', imageSize: '1K' },
471
+ '928x1152': { ar: '4:5', imageSize: '1K' },
472
+ '1152x928': { ar: '5:4', imageSize: '1K' },
473
+ '768x1376': { ar: '9:16', imageSize: '1K' },
474
+ '1376x768': { ar: '16:9', imageSize: '1K' },
475
+ '1584x672': { ar: '21:9', imageSize: '1K' },
476
+ '2048x2048': { ar: '1:1', imageSize: '2K' },
477
+ '1696x2528': { ar: '2:3', imageSize: '2K' },
478
+ '2528x1696': { ar: '3:2', imageSize: '2K' },
479
+ '1792x2400': { ar: '3:4', imageSize: '2K' },
480
+ '2400x1792': { ar: '4:3', imageSize: '2K' },
481
+ '1856x2304': { ar: '4:5', imageSize: '2K' },
482
+ '2304x1856': { ar: '5:4', imageSize: '2K' },
483
+ '1536x2752': { ar: '9:16', imageSize: '2K' },
484
+ '2752x1536': { ar: '16:9', imageSize: '2K' },
485
+ '3168x1344': { ar: '21:9', imageSize: '2K' },
486
+ '4096x4096': { ar: '1:1', imageSize: '4K' },
487
+ '3392x5056': { ar: '2:3', imageSize: '4K' },
488
+ '5056x3392': { ar: '3:2', imageSize: '4K' },
489
+ '3584x4800': { ar: '3:4', imageSize: '4K' },
490
+ '4800x3584': { ar: '4:3', imageSize: '4K' },
491
+ '3712x4608': { ar: '4:5', imageSize: '4K' },
492
+ '4608x3712': { ar: '5:4', imageSize: '4K' },
493
+ '3072x5504': { ar: '9:16', imageSize: '4K' },
494
+ '5504x3072': { ar: '16:9', imageSize: '4K' },
495
+ '6336x2688': { ar: '21:9', imageSize: '4K' },
496
+ };
399
497
  export class GeminiProvider extends BaseModelProvider {
400
498
  _client;
401
499
  apiKey;
@@ -893,14 +991,45 @@ export class GeminiProvider extends BaseModelProvider {
893
991
  if (hasOtherTools) {
894
992
  console.warn('[Gemini] Image generation ignores function tools; only google_web_search is supported.');
895
993
  }
994
+ const explicitWebGrounding = opts?.grounding?.web_search;
995
+ const explicitImageGrounding = opts?.grounding?.image_search;
996
+ const enableWebGrounding = explicitWebGrounding ?? hasGoogleWebSearch ?? false;
997
+ const isGemini31FlashImageModel = model.includes('gemini-3.1-flash-image-preview');
998
+ const enableImageGrounding = explicitImageGrounding === true && isGemini31FlashImageModel;
999
+ if (explicitImageGrounding && !isGemini31FlashImageModel) {
1000
+ console.warn('[Gemini] Image Search grounding is only available for gemini-3.1-flash-image-preview. Ignoring image_search=true.');
1001
+ }
1002
+ const includeThoughts = opts?.thinking?.include_thoughts === true;
1003
+ const requestedThinkingLevel = opts?.thinking?.level;
1004
+ const thinkingLevel = requestedThinkingLevel === 'high' ? 'High' : requestedThinkingLevel ? 'Minimal' : undefined;
1005
+ if (requestedThinkingLevel && !isGemini31FlashImageModel) {
1006
+ console.warn('[Gemini] thinking.level is currently supported for gemini-3.1-flash-image-preview only. Ignoring thinking level.');
1007
+ }
896
1008
  let aspectRatio = '1:1';
897
1009
  if (opts?.size === 'landscape')
898
1010
  aspectRatio = '16:9';
899
1011
  else if (opts?.size === 'portrait')
900
1012
  aspectRatio = '9:16';
901
1013
  console.log(`[Gemini] Generating ${numberOfImages} image(s) with model ${model}, prompt: "${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}"`);
902
- if (model.includes('gemini-2.5-flash-image-preview') || model.includes('gemini-3-pro-image-preview')) {
1014
+ if (model.includes('gemini-2.5-flash-image-preview') ||
1015
+ model.includes('gemini-3.1-flash-image-preview') ||
1016
+ model.includes('gemini-3-pro-image-preview')) {
1017
+ let aggregateMetadata = { model };
903
1018
  const sizeMap = {
1019
+ '1:1': { ar: '1:1' },
1020
+ '1:4': { ar: '1:4' },
1021
+ '1:8': { ar: '1:8' },
1022
+ '2:3': { ar: '2:3' },
1023
+ '3:2': { ar: '3:2' },
1024
+ '3:4': { ar: '3:4' },
1025
+ '4:1': { ar: '4:1' },
1026
+ '4:3': { ar: '4:3' },
1027
+ '4:5': { ar: '4:5' },
1028
+ '5:4': { ar: '5:4' },
1029
+ '8:1': { ar: '8:1' },
1030
+ '9:16': { ar: '9:16' },
1031
+ '16:9': { ar: '16:9' },
1032
+ '21:9': { ar: '21:9' },
904
1033
  square: { ar: '1:1' },
905
1034
  landscape: { ar: '16:9' },
906
1035
  portrait: { ar: '9:16' },
@@ -915,20 +1044,85 @@ export class GeminiProvider extends BaseModelProvider {
915
1044
  '1024x1792': { ar: '9:16' },
916
1045
  };
917
1046
  const sm = opts?.size ? sizeMap[String(opts.size)] : undefined;
1047
+ const gemini3ProDimensionPreset = model.includes('gemini-3-pro-image-preview')
1048
+ ? GEMINI_3_PRO_IMAGE_DIMENSION_PRESETS[String(opts?.size)]
1049
+ : undefined;
918
1050
  const imageConfig = {};
919
1051
  if (sm?.ar)
920
1052
  imageConfig.aspectRatio = sm.ar;
1053
+ if (gemini3ProDimensionPreset?.ar)
1054
+ imageConfig.aspectRatio = gemini3ProDimensionPreset.ar;
921
1055
  const qualityKey = typeof opts?.quality === 'string' ? opts.quality.toLowerCase() : '';
922
- const imageSizeMap = {
923
- low: '1K',
924
- standard: '2K',
925
- medium: '2K',
926
- hd: '4K',
927
- high: '4K',
928
- };
929
- const imageSize = imageSizeMap[qualityKey];
930
- if (imageSize)
931
- imageConfig.imageSize = imageSize;
1056
+ const imageSizeMap = isGemini31FlashImageModel
1057
+ ? {
1058
+ low: '0.5K',
1059
+ standard: '1K',
1060
+ medium: '2K',
1061
+ hd: '4K',
1062
+ high: '4K',
1063
+ }
1064
+ : {
1065
+ low: '1K',
1066
+ standard: '2K',
1067
+ medium: '2K',
1068
+ hd: '4K',
1069
+ high: '4K',
1070
+ };
1071
+ let imageSize = imageSizeMap[qualityKey];
1072
+ if (gemini3ProDimensionPreset?.imageSize) {
1073
+ imageSize = gemini3ProDimensionPreset.imageSize;
1074
+ }
1075
+ if (isGemini31FlashImageModel && opts?.size === '512x512') {
1076
+ imageSize = '0.5K';
1077
+ }
1078
+ const requestImageSize = imageSize === '0.5K' ? undefined : imageSize;
1079
+ if (requestImageSize)
1080
+ imageConfig.imageSize = requestImageSize;
1081
+ const thinkingConfig = {};
1082
+ if (opts?.thinking && 'include_thoughts' in opts.thinking) {
1083
+ thinkingConfig.includeThoughts = includeThoughts;
1084
+ }
1085
+ if (thinkingLevel && isGemini31FlashImageModel) {
1086
+ thinkingConfig.thinkingLevel = thinkingLevel;
1087
+ }
1088
+ const searchTypes = {};
1089
+ if (enableWebGrounding)
1090
+ searchTypes.webSearch = {};
1091
+ if (enableImageGrounding)
1092
+ searchTypes.imageSearch = {};
1093
+ const googleSearchTool = Object.keys(searchTypes).length > 0
1094
+ ? {
1095
+ googleSearch: {
1096
+ searchTypes,
1097
+ },
1098
+ }
1099
+ : undefined;
1100
+ const halfKTargetDimensions = (() => {
1101
+ if (!isGemini31FlashImageModel || imageSize !== '0.5K')
1102
+ return undefined;
1103
+ const ar = imageConfig.aspectRatio || '1:1';
1104
+ const exactDimensions = GEMINI_31_FLASH_IMAGE_05K_DIMENSIONS[ar];
1105
+ if (exactDimensions)
1106
+ return exactDimensions;
1107
+ const match = /^(\d+):(\d+)$/.exec(ar);
1108
+ if (!match)
1109
+ return { width: 512, height: 512 };
1110
+ const wRatio = Number(match[1]);
1111
+ const hRatio = Number(match[2]);
1112
+ if (!Number.isFinite(wRatio) || !Number.isFinite(hRatio) || wRatio <= 0 || hRatio <= 0) {
1113
+ return { width: 512, height: 512 };
1114
+ }
1115
+ if (wRatio >= hRatio) {
1116
+ return {
1117
+ width: Math.max(1, Math.round((wRatio / hRatio) * 512)),
1118
+ height: 512,
1119
+ };
1120
+ }
1121
+ return {
1122
+ width: 512,
1123
+ height: Math.max(1, Math.round((hRatio / wRatio) * 512)),
1124
+ };
1125
+ })();
932
1126
  const perImageCost = this.getImageCost(model, imageSize);
933
1127
  const makeOne = async () => {
934
1128
  const requestParams = {
@@ -989,13 +1183,15 @@ export class GeminiProvider extends BaseModelProvider {
989
1183
  config: {
990
1184
  responseModalities: [Modality.IMAGE, Modality.TEXT],
991
1185
  ...(Object.keys(imageConfig).length ? { imageConfig } : {}),
992
- ...(hasGoogleWebSearch ? { tools: [{ googleSearch: {} }] } : {}),
1186
+ ...(googleSearchTool ? { tools: [googleSearchTool] } : {}),
1187
+ ...(Object.keys(thinkingConfig).length ? { thinkingConfig: thinkingConfig } : {}),
993
1188
  },
994
1189
  };
995
1190
  const loggedRequestId = log_llm_request(agent.agent_id || 'default', 'gemini', model, requestParams, new Date(), requestId, agent.tags);
996
1191
  finalRequestId = loggedRequestId;
997
1192
  const response = await this.client.models.generateContentStream(requestParams);
998
1193
  const images = [];
1194
+ let metadata = { model };
999
1195
  let usageMetadata;
1000
1196
  for await (const chunk of response) {
1001
1197
  if (chunk.usageMetadata) {
@@ -1004,11 +1200,79 @@ export class GeminiProvider extends BaseModelProvider {
1004
1200
  if (!chunk.candidates)
1005
1201
  continue;
1006
1202
  for (const cand of chunk.candidates) {
1203
+ const groundingMetadata = cand.groundingMetadata;
1204
+ if (groundingMetadata) {
1205
+ const chunks = Array.isArray(groundingMetadata.groundingChunks)
1206
+ ? groundingMetadata.groundingChunks
1207
+ .map((c) => normalizeGroundingChunk(c))
1208
+ .filter((c) => !!c)
1209
+ : [];
1210
+ const searchEntryPoint = groundingMetadata.searchEntryPoint;
1211
+ const imageSearchQueries = Array.isArray(groundingMetadata.imageSearchQueries)
1212
+ ? groundingMetadata.imageSearchQueries
1213
+ .map((q) => (typeof q === 'string' ? q : q?.query || q?.text))
1214
+ .filter((q) => typeof q === 'string' && q.length > 0)
1215
+ : [];
1216
+ const webSearchQueries = Array.isArray(groundingMetadata.webSearchQueries)
1217
+ ? groundingMetadata.webSearchQueries
1218
+ .map((q) => (typeof q === 'string' ? q : q?.query || q?.text))
1219
+ .filter((q) => typeof q === 'string' && q.length > 0)
1220
+ : [];
1221
+ metadata = mergeImageMetadata(metadata, {
1222
+ model,
1223
+ grounding: {
1224
+ ...(imageSearchQueries.length ? { imageSearchQueries } : {}),
1225
+ ...(webSearchQueries.length ? { webSearchQueries } : {}),
1226
+ ...(chunks.length ? { groundingChunks: chunks } : {}),
1227
+ ...(Array.isArray(groundingMetadata.groundingSupports)
1228
+ ? { groundingSupports: groundingMetadata.groundingSupports }
1229
+ : {}),
1230
+ ...(searchEntryPoint ? { searchEntryPoint } : {}),
1231
+ },
1232
+ citations: chunks.filter(c => !!c.uri),
1233
+ });
1234
+ }
1007
1235
  const parts = cand.content?.parts || [];
1008
1236
  for (const part of parts) {
1237
+ const thoughtSignature = part.thoughtSignature || part.thought_signature;
1238
+ if (thoughtSignature) {
1239
+ metadata = mergeImageMetadata(metadata, {
1240
+ model,
1241
+ thought_signatures: [thoughtSignature],
1242
+ });
1243
+ }
1244
+ if (part.thought) {
1245
+ if (includeThoughts) {
1246
+ const thoughtPart = {
1247
+ thought: true,
1248
+ type: part.inlineData?.data ? 'image' : 'text',
1249
+ ...(part.text ? { text: part.text } : {}),
1250
+ ...(part.inlineData?.mimeType ? { mime_type: part.inlineData.mimeType } : {}),
1251
+ ...(part.inlineData?.data ? { data: part.inlineData.data } : {}),
1252
+ ...(thoughtSignature ? { thought_signature: thoughtSignature } : {}),
1253
+ };
1254
+ metadata = mergeImageMetadata(metadata, {
1255
+ model,
1256
+ thoughts: [thoughtPart],
1257
+ });
1258
+ }
1259
+ continue;
1260
+ }
1009
1261
  if (part.inlineData?.data) {
1010
1262
  const mime = part.inlineData.mimeType || 'image/png';
1011
- images.push(`data:${mime};base64,${part.inlineData.data}`);
1263
+ let imageData = `data:${mime};base64,${part.inlineData.data}`;
1264
+ if (halfKTargetDimensions) {
1265
+ try {
1266
+ imageData = await resizeDataUrl(imageData, halfKTargetDimensions.width, halfKTargetDimensions.height, {
1267
+ fit: 'cover',
1268
+ });
1269
+ }
1270
+ catch (resizeError) {
1271
+ console.warn('[Gemini] Failed to resize image to 0.5K, returning original image.');
1272
+ console.warn(truncateLargeValues(resizeError));
1273
+ }
1274
+ }
1275
+ images.push(imageData);
1012
1276
  }
1013
1277
  }
1014
1278
  }
@@ -1057,12 +1321,13 @@ export class GeminiProvider extends BaseModelProvider {
1057
1321
  });
1058
1322
  }
1059
1323
  }
1060
- return images;
1324
+ return { images, metadata };
1061
1325
  };
1062
1326
  const allImages = [];
1063
1327
  const calls = Math.max(1, numberOfImages);
1064
1328
  for (let i = 0; i < calls; i++) {
1065
- const imgs = await makeOne();
1329
+ const { images: imgs, metadata } = await makeOne();
1330
+ aggregateMetadata = mergeImageMetadata(aggregateMetadata, metadata);
1066
1331
  for (const img of imgs) {
1067
1332
  if (allImages.length < numberOfImages)
1068
1333
  allImages.push(img);
@@ -1070,6 +1335,12 @@ export class GeminiProvider extends BaseModelProvider {
1070
1335
  if (allImages.length >= numberOfImages)
1071
1336
  break;
1072
1337
  }
1338
+ if (aggregateMetadata.grounding?.groundingChunks) {
1339
+ aggregateMetadata.citations = dedupeGroundingChunks(aggregateMetadata.grounding.groundingChunks.filter(c => !!c.uri));
1340
+ }
1341
+ if (opts?.on_metadata) {
1342
+ opts.on_metadata(aggregateMetadata);
1343
+ }
1073
1344
  if (allImages.length === 0) {
1074
1345
  throw new Error(`No images returned from ${model} model`);
1075
1346
  }
@@ -1131,7 +1402,16 @@ export class GeminiProvider extends BaseModelProvider {
1131
1402
  }
1132
1403
  }
1133
1404
  getImageCost(model, imageSize) {
1134
- if (model.includes('gemini-2.5-flash-image-preview')) {
1405
+ if (model.includes('gemini-3.1-flash-image-preview')) {
1406
+ if (imageSize === '4K')
1407
+ return 0.151;
1408
+ if (imageSize === '2K')
1409
+ return 0.101;
1410
+ if (imageSize === '0.5K')
1411
+ return 0.045;
1412
+ return 0.067;
1413
+ }
1414
+ else if (model.includes('gemini-2.5-flash-image-preview')) {
1135
1415
  return 0.039;
1136
1416
  }
1137
1417
  else if (model.includes('gemini-3-pro-image-preview')) {