@just-every/ensemble 0.2.181 → 0.2.183

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { BaseModelProvider } from './base_provider.js';
4
4
  import { costTracker } from '../utils/cost_tracker.js';
5
5
  import { log_llm_error, log_llm_request, log_llm_response } from '../utils/llm_logger.js';
6
6
  import { isPaused } from '../utils/pause_controller.js';
7
- import { appendMessageWithImage, normalizeImageDataUrl, resizeAndTruncateForGemini } from '../utils/image_utils.js';
7
+ import { appendMessageWithImage, normalizeImageDataUrl, resizeAndTruncateForGemini, resizeDataUrl, } from '../utils/image_utils.js';
8
8
  import { hasEventHandler } from '../utils/event_controller.js';
9
9
  import { truncateLargeValues } from '../utils/truncate_utils.js';
10
10
  function convertParameterToGeminiFormat(param) {
@@ -225,6 +225,56 @@ function formatGroundingChunks(chunks) {
225
225
  .map((c, i) => `${i + 1}. ${c.web.title || 'Untitled'} – ${c.web.uri}`)
226
226
  .join('\n');
227
227
  }
228
+ function normalizeGroundingChunk(chunk) {
229
+ if (!chunk || typeof chunk !== 'object')
230
+ return null;
231
+ const webUri = chunk?.web?.uri;
232
+ const webTitle = chunk?.web?.title;
233
+ const imageUri = chunk?.image?.imageUri || chunk?.image?.image_uri || chunk?.image_uri;
234
+ const imageLandingUri = chunk?.image?.uri || chunk?.uri;
235
+ const uri = webUri || imageLandingUri;
236
+ if (!uri && !imageUri)
237
+ return null;
238
+ return {
239
+ ...(uri ? { uri } : {}),
240
+ ...(imageUri ? { image_uri: imageUri } : {}),
241
+ ...(webTitle ? { title: webTitle } : {}),
242
+ };
243
+ }
244
+ function dedupeGroundingChunks(chunks) {
245
+ const seen = new Set();
246
+ const out = [];
247
+ for (const chunk of chunks) {
248
+ const key = `${chunk.uri || ''}|${chunk.image_uri || ''}|${chunk.title || ''}`;
249
+ if (seen.has(key))
250
+ continue;
251
+ seen.add(key);
252
+ out.push(chunk);
253
+ }
254
+ return out;
255
+ }
256
+ function mergeImageMetadata(target, source) {
257
+ const next = {
258
+ ...target,
259
+ model: source.model || target.model,
260
+ };
261
+ if (source.grounding) {
262
+ const t = target.grounding || {};
263
+ const s = source.grounding;
264
+ next.grounding = {
265
+ ...t,
266
+ ...s,
267
+ imageSearchQueries: Array.from(new Set([...(t.imageSearchQueries || []), ...(s.imageSearchQueries || [])])),
268
+ webSearchQueries: Array.from(new Set([...(t.webSearchQueries || []), ...(s.webSearchQueries || [])])),
269
+ groundingChunks: dedupeGroundingChunks([...(t.groundingChunks || []), ...(s.groundingChunks || [])]),
270
+ groundingSupports: [...(t.groundingSupports || []), ...(s.groundingSupports || [])],
271
+ };
272
+ }
273
+ next.thought_signatures = Array.from(new Set([...(target.thought_signatures || []), ...(source.thought_signatures || [])]));
274
+ next.thoughts = [...(target.thoughts || []), ...(source.thoughts || [])];
275
+ next.citations = dedupeGroundingChunks([...(target.citations || []), ...(source.citations || [])]);
276
+ return next;
277
+ }
228
278
  async function addImagesToInput(input, images, source) {
229
279
  for (const [image_id, imageData] of Object.entries(images)) {
230
280
  const processedImageData = await resizeAndTruncateForGemini(imageData);
@@ -396,6 +446,54 @@ const THINKING_BUDGET_CONFIGS = {
396
446
  '-high': 12288,
397
447
  '-max': 24576,
398
448
  };
449
+ const GEMINI_31_FLASH_IMAGE_05K_DIMENSIONS = {
450
+ '1:1': { width: 512, height: 512 },
451
+ '1:4': { width: 256, height: 1024 },
452
+ '1:8': { width: 192, height: 1536 },
453
+ '2:3': { width: 424, height: 632 },
454
+ '3:2': { width: 632, height: 424 },
455
+ '3:4': { width: 448, height: 600 },
456
+ '4:1': { width: 1024, height: 256 },
457
+ '4:3': { width: 600, height: 448 },
458
+ '4:5': { width: 464, height: 576 },
459
+ '5:4': { width: 576, height: 464 },
460
+ '8:1': { width: 1536, height: 192 },
461
+ '9:16': { width: 384, height: 688 },
462
+ '16:9': { width: 688, height: 384 },
463
+ '21:9': { width: 792, height: 168 },
464
+ };
465
+ const GEMINI_3_PRO_IMAGE_DIMENSION_PRESETS = {
466
+ '1024x1024': { ar: '1:1', imageSize: '1K' },
467
+ '848x1264': { ar: '2:3', imageSize: '1K' },
468
+ '1264x848': { ar: '3:2', imageSize: '1K' },
469
+ '896x1200': { ar: '3:4', imageSize: '1K' },
470
+ '1200x896': { ar: '4:3', imageSize: '1K' },
471
+ '928x1152': { ar: '4:5', imageSize: '1K' },
472
+ '1152x928': { ar: '5:4', imageSize: '1K' },
473
+ '768x1376': { ar: '9:16', imageSize: '1K' },
474
+ '1376x768': { ar: '16:9', imageSize: '1K' },
475
+ '1584x672': { ar: '21:9', imageSize: '1K' },
476
+ '2048x2048': { ar: '1:1', imageSize: '2K' },
477
+ '1696x2528': { ar: '2:3', imageSize: '2K' },
478
+ '2528x1696': { ar: '3:2', imageSize: '2K' },
479
+ '1792x2400': { ar: '3:4', imageSize: '2K' },
480
+ '2400x1792': { ar: '4:3', imageSize: '2K' },
481
+ '1856x2304': { ar: '4:5', imageSize: '2K' },
482
+ '2304x1856': { ar: '5:4', imageSize: '2K' },
483
+ '1536x2752': { ar: '9:16', imageSize: '2K' },
484
+ '2752x1536': { ar: '16:9', imageSize: '2K' },
485
+ '3168x1344': { ar: '21:9', imageSize: '2K' },
486
+ '4096x4096': { ar: '1:1', imageSize: '4K' },
487
+ '3392x5056': { ar: '2:3', imageSize: '4K' },
488
+ '5056x3392': { ar: '3:2', imageSize: '4K' },
489
+ '3584x4800': { ar: '3:4', imageSize: '4K' },
490
+ '4800x3584': { ar: '4:3', imageSize: '4K' },
491
+ '3712x4608': { ar: '4:5', imageSize: '4K' },
492
+ '4608x3712': { ar: '5:4', imageSize: '4K' },
493
+ '3072x5504': { ar: '9:16', imageSize: '4K' },
494
+ '5504x3072': { ar: '16:9', imageSize: '4K' },
495
+ '6336x2688': { ar: '21:9', imageSize: '4K' },
496
+ };
399
497
  export class GeminiProvider extends BaseModelProvider {
400
498
  _client;
401
499
  apiKey;
@@ -893,55 +991,139 @@ export class GeminiProvider extends BaseModelProvider {
893
991
  if (hasOtherTools) {
894
992
  console.warn('[Gemini] Image generation ignores function tools; only google_web_search is supported.');
895
993
  }
994
+ const explicitWebGrounding = opts?.grounding?.web_search;
995
+ const explicitImageGrounding = opts?.grounding?.image_search;
996
+ const enableWebGrounding = explicitWebGrounding ?? hasGoogleWebSearch ?? false;
997
+ const isGemini31FlashImageModel = model.includes('gemini-3.1-flash-image-preview');
998
+ const enableImageGrounding = explicitImageGrounding === true && isGemini31FlashImageModel;
999
+ if (explicitImageGrounding && !isGemini31FlashImageModel) {
1000
+ console.warn('[Gemini] Image Search grounding is only available for gemini-3.1-flash-image-preview. Ignoring image_search=true.');
1001
+ }
1002
+ const includeThoughts = opts?.thinking?.include_thoughts === true;
1003
+ const requestedThinkingLevel = opts?.thinking?.level;
1004
+ const thinkingLevel = requestedThinkingLevel === 'high' ? 'High' : requestedThinkingLevel ? 'Minimal' : undefined;
1005
+ if (requestedThinkingLevel && !isGemini31FlashImageModel) {
1006
+ console.warn('[Gemini] thinking.level is currently supported for gemini-3.1-flash-image-preview only. Ignoring thinking level.');
1007
+ }
896
1008
  let aspectRatio = '1:1';
897
1009
  if (opts?.size === 'landscape')
898
1010
  aspectRatio = '16:9';
899
1011
  else if (opts?.size === 'portrait')
900
1012
  aspectRatio = '9:16';
901
1013
  console.log(`[Gemini] Generating ${numberOfImages} image(s) with model ${model}, prompt: "${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}"`);
902
- if (model.includes('gemini-2.5-flash-image-preview') || model.includes('gemini-3-pro-image-preview')) {
903
- const constraints = [];
1014
+ if (model.includes('gemini-2.5-flash-image-preview') ||
1015
+ model.includes('gemini-3.1-flash-image-preview') ||
1016
+ model.includes('gemini-3-pro-image-preview')) {
1017
+ let aggregateMetadata = { model };
904
1018
  const sizeMap = {
905
- square: { ar: '1:1', px: '1024x1024' },
1019
+ '1:1': { ar: '1:1' },
1020
+ '1:4': { ar: '1:4' },
1021
+ '1:8': { ar: '1:8' },
1022
+ '2:3': { ar: '2:3' },
1023
+ '3:2': { ar: '3:2' },
1024
+ '3:4': { ar: '3:4' },
1025
+ '4:1': { ar: '4:1' },
1026
+ '4:3': { ar: '4:3' },
1027
+ '4:5': { ar: '4:5' },
1028
+ '5:4': { ar: '5:4' },
1029
+ '8:1': { ar: '8:1' },
1030
+ '9:16': { ar: '9:16' },
1031
+ '16:9': { ar: '16:9' },
1032
+ '21:9': { ar: '21:9' },
1033
+ square: { ar: '1:1' },
906
1034
  landscape: { ar: '16:9' },
907
1035
  portrait: { ar: '9:16' },
908
- '256x256': { ar: '1:1', px: '256x256' },
909
- '512x512': { ar: '1:1', px: '512x512' },
910
- '1024x1024': { ar: '1:1', px: '1024x1024' },
911
- '1536x1024': { ar: '3:2', px: '1536x1024' },
912
- '1024x1536': { ar: '2:3', px: '1024x1536' },
913
- '1696x2528': { ar: '2:3', px: '1696x2528' },
914
- '2048x2048': { ar: '1:1', px: '2048x2048' },
915
- '1792x1024': { ar: '16:9', px: '1792x1024' },
916
- '1024x1792': { ar: '9:16', px: '1024x1792' },
1036
+ '256x256': { ar: '1:1' },
1037
+ '512x512': { ar: '1:1' },
1038
+ '1024x1024': { ar: '1:1' },
1039
+ '1536x1024': { ar: '3:2' },
1040
+ '1024x1536': { ar: '2:3' },
1041
+ '1696x2528': { ar: '2:3' },
1042
+ '2048x2048': { ar: '1:1' },
1043
+ '1792x1024': { ar: '16:9' },
1044
+ '1024x1792': { ar: '9:16' },
917
1045
  };
918
1046
  const sm = opts?.size ? sizeMap[String(opts.size)] : undefined;
919
- if (sm?.ar)
920
- constraints.push(`Aspect ratio: ${sm.ar}.`);
921
- if (sm?.px)
922
- constraints.push(`Target size: ${sm.px} pixels (approximate).`);
923
- if (opts?.style)
924
- constraints.push(`Style: ${opts.style}.`);
925
- if (opts?.background)
926
- constraints.push(`Background: ${opts.background} (use transparency if supported).`);
1047
+ const gemini3ProDimensionPreset = model.includes('gemini-3-pro-image-preview')
1048
+ ? GEMINI_3_PRO_IMAGE_DIMENSION_PRESETS[String(opts?.size)]
1049
+ : undefined;
927
1050
  const imageConfig = {};
928
1051
  if (sm?.ar)
929
1052
  imageConfig.aspectRatio = sm.ar;
1053
+ if (gemini3ProDimensionPreset?.ar)
1054
+ imageConfig.aspectRatio = gemini3ProDimensionPreset.ar;
930
1055
  const qualityKey = typeof opts?.quality === 'string' ? opts.quality.toLowerCase() : '';
931
- const imageSizeMap = {
932
- low: '1K',
933
- standard: '2K',
934
- medium: '2K',
935
- hd: '4K',
936
- high: '4K',
937
- };
938
- const imageSize = imageSizeMap[qualityKey];
939
- if (imageSize)
940
- imageConfig.imageSize = imageSize;
1056
+ const imageSizeMap = isGemini31FlashImageModel
1057
+ ? {
1058
+ low: '0.5K',
1059
+ standard: '1K',
1060
+ medium: '2K',
1061
+ hd: '4K',
1062
+ high: '4K',
1063
+ }
1064
+ : {
1065
+ low: '1K',
1066
+ standard: '2K',
1067
+ medium: '2K',
1068
+ hd: '4K',
1069
+ high: '4K',
1070
+ };
1071
+ let imageSize = imageSizeMap[qualityKey];
1072
+ if (gemini3ProDimensionPreset?.imageSize) {
1073
+ imageSize = gemini3ProDimensionPreset.imageSize;
1074
+ }
1075
+ if (isGemini31FlashImageModel && opts?.size === '512x512') {
1076
+ imageSize = '0.5K';
1077
+ }
1078
+ const requestImageSize = imageSize === '0.5K' ? undefined : imageSize;
1079
+ if (requestImageSize)
1080
+ imageConfig.imageSize = requestImageSize;
1081
+ const thinkingConfig = {};
1082
+ if (opts?.thinking && 'include_thoughts' in opts.thinking) {
1083
+ thinkingConfig.includeThoughts = includeThoughts;
1084
+ }
1085
+ if (thinkingLevel && isGemini31FlashImageModel) {
1086
+ thinkingConfig.thinkingLevel = thinkingLevel;
1087
+ }
1088
+ const searchTypes = {};
1089
+ if (enableWebGrounding)
1090
+ searchTypes.webSearch = {};
1091
+ if (enableImageGrounding)
1092
+ searchTypes.imageSearch = {};
1093
+ const googleSearchTool = Object.keys(searchTypes).length > 0
1094
+ ? {
1095
+ googleSearch: {
1096
+ searchTypes,
1097
+ },
1098
+ }
1099
+ : undefined;
1100
+ const halfKTargetDimensions = (() => {
1101
+ if (!isGemini31FlashImageModel || imageSize !== '0.5K')
1102
+ return undefined;
1103
+ const ar = imageConfig.aspectRatio || '1:1';
1104
+ const exactDimensions = GEMINI_31_FLASH_IMAGE_05K_DIMENSIONS[ar];
1105
+ if (exactDimensions)
1106
+ return exactDimensions;
1107
+ const match = /^(\d+):(\d+)$/.exec(ar);
1108
+ if (!match)
1109
+ return { width: 512, height: 512 };
1110
+ const wRatio = Number(match[1]);
1111
+ const hRatio = Number(match[2]);
1112
+ if (!Number.isFinite(wRatio) || !Number.isFinite(hRatio) || wRatio <= 0 || hRatio <= 0) {
1113
+ return { width: 512, height: 512 };
1114
+ }
1115
+ if (wRatio >= hRatio) {
1116
+ return {
1117
+ width: Math.max(1, Math.round((wRatio / hRatio) * 512)),
1118
+ height: 512,
1119
+ };
1120
+ }
1121
+ return {
1122
+ width: 512,
1123
+ height: Math.max(1, Math.round((hRatio / wRatio) * 512)),
1124
+ };
1125
+ })();
941
1126
  const perImageCost = this.getImageCost(model, imageSize);
942
- const constraintText = constraints.length
943
- ? `\n\nImage constraints (please prioritize):\n- ${constraints.join('\n- ')}`
944
- : '';
945
1127
  const makeOne = async () => {
946
1128
  const requestParams = {
947
1129
  model,
@@ -994,20 +1176,22 @@ export class GeminiProvider extends BaseModelProvider {
994
1176
  })(),
995
1177
  ].filter(Boolean)
996
1178
  : []),
997
- { text: `${prompt}${constraintText}` },
1179
+ { text: prompt },
998
1180
  ],
999
1181
  },
1000
1182
  ],
1001
1183
  config: {
1002
1184
  responseModalities: [Modality.IMAGE, Modality.TEXT],
1003
1185
  ...(Object.keys(imageConfig).length ? { imageConfig } : {}),
1004
- ...(hasGoogleWebSearch ? { tools: [{ googleSearch: {} }] } : {}),
1186
+ ...(googleSearchTool ? { tools: [googleSearchTool] } : {}),
1187
+ ...(Object.keys(thinkingConfig).length ? { thinkingConfig: thinkingConfig } : {}),
1005
1188
  },
1006
1189
  };
1007
1190
  const loggedRequestId = log_llm_request(agent.agent_id || 'default', 'gemini', model, requestParams, new Date(), requestId, agent.tags);
1008
1191
  finalRequestId = loggedRequestId;
1009
1192
  const response = await this.client.models.generateContentStream(requestParams);
1010
1193
  const images = [];
1194
+ let metadata = { model };
1011
1195
  let usageMetadata;
1012
1196
  for await (const chunk of response) {
1013
1197
  if (chunk.usageMetadata) {
@@ -1016,11 +1200,79 @@ export class GeminiProvider extends BaseModelProvider {
1016
1200
  if (!chunk.candidates)
1017
1201
  continue;
1018
1202
  for (const cand of chunk.candidates) {
1203
+ const groundingMetadata = cand.groundingMetadata;
1204
+ if (groundingMetadata) {
1205
+ const chunks = Array.isArray(groundingMetadata.groundingChunks)
1206
+ ? groundingMetadata.groundingChunks
1207
+ .map((c) => normalizeGroundingChunk(c))
1208
+ .filter((c) => !!c)
1209
+ : [];
1210
+ const searchEntryPoint = groundingMetadata.searchEntryPoint;
1211
+ const imageSearchQueries = Array.isArray(groundingMetadata.imageSearchQueries)
1212
+ ? groundingMetadata.imageSearchQueries
1213
+ .map((q) => (typeof q === 'string' ? q : q?.query || q?.text))
1214
+ .filter((q) => typeof q === 'string' && q.length > 0)
1215
+ : [];
1216
+ const webSearchQueries = Array.isArray(groundingMetadata.webSearchQueries)
1217
+ ? groundingMetadata.webSearchQueries
1218
+ .map((q) => (typeof q === 'string' ? q : q?.query || q?.text))
1219
+ .filter((q) => typeof q === 'string' && q.length > 0)
1220
+ : [];
1221
+ metadata = mergeImageMetadata(metadata, {
1222
+ model,
1223
+ grounding: {
1224
+ ...(imageSearchQueries.length ? { imageSearchQueries } : {}),
1225
+ ...(webSearchQueries.length ? { webSearchQueries } : {}),
1226
+ ...(chunks.length ? { groundingChunks: chunks } : {}),
1227
+ ...(Array.isArray(groundingMetadata.groundingSupports)
1228
+ ? { groundingSupports: groundingMetadata.groundingSupports }
1229
+ : {}),
1230
+ ...(searchEntryPoint ? { searchEntryPoint } : {}),
1231
+ },
1232
+ citations: chunks.filter(c => !!c.uri),
1233
+ });
1234
+ }
1019
1235
  const parts = cand.content?.parts || [];
1020
1236
  for (const part of parts) {
1237
+ const thoughtSignature = part.thoughtSignature || part.thought_signature;
1238
+ if (thoughtSignature) {
1239
+ metadata = mergeImageMetadata(metadata, {
1240
+ model,
1241
+ thought_signatures: [thoughtSignature],
1242
+ });
1243
+ }
1244
+ if (part.thought) {
1245
+ if (includeThoughts) {
1246
+ const thoughtPart = {
1247
+ thought: true,
1248
+ type: part.inlineData?.data ? 'image' : 'text',
1249
+ ...(part.text ? { text: part.text } : {}),
1250
+ ...(part.inlineData?.mimeType ? { mime_type: part.inlineData.mimeType } : {}),
1251
+ ...(part.inlineData?.data ? { data: part.inlineData.data } : {}),
1252
+ ...(thoughtSignature ? { thought_signature: thoughtSignature } : {}),
1253
+ };
1254
+ metadata = mergeImageMetadata(metadata, {
1255
+ model,
1256
+ thoughts: [thoughtPart],
1257
+ });
1258
+ }
1259
+ continue;
1260
+ }
1021
1261
  if (part.inlineData?.data) {
1022
1262
  const mime = part.inlineData.mimeType || 'image/png';
1023
- images.push(`data:${mime};base64,${part.inlineData.data}`);
1263
+ let imageData = `data:${mime};base64,${part.inlineData.data}`;
1264
+ if (halfKTargetDimensions) {
1265
+ try {
1266
+ imageData = await resizeDataUrl(imageData, halfKTargetDimensions.width, halfKTargetDimensions.height, {
1267
+ fit: 'cover',
1268
+ });
1269
+ }
1270
+ catch (resizeError) {
1271
+ console.warn('[Gemini] Failed to resize image to 0.5K, returning original image.');
1272
+ console.warn(truncateLargeValues(resizeError));
1273
+ }
1274
+ }
1275
+ images.push(imageData);
1024
1276
  }
1025
1277
  }
1026
1278
  }
@@ -1069,12 +1321,13 @@ export class GeminiProvider extends BaseModelProvider {
1069
1321
  });
1070
1322
  }
1071
1323
  }
1072
- return images;
1324
+ return { images, metadata };
1073
1325
  };
1074
1326
  const allImages = [];
1075
1327
  const calls = Math.max(1, numberOfImages);
1076
1328
  for (let i = 0; i < calls; i++) {
1077
- const imgs = await makeOne();
1329
+ const { images: imgs, metadata } = await makeOne();
1330
+ aggregateMetadata = mergeImageMetadata(aggregateMetadata, metadata);
1078
1331
  for (const img of imgs) {
1079
1332
  if (allImages.length < numberOfImages)
1080
1333
  allImages.push(img);
@@ -1082,6 +1335,12 @@ export class GeminiProvider extends BaseModelProvider {
1082
1335
  if (allImages.length >= numberOfImages)
1083
1336
  break;
1084
1337
  }
1338
+ if (aggregateMetadata.grounding?.groundingChunks) {
1339
+ aggregateMetadata.citations = dedupeGroundingChunks(aggregateMetadata.grounding.groundingChunks.filter(c => !!c.uri));
1340
+ }
1341
+ if (opts?.on_metadata) {
1342
+ opts.on_metadata(aggregateMetadata);
1343
+ }
1085
1344
  if (allImages.length === 0) {
1086
1345
  throw new Error(`No images returned from ${model} model`);
1087
1346
  }
@@ -1143,7 +1402,16 @@ export class GeminiProvider extends BaseModelProvider {
1143
1402
  }
1144
1403
  }
1145
1404
  getImageCost(model, imageSize) {
1146
- if (model.includes('gemini-2.5-flash-image-preview')) {
1405
+ if (model.includes('gemini-3.1-flash-image-preview')) {
1406
+ if (imageSize === '4K')
1407
+ return 0.151;
1408
+ if (imageSize === '2K')
1409
+ return 0.101;
1410
+ if (imageSize === '0.5K')
1411
+ return 0.045;
1412
+ return 0.067;
1413
+ }
1414
+ else if (model.includes('gemini-2.5-flash-image-preview')) {
1147
1415
  return 0.039;
1148
1416
  }
1149
1417
  else if (model.includes('gemini-3-pro-image-preview')) {