@just-every/ensemble 0.2.182 → 0.2.184

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { BaseModelProvider } from './base_provider.js';
4
4
  import { costTracker } from '../utils/cost_tracker.js';
5
5
  import { log_llm_error, log_llm_request, log_llm_response } from '../utils/llm_logger.js';
6
6
  import { isPaused } from '../utils/pause_controller.js';
7
- import { appendMessageWithImage, normalizeImageDataUrl, resizeAndTruncateForGemini } from '../utils/image_utils.js';
7
+ import { appendMessageWithImage, normalizeImageDataUrl, resizeAndTruncateForGemini, resizeDataUrl, } from '../utils/image_utils.js';
8
8
  import { hasEventHandler } from '../utils/event_controller.js';
9
9
  import { truncateLargeValues } from '../utils/truncate_utils.js';
10
10
  function convertParameterToGeminiFormat(param) {
@@ -225,6 +225,56 @@ function formatGroundingChunks(chunks) {
225
225
  .map((c, i) => `${i + 1}. ${c.web.title || 'Untitled'} – ${c.web.uri}`)
226
226
  .join('\n');
227
227
  }
228
+ function normalizeGroundingChunk(chunk) {
229
+ if (!chunk || typeof chunk !== 'object')
230
+ return null;
231
+ const webUri = chunk?.web?.uri;
232
+ const webTitle = chunk?.web?.title;
233
+ const imageUri = chunk?.image?.imageUri || chunk?.image?.image_uri || chunk?.image_uri;
234
+ const imageLandingUri = chunk?.image?.uri || chunk?.uri;
235
+ const uri = webUri || imageLandingUri;
236
+ if (!uri && !imageUri)
237
+ return null;
238
+ return {
239
+ ...(uri ? { uri } : {}),
240
+ ...(imageUri ? { image_uri: imageUri } : {}),
241
+ ...(webTitle ? { title: webTitle } : {}),
242
+ };
243
+ }
244
+ function dedupeGroundingChunks(chunks) {
245
+ const seen = new Set();
246
+ const out = [];
247
+ for (const chunk of chunks) {
248
+ const key = `${chunk.uri || ''}|${chunk.image_uri || ''}|${chunk.title || ''}`;
249
+ if (seen.has(key))
250
+ continue;
251
+ seen.add(key);
252
+ out.push(chunk);
253
+ }
254
+ return out;
255
+ }
256
+ function mergeImageMetadata(target, source) {
257
+ const next = {
258
+ ...target,
259
+ model: source.model || target.model,
260
+ };
261
+ if (source.grounding) {
262
+ const t = target.grounding || {};
263
+ const s = source.grounding;
264
+ next.grounding = {
265
+ ...t,
266
+ ...s,
267
+ imageSearchQueries: Array.from(new Set([...(t.imageSearchQueries || []), ...(s.imageSearchQueries || [])])),
268
+ webSearchQueries: Array.from(new Set([...(t.webSearchQueries || []), ...(s.webSearchQueries || [])])),
269
+ groundingChunks: dedupeGroundingChunks([...(t.groundingChunks || []), ...(s.groundingChunks || [])]),
270
+ groundingSupports: [...(t.groundingSupports || []), ...(s.groundingSupports || [])],
271
+ };
272
+ }
273
+ next.thought_signatures = Array.from(new Set([...(target.thought_signatures || []), ...(source.thought_signatures || [])]));
274
+ next.thoughts = [...(target.thoughts || []), ...(source.thoughts || [])];
275
+ next.citations = dedupeGroundingChunks([...(target.citations || []), ...(source.citations || [])]);
276
+ return next;
277
+ }
228
278
  async function addImagesToInput(input, images, source) {
229
279
  for (const [image_id, imageData] of Object.entries(images)) {
230
280
  const processedImageData = await resizeAndTruncateForGemini(imageData);
@@ -396,6 +446,54 @@ const THINKING_BUDGET_CONFIGS = {
396
446
  '-high': 12288,
397
447
  '-max': 24576,
398
448
  };
449
+ const GEMINI_31_FLASH_IMAGE_05K_DIMENSIONS = {
450
+ '1:1': { width: 512, height: 512 },
451
+ '1:4': { width: 256, height: 1024 },
452
+ '1:8': { width: 192, height: 1536 },
453
+ '2:3': { width: 424, height: 632 },
454
+ '3:2': { width: 632, height: 424 },
455
+ '3:4': { width: 448, height: 600 },
456
+ '4:1': { width: 1024, height: 256 },
457
+ '4:3': { width: 600, height: 448 },
458
+ '4:5': { width: 464, height: 576 },
459
+ '5:4': { width: 576, height: 464 },
460
+ '8:1': { width: 1536, height: 192 },
461
+ '9:16': { width: 384, height: 688 },
462
+ '16:9': { width: 688, height: 384 },
463
+ '21:9': { width: 792, height: 168 },
464
+ };
465
+ const GEMINI_3_PRO_IMAGE_DIMENSION_PRESETS = {
466
+ '1024x1024': { ar: '1:1', imageSize: '1K' },
467
+ '848x1264': { ar: '2:3', imageSize: '1K' },
468
+ '1264x848': { ar: '3:2', imageSize: '1K' },
469
+ '896x1200': { ar: '3:4', imageSize: '1K' },
470
+ '1200x896': { ar: '4:3', imageSize: '1K' },
471
+ '928x1152': { ar: '4:5', imageSize: '1K' },
472
+ '1152x928': { ar: '5:4', imageSize: '1K' },
473
+ '768x1376': { ar: '9:16', imageSize: '1K' },
474
+ '1376x768': { ar: '16:9', imageSize: '1K' },
475
+ '1584x672': { ar: '21:9', imageSize: '1K' },
476
+ '2048x2048': { ar: '1:1', imageSize: '2K' },
477
+ '1696x2528': { ar: '2:3', imageSize: '2K' },
478
+ '2528x1696': { ar: '3:2', imageSize: '2K' },
479
+ '1792x2400': { ar: '3:4', imageSize: '2K' },
480
+ '2400x1792': { ar: '4:3', imageSize: '2K' },
481
+ '1856x2304': { ar: '4:5', imageSize: '2K' },
482
+ '2304x1856': { ar: '5:4', imageSize: '2K' },
483
+ '1536x2752': { ar: '9:16', imageSize: '2K' },
484
+ '2752x1536': { ar: '16:9', imageSize: '2K' },
485
+ '3168x1344': { ar: '21:9', imageSize: '2K' },
486
+ '4096x4096': { ar: '1:1', imageSize: '4K' },
487
+ '3392x5056': { ar: '2:3', imageSize: '4K' },
488
+ '5056x3392': { ar: '3:2', imageSize: '4K' },
489
+ '3584x4800': { ar: '3:4', imageSize: '4K' },
490
+ '4800x3584': { ar: '4:3', imageSize: '4K' },
491
+ '3712x4608': { ar: '4:5', imageSize: '4K' },
492
+ '4608x3712': { ar: '5:4', imageSize: '4K' },
493
+ '3072x5504': { ar: '9:16', imageSize: '4K' },
494
+ '5504x3072': { ar: '16:9', imageSize: '4K' },
495
+ '6336x2688': { ar: '21:9', imageSize: '4K' },
496
+ };
399
497
  export class GeminiProvider extends BaseModelProvider {
400
498
  _client;
401
499
  apiKey;
@@ -893,14 +991,54 @@ export class GeminiProvider extends BaseModelProvider {
893
991
  if (hasOtherTools) {
894
992
  console.warn('[Gemini] Image generation ignores function tools; only google_web_search is supported.');
895
993
  }
994
+ const explicitWebGrounding = opts?.grounding?.web_search;
995
+ const explicitImageGrounding = opts?.grounding?.image_search;
996
+ const enableWebGrounding = explicitWebGrounding ?? hasGoogleWebSearch ?? false;
997
+ const isGemini31FlashImageModel = model.includes('gemini-3.1-flash-image-preview');
998
+ const enableImageGrounding = explicitImageGrounding === true && isGemini31FlashImageModel;
999
+ if (explicitImageGrounding && !isGemini31FlashImageModel) {
1000
+ console.warn('[Gemini] Image Search grounding is only available for gemini-3.1-flash-image-preview. Ignoring image_search=true.');
1001
+ }
1002
+ const thinkingOptions = opts?.thinking;
1003
+ const hasThinkingOptionsObject = thinkingOptions !== null &&
1004
+ typeof thinkingOptions === 'object' &&
1005
+ !Array.isArray(thinkingOptions);
1006
+ const includeThoughts = hasThinkingOptionsObject && thinkingOptions.include_thoughts === true;
1007
+ const requestedThinkingLevel = hasThinkingOptionsObject
1008
+ ? thinkingOptions.level
1009
+ : undefined;
1010
+ const thinkingLevel = requestedThinkingLevel === 'high' ? 'High' : requestedThinkingLevel ? 'Minimal' : undefined;
1011
+ if (requestedThinkingLevel && !isGemini31FlashImageModel) {
1012
+ console.warn('[Gemini] thinking.level is currently supported for gemini-3.1-flash-image-preview only. Ignoring thinking level.');
1013
+ }
1014
+ if (hasThinkingOptionsObject && 'include_thoughts' in thinkingOptions && !isGemini31FlashImageModel) {
1015
+ console.warn('[Gemini] thinking.include_thoughts is currently supported for gemini-3.1-flash-image-preview only. Ignoring include_thoughts.');
1016
+ }
896
1017
  let aspectRatio = '1:1';
897
1018
  if (opts?.size === 'landscape')
898
1019
  aspectRatio = '16:9';
899
1020
  else if (opts?.size === 'portrait')
900
1021
  aspectRatio = '9:16';
901
1022
  console.log(`[Gemini] Generating ${numberOfImages} image(s) with model ${model}, prompt: "${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}"`);
902
- if (model.includes('gemini-2.5-flash-image-preview') || model.includes('gemini-3-pro-image-preview')) {
1023
+ if (model.includes('gemini-2.5-flash-image-preview') ||
1024
+ model.includes('gemini-3.1-flash-image-preview') ||
1025
+ model.includes('gemini-3-pro-image-preview')) {
1026
+ let aggregateMetadata = { model };
903
1027
  const sizeMap = {
1028
+ '1:1': { ar: '1:1' },
1029
+ '1:4': { ar: '1:4' },
1030
+ '1:8': { ar: '1:8' },
1031
+ '2:3': { ar: '2:3' },
1032
+ '3:2': { ar: '3:2' },
1033
+ '3:4': { ar: '3:4' },
1034
+ '4:1': { ar: '4:1' },
1035
+ '4:3': { ar: '4:3' },
1036
+ '4:5': { ar: '4:5' },
1037
+ '5:4': { ar: '5:4' },
1038
+ '8:1': { ar: '8:1' },
1039
+ '9:16': { ar: '9:16' },
1040
+ '16:9': { ar: '16:9' },
1041
+ '21:9': { ar: '21:9' },
904
1042
  square: { ar: '1:1' },
905
1043
  landscape: { ar: '16:9' },
906
1044
  portrait: { ar: '9:16' },
@@ -915,20 +1053,85 @@ export class GeminiProvider extends BaseModelProvider {
915
1053
  '1024x1792': { ar: '9:16' },
916
1054
  };
917
1055
  const sm = opts?.size ? sizeMap[String(opts.size)] : undefined;
1056
+ const gemini3ProDimensionPreset = model.includes('gemini-3-pro-image-preview')
1057
+ ? GEMINI_3_PRO_IMAGE_DIMENSION_PRESETS[String(opts?.size)]
1058
+ : undefined;
918
1059
  const imageConfig = {};
919
1060
  if (sm?.ar)
920
1061
  imageConfig.aspectRatio = sm.ar;
1062
+ if (gemini3ProDimensionPreset?.ar)
1063
+ imageConfig.aspectRatio = gemini3ProDimensionPreset.ar;
921
1064
  const qualityKey = typeof opts?.quality === 'string' ? opts.quality.toLowerCase() : '';
922
- const imageSizeMap = {
923
- low: '1K',
924
- standard: '2K',
925
- medium: '2K',
926
- hd: '4K',
927
- high: '4K',
928
- };
929
- const imageSize = imageSizeMap[qualityKey];
930
- if (imageSize)
931
- imageConfig.imageSize = imageSize;
1065
+ const imageSizeMap = isGemini31FlashImageModel
1066
+ ? {
1067
+ low: '0.5K',
1068
+ standard: '1K',
1069
+ medium: '2K',
1070
+ hd: '4K',
1071
+ high: '4K',
1072
+ }
1073
+ : {
1074
+ low: '1K',
1075
+ standard: '2K',
1076
+ medium: '2K',
1077
+ hd: '4K',
1078
+ high: '4K',
1079
+ };
1080
+ let imageSize = imageSizeMap[qualityKey];
1081
+ if (gemini3ProDimensionPreset?.imageSize) {
1082
+ imageSize = gemini3ProDimensionPreset.imageSize;
1083
+ }
1084
+ if (isGemini31FlashImageModel && opts?.size === '512x512') {
1085
+ imageSize = '0.5K';
1086
+ }
1087
+ const requestImageSize = imageSize === '0.5K' ? undefined : imageSize;
1088
+ if (requestImageSize)
1089
+ imageConfig.imageSize = requestImageSize;
1090
+ const thinkingConfig = {};
1091
+ if (hasThinkingOptionsObject && 'include_thoughts' in thinkingOptions && isGemini31FlashImageModel) {
1092
+ thinkingConfig.includeThoughts = includeThoughts;
1093
+ }
1094
+ if (thinkingLevel && isGemini31FlashImageModel) {
1095
+ thinkingConfig.thinkingLevel = thinkingLevel;
1096
+ }
1097
+ const searchTypes = {};
1098
+ if (enableWebGrounding)
1099
+ searchTypes.webSearch = {};
1100
+ if (enableImageGrounding)
1101
+ searchTypes.imageSearch = {};
1102
+ const googleSearchTool = Object.keys(searchTypes).length > 0
1103
+ ? {
1104
+ googleSearch: {
1105
+ searchTypes,
1106
+ },
1107
+ }
1108
+ : undefined;
1109
+ const halfKTargetDimensions = (() => {
1110
+ if (!isGemini31FlashImageModel || imageSize !== '0.5K')
1111
+ return undefined;
1112
+ const ar = imageConfig.aspectRatio || '1:1';
1113
+ const exactDimensions = GEMINI_31_FLASH_IMAGE_05K_DIMENSIONS[ar];
1114
+ if (exactDimensions)
1115
+ return exactDimensions;
1116
+ const match = /^(\d+):(\d+)$/.exec(ar);
1117
+ if (!match)
1118
+ return { width: 512, height: 512 };
1119
+ const wRatio = Number(match[1]);
1120
+ const hRatio = Number(match[2]);
1121
+ if (!Number.isFinite(wRatio) || !Number.isFinite(hRatio) || wRatio <= 0 || hRatio <= 0) {
1122
+ return { width: 512, height: 512 };
1123
+ }
1124
+ if (wRatio >= hRatio) {
1125
+ return {
1126
+ width: Math.max(1, Math.round((wRatio / hRatio) * 512)),
1127
+ height: 512,
1128
+ };
1129
+ }
1130
+ return {
1131
+ width: 512,
1132
+ height: Math.max(1, Math.round((hRatio / wRatio) * 512)),
1133
+ };
1134
+ })();
932
1135
  const perImageCost = this.getImageCost(model, imageSize);
933
1136
  const makeOne = async () => {
934
1137
  const requestParams = {
@@ -989,13 +1192,15 @@ export class GeminiProvider extends BaseModelProvider {
989
1192
  config: {
990
1193
  responseModalities: [Modality.IMAGE, Modality.TEXT],
991
1194
  ...(Object.keys(imageConfig).length ? { imageConfig } : {}),
992
- ...(hasGoogleWebSearch ? { tools: [{ googleSearch: {} }] } : {}),
1195
+ ...(googleSearchTool ? { tools: [googleSearchTool] } : {}),
1196
+ ...(Object.keys(thinkingConfig).length ? { thinkingConfig: thinkingConfig } : {}),
993
1197
  },
994
1198
  };
995
1199
  const loggedRequestId = log_llm_request(agent.agent_id || 'default', 'gemini', model, requestParams, new Date(), requestId, agent.tags);
996
1200
  finalRequestId = loggedRequestId;
997
1201
  const response = await this.client.models.generateContentStream(requestParams);
998
1202
  const images = [];
1203
+ let metadata = { model };
999
1204
  let usageMetadata;
1000
1205
  for await (const chunk of response) {
1001
1206
  if (chunk.usageMetadata) {
@@ -1004,11 +1209,79 @@ export class GeminiProvider extends BaseModelProvider {
1004
1209
  if (!chunk.candidates)
1005
1210
  continue;
1006
1211
  for (const cand of chunk.candidates) {
1212
+ const groundingMetadata = cand.groundingMetadata;
1213
+ if (groundingMetadata) {
1214
+ const chunks = Array.isArray(groundingMetadata.groundingChunks)
1215
+ ? groundingMetadata.groundingChunks
1216
+ .map((c) => normalizeGroundingChunk(c))
1217
+ .filter((c) => !!c)
1218
+ : [];
1219
+ const searchEntryPoint = groundingMetadata.searchEntryPoint;
1220
+ const imageSearchQueries = Array.isArray(groundingMetadata.imageSearchQueries)
1221
+ ? groundingMetadata.imageSearchQueries
1222
+ .map((q) => (typeof q === 'string' ? q : q?.query || q?.text))
1223
+ .filter((q) => typeof q === 'string' && q.length > 0)
1224
+ : [];
1225
+ const webSearchQueries = Array.isArray(groundingMetadata.webSearchQueries)
1226
+ ? groundingMetadata.webSearchQueries
1227
+ .map((q) => (typeof q === 'string' ? q : q?.query || q?.text))
1228
+ .filter((q) => typeof q === 'string' && q.length > 0)
1229
+ : [];
1230
+ metadata = mergeImageMetadata(metadata, {
1231
+ model,
1232
+ grounding: {
1233
+ ...(imageSearchQueries.length ? { imageSearchQueries } : {}),
1234
+ ...(webSearchQueries.length ? { webSearchQueries } : {}),
1235
+ ...(chunks.length ? { groundingChunks: chunks } : {}),
1236
+ ...(Array.isArray(groundingMetadata.groundingSupports)
1237
+ ? { groundingSupports: groundingMetadata.groundingSupports }
1238
+ : {}),
1239
+ ...(searchEntryPoint ? { searchEntryPoint } : {}),
1240
+ },
1241
+ citations: chunks.filter(c => !!c.uri),
1242
+ });
1243
+ }
1007
1244
  const parts = cand.content?.parts || [];
1008
1245
  for (const part of parts) {
1246
+ const thoughtSignature = part.thoughtSignature || part.thought_signature;
1247
+ if (thoughtSignature) {
1248
+ metadata = mergeImageMetadata(metadata, {
1249
+ model,
1250
+ thought_signatures: [thoughtSignature],
1251
+ });
1252
+ }
1253
+ if (part.thought) {
1254
+ if (includeThoughts) {
1255
+ const thoughtPart = {
1256
+ thought: true,
1257
+ type: part.inlineData?.data ? 'image' : 'text',
1258
+ ...(part.text ? { text: part.text } : {}),
1259
+ ...(part.inlineData?.mimeType ? { mime_type: part.inlineData.mimeType } : {}),
1260
+ ...(part.inlineData?.data ? { data: part.inlineData.data } : {}),
1261
+ ...(thoughtSignature ? { thought_signature: thoughtSignature } : {}),
1262
+ };
1263
+ metadata = mergeImageMetadata(metadata, {
1264
+ model,
1265
+ thoughts: [thoughtPart],
1266
+ });
1267
+ }
1268
+ continue;
1269
+ }
1009
1270
  if (part.inlineData?.data) {
1010
1271
  const mime = part.inlineData.mimeType || 'image/png';
1011
- images.push(`data:${mime};base64,${part.inlineData.data}`);
1272
+ let imageData = `data:${mime};base64,${part.inlineData.data}`;
1273
+ if (halfKTargetDimensions) {
1274
+ try {
1275
+ imageData = await resizeDataUrl(imageData, halfKTargetDimensions.width, halfKTargetDimensions.height, {
1276
+ fit: 'cover',
1277
+ });
1278
+ }
1279
+ catch (resizeError) {
1280
+ console.warn('[Gemini] Failed to resize image to 0.5K, returning original image.');
1281
+ console.warn(truncateLargeValues(resizeError));
1282
+ }
1283
+ }
1284
+ images.push(imageData);
1012
1285
  }
1013
1286
  }
1014
1287
  }
@@ -1057,12 +1330,13 @@ export class GeminiProvider extends BaseModelProvider {
1057
1330
  });
1058
1331
  }
1059
1332
  }
1060
- return images;
1333
+ return { images, metadata };
1061
1334
  };
1062
1335
  const allImages = [];
1063
1336
  const calls = Math.max(1, numberOfImages);
1064
1337
  for (let i = 0; i < calls; i++) {
1065
- const imgs = await makeOne();
1338
+ const { images: imgs, metadata } = await makeOne();
1339
+ aggregateMetadata = mergeImageMetadata(aggregateMetadata, metadata);
1066
1340
  for (const img of imgs) {
1067
1341
  if (allImages.length < numberOfImages)
1068
1342
  allImages.push(img);
@@ -1070,6 +1344,12 @@ export class GeminiProvider extends BaseModelProvider {
1070
1344
  if (allImages.length >= numberOfImages)
1071
1345
  break;
1072
1346
  }
1347
+ if (aggregateMetadata.grounding?.groundingChunks) {
1348
+ aggregateMetadata.citations = dedupeGroundingChunks(aggregateMetadata.grounding.groundingChunks.filter(c => !!c.uri));
1349
+ }
1350
+ if (opts?.on_metadata) {
1351
+ opts.on_metadata(aggregateMetadata);
1352
+ }
1073
1353
  if (allImages.length === 0) {
1074
1354
  throw new Error(`No images returned from ${model} model`);
1075
1355
  }
@@ -1131,7 +1411,16 @@ export class GeminiProvider extends BaseModelProvider {
1131
1411
  }
1132
1412
  }
1133
1413
  getImageCost(model, imageSize) {
1134
- if (model.includes('gemini-2.5-flash-image-preview')) {
1414
+ if (model.includes('gemini-3.1-flash-image-preview')) {
1415
+ if (imageSize === '4K')
1416
+ return 0.151;
1417
+ if (imageSize === '2K')
1418
+ return 0.101;
1419
+ if (imageSize === '0.5K')
1420
+ return 0.045;
1421
+ return 0.067;
1422
+ }
1423
+ else if (model.includes('gemini-2.5-flash-image-preview')) {
1135
1424
  return 0.039;
1136
1425
  }
1137
1426
  else if (model.includes('gemini-3-pro-image-preview')) {