@ljoukov/llm 7.0.12 → 7.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,6 +15,7 @@ import {
15
15
  } from "@google/genai";
16
16
  import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
17
17
  import { z as z3 } from "zod";
18
+ import { toFile } from "openai";
18
19
 
19
20
  // src/utils/asyncQueue.ts
20
21
  function createAsyncQueue() {
@@ -216,6 +217,85 @@ var OPENAI_MODEL_IDS = [
216
217
  function isOpenAiModelId(value) {
217
218
  return OPENAI_MODEL_IDS.includes(value);
218
219
  }
220
+ var OPENAI_IMAGE_MODEL_IDS = ["gpt-image-2"];
221
+ function isOpenAiImageModelId(value) {
222
+ return OPENAI_IMAGE_MODEL_IDS.includes(value);
223
+ }
224
+ var CHATGPT_IMAGE_MODEL_IDS = ["chatgpt-gpt-image-2"];
225
+ function isChatGptImageModelId(value) {
226
+ return CHATGPT_IMAGE_MODEL_IDS.includes(value);
227
+ }
228
+ var OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS = [
229
+ "1024x1024",
230
+ "1536x1024",
231
+ "1024x1536",
232
+ "2048x2048",
233
+ "2048x1152",
234
+ "3840x2160",
235
+ "2160x3840"
236
+ ];
237
+ var OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION = "auto";
238
+ var OPENAI_GPT_IMAGE_2_RESOLUTIONS = [
239
+ ...OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS,
240
+ OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION
241
+ ];
242
+ var OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS = {
243
+ maxEdgePixels: 3840,
244
+ edgeMultiplePixels: 16,
245
+ maxLongToShortEdgeRatio: 3,
246
+ minTotalPixels: 655360,
247
+ maxTotalPixels: 8294400,
248
+ experimentalTotalPixelsThreshold: 3686400
249
+ };
250
+ function validateOpenAiGptImage2Resolution(value) {
251
+ if (value === OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION) {
252
+ return { valid: true };
253
+ }
254
+ const match = /^([1-9]\d*)x([1-9]\d*)$/.exec(value);
255
+ if (!match) {
256
+ return { valid: false, reason: 'Expected "auto" or a WIDTHxHEIGHT pixel string.' };
257
+ }
258
+ const width = Number(match[1]);
259
+ const height = Number(match[2]);
260
+ if (!Number.isSafeInteger(width) || !Number.isSafeInteger(height)) {
261
+ return { valid: false, reason: "Width and height must be safe integer pixel counts." };
262
+ }
263
+ const constraints = OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS;
264
+ if (width > constraints.maxEdgePixels || height > constraints.maxEdgePixels) {
265
+ return {
266
+ valid: false,
267
+ reason: `Width and height must each be at most ${constraints.maxEdgePixels}px.`
268
+ };
269
+ }
270
+ if (width % constraints.edgeMultiplePixels !== 0 || height % constraints.edgeMultiplePixels !== 0) {
271
+ return {
272
+ valid: false,
273
+ reason: `Width and height must each be multiples of ${constraints.edgeMultiplePixels}px.`
274
+ };
275
+ }
276
+ const totalPixels = width * height;
277
+ if (totalPixels < constraints.minTotalPixels || totalPixels > constraints.maxTotalPixels) {
278
+ return {
279
+ valid: false,
280
+ reason: `Total pixels must be between ${constraints.minTotalPixels} and ${constraints.maxTotalPixels}.`
281
+ };
282
+ }
283
+ const longEdge = Math.max(width, height);
284
+ const shortEdge = Math.min(width, height);
285
+ if (longEdge / shortEdge > constraints.maxLongToShortEdgeRatio) {
286
+ return {
287
+ valid: false,
288
+ reason: `The long edge must be at most ${constraints.maxLongToShortEdgeRatio}:1 relative to the short edge.`
289
+ };
290
+ }
291
+ return { valid: true };
292
+ }
293
+ var OPENAI_GPT_IMAGE_2_QUALITY_LEVELS = ["low", "medium", "high", "auto"];
294
+ var OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS = ["png", "jpeg", "webp"];
295
+ var OPENAI_GPT_IMAGE_2_BACKGROUNDS = ["opaque", "auto"];
296
+ var OPENAI_GPT_IMAGE_2_MODERATION_LEVELS = ["low", "auto"];
297
+ var OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS = [0, 1, 2, 3];
298
+ var OPENAI_GPT_IMAGE_2_NUM_IMAGES = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
219
299
  var CHATGPT_MODEL_IDS = [
220
300
  "chatgpt-gpt-5.5",
221
301
  "chatgpt-gpt-5.5-fast",
@@ -227,6 +307,7 @@ var CHATGPT_MODEL_IDS = [
227
307
  var FAST_MODEL_SUFFIX = "-fast";
228
308
  var OPENAI_PRIORITY_MODEL_IDS = ["gpt-5.5-fast"];
229
309
  var CHATGPT_PRIORITY_MODEL_IDS = ["chatgpt-gpt-5.5-fast", "chatgpt-gpt-5.4-fast"];
310
+ var CHATGPT_IMAGE_GENERATION_PROVIDER_MODEL = "gpt-5.4";
230
311
  var EXPERIMENTAL_CHATGPT_MODEL_PREFIX = "experimental-chatgpt-";
231
312
  function isExperimentalChatGptModelId(value) {
232
313
  return value.startsWith(EXPERIMENTAL_CHATGPT_MODEL_PREFIX) && value.length > EXPERIMENTAL_CHATGPT_MODEL_PREFIX.length;
@@ -253,6 +334,9 @@ function resolveChatGptProviderModel(model) {
253
334
  const providerModel = stripChatGptPrefix(model);
254
335
  return CHATGPT_PRIORITY_MODEL_IDS.includes(model) ? stripFastSuffix(providerModel) : providerModel;
255
336
  }
337
+ function resolveChatGptImageProviderModel(_model) {
338
+ return CHATGPT_IMAGE_GENERATION_PROVIDER_MODEL;
339
+ }
256
340
  function resolveChatGptServiceTier(model) {
257
341
  return CHATGPT_PRIORITY_MODEL_IDS.includes(model) ? "priority" : void 0;
258
342
  }
@@ -298,6 +382,27 @@ var OPENAI_GPT_54_NANO_PRICING = {
298
382
  cachedRate: 5e-3 / 1e6,
299
383
  outputRate: 0.4 / 1e6
300
384
  };
385
+ var OPENAI_GPT_IMAGE_2_PRICING = {
386
+ defaultQuality: "medium",
387
+ defaultResolution: "1024x1024",
388
+ imagePrices: {
389
+ low: {
390
+ "1024x1024": 6e-3,
391
+ "1024x1536": 5e-3,
392
+ "1536x1024": 5e-3
393
+ },
394
+ medium: {
395
+ "1024x1024": 0.053,
396
+ "1024x1536": 0.041,
397
+ "1536x1024": 0.041
398
+ },
399
+ high: {
400
+ "1024x1024": 0.211,
401
+ "1024x1536": 0.165,
402
+ "1536x1024": 0.165
403
+ }
404
+ }
405
+ };
301
406
  function getOpenAiPricing(modelId) {
302
407
  if (isExperimentalChatGptModelId(modelId)) {
303
408
  return OPENAI_GPT_54_PRICING;
@@ -325,6 +430,9 @@ function getOpenAiPricing(modelId) {
325
430
  }
326
431
  return void 0;
327
432
  }
433
+ function getOpenAiImagePricing(modelId) {
434
+ return isOpenAiImageModelId(modelId) || isChatGptImageModelId(modelId) ? OPENAI_GPT_IMAGE_2_PRICING : void 0;
435
+ }
328
436
 
329
437
  // src/utils/cost.ts
330
438
  function resolveUsageNumber(value) {
@@ -337,8 +445,18 @@ function estimateCallCostUsd({
337
445
  modelId,
338
446
  tokens,
339
447
  responseImages,
340
- imageSize
448
+ imageSize,
449
+ imageQuality
341
450
  }) {
451
+ const openAiImagePricing = getOpenAiImagePricing(modelId);
452
+ if (openAiImagePricing) {
453
+ return estimateOpenAiImageCostUsd({
454
+ pricing: openAiImagePricing,
455
+ responseImages,
456
+ imageSize,
457
+ imageQuality
458
+ });
459
+ }
342
460
  if (!tokens) {
343
461
  return 0;
344
462
  }
@@ -400,6 +518,40 @@ function estimateCallCostUsd({
400
518
  }
401
519
  return 0;
402
520
  }
521
+ function estimateOpenAiImageCostUsd({
522
+ pricing,
523
+ responseImages,
524
+ imageSize,
525
+ imageQuality
526
+ }) {
527
+ if (responseImages <= 0) {
528
+ return 0;
529
+ }
530
+ const quality = imageQuality === "low" || imageQuality === "medium" || imageQuality === "high" ? imageQuality : pricing.defaultQuality;
531
+ const resolution = resolveOpenAiImagePriceResolution(imageSize) ?? pricing.defaultResolution;
532
+ return responseImages * pricing.imagePrices[quality][resolution];
533
+ }
534
+ function resolveOpenAiImagePriceResolution(imageSize) {
535
+ if (imageSize === "1024x1024" || imageSize === "1024x1536" || imageSize === "1536x1024") {
536
+ return imageSize;
537
+ }
538
+ if (!imageSize || imageSize === "auto") {
539
+ return void 0;
540
+ }
541
+ const match = /^(\d+)x(\d+)$/.exec(imageSize);
542
+ if (!match) {
543
+ return void 0;
544
+ }
545
+ const width = Number(match[1]);
546
+ const height = Number(match[2]);
547
+ if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
548
+ return void 0;
549
+ }
550
+ if (width === height) {
551
+ return "1024x1024";
552
+ }
553
+ return width > height ? "1536x1024" : "1024x1536";
554
+ }
403
555
 
404
556
  // src/openai/chatgpt-codex.ts
405
557
  import os2 from "os";
@@ -1554,6 +1706,8 @@ async function collectChatGptCodexStream(options) {
1554
1706
  const toolCallOrder = [];
1555
1707
  const webSearchCalls = /* @__PURE__ */ new Map();
1556
1708
  const webSearchCallOrder = [];
1709
+ const imageGenerationCalls = /* @__PURE__ */ new Map();
1710
+ const imageGenerationCallOrder = [];
1557
1711
  let text = "";
1558
1712
  const reasoningText = "";
1559
1713
  let reasoningSummaryText = "";
@@ -1624,6 +1778,20 @@ async function collectChatGptCodexStream(options) {
1624
1778
  action: item.action && typeof item.action === "object" ? item.action : void 0
1625
1779
  });
1626
1780
  }
1781
+ } else if (item.type === "image_generation_call") {
1782
+ const id = typeof item.id === "string" ? item.id : "";
1783
+ const result = typeof item.result === "string" ? item.result : "";
1784
+ if (id && result) {
1785
+ if (!imageGenerationCalls.has(id)) {
1786
+ imageGenerationCallOrder.push(id);
1787
+ }
1788
+ imageGenerationCalls.set(id, {
1789
+ id,
1790
+ status: typeof item.status === "string" ? item.status : void 0,
1791
+ revisedPrompt: typeof item.revised_prompt === "string" ? item.revised_prompt : void 0,
1792
+ result
1793
+ });
1794
+ }
1627
1795
  }
1628
1796
  }
1629
1797
  continue;
@@ -1663,12 +1831,14 @@ async function collectChatGptCodexStream(options) {
1663
1831
  }
1664
1832
  const orderedToolCalls = toolCallOrder.map((id) => toolCalls.get(id)).filter((call) => call !== void 0);
1665
1833
  const orderedWebSearchCalls = webSearchCallOrder.map((id) => webSearchCalls.get(id)).filter((call) => call !== void 0);
1834
+ const orderedImageGenerationCalls = imageGenerationCallOrder.map((id) => imageGenerationCalls.get(id)).filter((call) => call !== void 0);
1666
1835
  return {
1667
1836
  text,
1668
1837
  reasoningText,
1669
1838
  reasoningSummaryText,
1670
1839
  toolCalls: orderedToolCalls,
1671
1840
  webSearchCalls: orderedWebSearchCalls,
1841
+ imageGenerationCalls: orderedImageGenerationCalls,
1672
1842
  usage,
1673
1843
  id: responseId,
1674
1844
  model,
@@ -4380,13 +4550,17 @@ var LLM_TEXT_MODEL_IDS = [
4380
4550
  ...FIREWORKS_MODEL_IDS,
4381
4551
  ...GEMINI_TEXT_MODEL_IDS
4382
4552
  ];
4383
- var LLM_IMAGE_MODEL_IDS = [...GEMINI_IMAGE_MODEL_IDS];
4553
+ var LLM_IMAGE_MODEL_IDS = [
4554
+ ...OPENAI_IMAGE_MODEL_IDS,
4555
+ ...CHATGPT_IMAGE_MODEL_IDS,
4556
+ ...GEMINI_IMAGE_MODEL_IDS
4557
+ ];
4384
4558
  var LLM_MODEL_IDS = [...LLM_TEXT_MODEL_IDS, ...LLM_IMAGE_MODEL_IDS];
4385
4559
  function isLlmTextModelId(value) {
4386
4560
  return isOpenAiModelId(value) || isChatGptModelId(value) || isFireworksModelId(value) || isGeminiTextModelId(value);
4387
4561
  }
4388
4562
  function isLlmImageModelId(value) {
4389
- return isGeminiImageModelId(value);
4563
+ return isOpenAiImageModelId(value) || isChatGptImageModelId(value) || isGeminiImageModelId(value);
4390
4564
  }
4391
4565
  function isLlmModelId(value) {
4392
4566
  return isLlmTextModelId(value) || isLlmImageModelId(value);
@@ -4398,6 +4572,12 @@ var LlmJsonCallError = class extends Error {
4398
4572
  this.name = "LlmJsonCallError";
4399
4573
  }
4400
4574
  };
4575
+ function isOpenAiGenerateImagesRequest(request) {
4576
+ return isOpenAiImageModelId(request.model);
4577
+ }
4578
+ function isChatGptGenerateImagesRequest(request) {
4579
+ return isChatGptImageModelId(request.model);
4580
+ }
4401
4581
  function tool(options) {
4402
4582
  return {
4403
4583
  type: "function",
@@ -4988,6 +5168,15 @@ function resolveProvider(model) {
4988
5168
  return { provider: "fireworks", model: fireworksModel };
4989
5169
  }
4990
5170
  }
5171
+ if (isOpenAiImageModelId(model)) {
5172
+ return { provider: "openai", model };
5173
+ }
5174
+ if (isChatGptImageModelId(model)) {
5175
+ return {
5176
+ provider: "chatgpt",
5177
+ model: resolveChatGptImageProviderModel(model)
5178
+ };
5179
+ }
4991
5180
  if (isOpenAiModelId(model)) {
4992
5181
  return {
4993
5182
  provider: "openai",
@@ -4995,7 +5184,7 @@ function resolveProvider(model) {
4995
5184
  serviceTier: resolveOpenAiServiceTier(model)
4996
5185
  };
4997
5186
  }
4998
- throw new Error(`Unsupported text model: ${model}`);
5187
+ throw new Error(`Unsupported model: ${model}`);
4999
5188
  }
5000
5189
  function isOpenAiCodexModel(modelId) {
5001
5190
  return modelId.includes("codex");
@@ -6151,8 +6340,11 @@ function mergeTokenUpdates(current, next) {
6151
6340
  }
6152
6341
  return {
6153
6342
  promptTokens: next.promptTokens ?? current.promptTokens,
6343
+ promptTextTokens: next.promptTextTokens ?? current.promptTextTokens,
6344
+ promptImageTokens: next.promptImageTokens ?? current.promptImageTokens,
6154
6345
  cachedTokens: next.cachedTokens ?? current.cachedTokens,
6155
6346
  responseTokens: next.responseTokens ?? current.responseTokens,
6347
+ responseTextTokens: next.responseTextTokens ?? current.responseTextTokens,
6156
6348
  responseImageTokens: next.responseImageTokens ?? current.responseImageTokens,
6157
6349
  thinkingTokens: next.thinkingTokens ?? current.thinkingTokens,
6158
6350
  totalTokens: next.totalTokens ?? current.totalTokens,
@@ -6175,8 +6367,11 @@ function sumUsageTokens(current, next) {
6175
6367
  }
6176
6368
  return {
6177
6369
  promptTokens: sumUsageValue(current?.promptTokens, next.promptTokens),
6370
+ promptTextTokens: sumUsageValue(current?.promptTextTokens, next.promptTextTokens),
6371
+ promptImageTokens: sumUsageValue(current?.promptImageTokens, next.promptImageTokens),
6178
6372
  cachedTokens: sumUsageValue(current?.cachedTokens, next.cachedTokens),
6179
6373
  responseTokens: sumUsageValue(current?.responseTokens, next.responseTokens),
6374
+ responseTextTokens: sumUsageValue(current?.responseTextTokens, next.responseTextTokens),
6180
6375
  responseImageTokens: sumUsageValue(current?.responseImageTokens, next.responseImageTokens),
6181
6376
  thinkingTokens: sumUsageValue(current?.thinkingTokens, next.thinkingTokens),
6182
6377
  totalTokens: sumUsageValue(current?.totalTokens, next.totalTokens),
@@ -6291,10 +6486,22 @@ function extractOpenAiUsageTokens(usage) {
6291
6486
  const cachedTokens = toMaybeNumber(
6292
6487
  usage.input_tokens_details?.cached_tokens
6293
6488
  );
6489
+ const promptTextTokens = toMaybeNumber(
6490
+ usage.input_tokens_details?.text_tokens
6491
+ );
6492
+ const promptImageTokens = toMaybeNumber(
6493
+ usage.input_tokens_details?.image_tokens
6494
+ );
6294
6495
  const outputTokensRaw = toMaybeNumber(usage.output_tokens);
6295
6496
  const reasoningTokens = toMaybeNumber(
6296
6497
  usage.output_tokens_details?.reasoning_tokens
6297
6498
  );
6499
+ const responseTextTokens = toMaybeNumber(
6500
+ usage.output_tokens_details?.text_tokens
6501
+ );
6502
+ const responseImageTokens = toMaybeNumber(
6503
+ usage.output_tokens_details?.image_tokens
6504
+ );
6298
6505
  const totalTokens = toMaybeNumber(usage.total_tokens);
6299
6506
  let responseTokens;
6300
6507
  if (outputTokensRaw !== void 0) {
@@ -6306,8 +6513,12 @@ function extractOpenAiUsageTokens(usage) {
6306
6513
  }
6307
6514
  return {
6308
6515
  promptTokens,
6516
+ promptTextTokens,
6517
+ promptImageTokens,
6309
6518
  cachedTokens,
6310
6519
  responseTokens,
6520
+ responseTextTokens,
6521
+ responseImageTokens,
6311
6522
  thinkingTokens: reasoningTokens,
6312
6523
  totalTokens
6313
6524
  };
@@ -7783,6 +7994,9 @@ async function runTextCall(params) {
7783
7994
  const { result } = await collectFileUploadMetrics(async () => {
7784
7995
  try {
7785
7996
  if (provider === "openai") {
7997
+ if (isOpenAiImageModelId(request.model)) {
7998
+ throw new Error("gpt-image-2 is an image generation model; use generateImages().");
7999
+ }
7786
8000
  const openAiInput = await maybePrepareOpenAiPromptInput(
7787
8001
  toOpenAiInput(contents, {
7788
8002
  defaultMediaResolution: request.mediaResolution,
@@ -7869,6 +8083,11 @@ async function runTextCall(params) {
7869
8083
  }
7870
8084
  }, modelForProvider);
7871
8085
  } else if (provider === "chatgpt") {
8086
+ if (isChatGptImageModelId(request.model)) {
8087
+ throw new Error(
8088
+ "chatgpt-gpt-image-2 is an image generation model; use generateImages()."
8089
+ );
8090
+ }
7872
8091
  const chatGptInput = toChatGptInput(contents, {
7873
8092
  defaultMediaResolution: request.mediaResolution,
7874
8093
  model: request.model
@@ -10148,7 +10367,318 @@ async function gradeGeneratedImage(params) {
10148
10367
  });
10149
10368
  return { grade: value.grade, result };
10150
10369
  }
10370
+ function resolveOpenAiImageMimeType(outputFormat) {
10371
+ switch (outputFormat) {
10372
+ case "jpeg":
10373
+ return "image/jpeg";
10374
+ case "webp":
10375
+ return "image/webp";
10376
+ case "png":
10377
+ case void 0:
10378
+ return "image/png";
10379
+ }
10380
+ }
10381
+ function buildOpenAiImagePrompt(params) {
10382
+ return [
10383
+ "Follow the requested visual style.",
10384
+ "",
10385
+ "Style:",
10386
+ params.stylePrompt.trim(),
10387
+ ...params.hasStyleImages ? [
10388
+ "",
10389
+ "Use the attached reference image or images for palette, lighting, mood, composition, and material feel."
10390
+ ] : [],
10391
+ "",
10392
+ "Image:",
10393
+ params.imagePrompt.trim()
10394
+ ].filter((line) => line.length > 0).join("\n");
10395
+ }
10396
+ function resolveOpenAiImageRequestParams(request) {
10397
+ if (request.partialImages !== void 0) {
10398
+ throw new Error("partialImages is only supported for streaming image generation.");
10399
+ }
10400
+ if (request.outputCompression !== void 0 && (!Number.isInteger(request.outputCompression) || request.outputCompression < 0 || request.outputCompression > 100)) {
10401
+ throw new Error("outputCompression must be an integer from 0 to 100.");
10402
+ }
10403
+ if (request.outputCompression !== void 0 && request.outputFormat !== "jpeg" && request.outputFormat !== "webp") {
10404
+ throw new Error("outputCompression requires outputFormat to be jpeg or webp.");
10405
+ }
10406
+ const size = request.imageResolution ?? "auto";
10407
+ const sizeValidation = validateOpenAiGptImage2Resolution(size);
10408
+ if (!sizeValidation.valid) {
10409
+ throw new Error(
10410
+ `imageResolution ${JSON.stringify(size)} is not supported by gpt-image-2: ${sizeValidation.reason}`
10411
+ );
10412
+ }
10413
+ return {
10414
+ size,
10415
+ quality: request.imageQuality ?? "auto",
10416
+ outputFormat: request.outputFormat,
10417
+ n: request.numImages ?? 1,
10418
+ background: request.background,
10419
+ moderation: request.moderation
10420
+ };
10421
+ }
10422
+ async function createOpenAiStyleImageFiles(styleImages) {
10423
+ if (!styleImages || styleImages.length === 0) {
10424
+ return void 0;
10425
+ }
10426
+ return await Promise.all(
10427
+ styleImages.map(async (image, index) => {
10428
+ const mimeType = image.mimeType ?? "image/png";
10429
+ const extension = resolveAttachmentExtension(mimeType);
10430
+ return await toFile(image.data, `style-${index + 1}.${extension}`, { type: mimeType });
10431
+ })
10432
+ );
10433
+ }
10434
+ async function generateImagesWithOpenAiImageApi(request) {
10435
+ const promptEntries = Array.from(request.imagePrompts, (rawPrompt, index) => {
10436
+ const prompt = rawPrompt.trim();
10437
+ if (!prompt) {
10438
+ throw new Error(`imagePrompts[${index}] must be a non-empty string`);
10439
+ }
10440
+ return prompt;
10441
+ });
10442
+ if (promptEntries.length === 0) {
10443
+ return [];
10444
+ }
10445
+ const provider = resolveProvider(request.model).provider;
10446
+ const telemetry = createLlmTelemetryEmitter({
10447
+ telemetry: request.telemetry,
10448
+ operation: "generateImages",
10449
+ provider,
10450
+ model: request.model
10451
+ });
10452
+ const startedAtMs = Date.now();
10453
+ const params = resolveOpenAiImageRequestParams(request);
10454
+ const styleImages = await createOpenAiStyleImageFiles(request.styleImages);
10455
+ const hasStyleImages = Boolean(styleImages && styleImages.length > 0);
10456
+ const outputMimeType = resolveOpenAiImageMimeType(params.outputFormat);
10457
+ let totalUsage;
10458
+ let costUsd = 0;
10459
+ let outputImages = 0;
10460
+ telemetry.emit({
10461
+ type: "llm.call.started",
10462
+ imagePromptCount: promptEntries.length,
10463
+ styleImageCount: request.styleImages?.length ?? 0,
10464
+ numImagesPerPrompt: params.n
10465
+ });
10466
+ try {
10467
+ const images = [];
10468
+ for (const imagePrompt of promptEntries) {
10469
+ const prompt = buildOpenAiImagePrompt({
10470
+ stylePrompt: request.stylePrompt,
10471
+ imagePrompt,
10472
+ hasStyleImages
10473
+ });
10474
+ const response = await runOpenAiCall(async (client) => {
10475
+ const payload = {
10476
+ model: request.model,
10477
+ prompt,
10478
+ n: params.n,
10479
+ size: params.size,
10480
+ quality: params.quality,
10481
+ ...params.outputFormat ? { output_format: params.outputFormat } : {},
10482
+ ...request.outputCompression !== void 0 ? { output_compression: request.outputCompression } : {},
10483
+ ...params.background ? { background: params.background } : {},
10484
+ ...params.moderation ? { moderation: params.moderation } : {}
10485
+ };
10486
+ if (styleImages && styleImages.length > 0) {
10487
+ return await client.images.edit(
10488
+ {
10489
+ ...payload,
10490
+ image: styleImages
10491
+ },
10492
+ { signal: request.signal }
10493
+ );
10494
+ }
10495
+ return await client.images.generate(payload, { signal: request.signal });
10496
+ }, request.model);
10497
+ const data = Array.isArray(response.data) ? response.data ?? [] : [];
10498
+ for (const item of data) {
10499
+ if (typeof item.b64_json !== "string" || item.b64_json.length === 0) {
10500
+ continue;
10501
+ }
10502
+ images.push({
10503
+ mimeType: outputMimeType,
10504
+ data: Buffer5.from(item.b64_json, "base64")
10505
+ });
10506
+ }
10507
+ outputImages = images.length;
10508
+ const usage = extractOpenAiUsageTokens(response.usage);
10509
+ totalUsage = sumUsageTokens(totalUsage, usage);
10510
+ costUsd += estimateCallCostUsd({
10511
+ modelId: request.model,
10512
+ tokens: usage,
10513
+ responseImages: data.length,
10514
+ imageSize: params.size,
10515
+ imageQuality: params.quality
10516
+ });
10517
+ }
10518
+ telemetry.emit({
10519
+ type: "llm.call.completed",
10520
+ success: true,
10521
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10522
+ usage: totalUsage,
10523
+ costUsd,
10524
+ imageCount: images.length,
10525
+ attempts: promptEntries.length
10526
+ });
10527
+ return images;
10528
+ } catch (error) {
10529
+ const err = error instanceof Error ? error : new Error(String(error));
10530
+ telemetry.emit({
10531
+ type: "llm.call.completed",
10532
+ success: false,
10533
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10534
+ usage: totalUsage,
10535
+ costUsd,
10536
+ imageCount: outputImages,
10537
+ error: err.message
10538
+ });
10539
+ throw err;
10540
+ } finally {
10541
+ await telemetry.flush();
10542
+ }
10543
+ }
10544
+ function buildChatGptImageInputContent(params) {
10545
+ const parts = [
10546
+ {
10547
+ type: "text",
10548
+ text: params.prompt
10549
+ }
10550
+ ];
10551
+ for (const [index, image] of (params.styleImages ?? []).entries()) {
10552
+ const mimeType = image.mimeType ?? "image/png";
10553
+ parts.push({
10554
+ type: "inlineData",
10555
+ data: image.data.toString("base64"),
10556
+ mimeType,
10557
+ filename: `style-${index + 1}.${resolveAttachmentExtension(mimeType)}`
10558
+ });
10559
+ }
10560
+ return [{ role: "user", parts }];
10561
+ }
10562
+ async function generateImagesWithChatGptImageTool(request) {
10563
+ const promptEntries = Array.from(request.imagePrompts, (rawPrompt, index) => {
10564
+ const prompt = rawPrompt.trim();
10565
+ if (!prompt) {
10566
+ throw new Error(`imagePrompts[${index}] must be a non-empty string`);
10567
+ }
10568
+ return prompt;
10569
+ });
10570
+ if (promptEntries.length === 0) {
10571
+ return [];
10572
+ }
10573
+ const providerInfo = resolveProvider(request.model);
10574
+ const telemetry = createLlmTelemetryEmitter({
10575
+ telemetry: request.telemetry,
10576
+ operation: "generateImages",
10577
+ provider: providerInfo.provider,
10578
+ model: request.model
10579
+ });
10580
+ const startedAtMs = Date.now();
10581
+ const numImagesPerPrompt = request.numImages ?? 1;
10582
+ let totalUsage;
10583
+ let costUsd = 0;
10584
+ let outputImages = 0;
10585
+ telemetry.emit({
10586
+ type: "llm.call.started",
10587
+ imagePromptCount: promptEntries.length,
10588
+ styleImageCount: request.styleImages?.length ?? 0,
10589
+ numImagesPerPrompt
10590
+ });
10591
+ try {
10592
+ const images = [];
10593
+ for (const imagePrompt of promptEntries) {
10594
+ const prompt = buildOpenAiImagePrompt({
10595
+ stylePrompt: request.stylePrompt,
10596
+ imagePrompt,
10597
+ hasStyleImages: Boolean(request.styleImages && request.styleImages.length > 0)
10598
+ });
10599
+ for (let imageIndex = 0; imageIndex < numImagesPerPrompt; imageIndex += 1) {
10600
+ const chatGptInput = toChatGptInput(
10601
+ buildChatGptImageInputContent({
10602
+ prompt,
10603
+ styleImages: request.styleImages
10604
+ }),
10605
+ { model: request.model }
10606
+ );
10607
+ const preparedInput = await maybePrepareOpenAiPromptInput(chatGptInput.input, {
10608
+ model: request.model,
10609
+ provider: "chatgpt"
10610
+ });
10611
+ const result = await collectChatGptCodexResponseWithRetry({
10612
+ request: {
10613
+ model: providerInfo.model,
10614
+ store: false,
10615
+ stream: true,
10616
+ instructions: chatGptInput.instructions ?? "Use the image_generation tool to generate exactly one PNG image. Do not return prose instead of the image.",
10617
+ input: preparedInput,
10618
+ tool_choice: "required",
10619
+ parallel_tool_calls: false,
10620
+ tools: [{ type: "image_generation", output_format: "png" }]
10621
+ },
10622
+ signal: request.signal
10623
+ });
10624
+ if (result.status && result.status !== "completed") {
10625
+ throw new Error(`ChatGPT image generation response status ${result.status}`);
10626
+ }
10627
+ if (result.imageGenerationCalls.length === 0) {
10628
+ throw new Error("ChatGPT image generation returned no image_generation_call result.");
10629
+ }
10630
+ for (const call of result.imageGenerationCalls) {
10631
+ images.push({
10632
+ mimeType: "image/png",
10633
+ data: Buffer5.from(call.result, "base64")
10634
+ });
10635
+ }
10636
+ outputImages = images.length;
10637
+ const usage = extractChatGptUsageTokens(result.usage);
10638
+ totalUsage = sumUsageTokens(totalUsage, usage);
10639
+ costUsd += estimateCallCostUsd({
10640
+ modelId: request.model,
10641
+ tokens: usage,
10642
+ responseImages: result.imageGenerationCalls.length,
10643
+ imageSize: "1024x1024",
10644
+ imageQuality: "medium"
10645
+ });
10646
+ }
10647
+ }
10648
+ telemetry.emit({
10649
+ type: "llm.call.completed",
10650
+ success: true,
10651
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10652
+ modelVersion: request.model,
10653
+ usage: totalUsage,
10654
+ costUsd,
10655
+ imageCount: images.length,
10656
+ attempts: promptEntries.length * numImagesPerPrompt
10657
+ });
10658
+ return images;
10659
+ } catch (error) {
10660
+ const err = error instanceof Error ? error : new Error(String(error));
10661
+ telemetry.emit({
10662
+ type: "llm.call.completed",
10663
+ success: false,
10664
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10665
+ usage: totalUsage,
10666
+ costUsd,
10667
+ imageCount: outputImages,
10668
+ error: err.message
10669
+ });
10670
+ throw err;
10671
+ } finally {
10672
+ await telemetry.flush();
10673
+ }
10674
+ }
10151
10675
  async function generateImages(request) {
10676
+ if (isOpenAiGenerateImagesRequest(request)) {
10677
+ return await generateImagesWithOpenAiImageApi(request);
10678
+ }
10679
+ if (isChatGptGenerateImagesRequest(request)) {
10680
+ return await generateImagesWithChatGptImageTool(request);
10681
+ }
10152
10682
  const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 4));
10153
10683
  const promptList = Array.from(request.imagePrompts);
10154
10684
  if (promptList.length === 0) {
@@ -10162,7 +10692,7 @@ async function generateImages(request) {
10162
10692
  }
10163
10693
  return { index: arrayIndex + 1, prompt: trimmedPrompt };
10164
10694
  });
10165
- const gradingPrompt = request.imageGradingPrompt.trim();
10695
+ const gradingPrompt = request.imageGradingPrompt?.trim() ?? "";
10166
10696
  if (!gradingPrompt) {
10167
10697
  throw new Error("imageGradingPrompt must be a non-empty string");
10168
10698
  }
@@ -13460,8 +13990,11 @@ function summarizeResultUsage(result) {
13460
13990
  }
13461
13991
  summary = {
13462
13992
  promptTokens: sumUsageValue2(summary?.promptTokens, usage.promptTokens),
13993
+ promptTextTokens: sumUsageValue2(summary?.promptTextTokens, usage.promptTextTokens),
13994
+ promptImageTokens: sumUsageValue2(summary?.promptImageTokens, usage.promptImageTokens),
13463
13995
  cachedTokens: sumUsageValue2(summary?.cachedTokens, usage.cachedTokens),
13464
13996
  responseTokens: sumUsageValue2(summary?.responseTokens, usage.responseTokens),
13997
+ responseTextTokens: sumUsageValue2(summary?.responseTextTokens, usage.responseTextTokens),
13465
13998
  responseImageTokens: sumUsageValue2(summary?.responseImageTokens, usage.responseImageTokens),
13466
13999
  thinkingTokens: sumUsageValue2(summary?.thinkingTokens, usage.thinkingTokens),
13467
14000
  totalTokens: sumUsageValue2(summary?.totalTokens, usage.totalTokens),
@@ -14172,6 +14705,7 @@ async function runCandidateEvolution(options) {
14172
14705
  };
14173
14706
  }
14174
14707
  export {
14708
+ CHATGPT_IMAGE_MODEL_IDS,
14175
14709
  CHATGPT_MODEL_IDS,
14176
14710
  CODEX_APPLY_PATCH_FREEFORM_TOOL_DESCRIPTION,
14177
14711
  CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION,
@@ -14192,6 +14726,17 @@ export {
14192
14726
  LLM_MODEL_IDS,
14193
14727
  LLM_TEXT_MODEL_IDS,
14194
14728
  LlmJsonCallError,
14729
+ OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION,
14730
+ OPENAI_GPT_IMAGE_2_BACKGROUNDS,
14731
+ OPENAI_GPT_IMAGE_2_MODERATION_LEVELS,
14732
+ OPENAI_GPT_IMAGE_2_NUM_IMAGES,
14733
+ OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS,
14734
+ OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS,
14735
+ OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS,
14736
+ OPENAI_GPT_IMAGE_2_QUALITY_LEVELS,
14737
+ OPENAI_GPT_IMAGE_2_RESOLUTIONS,
14738
+ OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS,
14739
+ OPENAI_IMAGE_MODEL_IDS,
14195
14740
  OPENAI_MODEL_IDS,
14196
14741
  appendMarkdownSourcesSection,
14197
14742
  applyPatch,
@@ -14232,6 +14777,7 @@ export {
14232
14777
  generateText,
14233
14778
  getChatGptAuthProfile,
14234
14779
  getCurrentToolCallContext,
14780
+ isChatGptImageModelId,
14235
14781
  isChatGptModelId,
14236
14782
  isExperimentalChatGptModelId,
14237
14783
  isFireworksModelId,
@@ -14241,6 +14787,7 @@ export {
14241
14787
  isLlmImageModelId,
14242
14788
  isLlmModelId,
14243
14789
  isLlmTextModelId,
14790
+ isOpenAiImageModelId,
14244
14791
  isOpenAiModelId,
14245
14792
  loadEnvFromFile,
14246
14793
  loadLocalEnv,
@@ -14248,6 +14795,7 @@ export {
14248
14795
  refreshChatGptOauthToken,
14249
14796
  resetModelConcurrencyConfig,
14250
14797
  resetTelemetry,
14798
+ resolveChatGptImageProviderModel,
14251
14799
  resolveFilesystemToolProfile,
14252
14800
  resolveFireworksModelId,
14253
14801
  runAgentLoop,
@@ -14260,6 +14808,7 @@ export {
14260
14808
  streamToolLoop,
14261
14809
  stripCodexCitationMarkers,
14262
14810
  toGeminiJsonSchema,
14263
- tool
14811
+ tool,
14812
+ validateOpenAiGptImage2Resolution
14264
14813
  };
14265
14814
  //# sourceMappingURL=index.js.map