@ljoukov/llm 7.0.12 → 7.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -30,6 +30,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ CHATGPT_IMAGE_MODEL_IDS: () => CHATGPT_IMAGE_MODEL_IDS,
33
34
  CHATGPT_MODEL_IDS: () => CHATGPT_MODEL_IDS,
34
35
  CODEX_APPLY_PATCH_FREEFORM_TOOL_DESCRIPTION: () => CODEX_APPLY_PATCH_FREEFORM_TOOL_DESCRIPTION,
35
36
  CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION: () => CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION,
@@ -50,6 +51,17 @@ __export(index_exports, {
50
51
  LLM_MODEL_IDS: () => LLM_MODEL_IDS,
51
52
  LLM_TEXT_MODEL_IDS: () => LLM_TEXT_MODEL_IDS,
52
53
  LlmJsonCallError: () => LlmJsonCallError,
54
+ OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION: () => OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION,
55
+ OPENAI_GPT_IMAGE_2_BACKGROUNDS: () => OPENAI_GPT_IMAGE_2_BACKGROUNDS,
56
+ OPENAI_GPT_IMAGE_2_MODERATION_LEVELS: () => OPENAI_GPT_IMAGE_2_MODERATION_LEVELS,
57
+ OPENAI_GPT_IMAGE_2_NUM_IMAGES: () => OPENAI_GPT_IMAGE_2_NUM_IMAGES,
58
+ OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS: () => OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS,
59
+ OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS: () => OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS,
60
+ OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS: () => OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS,
61
+ OPENAI_GPT_IMAGE_2_QUALITY_LEVELS: () => OPENAI_GPT_IMAGE_2_QUALITY_LEVELS,
62
+ OPENAI_GPT_IMAGE_2_RESOLUTIONS: () => OPENAI_GPT_IMAGE_2_RESOLUTIONS,
63
+ OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS: () => OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS,
64
+ OPENAI_IMAGE_MODEL_IDS: () => OPENAI_IMAGE_MODEL_IDS,
53
65
  OPENAI_MODEL_IDS: () => OPENAI_MODEL_IDS,
54
66
  appendMarkdownSourcesSection: () => appendMarkdownSourcesSection,
55
67
  applyPatch: () => applyPatch,
@@ -90,6 +102,7 @@ __export(index_exports, {
90
102
  generateText: () => generateText,
91
103
  getChatGptAuthProfile: () => getChatGptAuthProfile,
92
104
  getCurrentToolCallContext: () => getCurrentToolCallContext,
105
+ isChatGptImageModelId: () => isChatGptImageModelId,
93
106
  isChatGptModelId: () => isChatGptModelId,
94
107
  isExperimentalChatGptModelId: () => isExperimentalChatGptModelId,
95
108
  isFireworksModelId: () => isFireworksModelId,
@@ -99,6 +112,7 @@ __export(index_exports, {
99
112
  isLlmImageModelId: () => isLlmImageModelId,
100
113
  isLlmModelId: () => isLlmModelId,
101
114
  isLlmTextModelId: () => isLlmTextModelId,
115
+ isOpenAiImageModelId: () => isOpenAiImageModelId,
102
116
  isOpenAiModelId: () => isOpenAiModelId,
103
117
  loadEnvFromFile: () => loadEnvFromFile,
104
118
  loadLocalEnv: () => loadLocalEnv,
@@ -106,6 +120,7 @@ __export(index_exports, {
106
120
  refreshChatGptOauthToken: () => refreshChatGptOauthToken,
107
121
  resetModelConcurrencyConfig: () => resetModelConcurrencyConfig,
108
122
  resetTelemetry: () => resetTelemetry,
123
+ resolveChatGptImageProviderModel: () => resolveChatGptImageProviderModel,
109
124
  resolveFilesystemToolProfile: () => resolveFilesystemToolProfile,
110
125
  resolveFireworksModelId: () => resolveFireworksModelId,
111
126
  runAgentLoop: () => runAgentLoop,
@@ -118,7 +133,8 @@ __export(index_exports, {
118
133
  streamToolLoop: () => streamToolLoop,
119
134
  stripCodexCitationMarkers: () => stripCodexCitationMarkers,
120
135
  toGeminiJsonSchema: () => toGeminiJsonSchema,
121
- tool: () => tool
136
+ tool: () => tool,
137
+ validateOpenAiGptImage2Resolution: () => validateOpenAiGptImage2Resolution
122
138
  });
123
139
  module.exports = __toCommonJS(index_exports);
124
140
 
@@ -130,6 +146,7 @@ var import_node_path5 = __toESM(require("path"), 1);
130
146
  var import_genai2 = require("@google/genai");
131
147
  var import_zod_to_json_schema = require("@alcyone-labs/zod-to-json-schema");
132
148
  var import_zod3 = require("zod");
149
+ var import_openai3 = require("openai");
133
150
 
134
151
  // src/utils/asyncQueue.ts
135
152
  function createAsyncQueue() {
@@ -331,6 +348,85 @@ var OPENAI_MODEL_IDS = [
331
348
  function isOpenAiModelId(value) {
332
349
  return OPENAI_MODEL_IDS.includes(value);
333
350
  }
351
+ var OPENAI_IMAGE_MODEL_IDS = ["gpt-image-2"];
352
+ function isOpenAiImageModelId(value) {
353
+ return OPENAI_IMAGE_MODEL_IDS.includes(value);
354
+ }
355
+ var CHATGPT_IMAGE_MODEL_IDS = ["chatgpt-gpt-image-2"];
356
+ function isChatGptImageModelId(value) {
357
+ return CHATGPT_IMAGE_MODEL_IDS.includes(value);
358
+ }
359
+ var OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS = [
360
+ "1024x1024",
361
+ "1536x1024",
362
+ "1024x1536",
363
+ "2048x2048",
364
+ "2048x1152",
365
+ "3840x2160",
366
+ "2160x3840"
367
+ ];
368
+ var OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION = "auto";
369
+ var OPENAI_GPT_IMAGE_2_RESOLUTIONS = [
370
+ ...OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS,
371
+ OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION
372
+ ];
373
+ var OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS = {
374
+ maxEdgePixels: 3840,
375
+ edgeMultiplePixels: 16,
376
+ maxLongToShortEdgeRatio: 3,
377
+ minTotalPixels: 655360,
378
+ maxTotalPixels: 8294400,
379
+ experimentalTotalPixelsThreshold: 3686400
380
+ };
381
+ function validateOpenAiGptImage2Resolution(value) {
382
+ if (value === OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION) {
383
+ return { valid: true };
384
+ }
385
+ const match = /^([1-9]\d*)x([1-9]\d*)$/.exec(value);
386
+ if (!match) {
387
+ return { valid: false, reason: 'Expected "auto" or a WIDTHxHEIGHT pixel string.' };
388
+ }
389
+ const width = Number(match[1]);
390
+ const height = Number(match[2]);
391
+ if (!Number.isSafeInteger(width) || !Number.isSafeInteger(height)) {
392
+ return { valid: false, reason: "Width and height must be safe integer pixel counts." };
393
+ }
394
+ const constraints = OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS;
395
+ if (width > constraints.maxEdgePixels || height > constraints.maxEdgePixels) {
396
+ return {
397
+ valid: false,
398
+ reason: `Width and height must each be at most ${constraints.maxEdgePixels}px.`
399
+ };
400
+ }
401
+ if (width % constraints.edgeMultiplePixels !== 0 || height % constraints.edgeMultiplePixels !== 0) {
402
+ return {
403
+ valid: false,
404
+ reason: `Width and height must each be multiples of ${constraints.edgeMultiplePixels}px.`
405
+ };
406
+ }
407
+ const totalPixels = width * height;
408
+ if (totalPixels < constraints.minTotalPixels || totalPixels > constraints.maxTotalPixels) {
409
+ return {
410
+ valid: false,
411
+ reason: `Total pixels must be between ${constraints.minTotalPixels} and ${constraints.maxTotalPixels}.`
412
+ };
413
+ }
414
+ const longEdge = Math.max(width, height);
415
+ const shortEdge = Math.min(width, height);
416
+ if (longEdge / shortEdge > constraints.maxLongToShortEdgeRatio) {
417
+ return {
418
+ valid: false,
419
+ reason: `The long edge must be at most ${constraints.maxLongToShortEdgeRatio}:1 relative to the short edge.`
420
+ };
421
+ }
422
+ return { valid: true };
423
+ }
424
+ var OPENAI_GPT_IMAGE_2_QUALITY_LEVELS = ["low", "medium", "high", "auto"];
425
+ var OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS = ["png", "jpeg", "webp"];
426
+ var OPENAI_GPT_IMAGE_2_BACKGROUNDS = ["opaque", "auto"];
427
+ var OPENAI_GPT_IMAGE_2_MODERATION_LEVELS = ["low", "auto"];
428
+ var OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS = [0, 1, 2, 3];
429
+ var OPENAI_GPT_IMAGE_2_NUM_IMAGES = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
334
430
  var CHATGPT_MODEL_IDS = [
335
431
  "chatgpt-gpt-5.5",
336
432
  "chatgpt-gpt-5.5-fast",
@@ -342,6 +438,7 @@ var CHATGPT_MODEL_IDS = [
342
438
  var FAST_MODEL_SUFFIX = "-fast";
343
439
  var OPENAI_PRIORITY_MODEL_IDS = ["gpt-5.5-fast"];
344
440
  var CHATGPT_PRIORITY_MODEL_IDS = ["chatgpt-gpt-5.5-fast", "chatgpt-gpt-5.4-fast"];
441
+ var CHATGPT_IMAGE_GENERATION_PROVIDER_MODEL = "gpt-5.4";
345
442
  var EXPERIMENTAL_CHATGPT_MODEL_PREFIX = "experimental-chatgpt-";
346
443
  function isExperimentalChatGptModelId(value) {
347
444
  return value.startsWith(EXPERIMENTAL_CHATGPT_MODEL_PREFIX) && value.length > EXPERIMENTAL_CHATGPT_MODEL_PREFIX.length;
@@ -368,6 +465,9 @@ function resolveChatGptProviderModel(model) {
368
465
  const providerModel = stripChatGptPrefix(model);
369
466
  return CHATGPT_PRIORITY_MODEL_IDS.includes(model) ? stripFastSuffix(providerModel) : providerModel;
370
467
  }
468
+ function resolveChatGptImageProviderModel(_model) {
469
+ return CHATGPT_IMAGE_GENERATION_PROVIDER_MODEL;
470
+ }
371
471
  function resolveChatGptServiceTier(model) {
372
472
  return CHATGPT_PRIORITY_MODEL_IDS.includes(model) ? "priority" : void 0;
373
473
  }
@@ -413,6 +513,27 @@ var OPENAI_GPT_54_NANO_PRICING = {
413
513
  cachedRate: 5e-3 / 1e6,
414
514
  outputRate: 0.4 / 1e6
415
515
  };
516
+ var OPENAI_GPT_IMAGE_2_PRICING = {
517
+ defaultQuality: "medium",
518
+ defaultResolution: "1024x1024",
519
+ imagePrices: {
520
+ low: {
521
+ "1024x1024": 6e-3,
522
+ "1024x1536": 5e-3,
523
+ "1536x1024": 5e-3
524
+ },
525
+ medium: {
526
+ "1024x1024": 0.053,
527
+ "1024x1536": 0.041,
528
+ "1536x1024": 0.041
529
+ },
530
+ high: {
531
+ "1024x1024": 0.211,
532
+ "1024x1536": 0.165,
533
+ "1536x1024": 0.165
534
+ }
535
+ }
536
+ };
416
537
  function getOpenAiPricing(modelId) {
417
538
  if (isExperimentalChatGptModelId(modelId)) {
418
539
  return OPENAI_GPT_54_PRICING;
@@ -440,6 +561,9 @@ function getOpenAiPricing(modelId) {
440
561
  }
441
562
  return void 0;
442
563
  }
564
+ function getOpenAiImagePricing(modelId) {
565
+ return isOpenAiImageModelId(modelId) || isChatGptImageModelId(modelId) ? OPENAI_GPT_IMAGE_2_PRICING : void 0;
566
+ }
443
567
 
444
568
  // src/utils/cost.ts
445
569
  function resolveUsageNumber(value) {
@@ -452,8 +576,18 @@ function estimateCallCostUsd({
452
576
  modelId,
453
577
  tokens,
454
578
  responseImages,
455
- imageSize
579
+ imageSize,
580
+ imageQuality
456
581
  }) {
582
+ const openAiImagePricing = getOpenAiImagePricing(modelId);
583
+ if (openAiImagePricing) {
584
+ return estimateOpenAiImageCostUsd({
585
+ pricing: openAiImagePricing,
586
+ responseImages,
587
+ imageSize,
588
+ imageQuality
589
+ });
590
+ }
457
591
  if (!tokens) {
458
592
  return 0;
459
593
  }
@@ -515,6 +649,40 @@ function estimateCallCostUsd({
515
649
  }
516
650
  return 0;
517
651
  }
652
+ function estimateOpenAiImageCostUsd({
653
+ pricing,
654
+ responseImages,
655
+ imageSize,
656
+ imageQuality
657
+ }) {
658
+ if (responseImages <= 0) {
659
+ return 0;
660
+ }
661
+ const quality = imageQuality === "low" || imageQuality === "medium" || imageQuality === "high" ? imageQuality : pricing.defaultQuality;
662
+ const resolution = resolveOpenAiImagePriceResolution(imageSize) ?? pricing.defaultResolution;
663
+ return responseImages * pricing.imagePrices[quality][resolution];
664
+ }
665
+ function resolveOpenAiImagePriceResolution(imageSize) {
666
+ if (imageSize === "1024x1024" || imageSize === "1024x1536" || imageSize === "1536x1024") {
667
+ return imageSize;
668
+ }
669
+ if (!imageSize || imageSize === "auto") {
670
+ return void 0;
671
+ }
672
+ const match = /^(\d+)x(\d+)$/.exec(imageSize);
673
+ if (!match) {
674
+ return void 0;
675
+ }
676
+ const width = Number(match[1]);
677
+ const height = Number(match[2]);
678
+ if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
679
+ return void 0;
680
+ }
681
+ if (width === height) {
682
+ return "1024x1024";
683
+ }
684
+ return width > height ? "1536x1024" : "1024x1536";
685
+ }
518
686
 
519
687
  // src/openai/chatgpt-codex.ts
520
688
  var import_node_os2 = __toESM(require("os"), 1);
@@ -1669,6 +1837,8 @@ async function collectChatGptCodexStream(options) {
1669
1837
  const toolCallOrder = [];
1670
1838
  const webSearchCalls = /* @__PURE__ */ new Map();
1671
1839
  const webSearchCallOrder = [];
1840
+ const imageGenerationCalls = /* @__PURE__ */ new Map();
1841
+ const imageGenerationCallOrder = [];
1672
1842
  let text = "";
1673
1843
  const reasoningText = "";
1674
1844
  let reasoningSummaryText = "";
@@ -1739,6 +1909,20 @@ async function collectChatGptCodexStream(options) {
1739
1909
  action: item.action && typeof item.action === "object" ? item.action : void 0
1740
1910
  });
1741
1911
  }
1912
+ } else if (item.type === "image_generation_call") {
1913
+ const id = typeof item.id === "string" ? item.id : "";
1914
+ const result = typeof item.result === "string" ? item.result : "";
1915
+ if (id && result) {
1916
+ if (!imageGenerationCalls.has(id)) {
1917
+ imageGenerationCallOrder.push(id);
1918
+ }
1919
+ imageGenerationCalls.set(id, {
1920
+ id,
1921
+ status: typeof item.status === "string" ? item.status : void 0,
1922
+ revisedPrompt: typeof item.revised_prompt === "string" ? item.revised_prompt : void 0,
1923
+ result
1924
+ });
1925
+ }
1742
1926
  }
1743
1927
  }
1744
1928
  continue;
@@ -1778,12 +1962,14 @@ async function collectChatGptCodexStream(options) {
1778
1962
  }
1779
1963
  const orderedToolCalls = toolCallOrder.map((id) => toolCalls.get(id)).filter((call) => call !== void 0);
1780
1964
  const orderedWebSearchCalls = webSearchCallOrder.map((id) => webSearchCalls.get(id)).filter((call) => call !== void 0);
1965
+ const orderedImageGenerationCalls = imageGenerationCallOrder.map((id) => imageGenerationCalls.get(id)).filter((call) => call !== void 0);
1781
1966
  return {
1782
1967
  text,
1783
1968
  reasoningText,
1784
1969
  reasoningSummaryText,
1785
1970
  toolCalls: orderedToolCalls,
1786
1971
  webSearchCalls: orderedWebSearchCalls,
1972
+ imageGenerationCalls: orderedImageGenerationCalls,
1787
1973
  usage,
1788
1974
  id: responseId,
1789
1975
  model,
@@ -4495,13 +4681,17 @@ var LLM_TEXT_MODEL_IDS = [
4495
4681
  ...FIREWORKS_MODEL_IDS,
4496
4682
  ...GEMINI_TEXT_MODEL_IDS
4497
4683
  ];
4498
- var LLM_IMAGE_MODEL_IDS = [...GEMINI_IMAGE_MODEL_IDS];
4684
+ var LLM_IMAGE_MODEL_IDS = [
4685
+ ...OPENAI_IMAGE_MODEL_IDS,
4686
+ ...CHATGPT_IMAGE_MODEL_IDS,
4687
+ ...GEMINI_IMAGE_MODEL_IDS
4688
+ ];
4499
4689
  var LLM_MODEL_IDS = [...LLM_TEXT_MODEL_IDS, ...LLM_IMAGE_MODEL_IDS];
4500
4690
  function isLlmTextModelId(value) {
4501
4691
  return isOpenAiModelId(value) || isChatGptModelId(value) || isFireworksModelId(value) || isGeminiTextModelId(value);
4502
4692
  }
4503
4693
  function isLlmImageModelId(value) {
4504
- return isGeminiImageModelId(value);
4694
+ return isOpenAiImageModelId(value) || isChatGptImageModelId(value) || isGeminiImageModelId(value);
4505
4695
  }
4506
4696
  function isLlmModelId(value) {
4507
4697
  return isLlmTextModelId(value) || isLlmImageModelId(value);
@@ -4513,6 +4703,12 @@ var LlmJsonCallError = class extends Error {
4513
4703
  this.name = "LlmJsonCallError";
4514
4704
  }
4515
4705
  };
4706
+ function isOpenAiGenerateImagesRequest(request) {
4707
+ return isOpenAiImageModelId(request.model);
4708
+ }
4709
+ function isChatGptGenerateImagesRequest(request) {
4710
+ return isChatGptImageModelId(request.model);
4711
+ }
4516
4712
  function tool(options) {
4517
4713
  return {
4518
4714
  type: "function",
@@ -5103,6 +5299,15 @@ function resolveProvider(model) {
5103
5299
  return { provider: "fireworks", model: fireworksModel };
5104
5300
  }
5105
5301
  }
5302
+ if (isOpenAiImageModelId(model)) {
5303
+ return { provider: "openai", model };
5304
+ }
5305
+ if (isChatGptImageModelId(model)) {
5306
+ return {
5307
+ provider: "chatgpt",
5308
+ model: resolveChatGptImageProviderModel(model)
5309
+ };
5310
+ }
5106
5311
  if (isOpenAiModelId(model)) {
5107
5312
  return {
5108
5313
  provider: "openai",
@@ -5110,7 +5315,7 @@ function resolveProvider(model) {
5110
5315
  serviceTier: resolveOpenAiServiceTier(model)
5111
5316
  };
5112
5317
  }
5113
- throw new Error(`Unsupported text model: ${model}`);
5318
+ throw new Error(`Unsupported model: ${model}`);
5114
5319
  }
5115
5320
  function isOpenAiCodexModel(modelId) {
5116
5321
  return modelId.includes("codex");
@@ -6266,8 +6471,11 @@ function mergeTokenUpdates(current, next) {
6266
6471
  }
6267
6472
  return {
6268
6473
  promptTokens: next.promptTokens ?? current.promptTokens,
6474
+ promptTextTokens: next.promptTextTokens ?? current.promptTextTokens,
6475
+ promptImageTokens: next.promptImageTokens ?? current.promptImageTokens,
6269
6476
  cachedTokens: next.cachedTokens ?? current.cachedTokens,
6270
6477
  responseTokens: next.responseTokens ?? current.responseTokens,
6478
+ responseTextTokens: next.responseTextTokens ?? current.responseTextTokens,
6271
6479
  responseImageTokens: next.responseImageTokens ?? current.responseImageTokens,
6272
6480
  thinkingTokens: next.thinkingTokens ?? current.thinkingTokens,
6273
6481
  totalTokens: next.totalTokens ?? current.totalTokens,
@@ -6290,8 +6498,11 @@ function sumUsageTokens(current, next) {
6290
6498
  }
6291
6499
  return {
6292
6500
  promptTokens: sumUsageValue(current?.promptTokens, next.promptTokens),
6501
+ promptTextTokens: sumUsageValue(current?.promptTextTokens, next.promptTextTokens),
6502
+ promptImageTokens: sumUsageValue(current?.promptImageTokens, next.promptImageTokens),
6293
6503
  cachedTokens: sumUsageValue(current?.cachedTokens, next.cachedTokens),
6294
6504
  responseTokens: sumUsageValue(current?.responseTokens, next.responseTokens),
6505
+ responseTextTokens: sumUsageValue(current?.responseTextTokens, next.responseTextTokens),
6295
6506
  responseImageTokens: sumUsageValue(current?.responseImageTokens, next.responseImageTokens),
6296
6507
  thinkingTokens: sumUsageValue(current?.thinkingTokens, next.thinkingTokens),
6297
6508
  totalTokens: sumUsageValue(current?.totalTokens, next.totalTokens),
@@ -6406,10 +6617,22 @@ function extractOpenAiUsageTokens(usage) {
6406
6617
  const cachedTokens = toMaybeNumber(
6407
6618
  usage.input_tokens_details?.cached_tokens
6408
6619
  );
6620
+ const promptTextTokens = toMaybeNumber(
6621
+ usage.input_tokens_details?.text_tokens
6622
+ );
6623
+ const promptImageTokens = toMaybeNumber(
6624
+ usage.input_tokens_details?.image_tokens
6625
+ );
6409
6626
  const outputTokensRaw = toMaybeNumber(usage.output_tokens);
6410
6627
  const reasoningTokens = toMaybeNumber(
6411
6628
  usage.output_tokens_details?.reasoning_tokens
6412
6629
  );
6630
+ const responseTextTokens = toMaybeNumber(
6631
+ usage.output_tokens_details?.text_tokens
6632
+ );
6633
+ const responseImageTokens = toMaybeNumber(
6634
+ usage.output_tokens_details?.image_tokens
6635
+ );
6413
6636
  const totalTokens = toMaybeNumber(usage.total_tokens);
6414
6637
  let responseTokens;
6415
6638
  if (outputTokensRaw !== void 0) {
@@ -6421,8 +6644,12 @@ function extractOpenAiUsageTokens(usage) {
6421
6644
  }
6422
6645
  return {
6423
6646
  promptTokens,
6647
+ promptTextTokens,
6648
+ promptImageTokens,
6424
6649
  cachedTokens,
6425
6650
  responseTokens,
6651
+ responseTextTokens,
6652
+ responseImageTokens,
6426
6653
  thinkingTokens: reasoningTokens,
6427
6654
  totalTokens
6428
6655
  };
@@ -7898,6 +8125,9 @@ async function runTextCall(params) {
7898
8125
  const { result } = await collectFileUploadMetrics(async () => {
7899
8126
  try {
7900
8127
  if (provider === "openai") {
8128
+ if (isOpenAiImageModelId(request.model)) {
8129
+ throw new Error("gpt-image-2 is an image generation model; use generateImages().");
8130
+ }
7901
8131
  const openAiInput = await maybePrepareOpenAiPromptInput(
7902
8132
  toOpenAiInput(contents, {
7903
8133
  defaultMediaResolution: request.mediaResolution,
@@ -7984,6 +8214,11 @@ async function runTextCall(params) {
7984
8214
  }
7985
8215
  }, modelForProvider);
7986
8216
  } else if (provider === "chatgpt") {
8217
+ if (isChatGptImageModelId(request.model)) {
8218
+ throw new Error(
8219
+ "chatgpt-gpt-image-2 is an image generation model; use generateImages()."
8220
+ );
8221
+ }
7987
8222
  const chatGptInput = toChatGptInput(contents, {
7988
8223
  defaultMediaResolution: request.mediaResolution,
7989
8224
  model: request.model
@@ -10263,7 +10498,318 @@ async function gradeGeneratedImage(params) {
10263
10498
  });
10264
10499
  return { grade: value.grade, result };
10265
10500
  }
10501
+ function resolveOpenAiImageMimeType(outputFormat) {
10502
+ switch (outputFormat) {
10503
+ case "jpeg":
10504
+ return "image/jpeg";
10505
+ case "webp":
10506
+ return "image/webp";
10507
+ case "png":
10508
+ case void 0:
10509
+ return "image/png";
10510
+ }
10511
+ }
10512
+ function buildOpenAiImagePrompt(params) {
10513
+ return [
10514
+ "Follow the requested visual style.",
10515
+ "",
10516
+ "Style:",
10517
+ params.stylePrompt.trim(),
10518
+ ...params.hasStyleImages ? [
10519
+ "",
10520
+ "Use the attached reference image or images for palette, lighting, mood, composition, and material feel."
10521
+ ] : [],
10522
+ "",
10523
+ "Image:",
10524
+ params.imagePrompt.trim()
10525
+ ].filter((line) => line.length > 0).join("\n");
10526
+ }
10527
+ function resolveOpenAiImageRequestParams(request) {
10528
+ if (request.partialImages !== void 0) {
10529
+ throw new Error("partialImages is only supported for streaming image generation.");
10530
+ }
10531
+ if (request.outputCompression !== void 0 && (!Number.isInteger(request.outputCompression) || request.outputCompression < 0 || request.outputCompression > 100)) {
10532
+ throw new Error("outputCompression must be an integer from 0 to 100.");
10533
+ }
10534
+ if (request.outputCompression !== void 0 && request.outputFormat !== "jpeg" && request.outputFormat !== "webp") {
10535
+ throw new Error("outputCompression requires outputFormat to be jpeg or webp.");
10536
+ }
10537
+ const size = request.imageResolution ?? "auto";
10538
+ const sizeValidation = validateOpenAiGptImage2Resolution(size);
10539
+ if (!sizeValidation.valid) {
10540
+ throw new Error(
10541
+ `imageResolution ${JSON.stringify(size)} is not supported by gpt-image-2: ${sizeValidation.reason}`
10542
+ );
10543
+ }
10544
+ return {
10545
+ size,
10546
+ quality: request.imageQuality ?? "auto",
10547
+ outputFormat: request.outputFormat,
10548
+ n: request.numImages ?? 1,
10549
+ background: request.background,
10550
+ moderation: request.moderation
10551
+ };
10552
+ }
10553
+ async function createOpenAiStyleImageFiles(styleImages) {
10554
+ if (!styleImages || styleImages.length === 0) {
10555
+ return void 0;
10556
+ }
10557
+ return await Promise.all(
10558
+ styleImages.map(async (image, index) => {
10559
+ const mimeType = image.mimeType ?? "image/png";
10560
+ const extension = resolveAttachmentExtension(mimeType);
10561
+ return await (0, import_openai3.toFile)(image.data, `style-${index + 1}.${extension}`, { type: mimeType });
10562
+ })
10563
+ );
10564
+ }
10565
+ async function generateImagesWithOpenAiImageApi(request) {
10566
+ const promptEntries = Array.from(request.imagePrompts, (rawPrompt, index) => {
10567
+ const prompt = rawPrompt.trim();
10568
+ if (!prompt) {
10569
+ throw new Error(`imagePrompts[${index}] must be a non-empty string`);
10570
+ }
10571
+ return prompt;
10572
+ });
10573
+ if (promptEntries.length === 0) {
10574
+ return [];
10575
+ }
10576
+ const provider = resolveProvider(request.model).provider;
10577
+ const telemetry = createLlmTelemetryEmitter({
10578
+ telemetry: request.telemetry,
10579
+ operation: "generateImages",
10580
+ provider,
10581
+ model: request.model
10582
+ });
10583
+ const startedAtMs = Date.now();
10584
+ const params = resolveOpenAiImageRequestParams(request);
10585
+ const styleImages = await createOpenAiStyleImageFiles(request.styleImages);
10586
+ const hasStyleImages = Boolean(styleImages && styleImages.length > 0);
10587
+ const outputMimeType = resolveOpenAiImageMimeType(params.outputFormat);
10588
+ let totalUsage;
10589
+ let costUsd = 0;
10590
+ let outputImages = 0;
10591
+ telemetry.emit({
10592
+ type: "llm.call.started",
10593
+ imagePromptCount: promptEntries.length,
10594
+ styleImageCount: request.styleImages?.length ?? 0,
10595
+ numImagesPerPrompt: params.n
10596
+ });
10597
+ try {
10598
+ const images = [];
10599
+ for (const imagePrompt of promptEntries) {
10600
+ const prompt = buildOpenAiImagePrompt({
10601
+ stylePrompt: request.stylePrompt,
10602
+ imagePrompt,
10603
+ hasStyleImages
10604
+ });
10605
+ const response = await runOpenAiCall(async (client) => {
10606
+ const payload = {
10607
+ model: request.model,
10608
+ prompt,
10609
+ n: params.n,
10610
+ size: params.size,
10611
+ quality: params.quality,
10612
+ ...params.outputFormat ? { output_format: params.outputFormat } : {},
10613
+ ...request.outputCompression !== void 0 ? { output_compression: request.outputCompression } : {},
10614
+ ...params.background ? { background: params.background } : {},
10615
+ ...params.moderation ? { moderation: params.moderation } : {}
10616
+ };
10617
+ if (styleImages && styleImages.length > 0) {
10618
+ return await client.images.edit(
10619
+ {
10620
+ ...payload,
10621
+ image: styleImages
10622
+ },
10623
+ { signal: request.signal }
10624
+ );
10625
+ }
10626
+ return await client.images.generate(payload, { signal: request.signal });
10627
+ }, request.model);
10628
+ const data = Array.isArray(response.data) ? response.data ?? [] : [];
10629
+ for (const item of data) {
10630
+ if (typeof item.b64_json !== "string" || item.b64_json.length === 0) {
10631
+ continue;
10632
+ }
10633
+ images.push({
10634
+ mimeType: outputMimeType,
10635
+ data: import_node_buffer4.Buffer.from(item.b64_json, "base64")
10636
+ });
10637
+ }
10638
+ outputImages = images.length;
10639
+ const usage = extractOpenAiUsageTokens(response.usage);
10640
+ totalUsage = sumUsageTokens(totalUsage, usage);
10641
+ costUsd += estimateCallCostUsd({
10642
+ modelId: request.model,
10643
+ tokens: usage,
10644
+ responseImages: data.length,
10645
+ imageSize: params.size,
10646
+ imageQuality: params.quality
10647
+ });
10648
+ }
10649
+ telemetry.emit({
10650
+ type: "llm.call.completed",
10651
+ success: true,
10652
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10653
+ usage: totalUsage,
10654
+ costUsd,
10655
+ imageCount: images.length,
10656
+ attempts: promptEntries.length
10657
+ });
10658
+ return images;
10659
+ } catch (error) {
10660
+ const err = error instanceof Error ? error : new Error(String(error));
10661
+ telemetry.emit({
10662
+ type: "llm.call.completed",
10663
+ success: false,
10664
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10665
+ usage: totalUsage,
10666
+ costUsd,
10667
+ imageCount: outputImages,
10668
+ error: err.message
10669
+ });
10670
+ throw err;
10671
+ } finally {
10672
+ await telemetry.flush();
10673
+ }
10674
+ }
10675
+ function buildChatGptImageInputContent(params) {
10676
+ const parts = [
10677
+ {
10678
+ type: "text",
10679
+ text: params.prompt
10680
+ }
10681
+ ];
10682
+ for (const [index, image] of (params.styleImages ?? []).entries()) {
10683
+ const mimeType = image.mimeType ?? "image/png";
10684
+ parts.push({
10685
+ type: "inlineData",
10686
+ data: image.data.toString("base64"),
10687
+ mimeType,
10688
+ filename: `style-${index + 1}.${resolveAttachmentExtension(mimeType)}`
10689
+ });
10690
+ }
10691
+ return [{ role: "user", parts }];
10692
+ }
10693
+ async function generateImagesWithChatGptImageTool(request) {
10694
+ const promptEntries = Array.from(request.imagePrompts, (rawPrompt, index) => {
10695
+ const prompt = rawPrompt.trim();
10696
+ if (!prompt) {
10697
+ throw new Error(`imagePrompts[${index}] must be a non-empty string`);
10698
+ }
10699
+ return prompt;
10700
+ });
10701
+ if (promptEntries.length === 0) {
10702
+ return [];
10703
+ }
10704
+ const providerInfo = resolveProvider(request.model);
10705
+ const telemetry = createLlmTelemetryEmitter({
10706
+ telemetry: request.telemetry,
10707
+ operation: "generateImages",
10708
+ provider: providerInfo.provider,
10709
+ model: request.model
10710
+ });
10711
+ const startedAtMs = Date.now();
10712
+ const numImagesPerPrompt = request.numImages ?? 1;
10713
+ let totalUsage;
10714
+ let costUsd = 0;
10715
+ let outputImages = 0;
10716
+ telemetry.emit({
10717
+ type: "llm.call.started",
10718
+ imagePromptCount: promptEntries.length,
10719
+ styleImageCount: request.styleImages?.length ?? 0,
10720
+ numImagesPerPrompt
10721
+ });
10722
+ try {
10723
+ const images = [];
10724
+ for (const imagePrompt of promptEntries) {
10725
+ const prompt = buildOpenAiImagePrompt({
10726
+ stylePrompt: request.stylePrompt,
10727
+ imagePrompt,
10728
+ hasStyleImages: Boolean(request.styleImages && request.styleImages.length > 0)
10729
+ });
10730
+ for (let imageIndex = 0; imageIndex < numImagesPerPrompt; imageIndex += 1) {
10731
+ const chatGptInput = toChatGptInput(
10732
+ buildChatGptImageInputContent({
10733
+ prompt,
10734
+ styleImages: request.styleImages
10735
+ }),
10736
+ { model: request.model }
10737
+ );
10738
+ const preparedInput = await maybePrepareOpenAiPromptInput(chatGptInput.input, {
10739
+ model: request.model,
10740
+ provider: "chatgpt"
10741
+ });
10742
+ const result = await collectChatGptCodexResponseWithRetry({
10743
+ request: {
10744
+ model: providerInfo.model,
10745
+ store: false,
10746
+ stream: true,
10747
+ instructions: chatGptInput.instructions ?? "Use the image_generation tool to generate exactly one PNG image. Do not return prose instead of the image.",
10748
+ input: preparedInput,
10749
+ tool_choice: "required",
10750
+ parallel_tool_calls: false,
10751
+ tools: [{ type: "image_generation", output_format: "png" }]
10752
+ },
10753
+ signal: request.signal
10754
+ });
10755
+ if (result.status && result.status !== "completed") {
10756
+ throw new Error(`ChatGPT image generation response status ${result.status}`);
10757
+ }
10758
+ if (result.imageGenerationCalls.length === 0) {
10759
+ throw new Error("ChatGPT image generation returned no image_generation_call result.");
10760
+ }
10761
+ for (const call of result.imageGenerationCalls) {
10762
+ images.push({
10763
+ mimeType: "image/png",
10764
+ data: import_node_buffer4.Buffer.from(call.result, "base64")
10765
+ });
10766
+ }
10767
+ outputImages = images.length;
10768
+ const usage = extractChatGptUsageTokens(result.usage);
10769
+ totalUsage = sumUsageTokens(totalUsage, usage);
10770
+ costUsd += estimateCallCostUsd({
10771
+ modelId: request.model,
10772
+ tokens: usage,
10773
+ responseImages: result.imageGenerationCalls.length,
10774
+ imageSize: "1024x1024",
10775
+ imageQuality: "medium"
10776
+ });
10777
+ }
10778
+ }
10779
+ telemetry.emit({
10780
+ type: "llm.call.completed",
10781
+ success: true,
10782
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10783
+ modelVersion: request.model,
10784
+ usage: totalUsage,
10785
+ costUsd,
10786
+ imageCount: images.length,
10787
+ attempts: promptEntries.length * numImagesPerPrompt
10788
+ });
10789
+ return images;
10790
+ } catch (error) {
10791
+ const err = error instanceof Error ? error : new Error(String(error));
10792
+ telemetry.emit({
10793
+ type: "llm.call.completed",
10794
+ success: false,
10795
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10796
+ usage: totalUsage,
10797
+ costUsd,
10798
+ imageCount: outputImages,
10799
+ error: err.message
10800
+ });
10801
+ throw err;
10802
+ } finally {
10803
+ await telemetry.flush();
10804
+ }
10805
+ }
10266
10806
  async function generateImages(request) {
10807
+ if (isOpenAiGenerateImagesRequest(request)) {
10808
+ return await generateImagesWithOpenAiImageApi(request);
10809
+ }
10810
+ if (isChatGptGenerateImagesRequest(request)) {
10811
+ return await generateImagesWithChatGptImageTool(request);
10812
+ }
10267
10813
  const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 4));
10268
10814
  const promptList = Array.from(request.imagePrompts);
10269
10815
  if (promptList.length === 0) {
@@ -10277,7 +10823,7 @@ async function generateImages(request) {
10277
10823
  }
10278
10824
  return { index: arrayIndex + 1, prompt: trimmedPrompt };
10279
10825
  });
10280
- const gradingPrompt = request.imageGradingPrompt.trim();
10826
+ const gradingPrompt = request.imageGradingPrompt?.trim() ?? "";
10281
10827
  if (!gradingPrompt) {
10282
10828
  throw new Error("imageGradingPrompt must be a non-empty string");
10283
10829
  }
@@ -13575,8 +14121,11 @@ function summarizeResultUsage(result) {
13575
14121
  }
13576
14122
  summary = {
13577
14123
  promptTokens: sumUsageValue2(summary?.promptTokens, usage.promptTokens),
14124
+ promptTextTokens: sumUsageValue2(summary?.promptTextTokens, usage.promptTextTokens),
14125
+ promptImageTokens: sumUsageValue2(summary?.promptImageTokens, usage.promptImageTokens),
13578
14126
  cachedTokens: sumUsageValue2(summary?.cachedTokens, usage.cachedTokens),
13579
14127
  responseTokens: sumUsageValue2(summary?.responseTokens, usage.responseTokens),
14128
+ responseTextTokens: sumUsageValue2(summary?.responseTextTokens, usage.responseTextTokens),
13580
14129
  responseImageTokens: sumUsageValue2(summary?.responseImageTokens, usage.responseImageTokens),
13581
14130
  thinkingTokens: sumUsageValue2(summary?.thinkingTokens, usage.thinkingTokens),
13582
14131
  totalTokens: sumUsageValue2(summary?.totalTokens, usage.totalTokens),
@@ -14288,6 +14837,7 @@ async function runCandidateEvolution(options) {
14288
14837
  }
14289
14838
  // Annotate the CommonJS export names for ESM import in node:
14290
14839
  0 && (module.exports = {
14840
+ CHATGPT_IMAGE_MODEL_IDS,
14291
14841
  CHATGPT_MODEL_IDS,
14292
14842
  CODEX_APPLY_PATCH_FREEFORM_TOOL_DESCRIPTION,
14293
14843
  CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION,
@@ -14308,6 +14858,17 @@ async function runCandidateEvolution(options) {
14308
14858
  LLM_MODEL_IDS,
14309
14859
  LLM_TEXT_MODEL_IDS,
14310
14860
  LlmJsonCallError,
14861
+ OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION,
14862
+ OPENAI_GPT_IMAGE_2_BACKGROUNDS,
14863
+ OPENAI_GPT_IMAGE_2_MODERATION_LEVELS,
14864
+ OPENAI_GPT_IMAGE_2_NUM_IMAGES,
14865
+ OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS,
14866
+ OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS,
14867
+ OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS,
14868
+ OPENAI_GPT_IMAGE_2_QUALITY_LEVELS,
14869
+ OPENAI_GPT_IMAGE_2_RESOLUTIONS,
14870
+ OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS,
14871
+ OPENAI_IMAGE_MODEL_IDS,
14311
14872
  OPENAI_MODEL_IDS,
14312
14873
  appendMarkdownSourcesSection,
14313
14874
  applyPatch,
@@ -14348,6 +14909,7 @@ async function runCandidateEvolution(options) {
14348
14909
  generateText,
14349
14910
  getChatGptAuthProfile,
14350
14911
  getCurrentToolCallContext,
14912
+ isChatGptImageModelId,
14351
14913
  isChatGptModelId,
14352
14914
  isExperimentalChatGptModelId,
14353
14915
  isFireworksModelId,
@@ -14357,6 +14919,7 @@ async function runCandidateEvolution(options) {
14357
14919
  isLlmImageModelId,
14358
14920
  isLlmModelId,
14359
14921
  isLlmTextModelId,
14922
+ isOpenAiImageModelId,
14360
14923
  isOpenAiModelId,
14361
14924
  loadEnvFromFile,
14362
14925
  loadLocalEnv,
@@ -14364,6 +14927,7 @@ async function runCandidateEvolution(options) {
14364
14927
  refreshChatGptOauthToken,
14365
14928
  resetModelConcurrencyConfig,
14366
14929
  resetTelemetry,
14930
+ resolveChatGptImageProviderModel,
14367
14931
  resolveFilesystemToolProfile,
14368
14932
  resolveFireworksModelId,
14369
14933
  runAgentLoop,
@@ -14376,6 +14940,7 @@ async function runCandidateEvolution(options) {
14376
14940
  streamToolLoop,
14377
14941
  stripCodexCitationMarkers,
14378
14942
  toGeminiJsonSchema,
14379
- tool
14943
+ tool,
14944
+ validateOpenAiGptImage2Resolution
14380
14945
  });
14381
14946
  //# sourceMappingURL=index.cjs.map