@ljoukov/llm 7.0.11 → 7.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,6 +15,7 @@ import {
15
15
  } from "@google/genai";
16
16
  import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
17
17
  import { z as z3 } from "zod";
18
+ import { toFile } from "openai";
18
19
 
19
20
  // src/utils/asyncQueue.ts
20
21
  function createAsyncQueue() {
@@ -216,6 +217,81 @@ var OPENAI_MODEL_IDS = [
216
217
  function isOpenAiModelId(value) {
217
218
  return OPENAI_MODEL_IDS.includes(value);
218
219
  }
220
+ var OPENAI_IMAGE_MODEL_IDS = ["gpt-image-2"];
221
+ function isOpenAiImageModelId(value) {
222
+ return OPENAI_IMAGE_MODEL_IDS.includes(value);
223
+ }
224
+ var OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS = [
225
+ "1024x1024",
226
+ "1536x1024",
227
+ "1024x1536",
228
+ "2048x2048",
229
+ "2048x1152",
230
+ "3840x2160",
231
+ "2160x3840"
232
+ ];
233
+ var OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION = "auto";
234
+ var OPENAI_GPT_IMAGE_2_RESOLUTIONS = [
235
+ ...OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS,
236
+ OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION
237
+ ];
238
+ var OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS = {
239
+ maxEdgePixels: 3840,
240
+ edgeMultiplePixels: 16,
241
+ maxLongToShortEdgeRatio: 3,
242
+ minTotalPixels: 655360,
243
+ maxTotalPixels: 8294400,
244
+ experimentalTotalPixelsThreshold: 3686400
245
+ };
246
+ function validateOpenAiGptImage2Resolution(value) {
247
+ if (value === OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION) {
248
+ return { valid: true };
249
+ }
250
+ const match = /^([1-9]\d*)x([1-9]\d*)$/.exec(value);
251
+ if (!match) {
252
+ return { valid: false, reason: 'Expected "auto" or a WIDTHxHEIGHT pixel string.' };
253
+ }
254
+ const width = Number(match[1]);
255
+ const height = Number(match[2]);
256
+ if (!Number.isSafeInteger(width) || !Number.isSafeInteger(height)) {
257
+ return { valid: false, reason: "Width and height must be safe integer pixel counts." };
258
+ }
259
+ const constraints = OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS;
260
+ if (width > constraints.maxEdgePixels || height > constraints.maxEdgePixels) {
261
+ return {
262
+ valid: false,
263
+ reason: `Width and height must each be at most ${constraints.maxEdgePixels}px.`
264
+ };
265
+ }
266
+ if (width % constraints.edgeMultiplePixels !== 0 || height % constraints.edgeMultiplePixels !== 0) {
267
+ return {
268
+ valid: false,
269
+ reason: `Width and height must each be multiples of ${constraints.edgeMultiplePixels}px.`
270
+ };
271
+ }
272
+ const totalPixels = width * height;
273
+ if (totalPixels < constraints.minTotalPixels || totalPixels > constraints.maxTotalPixels) {
274
+ return {
275
+ valid: false,
276
+ reason: `Total pixels must be between ${constraints.minTotalPixels} and ${constraints.maxTotalPixels}.`
277
+ };
278
+ }
279
+ const longEdge = Math.max(width, height);
280
+ const shortEdge = Math.min(width, height);
281
+ if (longEdge / shortEdge > constraints.maxLongToShortEdgeRatio) {
282
+ return {
283
+ valid: false,
284
+ reason: `The long edge must be at most ${constraints.maxLongToShortEdgeRatio}:1 relative to the short edge.`
285
+ };
286
+ }
287
+ return { valid: true };
288
+ }
289
+ var OPENAI_GPT_IMAGE_2_QUALITY_LEVELS = ["low", "medium", "high", "auto"];
290
+ var OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS = ["png", "jpeg", "webp"];
291
+ var OPENAI_GPT_IMAGE_2_BACKGROUNDS = ["opaque", "auto"];
292
+ var OPENAI_GPT_IMAGE_2_MODERATION_LEVELS = ["low", "auto"];
293
+ var OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS = [0, 1, 2, 3];
294
+ var OPENAI_GPT_IMAGE_2_NUM_IMAGES = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
219
295
  var CHATGPT_MODEL_IDS = [
220
296
  "chatgpt-gpt-5.5",
221
297
  "chatgpt-gpt-5.5-fast",
@@ -298,6 +374,27 @@ var OPENAI_GPT_54_NANO_PRICING = {
298
374
  cachedRate: 5e-3 / 1e6,
299
375
  outputRate: 0.4 / 1e6
300
376
  };
377
+ var OPENAI_GPT_IMAGE_2_PRICING = {
378
+ defaultQuality: "medium",
379
+ defaultResolution: "1024x1024",
380
+ imagePrices: {
381
+ low: {
382
+ "1024x1024": 6e-3,
383
+ "1024x1536": 5e-3,
384
+ "1536x1024": 5e-3
385
+ },
386
+ medium: {
387
+ "1024x1024": 0.053,
388
+ "1024x1536": 0.041,
389
+ "1536x1024": 0.041
390
+ },
391
+ high: {
392
+ "1024x1024": 0.211,
393
+ "1024x1536": 0.165,
394
+ "1536x1024": 0.165
395
+ }
396
+ }
397
+ };
301
398
  function getOpenAiPricing(modelId) {
302
399
  if (isExperimentalChatGptModelId(modelId)) {
303
400
  return OPENAI_GPT_54_PRICING;
@@ -325,6 +422,9 @@ function getOpenAiPricing(modelId) {
325
422
  }
326
423
  return void 0;
327
424
  }
425
+ function getOpenAiImagePricing(modelId) {
426
+ return isOpenAiImageModelId(modelId) ? OPENAI_GPT_IMAGE_2_PRICING : void 0;
427
+ }
328
428
 
329
429
  // src/utils/cost.ts
330
430
  function resolveUsageNumber(value) {
@@ -337,8 +437,18 @@ function estimateCallCostUsd({
337
437
  modelId,
338
438
  tokens,
339
439
  responseImages,
340
- imageSize
440
+ imageSize,
441
+ imageQuality
341
442
  }) {
443
+ const openAiImagePricing = getOpenAiImagePricing(modelId);
444
+ if (openAiImagePricing) {
445
+ return estimateOpenAiImageCostUsd({
446
+ pricing: openAiImagePricing,
447
+ responseImages,
448
+ imageSize,
449
+ imageQuality
450
+ });
451
+ }
342
452
  if (!tokens) {
343
453
  return 0;
344
454
  }
@@ -400,6 +510,40 @@ function estimateCallCostUsd({
400
510
  }
401
511
  return 0;
402
512
  }
513
+ function estimateOpenAiImageCostUsd({
514
+ pricing,
515
+ responseImages,
516
+ imageSize,
517
+ imageQuality
518
+ }) {
519
+ if (responseImages <= 0) {
520
+ return 0;
521
+ }
522
+ const quality = imageQuality === "low" || imageQuality === "medium" || imageQuality === "high" ? imageQuality : pricing.defaultQuality;
523
+ const resolution = resolveOpenAiImagePriceResolution(imageSize) ?? pricing.defaultResolution;
524
+ return responseImages * pricing.imagePrices[quality][resolution];
525
+ }
526
+ function resolveOpenAiImagePriceResolution(imageSize) {
527
+ if (imageSize === "1024x1024" || imageSize === "1024x1536" || imageSize === "1536x1024") {
528
+ return imageSize;
529
+ }
530
+ if (!imageSize || imageSize === "auto") {
531
+ return void 0;
532
+ }
533
+ const match = /^(\d+)x(\d+)$/.exec(imageSize);
534
+ if (!match) {
535
+ return void 0;
536
+ }
537
+ const width = Number(match[1]);
538
+ const height = Number(match[2]);
539
+ if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
540
+ return void 0;
541
+ }
542
+ if (width === height) {
543
+ return "1024x1024";
544
+ }
545
+ return width > height ? "1536x1024" : "1024x1536";
546
+ }
403
547
 
404
548
  // src/openai/chatgpt-codex.ts
405
549
  import os2 from "os";
@@ -4380,13 +4524,13 @@ var LLM_TEXT_MODEL_IDS = [
4380
4524
  ...FIREWORKS_MODEL_IDS,
4381
4525
  ...GEMINI_TEXT_MODEL_IDS
4382
4526
  ];
4383
- var LLM_IMAGE_MODEL_IDS = [...GEMINI_IMAGE_MODEL_IDS];
4527
+ var LLM_IMAGE_MODEL_IDS = [...OPENAI_IMAGE_MODEL_IDS, ...GEMINI_IMAGE_MODEL_IDS];
4384
4528
  var LLM_MODEL_IDS = [...LLM_TEXT_MODEL_IDS, ...LLM_IMAGE_MODEL_IDS];
4385
4529
  function isLlmTextModelId(value) {
4386
4530
  return isOpenAiModelId(value) || isChatGptModelId(value) || isFireworksModelId(value) || isGeminiTextModelId(value);
4387
4531
  }
4388
4532
  function isLlmImageModelId(value) {
4389
- return isGeminiImageModelId(value);
4533
+ return isOpenAiImageModelId(value) || isGeminiImageModelId(value);
4390
4534
  }
4391
4535
  function isLlmModelId(value) {
4392
4536
  return isLlmTextModelId(value) || isLlmImageModelId(value);
@@ -4398,6 +4542,9 @@ var LlmJsonCallError = class extends Error {
4398
4542
  this.name = "LlmJsonCallError";
4399
4543
  }
4400
4544
  };
4545
+ function isOpenAiGenerateImagesRequest(request) {
4546
+ return isOpenAiImageModelId(request.model);
4547
+ }
4401
4548
  function tool(options) {
4402
4549
  return {
4403
4550
  type: "function",
@@ -4988,6 +5135,9 @@ function resolveProvider(model) {
4988
5135
  return { provider: "fireworks", model: fireworksModel };
4989
5136
  }
4990
5137
  }
5138
+ if (isOpenAiImageModelId(model)) {
5139
+ return { provider: "openai", model };
5140
+ }
4991
5141
  if (isOpenAiModelId(model)) {
4992
5142
  return {
4993
5143
  provider: "openai",
@@ -4995,7 +5145,7 @@ function resolveProvider(model) {
4995
5145
  serviceTier: resolveOpenAiServiceTier(model)
4996
5146
  };
4997
5147
  }
4998
- throw new Error(`Unsupported text model: ${model}`);
5148
+ throw new Error(`Unsupported model: ${model}`);
4999
5149
  }
5000
5150
  function isOpenAiCodexModel(modelId) {
5001
5151
  return modelId.includes("codex");
@@ -6082,12 +6232,40 @@ function toGeminiTools(tools) {
6082
6232
  return { googleSearch: {} };
6083
6233
  case "code-execution":
6084
6234
  return { codeExecution: {} };
6235
+ case "shell":
6236
+ throw new Error("Gemini provider does not support the OpenAI shell tool.");
6085
6237
  default:
6086
6238
  throw new Error("Unsupported tool configuration");
6087
6239
  }
6088
6240
  });
6089
6241
  }
6090
- function toOpenAiTools(tools) {
6242
+ function toOpenAiShellEnvironment(environment) {
6243
+ if (environment?.type === "container-reference") {
6244
+ return {
6245
+ type: "container_reference",
6246
+ container_id: environment.containerId
6247
+ };
6248
+ }
6249
+ return {
6250
+ type: "container_auto",
6251
+ ...environment?.fileIds ? { file_ids: Array.from(environment.fileIds) } : {},
6252
+ ...environment?.memoryLimit !== void 0 ? { memory_limit: environment.memoryLimit } : {},
6253
+ ...environment?.networkPolicy ? {
6254
+ network_policy: environment.networkPolicy.type === "allowlist" ? {
6255
+ type: "allowlist",
6256
+ allowed_domains: Array.from(environment.networkPolicy.allowedDomains),
6257
+ ...environment.networkPolicy.domainSecrets ? {
6258
+ domain_secrets: environment.networkPolicy.domainSecrets.map((secret) => ({
6259
+ domain: secret.domain,
6260
+ name: secret.name,
6261
+ value: secret.value
6262
+ }))
6263
+ } : {}
6264
+ } : { type: "disabled" }
6265
+ } : {}
6266
+ };
6267
+ }
6268
+ function toOpenAiTools(tools, options) {
6091
6269
  if (!tools || tools.length === 0) {
6092
6270
  return void 0;
6093
6271
  }
@@ -6100,6 +6278,15 @@ function toOpenAiTools(tools) {
6100
6278
  case "code-execution": {
6101
6279
  return { type: "code_interpreter", container: { type: "auto" } };
6102
6280
  }
6281
+ case "shell": {
6282
+ if (options.provider !== "openai") {
6283
+ throw new Error("OpenAI shell tool is only supported for OpenAI API models.");
6284
+ }
6285
+ return {
6286
+ type: "shell",
6287
+ environment: toOpenAiShellEnvironment(tool2.environment)
6288
+ };
6289
+ }
6103
6290
  default:
6104
6291
  throw new Error("Unsupported tool configuration");
6105
6292
  }
@@ -6114,8 +6301,11 @@ function mergeTokenUpdates(current, next) {
6114
6301
  }
6115
6302
  return {
6116
6303
  promptTokens: next.promptTokens ?? current.promptTokens,
6304
+ promptTextTokens: next.promptTextTokens ?? current.promptTextTokens,
6305
+ promptImageTokens: next.promptImageTokens ?? current.promptImageTokens,
6117
6306
  cachedTokens: next.cachedTokens ?? current.cachedTokens,
6118
6307
  responseTokens: next.responseTokens ?? current.responseTokens,
6308
+ responseTextTokens: next.responseTextTokens ?? current.responseTextTokens,
6119
6309
  responseImageTokens: next.responseImageTokens ?? current.responseImageTokens,
6120
6310
  thinkingTokens: next.thinkingTokens ?? current.thinkingTokens,
6121
6311
  totalTokens: next.totalTokens ?? current.totalTokens,
@@ -6138,8 +6328,11 @@ function sumUsageTokens(current, next) {
6138
6328
  }
6139
6329
  return {
6140
6330
  promptTokens: sumUsageValue(current?.promptTokens, next.promptTokens),
6331
+ promptTextTokens: sumUsageValue(current?.promptTextTokens, next.promptTextTokens),
6332
+ promptImageTokens: sumUsageValue(current?.promptImageTokens, next.promptImageTokens),
6141
6333
  cachedTokens: sumUsageValue(current?.cachedTokens, next.cachedTokens),
6142
6334
  responseTokens: sumUsageValue(current?.responseTokens, next.responseTokens),
6335
+ responseTextTokens: sumUsageValue(current?.responseTextTokens, next.responseTextTokens),
6143
6336
  responseImageTokens: sumUsageValue(current?.responseImageTokens, next.responseImageTokens),
6144
6337
  thinkingTokens: sumUsageValue(current?.thinkingTokens, next.thinkingTokens),
6145
6338
  totalTokens: sumUsageValue(current?.totalTokens, next.totalTokens),
@@ -6254,10 +6447,22 @@ function extractOpenAiUsageTokens(usage) {
6254
6447
  const cachedTokens = toMaybeNumber(
6255
6448
  usage.input_tokens_details?.cached_tokens
6256
6449
  );
6450
+ const promptTextTokens = toMaybeNumber(
6451
+ usage.input_tokens_details?.text_tokens
6452
+ );
6453
+ const promptImageTokens = toMaybeNumber(
6454
+ usage.input_tokens_details?.image_tokens
6455
+ );
6257
6456
  const outputTokensRaw = toMaybeNumber(usage.output_tokens);
6258
6457
  const reasoningTokens = toMaybeNumber(
6259
6458
  usage.output_tokens_details?.reasoning_tokens
6260
6459
  );
6460
+ const responseTextTokens = toMaybeNumber(
6461
+ usage.output_tokens_details?.text_tokens
6462
+ );
6463
+ const responseImageTokens = toMaybeNumber(
6464
+ usage.output_tokens_details?.image_tokens
6465
+ );
6261
6466
  const totalTokens = toMaybeNumber(usage.total_tokens);
6262
6467
  let responseTokens;
6263
6468
  if (outputTokensRaw !== void 0) {
@@ -6269,8 +6474,12 @@ function extractOpenAiUsageTokens(usage) {
6269
6474
  }
6270
6475
  return {
6271
6476
  promptTokens,
6477
+ promptTextTokens,
6478
+ promptImageTokens,
6272
6479
  cachedTokens,
6273
6480
  responseTokens,
6481
+ responseTextTokens,
6482
+ responseImageTokens,
6274
6483
  thinkingTokens: reasoningTokens,
6275
6484
  totalTokens
6276
6485
  };
@@ -7698,6 +7907,8 @@ async function runTextCall(params) {
7698
7907
  let responseRole;
7699
7908
  let latestUsage;
7700
7909
  let responseImages = 0;
7910
+ let sawResponseDelta = false;
7911
+ let sawThoughtDelta = false;
7701
7912
  const pushEvent = (event) => {
7702
7913
  queue.push(event);
7703
7914
  params.onEvent?.(event);
@@ -7708,8 +7919,10 @@ async function runTextCall(params) {
7708
7919
  }
7709
7920
  responseParts.push({ type: "text", text, ...channel === "thought" ? { thought: true } : {} });
7710
7921
  if (channel === "thought") {
7922
+ sawThoughtDelta = true;
7711
7923
  callLogger?.appendThoughtDelta(text);
7712
7924
  } else {
7925
+ sawResponseDelta = true;
7713
7926
  callLogger?.appendResponseDelta(text);
7714
7927
  }
7715
7928
  pushEvent({ type: "delta", channel, text });
@@ -7742,6 +7955,9 @@ async function runTextCall(params) {
7742
7955
  const { result } = await collectFileUploadMetrics(async () => {
7743
7956
  try {
7744
7957
  if (provider === "openai") {
7958
+ if (isOpenAiImageModelId(request.model)) {
7959
+ throw new Error("gpt-image-2 is an image generation model; use generateImages().");
7960
+ }
7745
7961
  const openAiInput = await maybePrepareOpenAiPromptInput(
7746
7962
  toOpenAiInput(contents, {
7747
7963
  defaultMediaResolution: request.mediaResolution,
@@ -7749,7 +7965,7 @@ async function runTextCall(params) {
7749
7965
  }),
7750
7966
  { model: request.model, provider: "openai" }
7751
7967
  );
7752
- const openAiTools = toOpenAiTools(request.tools);
7968
+ const openAiTools = toOpenAiTools(request.tools, { provider: "openai" });
7753
7969
  const reasoningEffort = resolveOpenAiReasoningEffort(
7754
7970
  modelForProvider,
7755
7971
  request.thinkingLevel
@@ -7810,12 +8026,17 @@ async function runTextCall(params) {
7810
8026
  );
7811
8027
  }
7812
8028
  latestUsage = extractOpenAiUsageTokens(finalResponse.usage);
7813
- if (responseParts.length === 0) {
8029
+ if (!sawResponseDelta || !sawThoughtDelta) {
8030
+ const needsResponseFallback = !sawResponseDelta;
8031
+ const needsThoughtFallback = !sawThoughtDelta;
7814
8032
  const fallback = extractOpenAiResponseParts(finalResponse);
7815
8033
  blocked = blocked || fallback.blocked;
7816
8034
  for (const part of fallback.parts) {
7817
8035
  if (part.type === "text") {
7818
- pushDelta(part.thought === true ? "thought" : "response", part.text);
8036
+ const channel = part.thought === true ? "thought" : "response";
8037
+ if (channel === "response" && needsResponseFallback || channel === "thought" && needsThoughtFallback) {
8038
+ pushDelta(channel, part.text);
8039
+ }
7819
8040
  } else if (part.type === "inlineData") {
7820
8041
  pushInline(part.data, part.mimeType);
7821
8042
  }
@@ -7832,7 +8053,7 @@ async function runTextCall(params) {
7832
8053
  provider: "chatgpt"
7833
8054
  });
7834
8055
  const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
7835
- const openAiTools = toOpenAiTools(request.tools);
8056
+ const openAiTools = toOpenAiTools(request.tools, { provider: "chatgpt" });
7836
8057
  const requestPayload = {
7837
8058
  model: modelForProvider,
7838
8059
  store: false,
@@ -7851,18 +8072,18 @@ async function runTextCall(params) {
7851
8072
  },
7852
8073
  ...openAiTools ? { tools: openAiTools } : {}
7853
8074
  };
7854
- let sawResponseDelta = false;
7855
- let sawThoughtDelta = false;
8075
+ let sawResponseDelta2 = false;
8076
+ let sawThoughtDelta2 = false;
7856
8077
  const result2 = await collectChatGptCodexResponseWithRetry({
7857
8078
  request: requestPayload,
7858
8079
  signal,
7859
8080
  onDelta: (delta) => {
7860
8081
  if (delta.thoughtDelta) {
7861
- sawThoughtDelta = true;
8082
+ sawThoughtDelta2 = true;
7862
8083
  pushDelta("thought", delta.thoughtDelta);
7863
8084
  }
7864
8085
  if (delta.textDelta) {
7865
- sawResponseDelta = true;
8086
+ sawResponseDelta2 = true;
7866
8087
  pushDelta("response", delta.textDelta);
7867
8088
  }
7868
8089
  }
@@ -7878,10 +8099,10 @@ async function runTextCall(params) {
7878
8099
  latestUsage = extractChatGptUsageTokens(result2.usage);
7879
8100
  const fallbackText = typeof result2.text === "string" ? result2.text : "";
7880
8101
  const fallbackThoughts = typeof result2.reasoningSummaryText === "string" && result2.reasoningSummaryText.length > 0 ? result2.reasoningSummaryText : typeof result2.reasoningText === "string" ? result2.reasoningText : "";
7881
- if (!sawThoughtDelta && fallbackThoughts.length > 0) {
8102
+ if (!sawThoughtDelta2 && fallbackThoughts.length > 0) {
7882
8103
  pushDelta("thought", fallbackThoughts);
7883
8104
  }
7884
- if (!sawResponseDelta && fallbackText.length > 0) {
8105
+ if (!sawResponseDelta2 && fallbackText.length > 0) {
7885
8106
  pushDelta("response", fallbackText);
7886
8107
  }
7887
8108
  } else if (provider === "fireworks") {
@@ -8601,7 +8822,7 @@ async function runToolLoop(request) {
8601
8822
  try {
8602
8823
  if (providerInfo.provider === "openai") {
8603
8824
  const openAiAgentTools = buildOpenAiToolsFromToolSet(request.tools);
8604
- const openAiNativeTools = toOpenAiTools(request.modelTools);
8825
+ const openAiNativeTools = toOpenAiTools(request.modelTools, { provider: "openai" });
8605
8826
  const openAiTools = openAiNativeTools ? [...openAiNativeTools, ...openAiAgentTools] : [...openAiAgentTools];
8606
8827
  const reasoningEffort = resolveOpenAiReasoningEffort(
8607
8828
  providerInfo.model,
@@ -9004,7 +9225,7 @@ async function runToolLoop(request) {
9004
9225
  }
9005
9226
  if (providerInfo.provider === "chatgpt") {
9006
9227
  const openAiAgentTools = buildOpenAiToolsFromToolSet(request.tools);
9007
- const openAiNativeTools = toOpenAiTools(request.modelTools);
9228
+ const openAiNativeTools = toOpenAiTools(request.modelTools, { provider: "chatgpt" });
9008
9229
  const openAiTools = openAiNativeTools ? [...openAiNativeTools, ...openAiAgentTools] : [...openAiAgentTools];
9009
9230
  const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
9010
9231
  const toolLoopInput = toChatGptInput(contents, {
@@ -10102,7 +10323,184 @@ async function gradeGeneratedImage(params) {
10102
10323
  });
10103
10324
  return { grade: value.grade, result };
10104
10325
  }
10326
+ function resolveOpenAiImageMimeType(outputFormat) {
10327
+ switch (outputFormat) {
10328
+ case "jpeg":
10329
+ return "image/jpeg";
10330
+ case "webp":
10331
+ return "image/webp";
10332
+ case "png":
10333
+ case void 0:
10334
+ return "image/png";
10335
+ }
10336
+ }
10337
+ function buildOpenAiImagePrompt(params) {
10338
+ return [
10339
+ "Follow the requested visual style.",
10340
+ "",
10341
+ "Style:",
10342
+ params.stylePrompt.trim(),
10343
+ ...params.hasStyleImages ? [
10344
+ "",
10345
+ "Use the attached reference image or images for palette, lighting, mood, composition, and material feel."
10346
+ ] : [],
10347
+ "",
10348
+ "Image:",
10349
+ params.imagePrompt.trim()
10350
+ ].filter((line) => line.length > 0).join("\n");
10351
+ }
10352
+ function resolveOpenAiImageRequestParams(request) {
10353
+ if (request.partialImages !== void 0) {
10354
+ throw new Error("partialImages is only supported for streaming image generation.");
10355
+ }
10356
+ if (request.outputCompression !== void 0 && (!Number.isInteger(request.outputCompression) || request.outputCompression < 0 || request.outputCompression > 100)) {
10357
+ throw new Error("outputCompression must be an integer from 0 to 100.");
10358
+ }
10359
+ if (request.outputCompression !== void 0 && request.outputFormat !== "jpeg" && request.outputFormat !== "webp") {
10360
+ throw new Error("outputCompression requires outputFormat to be jpeg or webp.");
10361
+ }
10362
+ const size = request.imageResolution ?? "auto";
10363
+ const sizeValidation = validateOpenAiGptImage2Resolution(size);
10364
+ if (!sizeValidation.valid) {
10365
+ throw new Error(
10366
+ `imageResolution ${JSON.stringify(size)} is not supported by gpt-image-2: ${sizeValidation.reason}`
10367
+ );
10368
+ }
10369
+ return {
10370
+ size,
10371
+ quality: request.imageQuality ?? "auto",
10372
+ outputFormat: request.outputFormat,
10373
+ n: request.numImages ?? 1,
10374
+ background: request.background,
10375
+ moderation: request.moderation
10376
+ };
10377
+ }
10378
+ async function createOpenAiStyleImageFiles(styleImages) {
10379
+ if (!styleImages || styleImages.length === 0) {
10380
+ return void 0;
10381
+ }
10382
+ return await Promise.all(
10383
+ styleImages.map(async (image, index) => {
10384
+ const mimeType = image.mimeType ?? "image/png";
10385
+ const extension = resolveAttachmentExtension(mimeType);
10386
+ return await toFile(image.data, `style-${index + 1}.${extension}`, { type: mimeType });
10387
+ })
10388
+ );
10389
+ }
10390
+ async function generateImagesWithOpenAiImageApi(request) {
10391
+ const promptEntries = Array.from(request.imagePrompts, (rawPrompt, index) => {
10392
+ const prompt = rawPrompt.trim();
10393
+ if (!prompt) {
10394
+ throw new Error(`imagePrompts[${index}] must be a non-empty string`);
10395
+ }
10396
+ return prompt;
10397
+ });
10398
+ if (promptEntries.length === 0) {
10399
+ return [];
10400
+ }
10401
+ const provider = resolveProvider(request.model).provider;
10402
+ const telemetry = createLlmTelemetryEmitter({
10403
+ telemetry: request.telemetry,
10404
+ operation: "generateImages",
10405
+ provider,
10406
+ model: request.model
10407
+ });
10408
+ const startedAtMs = Date.now();
10409
+ const params = resolveOpenAiImageRequestParams(request);
10410
+ const styleImages = await createOpenAiStyleImageFiles(request.styleImages);
10411
+ const hasStyleImages = Boolean(styleImages && styleImages.length > 0);
10412
+ const outputMimeType = resolveOpenAiImageMimeType(params.outputFormat);
10413
+ let totalUsage;
10414
+ let costUsd = 0;
10415
+ let outputImages = 0;
10416
+ telemetry.emit({
10417
+ type: "llm.call.started",
10418
+ imagePromptCount: promptEntries.length,
10419
+ styleImageCount: request.styleImages?.length ?? 0,
10420
+ numImagesPerPrompt: params.n
10421
+ });
10422
+ try {
10423
+ const images = [];
10424
+ for (const imagePrompt of promptEntries) {
10425
+ const prompt = buildOpenAiImagePrompt({
10426
+ stylePrompt: request.stylePrompt,
10427
+ imagePrompt,
10428
+ hasStyleImages
10429
+ });
10430
+ const response = await runOpenAiCall(async (client) => {
10431
+ const payload = {
10432
+ model: request.model,
10433
+ prompt,
10434
+ n: params.n,
10435
+ size: params.size,
10436
+ quality: params.quality,
10437
+ ...params.outputFormat ? { output_format: params.outputFormat } : {},
10438
+ ...request.outputCompression !== void 0 ? { output_compression: request.outputCompression } : {},
10439
+ ...params.background ? { background: params.background } : {},
10440
+ ...params.moderation ? { moderation: params.moderation } : {}
10441
+ };
10442
+ if (styleImages && styleImages.length > 0) {
10443
+ return await client.images.edit(
10444
+ {
10445
+ ...payload,
10446
+ image: styleImages
10447
+ },
10448
+ { signal: request.signal }
10449
+ );
10450
+ }
10451
+ return await client.images.generate(payload, { signal: request.signal });
10452
+ }, request.model);
10453
+ const data = Array.isArray(response.data) ? response.data ?? [] : [];
10454
+ for (const item of data) {
10455
+ if (typeof item.b64_json !== "string" || item.b64_json.length === 0) {
10456
+ continue;
10457
+ }
10458
+ images.push({
10459
+ mimeType: outputMimeType,
10460
+ data: Buffer5.from(item.b64_json, "base64")
10461
+ });
10462
+ }
10463
+ outputImages = images.length;
10464
+ const usage = extractOpenAiUsageTokens(response.usage);
10465
+ totalUsage = sumUsageTokens(totalUsage, usage);
10466
+ costUsd += estimateCallCostUsd({
10467
+ modelId: request.model,
10468
+ tokens: usage,
10469
+ responseImages: data.length,
10470
+ imageSize: params.size,
10471
+ imageQuality: params.quality
10472
+ });
10473
+ }
10474
+ telemetry.emit({
10475
+ type: "llm.call.completed",
10476
+ success: true,
10477
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10478
+ usage: totalUsage,
10479
+ costUsd,
10480
+ imageCount: images.length,
10481
+ attempts: promptEntries.length
10482
+ });
10483
+ return images;
10484
+ } catch (error) {
10485
+ const err = error instanceof Error ? error : new Error(String(error));
10486
+ telemetry.emit({
10487
+ type: "llm.call.completed",
10488
+ success: false,
10489
+ durationMs: Math.max(0, Date.now() - startedAtMs),
10490
+ usage: totalUsage,
10491
+ costUsd,
10492
+ imageCount: outputImages,
10493
+ error: err.message
10494
+ });
10495
+ throw err;
10496
+ } finally {
10497
+ await telemetry.flush();
10498
+ }
10499
+ }
10105
10500
  async function generateImages(request) {
10501
+ if (isOpenAiGenerateImagesRequest(request)) {
10502
+ return await generateImagesWithOpenAiImageApi(request);
10503
+ }
10106
10504
  const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 4));
10107
10505
  const promptList = Array.from(request.imagePrompts);
10108
10506
  if (promptList.length === 0) {
@@ -10116,7 +10514,7 @@ async function generateImages(request) {
10116
10514
  }
10117
10515
  return { index: arrayIndex + 1, prompt: trimmedPrompt };
10118
10516
  });
10119
- const gradingPrompt = request.imageGradingPrompt.trim();
10517
+ const gradingPrompt = request.imageGradingPrompt?.trim() ?? "";
10120
10518
  if (!gradingPrompt) {
10121
10519
  throw new Error("imageGradingPrompt must be a non-empty string");
10122
10520
  }
@@ -13414,8 +13812,11 @@ function summarizeResultUsage(result) {
13414
13812
  }
13415
13813
  summary = {
13416
13814
  promptTokens: sumUsageValue2(summary?.promptTokens, usage.promptTokens),
13815
+ promptTextTokens: sumUsageValue2(summary?.promptTextTokens, usage.promptTextTokens),
13816
+ promptImageTokens: sumUsageValue2(summary?.promptImageTokens, usage.promptImageTokens),
13417
13817
  cachedTokens: sumUsageValue2(summary?.cachedTokens, usage.cachedTokens),
13418
13818
  responseTokens: sumUsageValue2(summary?.responseTokens, usage.responseTokens),
13819
+ responseTextTokens: sumUsageValue2(summary?.responseTextTokens, usage.responseTextTokens),
13419
13820
  responseImageTokens: sumUsageValue2(summary?.responseImageTokens, usage.responseImageTokens),
13420
13821
  thinkingTokens: sumUsageValue2(summary?.thinkingTokens, usage.thinkingTokens),
13421
13822
  totalTokens: sumUsageValue2(summary?.totalTokens, usage.totalTokens),
@@ -14146,6 +14547,17 @@ export {
14146
14547
  LLM_MODEL_IDS,
14147
14548
  LLM_TEXT_MODEL_IDS,
14148
14549
  LlmJsonCallError,
14550
+ OPENAI_GPT_IMAGE_2_AUTO_RESOLUTION,
14551
+ OPENAI_GPT_IMAGE_2_BACKGROUNDS,
14552
+ OPENAI_GPT_IMAGE_2_MODERATION_LEVELS,
14553
+ OPENAI_GPT_IMAGE_2_NUM_IMAGES,
14554
+ OPENAI_GPT_IMAGE_2_OUTPUT_FORMATS,
14555
+ OPENAI_GPT_IMAGE_2_PARTIAL_IMAGE_COUNTS,
14556
+ OPENAI_GPT_IMAGE_2_POPULAR_RESOLUTIONS,
14557
+ OPENAI_GPT_IMAGE_2_QUALITY_LEVELS,
14558
+ OPENAI_GPT_IMAGE_2_RESOLUTIONS,
14559
+ OPENAI_GPT_IMAGE_2_SIZE_CONSTRAINTS,
14560
+ OPENAI_IMAGE_MODEL_IDS,
14149
14561
  OPENAI_MODEL_IDS,
14150
14562
  appendMarkdownSourcesSection,
14151
14563
  applyPatch,
@@ -14195,6 +14607,7 @@ export {
14195
14607
  isLlmImageModelId,
14196
14608
  isLlmModelId,
14197
14609
  isLlmTextModelId,
14610
+ isOpenAiImageModelId,
14198
14611
  isOpenAiModelId,
14199
14612
  loadEnvFromFile,
14200
14613
  loadLocalEnv,
@@ -14214,6 +14627,7 @@ export {
14214
14627
  streamToolLoop,
14215
14628
  stripCodexCitationMarkers,
14216
14629
  toGeminiJsonSchema,
14217
- tool
14630
+ tool,
14631
+ validateOpenAiGptImage2Resolution
14218
14632
  };
14219
14633
  //# sourceMappingURL=index.js.map