@ai-sdk/openai 2.1.0-beta.10 → 2.1.0-beta.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # @ai-sdk/openai
2
2
 
3
+ ## 2.1.0-beta.12
4
+
5
+ ### Patch Changes
6
+
7
+ - 046aa3b: feat(provider): speech model v3 spec
8
+ - e8109d3: feat: tool execution approval
9
+ - 21e20c0: feat(provider): transcription model v3 spec
10
+ - Updated dependencies [046aa3b]
11
+ - Updated dependencies [e8109d3]
12
+ - @ai-sdk/provider@2.1.0-beta.5
13
+ - @ai-sdk/provider-utils@3.1.0-beta.7
14
+
15
+ ## 2.1.0-beta.11
16
+
17
+ ### Patch Changes
18
+
19
+ - 0adc679: feat(provider): shared spec v3
20
+ - 2b0caef: feat(provider/openai): preview image generation results
21
+ - Updated dependencies [0adc679]
22
+ - Updated dependencies [2b0caef]
23
+ - @ai-sdk/provider-utils@3.1.0-beta.6
24
+ - @ai-sdk/provider@2.1.0-beta.4
25
+
3
26
  ## 2.1.0-beta.10
4
27
 
5
28
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { ProviderV3, LanguageModelV3, EmbeddingModelV3, ImageModelV3, TranscriptionModelV2, SpeechModelV2 } from '@ai-sdk/provider';
1
+ import { ProviderV3, LanguageModelV3, EmbeddingModelV3, ImageModelV3, TranscriptionModelV3, SpeechModelV3 } from '@ai-sdk/provider';
2
2
  import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
3
3
  import { FetchFunction } from '@ai-sdk/provider-utils';
4
4
  import { z } from 'zod/v4';
@@ -182,11 +182,16 @@ declare const openaiTools: {
182
182
  *
183
183
  * Must have name `image_generation`.
184
184
  *
185
- * @param size - Image dimensions (e.g., 1024x1024, 1024x1536)
186
- * @param quality - Rendering quality (e.g. low, medium, high)
187
- * @param format - File output format
188
- * @param compression - Compression level (0-100%) for JPEG and WebP formats
189
- * @param background - Transparent or opaque
185
+ * @param background - Background type for the generated image. One of 'auto', 'opaque', or 'transparent'.
186
+ * @param inputFidelity - Input fidelity for the generated image. One of 'low' or 'high'.
187
+ * @param inputImageMask - Optional mask for inpainting. Contains fileId and/or imageUrl.
188
+ * @param model - The image generation model to use. Default: gpt-image-1.
189
+ * @param moderation - Moderation level for the generated image. Default: 'auto'.
190
+ * @param outputCompression - Compression level for the output image (0-100).
191
+ * @param outputFormat - The output format of the generated image. One of 'png', 'jpeg', or 'webp'.
192
+ * @param partialImages - Number of partial images to generate in streaming mode (0-3).
193
+ * @param quality - The quality of the generated image. One of 'auto', 'low', 'medium', or 'high'.
194
+ * @param size - The size of the generated image. One of 'auto', '1024x1024', '1024x1536', or '1536x1024'.
190
195
  */
191
196
  imageGeneration: (args?: {
192
197
  background?: "auto" | "opaque" | "transparent";
@@ -199,6 +204,7 @@ declare const openaiTools: {
199
204
  moderation?: "auto";
200
205
  outputCompression?: number;
201
206
  outputFormat?: "png" | "jpeg" | "webp";
207
+ partialImages?: number;
202
208
  quality?: "auto" | "low" | "medium" | "high";
203
209
  size?: "auto" | "1024x1024" | "1024x1536" | "1536x1024";
204
210
  }) => _ai_sdk_provider_utils.Tool<{}, {
@@ -305,11 +311,11 @@ interface OpenAIProvider extends ProviderV3 {
305
311
  /**
306
312
  Creates a model for transcription.
307
313
  */
308
- transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV2;
314
+ transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV3;
309
315
  /**
310
316
  Creates a model for speech generation.
311
317
  */
312
- speech(modelId: OpenAISpeechModelId): SpeechModelV2;
318
+ speech(modelId: OpenAISpeechModelId): SpeechModelV3;
313
319
  /**
314
320
  OpenAI-specific tools.
315
321
  */
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { ProviderV3, LanguageModelV3, EmbeddingModelV3, ImageModelV3, TranscriptionModelV2, SpeechModelV2 } from '@ai-sdk/provider';
1
+ import { ProviderV3, LanguageModelV3, EmbeddingModelV3, ImageModelV3, TranscriptionModelV3, SpeechModelV3 } from '@ai-sdk/provider';
2
2
  import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
3
3
  import { FetchFunction } from '@ai-sdk/provider-utils';
4
4
  import { z } from 'zod/v4';
@@ -182,11 +182,16 @@ declare const openaiTools: {
182
182
  *
183
183
  * Must have name `image_generation`.
184
184
  *
185
- * @param size - Image dimensions (e.g., 1024x1024, 1024x1536)
186
- * @param quality - Rendering quality (e.g. low, medium, high)
187
- * @param format - File output format
188
- * @param compression - Compression level (0-100%) for JPEG and WebP formats
189
- * @param background - Transparent or opaque
185
+ * @param background - Background type for the generated image. One of 'auto', 'opaque', or 'transparent'.
186
+ * @param inputFidelity - Input fidelity for the generated image. One of 'low' or 'high'.
187
+ * @param inputImageMask - Optional mask for inpainting. Contains fileId and/or imageUrl.
188
+ * @param model - The image generation model to use. Default: gpt-image-1.
189
+ * @param moderation - Moderation level for the generated image. Default: 'auto'.
190
+ * @param outputCompression - Compression level for the output image (0-100).
191
+ * @param outputFormat - The output format of the generated image. One of 'png', 'jpeg', or 'webp'.
192
+ * @param partialImages - Number of partial images to generate in streaming mode (0-3).
193
+ * @param quality - The quality of the generated image. One of 'auto', 'low', 'medium', or 'high'.
194
+ * @param size - The size of the generated image. One of 'auto', '1024x1024', '1024x1536', or '1536x1024'.
190
195
  */
191
196
  imageGeneration: (args?: {
192
197
  background?: "auto" | "opaque" | "transparent";
@@ -199,6 +204,7 @@ declare const openaiTools: {
199
204
  moderation?: "auto";
200
205
  outputCompression?: number;
201
206
  outputFormat?: "png" | "jpeg" | "webp";
207
+ partialImages?: number;
202
208
  quality?: "auto" | "low" | "medium" | "high";
203
209
  size?: "auto" | "1024x1024" | "1024x1536" | "1536x1024";
204
210
  }) => _ai_sdk_provider_utils.Tool<{}, {
@@ -305,11 +311,11 @@ interface OpenAIProvider extends ProviderV3 {
305
311
  /**
306
312
  Creates a model for transcription.
307
313
  */
308
- transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV2;
314
+ transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV3;
309
315
  /**
310
316
  Creates a model for speech generation.
311
317
  */
312
- speech(modelId: OpenAISpeechModelId): SpeechModelV2;
318
+ speech(modelId: OpenAISpeechModelId): SpeechModelV3;
313
319
  /**
314
320
  OpenAI-specific tools.
315
321
  */
package/dist/index.js CHANGED
@@ -60,6 +60,7 @@ function convertToOpenAIChatMessages({
60
60
  prompt,
61
61
  systemMessageMode = "system"
62
62
  }) {
63
+ var _a;
63
64
  const messages = [];
64
65
  const warnings = [];
65
66
  for (const { role, content } of prompt) {
@@ -98,7 +99,7 @@ function convertToOpenAIChatMessages({
98
99
  messages.push({
99
100
  role: "user",
100
101
  content: content.map((part, index) => {
101
- var _a, _b, _c;
102
+ var _a2, _b, _c;
102
103
  switch (part.type) {
103
104
  case "text": {
104
105
  return { type: "text", text: part.text };
@@ -111,7 +112,7 @@ function convertToOpenAIChatMessages({
111
112
  image_url: {
112
113
  url: part.data instanceof URL ? part.data.toString() : `data:${mediaType};base64,${(0, import_provider_utils2.convertToBase64)(part.data)}`,
113
114
  // OpenAI specific extension: image detail
114
- detail: (_b = (_a = part.providerOptions) == null ? void 0 : _a.openai) == null ? void 0 : _b.imageDetail
115
+ detail: (_b = (_a2 = part.providerOptions) == null ? void 0 : _a2.openai) == null ? void 0 : _b.imageDetail
115
116
  }
116
117
  };
117
118
  } else if (part.mediaType.startsWith("audio/")) {
@@ -208,6 +209,9 @@ function convertToOpenAIChatMessages({
208
209
  case "error-text":
209
210
  contentValue = output.value;
210
211
  break;
212
+ case "execution-denied":
213
+ contentValue = (_a = output.reason) != null ? _a : "Tool execution denied.";
214
+ break;
211
215
  case "content":
212
216
  case "json":
213
217
  case "error-json":
@@ -1767,6 +1771,7 @@ var imageGenerationArgsSchema = import_v411.z.object({
1767
1771
  moderation: import_v411.z.enum(["auto"]).optional(),
1768
1772
  outputCompression: import_v411.z.number().int().min(0).max(100).optional(),
1769
1773
  outputFormat: import_v411.z.enum(["png", "jpeg", "webp"]).optional(),
1774
+ partialImages: import_v411.z.number().int().min(0).max(3).optional(),
1770
1775
  quality: import_v411.z.enum(["auto", "low", "medium", "high"]).optional(),
1771
1776
  size: import_v411.z.enum(["1024x1024", "1024x1536", "1536x1024", "auto"]).optional()
1772
1777
  }).strict();
@@ -1938,11 +1943,16 @@ var openaiTools = {
1938
1943
  *
1939
1944
  * Must have name `image_generation`.
1940
1945
  *
1941
- * @param size - Image dimensions (e.g., 1024x1024, 1024x1536)
1942
- * @param quality - Rendering quality (e.g. low, medium, high)
1943
- * @param format - File output format
1944
- * @param compression - Compression level (0-100%) for JPEG and WebP formats
1945
- * @param background - Transparent or opaque
1946
+ * @param background - Background type for the generated image. One of 'auto', 'opaque', or 'transparent'.
1947
+ * @param inputFidelity - Input fidelity for the generated image. One of 'low' or 'high'.
1948
+ * @param inputImageMask - Optional mask for inpainting. Contains fileId and/or imageUrl.
1949
+ * @param model - The image generation model to use. Default: gpt-image-1.
1950
+ * @param moderation - Moderation level for the generated image. Default: 'auto'.
1951
+ * @param outputCompression - Compression level for the output image (0-100).
1952
+ * @param outputFormat - The output format of the generated image. One of 'png', 'jpeg', or 'webp'.
1953
+ * @param partialImages - Number of partial images to generate in streaming mode (0-3).
1954
+ * @param quality - The quality of the generated image. One of 'auto', 'low', 'medium', or 'high'.
1955
+ * @param size - The size of the generated image. One of 'auto', '1024x1024', '1024x1536', or '1536x1024'.
1946
1956
  */
1947
1957
  imageGeneration,
1948
1958
  /**
@@ -1999,7 +2009,7 @@ async function convertToOpenAIResponsesInput({
1999
2009
  store,
2000
2010
  hasLocalShellTool = false
2001
2011
  }) {
2002
- var _a, _b, _c, _d, _e, _f, _g, _h, _i;
2012
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
2003
2013
  const input = [];
2004
2014
  const warnings = [];
2005
2015
  for (const { role, content } of prompt) {
@@ -2202,6 +2212,9 @@ async function convertToOpenAIResponsesInput({
2202
2212
  case "error-text":
2203
2213
  contentValue = output.value;
2204
2214
  break;
2215
+ case "execution-denied":
2216
+ contentValue = (_j = output.reason) != null ? _j : "Tool execution denied.";
2217
+ break;
2205
2218
  case "content":
2206
2219
  case "json":
2207
2220
  case "error-json":
@@ -2331,11 +2344,12 @@ function prepareResponsesTools({
2331
2344
  image_url: args.inputImageMask.imageUrl
2332
2345
  } : void 0,
2333
2346
  model: args.model,
2334
- size: args.size,
2335
- quality: args.quality,
2336
2347
  moderation: args.moderation,
2348
+ partial_images: args.partialImages,
2349
+ quality: args.quality,
2350
+ output_compression: args.outputCompression,
2337
2351
  output_format: args.outputFormat,
2338
- output_compression: args.outputCompression
2352
+ size: args.size
2339
2353
  });
2340
2354
  break;
2341
2355
  }
@@ -3294,6 +3308,17 @@ var OpenAIResponsesLanguageModel = class {
3294
3308
  delta: value.delta
3295
3309
  });
3296
3310
  }
3311
+ } else if (isResponseImageGenerationCallPartialImageChunk(value)) {
3312
+ controller.enqueue({
3313
+ type: "tool-result",
3314
+ toolCallId: value.item_id,
3315
+ toolName: "image_generation",
3316
+ result: {
3317
+ result: value.partial_image_b64
3318
+ },
3319
+ providerExecuted: true,
3320
+ preliminary: true
3321
+ });
3297
3322
  } else if (isResponseCodeInterpreterCallCodeDeltaChunk(value)) {
3298
3323
  const toolCall = ongoingToolCalls[value.output_index];
3299
3324
  if (toolCall != null) {
@@ -3566,6 +3591,12 @@ var responseFunctionCallArgumentsDeltaSchema = import_v416.z.object({
3566
3591
  output_index: import_v416.z.number(),
3567
3592
  delta: import_v416.z.string()
3568
3593
  });
3594
+ var responseImageGenerationCallPartialImageSchema = import_v416.z.object({
3595
+ type: import_v416.z.literal("response.image_generation_call.partial_image"),
3596
+ item_id: import_v416.z.string(),
3597
+ output_index: import_v416.z.number(),
3598
+ partial_image_b64: import_v416.z.string()
3599
+ });
3569
3600
  var responseCodeInterpreterCallCodeDeltaSchema = import_v416.z.object({
3570
3601
  type: import_v416.z.literal("response.code_interpreter_call_code.delta"),
3571
3602
  item_id: import_v416.z.string(),
@@ -3615,6 +3646,7 @@ var openaiResponsesChunkSchema = import_v416.z.union([
3615
3646
  responseOutputItemAddedSchema,
3616
3647
  responseOutputItemDoneSchema,
3617
3648
  responseFunctionCallArgumentsDeltaSchema,
3649
+ responseImageGenerationCallPartialImageSchema,
3618
3650
  responseCodeInterpreterCallCodeDeltaSchema,
3619
3651
  responseCodeInterpreterCallCodeDoneSchema,
3620
3652
  responseAnnotationAddedSchema,
@@ -3642,6 +3674,9 @@ function isResponseCreatedChunk(chunk) {
3642
3674
  function isResponseFunctionCallArgumentsDeltaChunk(chunk) {
3643
3675
  return chunk.type === "response.function_call_arguments.delta";
3644
3676
  }
3677
+ function isResponseImageGenerationCallPartialImageChunk(chunk) {
3678
+ return chunk.type === "response.image_generation_call.partial_image";
3679
+ }
3645
3680
  function isResponseCodeInterpreterCallCodeDeltaChunk(chunk) {
3646
3681
  return chunk.type === "response.code_interpreter_call_code.delta";
3647
3682
  }
@@ -3753,7 +3788,7 @@ var OpenAISpeechModel = class {
3753
3788
  constructor(modelId, config) {
3754
3789
  this.modelId = modelId;
3755
3790
  this.config = config;
3756
- this.specificationVersion = "v2";
3791
+ this.specificationVersion = "v3";
3757
3792
  }
3758
3793
  get provider() {
3759
3794
  return this.config.provider;
@@ -3944,7 +3979,7 @@ var OpenAITranscriptionModel = class {
3944
3979
  constructor(modelId, config) {
3945
3980
  this.modelId = modelId;
3946
3981
  this.config = config;
3947
- this.specificationVersion = "v2";
3982
+ this.specificationVersion = "v3";
3948
3983
  }
3949
3984
  get provider() {
3950
3985
  return this.config.provider;
@@ -4074,7 +4109,7 @@ var openaiTranscriptionResponseSchema = import_v419.z.object({
4074
4109
  });
4075
4110
 
4076
4111
  // src/version.ts
4077
- var VERSION = true ? "2.1.0-beta.10" : "0.0.0-test";
4112
+ var VERSION = true ? "2.1.0-beta.12" : "0.0.0-test";
4078
4113
 
4079
4114
  // src/openai-provider.ts
4080
4115
  function createOpenAI(options = {}) {