@ai-sdk/openai 3.0.55 → 3.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2316,6 +2316,9 @@ const { images } = await generateImage({
2316
2316
  Remove the background from an image by setting `background` to `transparent`:
2317
2317
 
2318
2318
  ```ts
2319
+ import { openai, type OpenAIImageModelEditOptions } from '@ai-sdk/openai';
2320
+ import { generateImage } from 'ai';
2321
+
2319
2322
  const imageBuffer = readFileSync('./input-image.png');
2320
2323
 
2321
2324
  const { images } = await generateImage({
@@ -2327,8 +2330,8 @@ const { images } = await generateImage({
2327
2330
  providerOptions: {
2328
2331
  openai: {
2329
2332
  background: 'transparent',
2330
- output_format: 'png',
2331
- },
2333
+ outputFormat: 'png',
2334
+ } satisfies OpenAIImageModelEditOptions,
2332
2335
  },
2333
2336
  });
2334
2337
  ```
@@ -2371,11 +2374,17 @@ const { images } = await generateImage({
2371
2374
  You can pass optional `providerOptions` to the image model. These are prone to change by OpenAI and are model dependent. For example, the `gpt-image-1` model supports the `quality` option:
2372
2375
 
2373
2376
  ```ts
2377
+ import {
2378
+ openai,
2379
+ type OpenAIImageModelGenerationOptions,
2380
+ } from '@ai-sdk/openai';
2381
+ import { generateImage } from 'ai';
2382
+
2374
2383
  const { image, providerMetadata } = await generateImage({
2375
2384
  model: openai.image('gpt-image-1.5'),
2376
2385
  prompt: 'A salamander at sunrise in a forest pond in the Seychelles.',
2377
2386
  providerOptions: {
2378
- openai: { quality: 'high' },
2387
+ openai: { quality: 'high' } satisfies OpenAIImageModelGenerationOptions,
2379
2388
  },
2380
2389
  });
2381
2390
  ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/openai",
3
- "version": "3.0.55",
3
+ "version": "3.0.57",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -37,7 +37,7 @@
37
37
  },
38
38
  "dependencies": {
39
39
  "@ai-sdk/provider": "3.0.10",
40
- "@ai-sdk/provider-utils": "4.0.25"
40
+ "@ai-sdk/provider-utils": "4.0.26"
41
41
  },
42
42
  "devDependencies": {
43
43
  "@types/node": "20.17.24",
@@ -0,0 +1,123 @@
1
+ import {
2
+ lazySchema,
3
+ zodSchema,
4
+ type InferSchema,
5
+ } from '@ai-sdk/provider-utils';
6
+ import { z } from 'zod/v4';
7
+
8
+ export type OpenAIImageModelId =
9
+ | 'dall-e-3'
10
+ | 'dall-e-2'
11
+ | 'gpt-image-1'
12
+ | 'gpt-image-1-mini'
13
+ | 'gpt-image-1.5'
14
+ | 'gpt-image-2'
15
+ | 'chatgpt-image-latest'
16
+ | (string & {});
17
+
18
+ // https://platform.openai.com/docs/guides/images
19
+ export const modelMaxImagesPerCall: Record<OpenAIImageModelId, number> = {
20
+ 'dall-e-3': 1,
21
+ 'dall-e-2': 10,
22
+ 'gpt-image-1': 10,
23
+ 'gpt-image-1-mini': 10,
24
+ 'gpt-image-1.5': 10,
25
+ 'gpt-image-2': 10,
26
+ 'chatgpt-image-latest': 10,
27
+ };
28
+
29
+ const defaultResponseFormatPrefixes = [
30
+ 'chatgpt-image-',
31
+ 'gpt-image-1-mini',
32
+ 'gpt-image-1.5',
33
+ 'gpt-image-1',
34
+ 'gpt-image-2',
35
+ ];
36
+
37
+ export function hasDefaultResponseFormat(modelId: string): boolean {
38
+ return defaultResponseFormatPrefixes.some(prefix =>
39
+ modelId.startsWith(prefix),
40
+ );
41
+ }
42
+
43
+ const baseImageModelOptionsObject = z.object({
44
+ /**
45
+ * Quality of the generated image(s).
46
+ *
47
+ * Valid values: `standard`, `hd`, `low`, `medium`, `high`, `auto`.
48
+ */
49
+ quality: z
50
+ .enum(['standard', 'hd', 'low', 'medium', 'high', 'auto'])
51
+ .optional(),
52
+
53
+ /**
54
+ * Background behavior for the generated image(s).
55
+ *
56
+ * If `transparent`, the output format must support transparency
57
+ * (i.e. `png` or `webp`).
58
+ */
59
+ background: z.enum(['transparent', 'opaque', 'auto']).optional(),
60
+
61
+ /**
62
+ * Format in which the generated image(s) are returned.
63
+ */
64
+ outputFormat: z.enum(['png', 'jpeg', 'webp']).optional(),
65
+
66
+ /**
67
+ * Compression level (0-100) for the generated image(s). Applies to the
68
+ * `jpeg` and `webp` output formats.
69
+ */
70
+ outputCompression: z.number().int().min(0).max(100).optional(),
71
+
72
+ /**
73
+ * A unique identifier representing your end-user, which can help OpenAI
74
+ * to monitor and detect abuse.
75
+ */
76
+ user: z.string().optional(),
77
+ });
78
+
79
+ export const openaiImageModelOptions = lazySchema(() =>
80
+ zodSchema(baseImageModelOptionsObject),
81
+ );
82
+
83
+ export type OpenAIImageModelOptions = InferSchema<
84
+ typeof openaiImageModelOptions
85
+ >;
86
+
87
+ export const openaiImageModelGenerationOptions = lazySchema(() =>
88
+ zodSchema(
89
+ baseImageModelOptionsObject.extend({
90
+ /**
91
+ * Style of the generated image. `vivid` produces hyper-real and
92
+ * dramatic images; `natural` produces more subdued, less hyper-real
93
+ * looking images.
94
+ */
95
+ style: z.enum(['vivid', 'natural']).optional(),
96
+
97
+ /**
98
+ * Content moderation level for the generated image(s). `low` applies
99
+ * less restrictive filtering.
100
+ */
101
+ moderation: z.enum(['auto', 'low']).optional(),
102
+ }),
103
+ ),
104
+ );
105
+
106
+ export type OpenAIImageModelGenerationOptions = InferSchema<
107
+ typeof openaiImageModelGenerationOptions
108
+ >;
109
+
110
+ export const openaiImageModelEditOptions = lazySchema(() =>
111
+ zodSchema(
112
+ baseImageModelOptionsObject.extend({
113
+ /**
114
+ * Fidelity of the output image(s) to the input image(s).
115
+ */
116
+ inputFidelity: z.enum(['high', 'low']).optional(),
117
+ }),
118
+ ),
119
+ );
120
+
121
+ export type OpenAIImageModelEditOptions = InferSchema<
122
+ typeof openaiImageModelEditOptions
123
+ >;
@@ -9,6 +9,7 @@ import {
9
9
  convertToFormData,
10
10
  createJsonResponseHandler,
11
11
  downloadBlob,
12
+ parseProviderOptions,
12
13
  postFormDataToApi,
13
14
  postJsonToApi,
14
15
  } from '@ai-sdk/provider-utils';
@@ -18,8 +19,11 @@ import { openaiImageResponseSchema } from './openai-image-api';
18
19
  import {
19
20
  hasDefaultResponseFormat,
20
21
  modelMaxImagesPerCall,
22
+ openaiImageModelEditOptions,
23
+ openaiImageModelGenerationOptions,
24
+ type OpenAIImageModelEditOptions,
21
25
  type OpenAIImageModelId,
22
- } from './openai-image-options';
26
+ } from './openai-image-model-options';
23
27
 
24
28
  interface OpenAIImageModelConfig extends OpenAIConfig {
25
29
  _internal?: {
@@ -75,6 +79,13 @@ export class OpenAIImageModel implements ImageModelV3 {
75
79
  const currentDate = this.config._internal?.currentDate?.() ?? new Date();
76
80
 
77
81
  if (files != null) {
82
+ const openaiOptions =
83
+ (await parseProviderOptions({
84
+ provider: 'openai',
85
+ providerOptions,
86
+ schema: openaiImageModelEditOptions,
87
+ })) ?? {};
88
+
78
89
  const { value: response, responseHeaders } = await postFormDataToApi({
79
90
  url: this.config.url({
80
91
  path: '/images/edits',
@@ -105,7 +116,12 @@ export class OpenAIImageModel implements ImageModelV3 {
105
116
  mask: mask != null ? await fileToBlob(mask) : undefined,
106
117
  n,
107
118
  size,
108
- ...(providerOptions.openai ?? {}),
119
+ quality: openaiOptions.quality,
120
+ background: openaiOptions.background,
121
+ output_format: openaiOptions.outputFormat,
122
+ output_compression: openaiOptions.outputCompression,
123
+ input_fidelity: openaiOptions.inputFidelity,
124
+ user: openaiOptions.user,
109
125
  }),
110
126
  failedResponseHandler: openaiFailedResponseHandler,
111
127
  successfulResponseHandler: createJsonResponseHandler(
@@ -153,6 +169,13 @@ export class OpenAIImageModel implements ImageModelV3 {
153
169
  };
154
170
  }
155
171
 
172
+ const openaiOptions =
173
+ (await parseProviderOptions({
174
+ provider: 'openai',
175
+ providerOptions,
176
+ schema: openaiImageModelGenerationOptions,
177
+ })) ?? {};
178
+
156
179
  const { value: response, responseHeaders } = await postJsonToApi({
157
180
  url: this.config.url({
158
181
  path: '/images/generations',
@@ -164,7 +187,13 @@ export class OpenAIImageModel implements ImageModelV3 {
164
187
  prompt,
165
188
  n,
166
189
  size,
167
- ...(providerOptions.openai ?? {}),
190
+ quality: openaiOptions.quality,
191
+ style: openaiOptions.style,
192
+ background: openaiOptions.background,
193
+ moderation: openaiOptions.moderation,
194
+ output_format: openaiOptions.outputFormat,
195
+ output_compression: openaiOptions.outputCompression,
196
+ user: openaiOptions.user,
168
197
  ...(!hasDefaultResponseFormat(this.modelId)
169
198
  ? { response_format: 'b64_json' }
170
199
  : {}),
@@ -251,84 +280,18 @@ function distributeTokenDetails(
251
280
  }
252
281
 
253
282
  type OpenAIImageEditInput = {
254
- /**
255
- * Allows to set transparency for the background of the generated image(s).
256
- * This parameter is only supported for `gpt-image-1`. Must be one of
257
- * `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
258
- * model will automatically determine the best background for the image.
259
- *
260
- * If `transparent`, the output format needs to support transparency, so it
261
- * should be set to either `png` (default value) or `webp`.
262
- *
263
- */
264
- background?: 'transparent' | 'opaque' | 'auto';
265
- /**
266
- * The image(s) to edit. Must be a supported image file or an array of images.
267
- *
268
- * For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less
269
- * than 50MB. You can provide up to 16 images.
270
- *
271
- * For `dall-e-2`, you can only provide one image, and it should be a square
272
- * `png` file less than 4MB.
273
- *
274
- */
283
+ model: OpenAIImageModelId;
284
+ prompt?: string;
275
285
  image: Blob | Blob[];
276
- input_fidelity?: ('high' | 'low') | null;
277
- /**
278
- * An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, the mask will be applied on the first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`.
279
- */
280
286
  mask?: Blob;
281
- /**
282
- * The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1` is used.
283
- */
284
- model?: 'dall-e-2' | 'gpt-image-1' | 'gpt-image-1-mini' | (string & {});
285
- /**
286
- * The number of images to generate. Must be between 1 and 10.
287
- */
288
287
  n?: number;
289
- /**
290
- * The compression level (0-100%) for the generated images. This parameter
291
- * is only supported for `gpt-image-1` with the `webp` or `jpeg` output
292
- * formats, and defaults to 100.
293
- *
294
- */
295
- output_compression?: number;
296
- /**
297
- * The format in which the generated images are returned. This parameter is
298
- * only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
299
- * The default value is `png`.
300
- *
301
- */
302
- output_format?: 'png' | 'jpeg' | 'webp';
303
- partial_images?: number | null;
304
- /**
305
- * A text description of the desired image(s). The maximum length is 1000 characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
306
- */
307
- prompt?: string;
308
- /**
309
- * The quality of the image that will be generated. `high`, `medium` and `low` are only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality. Defaults to `auto`.
310
- *
311
- */
312
- quality?: 'standard' | 'low' | 'medium' | 'high' | 'auto';
313
- /**
314
- * The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1` will always return base64-encoded images.
315
- */
316
- response_format?: 'url' | 'b64_json';
317
- /**
318
- * The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or `auto` (default value) for `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
319
- */
320
288
  size?: `${number}x${number}`;
321
- /**
322
- * Edit the image in streaming mode. Defaults to `false`. See the
323
- * [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more information.
324
- *
325
- */
326
- stream?: boolean;
327
- /**
328
- * A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
329
- *
330
- */
331
- user?: string;
289
+ quality?: OpenAIImageModelEditOptions['quality'];
290
+ background?: OpenAIImageModelEditOptions['background'];
291
+ output_format?: OpenAIImageModelEditOptions['outputFormat'];
292
+ output_compression?: OpenAIImageModelEditOptions['outputCompression'];
293
+ input_fidelity?: OpenAIImageModelEditOptions['inputFidelity'];
294
+ user?: OpenAIImageModelEditOptions['user'];
332
295
  };
333
296
 
334
297
  async function fileToBlob(
package/src/index.ts CHANGED
@@ -10,6 +10,11 @@ export type {
10
10
  /** @deprecated Use `OpenAILanguageModelChatOptions` instead. */
11
11
  OpenAILanguageModelChatOptions as OpenAIChatLanguageModelOptions,
12
12
  } from './chat/openai-chat-options';
13
+ export type {
14
+ OpenAIImageModelOptions,
15
+ OpenAIImageModelGenerationOptions,
16
+ OpenAIImageModelEditOptions,
17
+ } from './image/openai-image-model-options';
13
18
  export type { OpenAILanguageModelCompletionOptions } from './completion/openai-completion-options';
14
19
  export type { OpenAIEmbeddingModelOptions } from './embedding/openai-embedding-options';
15
20
  export type { OpenAISpeechModelOptions } from './speech/openai-speech-options';
@@ -5,7 +5,7 @@ export * from '../completion/openai-completion-options';
5
5
  export * from '../embedding/openai-embedding-model';
6
6
  export * from '../embedding/openai-embedding-options';
7
7
  export * from '../image/openai-image-model';
8
- export * from '../image/openai-image-options';
8
+ export * from '../image/openai-image-model-options';
9
9
  export * from '../transcription/openai-transcription-model';
10
10
  export * from '../transcription/openai-transcription-options';
11
11
  export * from '../speech/openai-speech-model';
@@ -20,7 +20,7 @@ import type { OpenAICompletionModelId } from './completion/openai-completion-opt
20
20
  import { OpenAIEmbeddingModel } from './embedding/openai-embedding-model';
21
21
  import type { OpenAIEmbeddingModelId } from './embedding/openai-embedding-options';
22
22
  import { OpenAIImageModel } from './image/openai-image-model';
23
- import type { OpenAIImageModelId } from './image/openai-image-options';
23
+ import type { OpenAIImageModelId } from './image/openai-image-model-options';
24
24
  import { openaiTools } from './openai-tools';
25
25
  import { OpenAIResponsesLanguageModel } from './responses/openai-responses-language-model';
26
26
  import type { OpenAIResponsesModelId } from './responses/openai-responses-options';
@@ -1,34 +0,0 @@
1
- export type OpenAIImageModelId =
2
- | 'dall-e-3'
3
- | 'dall-e-2'
4
- | 'gpt-image-1'
5
- | 'gpt-image-1-mini'
6
- | 'gpt-image-1.5'
7
- | 'gpt-image-2'
8
- | 'chatgpt-image-latest'
9
- | (string & {});
10
-
11
- // https://platform.openai.com/docs/guides/images
12
- export const modelMaxImagesPerCall: Record<OpenAIImageModelId, number> = {
13
- 'dall-e-3': 1,
14
- 'dall-e-2': 10,
15
- 'gpt-image-1': 10,
16
- 'gpt-image-1-mini': 10,
17
- 'gpt-image-1.5': 10,
18
- 'gpt-image-2': 10,
19
- 'chatgpt-image-latest': 10,
20
- };
21
-
22
- const defaultResponseFormatPrefixes = [
23
- 'chatgpt-image-',
24
- 'gpt-image-1-mini',
25
- 'gpt-image-1.5',
26
- 'gpt-image-1',
27
- 'gpt-image-2',
28
- ];
29
-
30
- export function hasDefaultResponseFormat(modelId: string): boolean {
31
- return defaultResponseFormatPrefixes.some(prefix =>
32
- modelId.startsWith(prefix),
33
- );
34
- }