@ai-sdk/google 3.0.24 → 3.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -108,6 +108,8 @@ Google Generative AI also supports some model specific settings that are not par
108
108
  You can pass them as an options argument:
109
109
 
110
110
  ```ts
111
+ import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
112
+
111
113
  const model = google('gemini-2.5-flash');
112
114
 
113
115
  await generateText({
@@ -120,7 +122,7 @@ await generateText({
120
122
  threshold: 'BLOCK_LOW_AND_ABOVE',
121
123
  },
122
124
  ],
123
- },
125
+ } satisfies GoogleLanguageModelOptions,
124
126
  },
125
127
  });
126
128
  ```
@@ -256,7 +258,7 @@ The Gemini 2.5 and Gemini 3 series models use an internal "thinking process" tha
256
258
  For Gemini 3 models, use the `thinkingLevel` parameter to control the depth of reasoning:
257
259
 
258
260
  ```ts
259
- import { google, GoogleGenerativeAIProviderOptions } from '@ai-sdk/google';
261
+ import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
260
262
  import { generateText } from 'ai';
261
263
 
262
264
  const model = google('gemini-3-pro-preview');
@@ -270,7 +272,7 @@ const { text, reasoning } = await generateText({
270
272
  thinkingLevel: 'high',
271
273
  includeThoughts: true,
272
274
  },
273
- } satisfies GoogleGenerativeAIProviderOptions,
275
+ } satisfies GoogleLanguageModelOptions,
274
276
  },
275
277
  });
276
278
 
@@ -284,7 +286,7 @@ console.log(reasoning); // Reasoning summary
284
286
  For Gemini 2.5 models, use the `thinkingBudget` parameter to control the number of thinking tokens:
285
287
 
286
288
  ```ts
287
- import { google, GoogleGenerativeAIProviderOptions } from '@ai-sdk/google';
289
+ import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
288
290
  import { generateText } from 'ai';
289
291
 
290
292
  const model = google('gemini-2.5-flash');
@@ -298,7 +300,7 @@ const { text, reasoning } = await generateText({
298
300
  thinkingBudget: 8192,
299
301
  includeThoughts: true,
300
302
  },
301
- } satisfies GoogleGenerativeAIProviderOptions,
303
+ } satisfies GoogleLanguageModelOptions,
302
304
  },
303
305
  });
304
306
 
@@ -435,7 +437,7 @@ console.log('Cached tokens:', providerMetadata.google);
435
437
  For guaranteed cost savings, you can still use explicit caching with Gemini 2.5 and 2.0 models. See the [models page](https://ai.google.dev/gemini-api/docs/models) to check if caching is supported for the used model:
436
438
 
437
439
  ```ts
438
- import { google } from '@ai-sdk/google';
440
+ import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
439
441
  import { GoogleGenAI } from '@google/genai';
440
442
  import { generateText } from 'ai';
441
443
 
@@ -465,7 +467,7 @@ const { text: veggieLasagnaRecipe } = await generateText({
465
467
  providerOptions: {
466
468
  google: {
467
469
  cachedContent: cache.name,
468
- },
470
+ } satisfies GoogleLanguageModelOptions,
469
471
  },
470
472
  });
471
473
 
@@ -475,7 +477,7 @@ const { text: meatLasagnaRecipe } = await generateText({
475
477
  providerOptions: {
476
478
  google: {
477
479
  cachedContent: cache.name,
478
- },
480
+ } satisfies GoogleLanguageModelOptions,
479
481
  },
480
482
  });
481
483
  ```
@@ -766,7 +768,7 @@ With [Google Maps grounding](https://ai.google.dev/gemini-api/docs/maps-groundin
766
768
  the model has access to Google Maps data for location-aware responses. This enables providing local data and geospatial context, such as finding nearby restaurants.
767
769
 
768
770
  ```ts highlight="7-16"
769
- import { google } from '@ai-sdk/google';
771
+ import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
770
772
  import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
771
773
  import { generateText } from 'ai';
772
774
 
@@ -780,7 +782,7 @@ const { text, sources, providerMetadata } = await generateText({
780
782
  retrievalConfig: {
781
783
  latLng: { latitude: 34.090199, longitude: -117.881081 },
782
784
  },
783
- },
785
+ } satisfies GoogleLanguageModelOptions,
784
786
  },
785
787
  prompt:
786
788
  'What are the best Italian restaurants within a 15-minute walk from here?',
@@ -927,7 +929,7 @@ The `vertexRagStore` tool accepts the following configuration options:
927
929
 
928
930
  ### Image Outputs
929
931
 
930
- Gemini models with image generation capabilities (`gemini-2.5-flash-image`) support image generation. Images are exposed as files in the response.
932
+ Gemini models with image generation capabilities (e.g. `gemini-2.5-flash-image`) support generating images as part of a multimodal response. Images are exposed as files in the response.
931
933
 
932
934
  ```ts
933
935
  import { google } from '@ai-sdk/google';
@@ -946,6 +948,12 @@ for (const file of result.files) {
946
948
  }
947
949
  ```
948
950
 
951
+ <Note>
952
+ If you primarily want to generate images without text output, you can also use
953
+ Gemini image models with the `generateImage()` function. See [Gemini Image
954
+ Models](#gemini-image-models) for details.
955
+ </Note>
956
+
949
957
  ### Safety Ratings
950
958
 
951
959
  The safety ratings provide insight into the safety of the model's response.
@@ -1008,7 +1016,7 @@ const { object } = await generateObject({
1008
1016
  providerOptions: {
1009
1017
  google: {
1010
1018
  structuredOutputs: false,
1011
- },
1019
+ } satisfies GoogleLanguageModelOptions,
1012
1020
  },
1013
1021
  schema: z.object({
1014
1022
  name: z.string(),
@@ -1099,7 +1107,7 @@ The Google Generative AI provider sends API calls to the right endpoint based on
1099
1107
  Google Generative AI embedding models support aditional settings. You can pass them as an options argument:
1100
1108
 
1101
1109
  ```ts
1102
- import { google } from '@ai-sdk/google';
1110
+ import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
1103
1111
  import { embed } from 'ai';
1104
1112
 
1105
1113
  const model = google.embedding('gemini-embedding-001');
@@ -1111,7 +1119,7 @@ const { embedding } = await embed({
1111
1119
  google: {
1112
1120
  outputDimensionality: 512, // optional, number of dimensions for the embedding
1113
1121
  taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
1114
- },
1122
+ } satisfies GoogleEmbeddingModelOptions,
1115
1123
  },
1116
1124
  });
1117
1125
  ```
@@ -1144,9 +1152,18 @@ The following optional provider options are available for Google Generative AI e
1144
1152
 
1145
1153
  ## Image Models
1146
1154
 
1147
- You can create [Imagen](https://ai.google.dev/gemini-api/docs/imagen) models that call the Google Generative AI API using the `.image()` factory method.
1155
+ You can create image models that call the Google Generative AI API using the `.image()` factory method.
1148
1156
  For more on image generation with the AI SDK see [generateImage()](/docs/reference/ai-sdk-core/generate-image).
1149
1157
 
1158
+ The Google provider supports two types of image models:
1159
+
1160
+ - **Imagen models**: Dedicated image generation models using the `:predict` API
1161
+ - **Gemini image models**: Multimodal language models with image output capabilities using the `:generateContent` API
1162
+
1163
+ ### Imagen Models
1164
+
1165
+ [Imagen](https://ai.google.dev/gemini-api/docs/imagen) models are dedicated image generation models.
1166
+
1150
1167
  ```ts
1151
1168
  import { google } from '@ai-sdk/google';
1152
1169
  import { generateImage } from 'ai';
@@ -1158,11 +1175,11 @@ const { image } = await generateImage({
1158
1175
  });
1159
1176
  ```
1160
1177
 
1161
- Further configuration can be done using Google provider options. You can validate the provider options using the `GoogleGenerativeAIImageProviderOptions` type.
1178
+ Further configuration can be done using Google provider options. You can validate the provider options using the `GoogleImageModelOptions` type.
1162
1179
 
1163
1180
  ```ts
1164
1181
  import { google } from '@ai-sdk/google';
1165
- import { GoogleGenerativeAIImageProviderOptions } from '@ai-sdk/google';
1182
+ import { GoogleImageModelOptions } from '@ai-sdk/google';
1166
1183
  import { generateImage } from 'ai';
1167
1184
 
1168
1185
  const { image } = await generateImage({
@@ -1170,13 +1187,13 @@ const { image } = await generateImage({
1170
1187
  providerOptions: {
1171
1188
  google: {
1172
1189
  personGeneration: 'dont_allow',
1173
- } satisfies GoogleGenerativeAIImageProviderOptions,
1190
+ } satisfies GoogleImageModelOptions,
1174
1191
  },
1175
1192
  // ...
1176
1193
  });
1177
1194
  ```
1178
1195
 
1179
- The following provider options are available:
1196
+ The following provider options are available for Imagen models:
1180
1197
 
1181
1198
  - **personGeneration** `allow_adult` | `allow_all` | `dont_allow`
1182
1199
  Whether to allow person generation. Defaults to `allow_adult`.
@@ -1186,10 +1203,84 @@ The following provider options are available:
1186
1203
  parameter instead.
1187
1204
  </Note>
1188
1205
 
1189
- #### Model Capabilities
1206
+ #### Imagen Model Capabilities
1190
1207
 
1191
1208
  | Model | Aspect Ratios |
1192
1209
  | ------------------------------- | ------------------------- |
1193
1210
  | `imagen-4.0-generate-001` | 1:1, 3:4, 4:3, 9:16, 16:9 |
1194
1211
  | `imagen-4.0-ultra-generate-001` | 1:1, 3:4, 4:3, 9:16, 16:9 |
1195
1212
  | `imagen-4.0-fast-generate-001` | 1:1, 3:4, 4:3, 9:16, 16:9 |
1213
+
1214
+ ### Gemini Image Models
1215
+
1216
+ [Gemini image models](https://ai.google.dev/gemini-api/docs/image-generation) (e.g. `gemini-2.5-flash-image`) are technically multimodal output language models, but they can be used with the `generateImage()` function for a simpler image generation experience. Internally, the provider calls the language model API with `responseModalities: ['IMAGE']`.
1217
+
1218
+ ```ts
1219
+ import { google } from '@ai-sdk/google';
1220
+ import { generateImage } from 'ai';
1221
+
1222
+ const { image } = await generateImage({
1223
+ model: google.image('gemini-2.5-flash-image'),
1224
+ prompt: 'A photorealistic image of a cat wearing a wizard hat',
1225
+ aspectRatio: '1:1',
1226
+ });
1227
+ ```
1228
+
1229
+ Gemini image models also support image editing by providing input images:
1230
+
1231
+ ```ts
1232
+ import { google } from '@ai-sdk/google';
1233
+ import { generateImage } from 'ai';
1234
+ import fs from 'node:fs';
1235
+
1236
+ const sourceImage = fs.readFileSync('./cat.png');
1237
+
1238
+ const { image } = await generateImage({
1239
+ model: google.image('gemini-2.5-flash-image'),
1240
+ prompt: {
1241
+ text: 'Add a small wizard hat to this cat',
1242
+ images: [sourceImage],
1243
+ },
1244
+ });
1245
+ ```
1246
+
1247
+ You can also use URLs for input images:
1248
+
1249
+ ```ts
1250
+ import { google } from '@ai-sdk/google';
1251
+ import { generateImage } from 'ai';
1252
+
1253
+ const { image } = await generateImage({
1254
+ model: google.image('gemini-2.5-flash-image'),
1255
+ prompt: {
1256
+ text: 'Add a small wizard hat to this cat',
1257
+ images: ['https://example.com/cat.png'],
1258
+ },
1259
+ });
1260
+ ```
1261
+
1262
+ <Note>
1263
+ Gemini image models do not support the `size` or `n` parameters. Use
1264
+ `aspectRatio` instead of `size`. Mask-based inpainting is also not supported.
1265
+ </Note>
1266
+
1267
+ <Note>
1268
+ For more advanced use cases where you need both text and image outputs, or
1269
+ want more control over the generation process, you can use Gemini image models
1270
+ directly with `generateText()`. See [Image Outputs](#image-outputs) for
1271
+ details.
1272
+ </Note>
1273
+
1274
+ #### Gemini Image Model Capabilities
1275
+
1276
+ | Model | Image Generation | Image Editing | Aspect Ratios |
1277
+ | ---------------------------- | ------------------- | ------------------- | --------------------------------------------------- |
1278
+ | `gemini-2.5-flash-image` | <Check size={18} /> | <Check size={18} /> | 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 |
1279
+ | `gemini-3-pro-image-preview` | <Check size={18} /> | <Check size={18} /> | 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 |
1280
+
1281
+ <Note>
1282
+ `gemini-3-pro-image-preview` supports additional features including up to 14
1283
+ reference images for editing (6 objects, 5 humans), resolution options (1K,
1284
+ 2K, 4K via `providerOptions.google.imageConfig.imageSize`), and Google Search
1285
+ grounding.
1286
+ </Note>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/google",
3
- "version": "3.0.24",
3
+ "version": "3.0.26",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -36,8 +36,8 @@
36
36
  }
37
37
  },
38
38
  "dependencies": {
39
- "@ai-sdk/provider": "3.0.8",
40
- "@ai-sdk/provider-utils": "4.0.14"
39
+ "@ai-sdk/provider-utils": "4.0.14",
40
+ "@ai-sdk/provider": "3.0.8"
41
41
  },
42
42
  "devDependencies": {
43
43
  "@types/node": "20.17.24",
@@ -16,7 +16,7 @@ import { z } from 'zod/v4';
16
16
  import { googleFailedResponseHandler } from './google-error';
17
17
  import {
18
18
  GoogleGenerativeAIEmbeddingModelId,
19
- googleGenerativeAIEmbeddingProviderOptions,
19
+ googleEmbeddingModelOptions,
20
20
  } from './google-generative-ai-embedding-options';
21
21
 
22
22
  type GoogleGenerativeAIEmbeddingConfig = {
@@ -57,7 +57,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
57
57
  const googleOptions = await parseProviderOptions({
58
58
  provider: 'google',
59
59
  providerOptions,
60
- schema: googleGenerativeAIEmbeddingProviderOptions,
60
+ schema: googleEmbeddingModelOptions,
61
61
  });
62
62
 
63
63
  if (values.length > this.maxEmbeddingsPerCall) {
@@ -10,7 +10,7 @@ export type GoogleGenerativeAIEmbeddingModelId =
10
10
  | 'text-embedding-004'
11
11
  | (string & {});
12
12
 
13
- export const googleGenerativeAIEmbeddingProviderOptions = lazySchema(() =>
13
+ export const googleEmbeddingModelOptions = lazySchema(() =>
14
14
  zodSchema(
15
15
  z.object({
16
16
  /**
@@ -47,6 +47,6 @@ export const googleGenerativeAIEmbeddingProviderOptions = lazySchema(() =>
47
47
  ),
48
48
  );
49
49
 
50
- export type GoogleGenerativeAIEmbeddingProviderOptions = InferSchema<
51
- typeof googleGenerativeAIEmbeddingProviderOptions
50
+ export type GoogleEmbeddingModelOptions = InferSchema<
51
+ typeof googleEmbeddingModelOptions
52
52
  >;
@@ -1,11 +1,19 @@
1
- import { ImageModelV3, SharedV3Warning } from '@ai-sdk/provider';
1
+ import {
2
+ ImageModelV3,
3
+ LanguageModelV3Prompt,
4
+ SharedV3Warning,
5
+ } from '@ai-sdk/provider';
2
6
  import {
3
7
  combineHeaders,
8
+ convertToBase64,
4
9
  createJsonResponseHandler,
10
+ FetchFunction,
11
+ generateId as defaultGenerateId,
5
12
  type InferSchema,
6
13
  lazySchema,
7
14
  parseProviderOptions,
8
15
  postJsonToApi,
16
+ Resolvable,
9
17
  resolve,
10
18
  zodSchema,
11
19
  } from '@ai-sdk/provider-utils';
@@ -15,7 +23,7 @@ import {
15
23
  GoogleGenerativeAIImageModelId,
16
24
  GoogleGenerativeAIImageSettings,
17
25
  } from './google-generative-ai-image-settings';
18
- import { FetchFunction, Resolvable } from '@ai-sdk/provider-utils';
26
+ import { GoogleGenerativeAILanguageModel } from './google-generative-ai-language-model';
19
27
 
20
28
  interface GoogleGenerativeAIImageModelConfig {
21
29
  provider: string;
@@ -32,8 +40,15 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
32
40
  readonly specificationVersion = 'v3';
33
41
 
34
42
  get maxImagesPerCall(): number {
43
+ if (this.settings.maxImagesPerCall != null) {
44
+ return this.settings.maxImagesPerCall;
45
+ }
46
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-image
47
+ if (isGeminiModel(this.modelId)) {
48
+ return 10;
49
+ }
35
50
  // https://ai.google.dev/gemini-api/docs/imagen#imagen-model
36
- return this.settings.maxImagesPerCall ?? 4;
51
+ return 4;
37
52
  }
38
53
 
39
54
  get provider(): string {
@@ -48,6 +63,16 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
48
63
 
49
64
  async doGenerate(
50
65
  options: Parameters<ImageModelV3['doGenerate']>[0],
66
+ ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
67
+ // Gemini image models use the language model API internally
68
+ if (isGeminiModel(this.modelId)) {
69
+ return this.doGenerateGemini(options);
70
+ }
71
+ return this.doGenerateImagen(options);
72
+ }
73
+
74
+ private async doGenerateImagen(
75
+ options: Parameters<ImageModelV3['doGenerate']>[0],
51
76
  ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
52
77
  const {
53
78
  prompt,
@@ -63,10 +88,10 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
63
88
  } = options;
64
89
  const warnings: Array<SharedV3Warning> = [];
65
90
 
66
- // Google Generative AI does not support image editing
91
+ // Imagen API endpoints do not support image editing
67
92
  if (files != null && files.length > 0) {
68
93
  throw new Error(
69
- 'Google Generative AI does not support image editing. ' +
94
+ 'Google Generative AI does not support image editing with Imagen models. ' +
70
95
  'Use Google Vertex AI (@ai-sdk/google-vertex) for image editing capabilities.',
71
96
  );
72
97
  }
@@ -99,7 +124,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
99
124
  const googleOptions = await parseProviderOptions({
100
125
  provider: 'google',
101
126
  providerOptions,
102
- schema: googleImageProviderOptionsSchema,
127
+ schema: googleImageModelOptionsSchema,
103
128
  });
104
129
 
105
130
  const currentDate = this.config._internal?.currentDate?.() ?? new Date();
@@ -138,10 +163,10 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
138
163
  images: response.predictions.map(
139
164
  (p: { bytesBase64Encoded: string }) => p.bytesBase64Encoded,
140
165
  ),
141
- warnings: warnings ?? [],
166
+ warnings,
142
167
  providerMetadata: {
143
168
  google: {
144
- images: response.predictions.map(prediction => ({
169
+ images: response.predictions.map(() => ({
145
170
  // Add any prediction-specific metadata here
146
171
  })),
147
172
  },
@@ -153,6 +178,146 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
153
178
  },
154
179
  };
155
180
  }
181
+
182
+ private async doGenerateGemini(
183
+ options: Parameters<ImageModelV3['doGenerate']>[0],
184
+ ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
185
+ const {
186
+ prompt,
187
+ n,
188
+ size,
189
+ aspectRatio,
190
+ seed,
191
+ providerOptions,
192
+ headers,
193
+ abortSignal,
194
+ files,
195
+ mask,
196
+ } = options;
197
+ const warnings: Array<SharedV3Warning> = [];
198
+
199
+ // Gemini does not support mask-based inpainting
200
+ if (mask != null) {
201
+ throw new Error(
202
+ 'Gemini image models do not support mask-based image editing.',
203
+ );
204
+ }
205
+
206
+ // Gemini does not support generating multiple images per call via n parameter
207
+ if (n != null && n > 1) {
208
+ throw new Error(
209
+ 'Gemini image models do not support generating a set number of images per call. Use n=1 or omit the n parameter.',
210
+ );
211
+ }
212
+
213
+ if (size != null) {
214
+ warnings.push({
215
+ type: 'unsupported',
216
+ feature: 'size',
217
+ details:
218
+ 'This model does not support the `size` option. Use `aspectRatio` instead.',
219
+ });
220
+ }
221
+
222
+ // Build user message content for language model
223
+ const userContent: Array<
224
+ | { type: 'text'; text: string }
225
+ | { type: 'file'; data: string | Uint8Array | URL; mediaType: string }
226
+ > = [];
227
+
228
+ // Add text prompt
229
+ if (prompt != null) {
230
+ userContent.push({ type: 'text', text: prompt });
231
+ }
232
+
233
+ // Add input images for editing
234
+ if (files != null && files.length > 0) {
235
+ for (const file of files) {
236
+ if (file.type === 'url') {
237
+ userContent.push({
238
+ type: 'file',
239
+ data: new URL(file.url),
240
+ mediaType: 'image/*',
241
+ });
242
+ } else {
243
+ userContent.push({
244
+ type: 'file',
245
+ data:
246
+ typeof file.data === 'string'
247
+ ? file.data
248
+ : new Uint8Array(file.data),
249
+ mediaType: file.mediaType,
250
+ });
251
+ }
252
+ }
253
+ }
254
+
255
+ const languageModelPrompt: LanguageModelV3Prompt = [
256
+ { role: 'user', content: userContent },
257
+ ];
258
+
259
+ // Instantiate language model
260
+ const languageModel = new GoogleGenerativeAILanguageModel(this.modelId, {
261
+ provider: this.config.provider,
262
+ baseURL: this.config.baseURL,
263
+ headers: this.config.headers ?? {},
264
+ fetch: this.config.fetch,
265
+ generateId: this.config.generateId ?? defaultGenerateId,
266
+ });
267
+
268
+ // Call language model with image-only response modality
269
+ const result = await languageModel.doGenerate({
270
+ prompt: languageModelPrompt,
271
+ seed,
272
+ providerOptions: {
273
+ google: {
274
+ responseModalities: ['IMAGE'],
275
+ imageConfig: aspectRatio ? { aspectRatio } : undefined,
276
+ ...((providerOptions?.google as Record<string, unknown>) ?? {}),
277
+ },
278
+ },
279
+ headers,
280
+ abortSignal,
281
+ });
282
+
283
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
284
+
285
+ // Extract images from language model response
286
+ const images: string[] = [];
287
+ for (const part of result.content) {
288
+ if (part.type === 'file' && part.mediaType.startsWith('image/')) {
289
+ images.push(convertToBase64(part.data));
290
+ }
291
+ }
292
+
293
+ return {
294
+ images,
295
+ warnings,
296
+ providerMetadata: {
297
+ google: {
298
+ images: images.map(() => ({})),
299
+ },
300
+ },
301
+ response: {
302
+ timestamp: currentDate,
303
+ modelId: this.modelId,
304
+ headers: result.response?.headers,
305
+ },
306
+ usage: result.usage
307
+ ? {
308
+ inputTokens: result.usage.inputTokens.total,
309
+ outputTokens: result.usage.outputTokens.total,
310
+ totalTokens:
311
+ (result.usage.inputTokens.total ?? 0) +
312
+ (result.usage.outputTokens.total ?? 0),
313
+ }
314
+ : undefined,
315
+ };
316
+ }
317
+ }
318
+
319
+ function isGeminiModel(modelId: string): boolean {
320
+ return modelId.startsWith('gemini-');
156
321
  }
157
322
 
158
323
  // minimal version of the schema
@@ -168,7 +333,7 @@ const googleImageResponseSchema = lazySchema(() =>
168
333
 
169
334
  // Note: For the initial GA launch of Imagen 3, safety filters are not configurable.
170
335
  // https://ai.google.dev/gemini-api/docs/imagen#imagen-model
171
- const googleImageProviderOptionsSchema = lazySchema(() =>
336
+ const googleImageModelOptionsSchema = lazySchema(() =>
172
337
  zodSchema(
173
338
  z.object({
174
339
  personGeneration: z
@@ -179,6 +344,6 @@ const googleImageProviderOptionsSchema = lazySchema(() =>
179
344
  ),
180
345
  );
181
346
 
182
- export type GoogleGenerativeAIImageProviderOptions = InferSchema<
183
- typeof googleImageProviderOptionsSchema
347
+ export type GoogleImageModelOptions = InferSchema<
348
+ typeof googleImageModelOptionsSchema
184
349
  >;
@@ -1,7 +1,11 @@
1
1
  export type GoogleGenerativeAIImageModelId =
2
+ // Imagen models (use :predict API)
2
3
  | 'imagen-4.0-generate-001'
3
4
  | 'imagen-4.0-ultra-generate-001'
4
5
  | 'imagen-4.0-fast-generate-001'
6
+ // Gemini image models (technically multimodal output language models, use :generateContent API)
7
+ | 'gemini-2.5-flash-image'
8
+ | 'gemini-3-pro-image-preview'
5
9
  | (string & {});
6
10
 
7
11
  export interface GoogleGenerativeAIImageSettings {
@@ -36,7 +36,7 @@ import { getModelPath } from './get-model-path';
36
36
  import { googleFailedResponseHandler } from './google-error';
37
37
  import {
38
38
  GoogleGenerativeAIModelId,
39
- googleGenerativeAIProviderOptions,
39
+ googleLanguageModelOptions,
40
40
  } from './google-generative-ai-options';
41
41
  import { GoogleGenerativeAIContentPart } from './google-generative-ai-prompt';
42
42
  import { prepareTools } from './google-prepare-tools';
@@ -103,14 +103,14 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
103
103
  let googleOptions = await parseProviderOptions({
104
104
  provider: providerOptionsName,
105
105
  providerOptions,
106
- schema: googleGenerativeAIProviderOptions,
106
+ schema: googleLanguageModelOptions,
107
107
  });
108
108
 
109
109
  if (googleOptions == null && providerOptionsName !== 'google') {
110
110
  googleOptions = await parseProviderOptions({
111
111
  provider: 'google',
112
112
  providerOptions,
113
- schema: googleGenerativeAIProviderOptions,
113
+ schema: googleLanguageModelOptions,
114
114
  });
115
115
  }
116
116
 
@@ -45,7 +45,7 @@ export type GoogleGenerativeAIModelId =
45
45
  | 'gemma-3-27b-it'
46
46
  | (string & {});
47
47
 
48
- export const googleGenerativeAIProviderOptions = lazySchema(() =>
48
+ export const googleLanguageModelOptions = lazySchema(() =>
49
49
  zodSchema(
50
50
  z.object({
51
51
  responseModalities: z.array(z.enum(['TEXT', 'IMAGE'])).optional(),
@@ -188,6 +188,6 @@ export const googleGenerativeAIProviderOptions = lazySchema(() =>
188
188
  ),
189
189
  );
190
190
 
191
- export type GoogleGenerativeAIProviderOptions = InferSchema<
192
- typeof googleGenerativeAIProviderOptions
191
+ export type GoogleLanguageModelOptions = InferSchema<
192
+ typeof googleLanguageModelOptions
193
193
  >;
@@ -21,7 +21,7 @@ import { z } from 'zod/v4';
21
21
  import { googleFailedResponseHandler } from './google-error';
22
22
  import type { GoogleGenerativeAIVideoModelId } from './google-generative-ai-video-settings';
23
23
 
24
- export type GoogleGenerativeAIVideoProviderOptions = {
24
+ export type GoogleVideoModelOptions = {
25
25
  // Polling configuration
26
26
  pollIntervalMs?: number | null;
27
27
  pollTimeoutMs?: number | null;
@@ -76,8 +76,8 @@ export class GoogleGenerativeAIVideoModel implements Experimental_VideoModelV3 {
76
76
  const googleOptions = (await parseProviderOptions({
77
77
  provider: 'google',
78
78
  providerOptions: options.providerOptions,
79
- schema: googleVideoProviderOptionsSchema,
80
- })) as GoogleGenerativeAIVideoProviderOptions | undefined;
79
+ schema: googleVideoModelOptionsSchema,
80
+ })) as GoogleVideoModelOptions | undefined;
81
81
 
82
82
  const instances: Array<Record<string, unknown>> = [{}];
83
83
  const instance = instances[0];
@@ -155,7 +155,7 @@ export class GoogleGenerativeAIVideoModel implements Experimental_VideoModelV3 {
155
155
  }
156
156
 
157
157
  if (googleOptions != null) {
158
- const opts = googleOptions as GoogleGenerativeAIVideoProviderOptions;
158
+ const opts = googleOptions as GoogleVideoModelOptions;
159
159
 
160
160
  if (
161
161
  opts.personGeneration !== undefined &&
@@ -350,7 +350,7 @@ const googleOperationSchema = z.object({
350
350
  .nullish(),
351
351
  });
352
352
 
353
- const googleVideoProviderOptionsSchema = lazySchema(() =>
353
+ const googleVideoModelOptionsSchema = lazySchema(() =>
354
354
  zodSchema(
355
355
  z
356
356
  .object({