@ai-sdk/google 3.0.24 → 3.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/index.d.mts +9 -9
- package/dist/index.d.ts +9 -9
- package/dist/index.js +132 -15
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +134 -15
- package/dist/index.mjs.map +1 -1
- package/dist/internal/index.js +3 -3
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/index.mjs +3 -3
- package/dist/internal/index.mjs.map +1 -1
- package/docs/15-google-generative-ai.mdx +111 -20
- package/package.json +3 -3
- package/src/google-generative-ai-embedding-model.ts +2 -2
- package/src/google-generative-ai-embedding-options.ts +3 -3
- package/src/google-generative-ai-image-model.ts +176 -11
- package/src/google-generative-ai-image-settings.ts +4 -0
- package/src/google-generative-ai-language-model.ts +3 -3
- package/src/google-generative-ai-options.ts +3 -3
- package/src/google-generative-ai-video-model.ts +5 -5
- package/src/index.ts +20 -4
|
@@ -108,6 +108,8 @@ Google Generative AI also supports some model specific settings that are not par
|
|
|
108
108
|
You can pass them as an options argument:
|
|
109
109
|
|
|
110
110
|
```ts
|
|
111
|
+
import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
|
|
112
|
+
|
|
111
113
|
const model = google('gemini-2.5-flash');
|
|
112
114
|
|
|
113
115
|
await generateText({
|
|
@@ -120,7 +122,7 @@ await generateText({
|
|
|
120
122
|
threshold: 'BLOCK_LOW_AND_ABOVE',
|
|
121
123
|
},
|
|
122
124
|
],
|
|
123
|
-
},
|
|
125
|
+
} satisfies GoogleLanguageModelOptions,
|
|
124
126
|
},
|
|
125
127
|
});
|
|
126
128
|
```
|
|
@@ -256,7 +258,7 @@ The Gemini 2.5 and Gemini 3 series models use an internal "thinking process" tha
|
|
|
256
258
|
For Gemini 3 models, use the `thinkingLevel` parameter to control the depth of reasoning:
|
|
257
259
|
|
|
258
260
|
```ts
|
|
259
|
-
import { google,
|
|
261
|
+
import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
|
|
260
262
|
import { generateText } from 'ai';
|
|
261
263
|
|
|
262
264
|
const model = google('gemini-3-pro-preview');
|
|
@@ -270,7 +272,7 @@ const { text, reasoning } = await generateText({
|
|
|
270
272
|
thinkingLevel: 'high',
|
|
271
273
|
includeThoughts: true,
|
|
272
274
|
},
|
|
273
|
-
} satisfies
|
|
275
|
+
} satisfies GoogleLanguageModelOptions,
|
|
274
276
|
},
|
|
275
277
|
});
|
|
276
278
|
|
|
@@ -284,7 +286,7 @@ console.log(reasoning); // Reasoning summary
|
|
|
284
286
|
For Gemini 2.5 models, use the `thinkingBudget` parameter to control the number of thinking tokens:
|
|
285
287
|
|
|
286
288
|
```ts
|
|
287
|
-
import { google,
|
|
289
|
+
import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
|
|
288
290
|
import { generateText } from 'ai';
|
|
289
291
|
|
|
290
292
|
const model = google('gemini-2.5-flash');
|
|
@@ -298,7 +300,7 @@ const { text, reasoning } = await generateText({
|
|
|
298
300
|
thinkingBudget: 8192,
|
|
299
301
|
includeThoughts: true,
|
|
300
302
|
},
|
|
301
|
-
} satisfies
|
|
303
|
+
} satisfies GoogleLanguageModelOptions,
|
|
302
304
|
},
|
|
303
305
|
});
|
|
304
306
|
|
|
@@ -435,7 +437,7 @@ console.log('Cached tokens:', providerMetadata.google);
|
|
|
435
437
|
For guaranteed cost savings, you can still use explicit caching with Gemini 2.5 and 2.0 models. See the [models page](https://ai.google.dev/gemini-api/docs/models) to check if caching is supported for the used model:
|
|
436
438
|
|
|
437
439
|
```ts
|
|
438
|
-
import { google } from '@ai-sdk/google';
|
|
440
|
+
import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
|
|
439
441
|
import { GoogleGenAI } from '@google/genai';
|
|
440
442
|
import { generateText } from 'ai';
|
|
441
443
|
|
|
@@ -465,7 +467,7 @@ const { text: veggieLasagnaRecipe } = await generateText({
|
|
|
465
467
|
providerOptions: {
|
|
466
468
|
google: {
|
|
467
469
|
cachedContent: cache.name,
|
|
468
|
-
},
|
|
470
|
+
} satisfies GoogleLanguageModelOptions,
|
|
469
471
|
},
|
|
470
472
|
});
|
|
471
473
|
|
|
@@ -475,7 +477,7 @@ const { text: meatLasagnaRecipe } = await generateText({
|
|
|
475
477
|
providerOptions: {
|
|
476
478
|
google: {
|
|
477
479
|
cachedContent: cache.name,
|
|
478
|
-
},
|
|
480
|
+
} satisfies GoogleLanguageModelOptions,
|
|
479
481
|
},
|
|
480
482
|
});
|
|
481
483
|
```
|
|
@@ -766,7 +768,7 @@ With [Google Maps grounding](https://ai.google.dev/gemini-api/docs/maps-groundin
|
|
|
766
768
|
the model has access to Google Maps data for location-aware responses. This enables providing local data and geospatial context, such as finding nearby restaurants.
|
|
767
769
|
|
|
768
770
|
```ts highlight="7-16"
|
|
769
|
-
import { google } from '@ai-sdk/google';
|
|
771
|
+
import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
|
|
770
772
|
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
|
|
771
773
|
import { generateText } from 'ai';
|
|
772
774
|
|
|
@@ -780,7 +782,7 @@ const { text, sources, providerMetadata } = await generateText({
|
|
|
780
782
|
retrievalConfig: {
|
|
781
783
|
latLng: { latitude: 34.090199, longitude: -117.881081 },
|
|
782
784
|
},
|
|
783
|
-
},
|
|
785
|
+
} satisfies GoogleLanguageModelOptions,
|
|
784
786
|
},
|
|
785
787
|
prompt:
|
|
786
788
|
'What are the best Italian restaurants within a 15-minute walk from here?',
|
|
@@ -927,7 +929,7 @@ The `vertexRagStore` tool accepts the following configuration options:
|
|
|
927
929
|
|
|
928
930
|
### Image Outputs
|
|
929
931
|
|
|
930
|
-
Gemini models with image generation capabilities (`gemini-2.5-flash-image`) support
|
|
932
|
+
Gemini models with image generation capabilities (e.g. `gemini-2.5-flash-image`) support generating images as part of a multimodal response. Images are exposed as files in the response.
|
|
931
933
|
|
|
932
934
|
```ts
|
|
933
935
|
import { google } from '@ai-sdk/google';
|
|
@@ -946,6 +948,12 @@ for (const file of result.files) {
|
|
|
946
948
|
}
|
|
947
949
|
```
|
|
948
950
|
|
|
951
|
+
<Note>
|
|
952
|
+
If you primarily want to generate images without text output, you can also use
|
|
953
|
+
Gemini image models with the `generateImage()` function. See [Gemini Image
|
|
954
|
+
Models](#gemini-image-models) for details.
|
|
955
|
+
</Note>
|
|
956
|
+
|
|
949
957
|
### Safety Ratings
|
|
950
958
|
|
|
951
959
|
The safety ratings provide insight into the safety of the model's response.
|
|
@@ -1008,7 +1016,7 @@ const { object } = await generateObject({
|
|
|
1008
1016
|
providerOptions: {
|
|
1009
1017
|
google: {
|
|
1010
1018
|
structuredOutputs: false,
|
|
1011
|
-
},
|
|
1019
|
+
} satisfies GoogleLanguageModelOptions,
|
|
1012
1020
|
},
|
|
1013
1021
|
schema: z.object({
|
|
1014
1022
|
name: z.string(),
|
|
@@ -1099,7 +1107,7 @@ The Google Generative AI provider sends API calls to the right endpoint based on
|
|
|
1099
1107
|
Google Generative AI embedding models support aditional settings. You can pass them as an options argument:
|
|
1100
1108
|
|
|
1101
1109
|
```ts
|
|
1102
|
-
import { google } from '@ai-sdk/google';
|
|
1110
|
+
import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
|
|
1103
1111
|
import { embed } from 'ai';
|
|
1104
1112
|
|
|
1105
1113
|
const model = google.embedding('gemini-embedding-001');
|
|
@@ -1111,7 +1119,7 @@ const { embedding } = await embed({
|
|
|
1111
1119
|
google: {
|
|
1112
1120
|
outputDimensionality: 512, // optional, number of dimensions for the embedding
|
|
1113
1121
|
taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
|
|
1114
|
-
},
|
|
1122
|
+
} satisfies GoogleEmbeddingModelOptions,
|
|
1115
1123
|
},
|
|
1116
1124
|
});
|
|
1117
1125
|
```
|
|
@@ -1144,9 +1152,18 @@ The following optional provider options are available for Google Generative AI e
|
|
|
1144
1152
|
|
|
1145
1153
|
## Image Models
|
|
1146
1154
|
|
|
1147
|
-
You can create
|
|
1155
|
+
You can create image models that call the Google Generative AI API using the `.image()` factory method.
|
|
1148
1156
|
For more on image generation with the AI SDK see [generateImage()](/docs/reference/ai-sdk-core/generate-image).
|
|
1149
1157
|
|
|
1158
|
+
The Google provider supports two types of image models:
|
|
1159
|
+
|
|
1160
|
+
- **Imagen models**: Dedicated image generation models using the `:predict` API
|
|
1161
|
+
- **Gemini image models**: Multimodal language models with image output capabilities using the `:generateContent` API
|
|
1162
|
+
|
|
1163
|
+
### Imagen Models
|
|
1164
|
+
|
|
1165
|
+
[Imagen](https://ai.google.dev/gemini-api/docs/imagen) models are dedicated image generation models.
|
|
1166
|
+
|
|
1150
1167
|
```ts
|
|
1151
1168
|
import { google } from '@ai-sdk/google';
|
|
1152
1169
|
import { generateImage } from 'ai';
|
|
@@ -1158,11 +1175,11 @@ const { image } = await generateImage({
|
|
|
1158
1175
|
});
|
|
1159
1176
|
```
|
|
1160
1177
|
|
|
1161
|
-
Further configuration can be done using Google provider options. You can validate the provider options using the `
|
|
1178
|
+
Further configuration can be done using Google provider options. You can validate the provider options using the `GoogleImageModelOptions` type.
|
|
1162
1179
|
|
|
1163
1180
|
```ts
|
|
1164
1181
|
import { google } from '@ai-sdk/google';
|
|
1165
|
-
import {
|
|
1182
|
+
import { GoogleImageModelOptions } from '@ai-sdk/google';
|
|
1166
1183
|
import { generateImage } from 'ai';
|
|
1167
1184
|
|
|
1168
1185
|
const { image } = await generateImage({
|
|
@@ -1170,13 +1187,13 @@ const { image } = await generateImage({
|
|
|
1170
1187
|
providerOptions: {
|
|
1171
1188
|
google: {
|
|
1172
1189
|
personGeneration: 'dont_allow',
|
|
1173
|
-
} satisfies
|
|
1190
|
+
} satisfies GoogleImageModelOptions,
|
|
1174
1191
|
},
|
|
1175
1192
|
// ...
|
|
1176
1193
|
});
|
|
1177
1194
|
```
|
|
1178
1195
|
|
|
1179
|
-
The following provider options are available:
|
|
1196
|
+
The following provider options are available for Imagen models:
|
|
1180
1197
|
|
|
1181
1198
|
- **personGeneration** `allow_adult` | `allow_all` | `dont_allow`
|
|
1182
1199
|
Whether to allow person generation. Defaults to `allow_adult`.
|
|
@@ -1186,10 +1203,84 @@ The following provider options are available:
|
|
|
1186
1203
|
parameter instead.
|
|
1187
1204
|
</Note>
|
|
1188
1205
|
|
|
1189
|
-
#### Model Capabilities
|
|
1206
|
+
#### Imagen Model Capabilities
|
|
1190
1207
|
|
|
1191
1208
|
| Model | Aspect Ratios |
|
|
1192
1209
|
| ------------------------------- | ------------------------- |
|
|
1193
1210
|
| `imagen-4.0-generate-001` | 1:1, 3:4, 4:3, 9:16, 16:9 |
|
|
1194
1211
|
| `imagen-4.0-ultra-generate-001` | 1:1, 3:4, 4:3, 9:16, 16:9 |
|
|
1195
1212
|
| `imagen-4.0-fast-generate-001` | 1:1, 3:4, 4:3, 9:16, 16:9 |
|
|
1213
|
+
|
|
1214
|
+
### Gemini Image Models
|
|
1215
|
+
|
|
1216
|
+
[Gemini image models](https://ai.google.dev/gemini-api/docs/image-generation) (e.g. `gemini-2.5-flash-image`) are technically multimodal output language models, but they can be used with the `generateImage()` function for a simpler image generation experience. Internally, the provider calls the language model API with `responseModalities: ['IMAGE']`.
|
|
1217
|
+
|
|
1218
|
+
```ts
|
|
1219
|
+
import { google } from '@ai-sdk/google';
|
|
1220
|
+
import { generateImage } from 'ai';
|
|
1221
|
+
|
|
1222
|
+
const { image } = await generateImage({
|
|
1223
|
+
model: google.image('gemini-2.5-flash-image'),
|
|
1224
|
+
prompt: 'A photorealistic image of a cat wearing a wizard hat',
|
|
1225
|
+
aspectRatio: '1:1',
|
|
1226
|
+
});
|
|
1227
|
+
```
|
|
1228
|
+
|
|
1229
|
+
Gemini image models also support image editing by providing input images:
|
|
1230
|
+
|
|
1231
|
+
```ts
|
|
1232
|
+
import { google } from '@ai-sdk/google';
|
|
1233
|
+
import { generateImage } from 'ai';
|
|
1234
|
+
import fs from 'node:fs';
|
|
1235
|
+
|
|
1236
|
+
const sourceImage = fs.readFileSync('./cat.png');
|
|
1237
|
+
|
|
1238
|
+
const { image } = await generateImage({
|
|
1239
|
+
model: google.image('gemini-2.5-flash-image'),
|
|
1240
|
+
prompt: {
|
|
1241
|
+
text: 'Add a small wizard hat to this cat',
|
|
1242
|
+
images: [sourceImage],
|
|
1243
|
+
},
|
|
1244
|
+
});
|
|
1245
|
+
```
|
|
1246
|
+
|
|
1247
|
+
You can also use URLs for input images:
|
|
1248
|
+
|
|
1249
|
+
```ts
|
|
1250
|
+
import { google } from '@ai-sdk/google';
|
|
1251
|
+
import { generateImage } from 'ai';
|
|
1252
|
+
|
|
1253
|
+
const { image } = await generateImage({
|
|
1254
|
+
model: google.image('gemini-2.5-flash-image'),
|
|
1255
|
+
prompt: {
|
|
1256
|
+
text: 'Add a small wizard hat to this cat',
|
|
1257
|
+
images: ['https://example.com/cat.png'],
|
|
1258
|
+
},
|
|
1259
|
+
});
|
|
1260
|
+
```
|
|
1261
|
+
|
|
1262
|
+
<Note>
|
|
1263
|
+
Gemini image models do not support the `size` or `n` parameters. Use
|
|
1264
|
+
`aspectRatio` instead of `size`. Mask-based inpainting is also not supported.
|
|
1265
|
+
</Note>
|
|
1266
|
+
|
|
1267
|
+
<Note>
|
|
1268
|
+
For more advanced use cases where you need both text and image outputs, or
|
|
1269
|
+
want more control over the generation process, you can use Gemini image models
|
|
1270
|
+
directly with `generateText()`. See [Image Outputs](#image-outputs) for
|
|
1271
|
+
details.
|
|
1272
|
+
</Note>
|
|
1273
|
+
|
|
1274
|
+
#### Gemini Image Model Capabilities
|
|
1275
|
+
|
|
1276
|
+
| Model | Image Generation | Image Editing | Aspect Ratios |
|
|
1277
|
+
| ---------------------------- | ------------------- | ------------------- | --------------------------------------------------- |
|
|
1278
|
+
| `gemini-2.5-flash-image` | <Check size={18} /> | <Check size={18} /> | 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 |
|
|
1279
|
+
| `gemini-3-pro-image-preview` | <Check size={18} /> | <Check size={18} /> | 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 |
|
|
1280
|
+
|
|
1281
|
+
<Note>
|
|
1282
|
+
`gemini-3-pro-image-preview` supports additional features including up to 14
|
|
1283
|
+
reference images for editing (6 objects, 5 humans), resolution options (1K,
|
|
1284
|
+
2K, 4K via `providerOptions.google.imageConfig.imageSize`), and Google Search
|
|
1285
|
+
grounding.
|
|
1286
|
+
</Note>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/google",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.26",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -36,8 +36,8 @@
|
|
|
36
36
|
}
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
|
-
"@ai-sdk/provider": "
|
|
40
|
-
"@ai-sdk/provider
|
|
39
|
+
"@ai-sdk/provider-utils": "4.0.14",
|
|
40
|
+
"@ai-sdk/provider": "3.0.8"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "20.17.24",
|
|
@@ -16,7 +16,7 @@ import { z } from 'zod/v4';
|
|
|
16
16
|
import { googleFailedResponseHandler } from './google-error';
|
|
17
17
|
import {
|
|
18
18
|
GoogleGenerativeAIEmbeddingModelId,
|
|
19
|
-
|
|
19
|
+
googleEmbeddingModelOptions,
|
|
20
20
|
} from './google-generative-ai-embedding-options';
|
|
21
21
|
|
|
22
22
|
type GoogleGenerativeAIEmbeddingConfig = {
|
|
@@ -57,7 +57,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
57
57
|
const googleOptions = await parseProviderOptions({
|
|
58
58
|
provider: 'google',
|
|
59
59
|
providerOptions,
|
|
60
|
-
schema:
|
|
60
|
+
schema: googleEmbeddingModelOptions,
|
|
61
61
|
});
|
|
62
62
|
|
|
63
63
|
if (values.length > this.maxEmbeddingsPerCall) {
|
|
@@ -10,7 +10,7 @@ export type GoogleGenerativeAIEmbeddingModelId =
|
|
|
10
10
|
| 'text-embedding-004'
|
|
11
11
|
| (string & {});
|
|
12
12
|
|
|
13
|
-
export const
|
|
13
|
+
export const googleEmbeddingModelOptions = lazySchema(() =>
|
|
14
14
|
zodSchema(
|
|
15
15
|
z.object({
|
|
16
16
|
/**
|
|
@@ -47,6 +47,6 @@ export const googleGenerativeAIEmbeddingProviderOptions = lazySchema(() =>
|
|
|
47
47
|
),
|
|
48
48
|
);
|
|
49
49
|
|
|
50
|
-
export type
|
|
51
|
-
typeof
|
|
50
|
+
export type GoogleEmbeddingModelOptions = InferSchema<
|
|
51
|
+
typeof googleEmbeddingModelOptions
|
|
52
52
|
>;
|
|
@@ -1,11 +1,19 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
ImageModelV3,
|
|
3
|
+
LanguageModelV3Prompt,
|
|
4
|
+
SharedV3Warning,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
2
6
|
import {
|
|
3
7
|
combineHeaders,
|
|
8
|
+
convertToBase64,
|
|
4
9
|
createJsonResponseHandler,
|
|
10
|
+
FetchFunction,
|
|
11
|
+
generateId as defaultGenerateId,
|
|
5
12
|
type InferSchema,
|
|
6
13
|
lazySchema,
|
|
7
14
|
parseProviderOptions,
|
|
8
15
|
postJsonToApi,
|
|
16
|
+
Resolvable,
|
|
9
17
|
resolve,
|
|
10
18
|
zodSchema,
|
|
11
19
|
} from '@ai-sdk/provider-utils';
|
|
@@ -15,7 +23,7 @@ import {
|
|
|
15
23
|
GoogleGenerativeAIImageModelId,
|
|
16
24
|
GoogleGenerativeAIImageSettings,
|
|
17
25
|
} from './google-generative-ai-image-settings';
|
|
18
|
-
import {
|
|
26
|
+
import { GoogleGenerativeAILanguageModel } from './google-generative-ai-language-model';
|
|
19
27
|
|
|
20
28
|
interface GoogleGenerativeAIImageModelConfig {
|
|
21
29
|
provider: string;
|
|
@@ -32,8 +40,15 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
32
40
|
readonly specificationVersion = 'v3';
|
|
33
41
|
|
|
34
42
|
get maxImagesPerCall(): number {
|
|
43
|
+
if (this.settings.maxImagesPerCall != null) {
|
|
44
|
+
return this.settings.maxImagesPerCall;
|
|
45
|
+
}
|
|
46
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-image
|
|
47
|
+
if (isGeminiModel(this.modelId)) {
|
|
48
|
+
return 10;
|
|
49
|
+
}
|
|
35
50
|
// https://ai.google.dev/gemini-api/docs/imagen#imagen-model
|
|
36
|
-
return
|
|
51
|
+
return 4;
|
|
37
52
|
}
|
|
38
53
|
|
|
39
54
|
get provider(): string {
|
|
@@ -48,6 +63,16 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
48
63
|
|
|
49
64
|
async doGenerate(
|
|
50
65
|
options: Parameters<ImageModelV3['doGenerate']>[0],
|
|
66
|
+
): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
|
|
67
|
+
// Gemini image models use the language model API internally
|
|
68
|
+
if (isGeminiModel(this.modelId)) {
|
|
69
|
+
return this.doGenerateGemini(options);
|
|
70
|
+
}
|
|
71
|
+
return this.doGenerateImagen(options);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private async doGenerateImagen(
|
|
75
|
+
options: Parameters<ImageModelV3['doGenerate']>[0],
|
|
51
76
|
): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
|
|
52
77
|
const {
|
|
53
78
|
prompt,
|
|
@@ -63,10 +88,10 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
63
88
|
} = options;
|
|
64
89
|
const warnings: Array<SharedV3Warning> = [];
|
|
65
90
|
|
|
66
|
-
//
|
|
91
|
+
// Imagen API endpoints do not support image editing
|
|
67
92
|
if (files != null && files.length > 0) {
|
|
68
93
|
throw new Error(
|
|
69
|
-
'Google Generative AI does not support image editing. ' +
|
|
94
|
+
'Google Generative AI does not support image editing with Imagen models. ' +
|
|
70
95
|
'Use Google Vertex AI (@ai-sdk/google-vertex) for image editing capabilities.',
|
|
71
96
|
);
|
|
72
97
|
}
|
|
@@ -99,7 +124,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
99
124
|
const googleOptions = await parseProviderOptions({
|
|
100
125
|
provider: 'google',
|
|
101
126
|
providerOptions,
|
|
102
|
-
schema:
|
|
127
|
+
schema: googleImageModelOptionsSchema,
|
|
103
128
|
});
|
|
104
129
|
|
|
105
130
|
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
@@ -138,10 +163,10 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
138
163
|
images: response.predictions.map(
|
|
139
164
|
(p: { bytesBase64Encoded: string }) => p.bytesBase64Encoded,
|
|
140
165
|
),
|
|
141
|
-
warnings
|
|
166
|
+
warnings,
|
|
142
167
|
providerMetadata: {
|
|
143
168
|
google: {
|
|
144
|
-
images: response.predictions.map(
|
|
169
|
+
images: response.predictions.map(() => ({
|
|
145
170
|
// Add any prediction-specific metadata here
|
|
146
171
|
})),
|
|
147
172
|
},
|
|
@@ -153,6 +178,146 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
153
178
|
},
|
|
154
179
|
};
|
|
155
180
|
}
|
|
181
|
+
|
|
182
|
+
private async doGenerateGemini(
|
|
183
|
+
options: Parameters<ImageModelV3['doGenerate']>[0],
|
|
184
|
+
): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
|
|
185
|
+
const {
|
|
186
|
+
prompt,
|
|
187
|
+
n,
|
|
188
|
+
size,
|
|
189
|
+
aspectRatio,
|
|
190
|
+
seed,
|
|
191
|
+
providerOptions,
|
|
192
|
+
headers,
|
|
193
|
+
abortSignal,
|
|
194
|
+
files,
|
|
195
|
+
mask,
|
|
196
|
+
} = options;
|
|
197
|
+
const warnings: Array<SharedV3Warning> = [];
|
|
198
|
+
|
|
199
|
+
// Gemini does not support mask-based inpainting
|
|
200
|
+
if (mask != null) {
|
|
201
|
+
throw new Error(
|
|
202
|
+
'Gemini image models do not support mask-based image editing.',
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Gemini does not support generating multiple images per call via n parameter
|
|
207
|
+
if (n != null && n > 1) {
|
|
208
|
+
throw new Error(
|
|
209
|
+
'Gemini image models do not support generating a set number of images per call. Use n=1 or omit the n parameter.',
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (size != null) {
|
|
214
|
+
warnings.push({
|
|
215
|
+
type: 'unsupported',
|
|
216
|
+
feature: 'size',
|
|
217
|
+
details:
|
|
218
|
+
'This model does not support the `size` option. Use `aspectRatio` instead.',
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Build user message content for language model
|
|
223
|
+
const userContent: Array<
|
|
224
|
+
| { type: 'text'; text: string }
|
|
225
|
+
| { type: 'file'; data: string | Uint8Array | URL; mediaType: string }
|
|
226
|
+
> = [];
|
|
227
|
+
|
|
228
|
+
// Add text prompt
|
|
229
|
+
if (prompt != null) {
|
|
230
|
+
userContent.push({ type: 'text', text: prompt });
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Add input images for editing
|
|
234
|
+
if (files != null && files.length > 0) {
|
|
235
|
+
for (const file of files) {
|
|
236
|
+
if (file.type === 'url') {
|
|
237
|
+
userContent.push({
|
|
238
|
+
type: 'file',
|
|
239
|
+
data: new URL(file.url),
|
|
240
|
+
mediaType: 'image/*',
|
|
241
|
+
});
|
|
242
|
+
} else {
|
|
243
|
+
userContent.push({
|
|
244
|
+
type: 'file',
|
|
245
|
+
data:
|
|
246
|
+
typeof file.data === 'string'
|
|
247
|
+
? file.data
|
|
248
|
+
: new Uint8Array(file.data),
|
|
249
|
+
mediaType: file.mediaType,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const languageModelPrompt: LanguageModelV3Prompt = [
|
|
256
|
+
{ role: 'user', content: userContent },
|
|
257
|
+
];
|
|
258
|
+
|
|
259
|
+
// Instantiate language model
|
|
260
|
+
const languageModel = new GoogleGenerativeAILanguageModel(this.modelId, {
|
|
261
|
+
provider: this.config.provider,
|
|
262
|
+
baseURL: this.config.baseURL,
|
|
263
|
+
headers: this.config.headers ?? {},
|
|
264
|
+
fetch: this.config.fetch,
|
|
265
|
+
generateId: this.config.generateId ?? defaultGenerateId,
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
// Call language model with image-only response modality
|
|
269
|
+
const result = await languageModel.doGenerate({
|
|
270
|
+
prompt: languageModelPrompt,
|
|
271
|
+
seed,
|
|
272
|
+
providerOptions: {
|
|
273
|
+
google: {
|
|
274
|
+
responseModalities: ['IMAGE'],
|
|
275
|
+
imageConfig: aspectRatio ? { aspectRatio } : undefined,
|
|
276
|
+
...((providerOptions?.google as Record<string, unknown>) ?? {}),
|
|
277
|
+
},
|
|
278
|
+
},
|
|
279
|
+
headers,
|
|
280
|
+
abortSignal,
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
284
|
+
|
|
285
|
+
// Extract images from language model response
|
|
286
|
+
const images: string[] = [];
|
|
287
|
+
for (const part of result.content) {
|
|
288
|
+
if (part.type === 'file' && part.mediaType.startsWith('image/')) {
|
|
289
|
+
images.push(convertToBase64(part.data));
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
return {
|
|
294
|
+
images,
|
|
295
|
+
warnings,
|
|
296
|
+
providerMetadata: {
|
|
297
|
+
google: {
|
|
298
|
+
images: images.map(() => ({})),
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
response: {
|
|
302
|
+
timestamp: currentDate,
|
|
303
|
+
modelId: this.modelId,
|
|
304
|
+
headers: result.response?.headers,
|
|
305
|
+
},
|
|
306
|
+
usage: result.usage
|
|
307
|
+
? {
|
|
308
|
+
inputTokens: result.usage.inputTokens.total,
|
|
309
|
+
outputTokens: result.usage.outputTokens.total,
|
|
310
|
+
totalTokens:
|
|
311
|
+
(result.usage.inputTokens.total ?? 0) +
|
|
312
|
+
(result.usage.outputTokens.total ?? 0),
|
|
313
|
+
}
|
|
314
|
+
: undefined,
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function isGeminiModel(modelId: string): boolean {
|
|
320
|
+
return modelId.startsWith('gemini-');
|
|
156
321
|
}
|
|
157
322
|
|
|
158
323
|
// minimal version of the schema
|
|
@@ -168,7 +333,7 @@ const googleImageResponseSchema = lazySchema(() =>
|
|
|
168
333
|
|
|
169
334
|
// Note: For the initial GA launch of Imagen 3, safety filters are not configurable.
|
|
170
335
|
// https://ai.google.dev/gemini-api/docs/imagen#imagen-model
|
|
171
|
-
const
|
|
336
|
+
const googleImageModelOptionsSchema = lazySchema(() =>
|
|
172
337
|
zodSchema(
|
|
173
338
|
z.object({
|
|
174
339
|
personGeneration: z
|
|
@@ -179,6 +344,6 @@ const googleImageProviderOptionsSchema = lazySchema(() =>
|
|
|
179
344
|
),
|
|
180
345
|
);
|
|
181
346
|
|
|
182
|
-
export type
|
|
183
|
-
typeof
|
|
347
|
+
export type GoogleImageModelOptions = InferSchema<
|
|
348
|
+
typeof googleImageModelOptionsSchema
|
|
184
349
|
>;
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
export type GoogleGenerativeAIImageModelId =
|
|
2
|
+
// Imagen models (use :predict API)
|
|
2
3
|
| 'imagen-4.0-generate-001'
|
|
3
4
|
| 'imagen-4.0-ultra-generate-001'
|
|
4
5
|
| 'imagen-4.0-fast-generate-001'
|
|
6
|
+
// Gemini image models (technically multimodal output language models, use :generateContent API)
|
|
7
|
+
| 'gemini-2.5-flash-image'
|
|
8
|
+
| 'gemini-3-pro-image-preview'
|
|
5
9
|
| (string & {});
|
|
6
10
|
|
|
7
11
|
export interface GoogleGenerativeAIImageSettings {
|
|
@@ -36,7 +36,7 @@ import { getModelPath } from './get-model-path';
|
|
|
36
36
|
import { googleFailedResponseHandler } from './google-error';
|
|
37
37
|
import {
|
|
38
38
|
GoogleGenerativeAIModelId,
|
|
39
|
-
|
|
39
|
+
googleLanguageModelOptions,
|
|
40
40
|
} from './google-generative-ai-options';
|
|
41
41
|
import { GoogleGenerativeAIContentPart } from './google-generative-ai-prompt';
|
|
42
42
|
import { prepareTools } from './google-prepare-tools';
|
|
@@ -103,14 +103,14 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
|
|
|
103
103
|
let googleOptions = await parseProviderOptions({
|
|
104
104
|
provider: providerOptionsName,
|
|
105
105
|
providerOptions,
|
|
106
|
-
schema:
|
|
106
|
+
schema: googleLanguageModelOptions,
|
|
107
107
|
});
|
|
108
108
|
|
|
109
109
|
if (googleOptions == null && providerOptionsName !== 'google') {
|
|
110
110
|
googleOptions = await parseProviderOptions({
|
|
111
111
|
provider: 'google',
|
|
112
112
|
providerOptions,
|
|
113
|
-
schema:
|
|
113
|
+
schema: googleLanguageModelOptions,
|
|
114
114
|
});
|
|
115
115
|
}
|
|
116
116
|
|
|
@@ -45,7 +45,7 @@ export type GoogleGenerativeAIModelId =
|
|
|
45
45
|
| 'gemma-3-27b-it'
|
|
46
46
|
| (string & {});
|
|
47
47
|
|
|
48
|
-
export const
|
|
48
|
+
export const googleLanguageModelOptions = lazySchema(() =>
|
|
49
49
|
zodSchema(
|
|
50
50
|
z.object({
|
|
51
51
|
responseModalities: z.array(z.enum(['TEXT', 'IMAGE'])).optional(),
|
|
@@ -188,6 +188,6 @@ export const googleGenerativeAIProviderOptions = lazySchema(() =>
|
|
|
188
188
|
),
|
|
189
189
|
);
|
|
190
190
|
|
|
191
|
-
export type
|
|
192
|
-
typeof
|
|
191
|
+
export type GoogleLanguageModelOptions = InferSchema<
|
|
192
|
+
typeof googleLanguageModelOptions
|
|
193
193
|
>;
|
|
@@ -21,7 +21,7 @@ import { z } from 'zod/v4';
|
|
|
21
21
|
import { googleFailedResponseHandler } from './google-error';
|
|
22
22
|
import type { GoogleGenerativeAIVideoModelId } from './google-generative-ai-video-settings';
|
|
23
23
|
|
|
24
|
-
export type
|
|
24
|
+
export type GoogleVideoModelOptions = {
|
|
25
25
|
// Polling configuration
|
|
26
26
|
pollIntervalMs?: number | null;
|
|
27
27
|
pollTimeoutMs?: number | null;
|
|
@@ -76,8 +76,8 @@ export class GoogleGenerativeAIVideoModel implements Experimental_VideoModelV3 {
|
|
|
76
76
|
const googleOptions = (await parseProviderOptions({
|
|
77
77
|
provider: 'google',
|
|
78
78
|
providerOptions: options.providerOptions,
|
|
79
|
-
schema:
|
|
80
|
-
})) as
|
|
79
|
+
schema: googleVideoModelOptionsSchema,
|
|
80
|
+
})) as GoogleVideoModelOptions | undefined;
|
|
81
81
|
|
|
82
82
|
const instances: Array<Record<string, unknown>> = [{}];
|
|
83
83
|
const instance = instances[0];
|
|
@@ -155,7 +155,7 @@ export class GoogleGenerativeAIVideoModel implements Experimental_VideoModelV3 {
|
|
|
155
155
|
}
|
|
156
156
|
|
|
157
157
|
if (googleOptions != null) {
|
|
158
|
-
const opts = googleOptions as
|
|
158
|
+
const opts = googleOptions as GoogleVideoModelOptions;
|
|
159
159
|
|
|
160
160
|
if (
|
|
161
161
|
opts.personGeneration !== undefined &&
|
|
@@ -350,7 +350,7 @@ const googleOperationSchema = z.object({
|
|
|
350
350
|
.nullish(),
|
|
351
351
|
});
|
|
352
352
|
|
|
353
|
-
const
|
|
353
|
+
const googleVideoModelOptionsSchema = lazySchema(() =>
|
|
354
354
|
zodSchema(
|
|
355
355
|
z
|
|
356
356
|
.object({
|