@ai-sdk/google 3.0.74 → 3.0.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/index.d.mts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +478 -312
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +478 -312
- package/dist/index.mjs.map +1 -1
- package/docs/15-google-generative-ai.mdx +72 -16
- package/package.json +1 -1
- package/src/interactions/build-google-interactions-stream-transform.ts +285 -154
- package/src/interactions/convert-to-google-interactions-input.ts +57 -133
- package/src/interactions/extract-google-interactions-sources.ts +3 -3
- package/src/interactions/google-interactions-api.ts +179 -115
- package/src/interactions/google-interactions-language-model-options.ts +61 -0
- package/src/interactions/google-interactions-language-model.ts +100 -38
- package/src/interactions/google-interactions-prompt.ts +189 -114
- package/src/interactions/map-google-interactions-finish-reason.ts +3 -5
- package/src/interactions/parse-google-interactions-outputs.ts +80 -74
- package/src/interactions/prepare-google-interactions-tools.ts +1 -1
- package/src/interactions/stream-google-interactions.ts +1 -1
- package/src/interactions/synthesize-google-interactions-agent-stream.ts +1 -1
|
@@ -1179,21 +1179,28 @@ The following optional provider options are available:
|
|
|
1179
1179
|
Whether the model returns synthesized thought summaries on reasoning
|
|
1180
1180
|
parts. Defaults to the API default.
|
|
1181
1181
|
|
|
1182
|
-
- **
|
|
1182
|
+
- **responseFormat** _Array\<\{ type: 'text' | 'image' | 'audio'; mimeType?: string; schema?: unknown; aspectRatio?: string; imageSize?: '1K' \| '2K' \| '4K' \| '512' \}\>_
|
|
1183
1183
|
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1184
|
+
Output-format entries that map directly to the API's `response_format`
|
|
1185
|
+
array. Use this for fine-grained control over image, audio, or non-JSON
|
|
1186
|
+
text outputs (e.g. `aspectRatio` and `imageSize` for image generation).
|
|
1187
|
+
The AI SDK call-level `responseFormat: { type: 'json', schema }` still
|
|
1188
|
+
drives JSON-mode automatically and prepends a matching text entry;
|
|
1189
|
+
entries listed here are appended.
|
|
1187
1190
|
|
|
1188
|
-
|
|
1191
|
+
`aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`,
|
|
1192
|
+
`9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
|
|
1189
1193
|
|
|
1190
|
-
|
|
1194
|
+
- **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_ (deprecated)
|
|
1191
1195
|
|
|
1192
|
-
|
|
1196
|
+
Use **responseFormat** with a `{ type: 'image', ... }` entry instead.
|
|
1197
|
+
Retained for backwards compatibility; the SDK translates `imageConfig`
|
|
1198
|
+
into a matching `response_format` image entry and emits a warning when
|
|
1199
|
+
set. Ignored when `responseFormat` already supplies an image entry.
|
|
1193
1200
|
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1201
|
+
- **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
|
|
1202
|
+
|
|
1203
|
+
Media resolution applied to image inputs / outputs.
|
|
1197
1204
|
|
|
1198
1205
|
- **serviceTier** _'flex' | 'standard' | 'priority'_
|
|
1199
1206
|
|
|
@@ -1346,9 +1353,10 @@ const { text, toolCalls } = await generateText({
|
|
|
1346
1353
|
|
|
1347
1354
|
### Image output via Interactions
|
|
1348
1355
|
|
|
1349
|
-
|
|
1350
|
-
images as `LanguageModelV4FilePart` files in
|
|
1351
|
-
is required
|
|
1356
|
+
Add a `{ type: 'image' }` entry to `responseFormat` on a Gemini
|
|
1357
|
+
image-capable model to get images as `LanguageModelV4FilePart` files in
|
|
1358
|
+
the response. No tool wrapping is required, and the entry doubles as the
|
|
1359
|
+
place to set `aspectRatio`, `imageSize`, and `mimeType`.
|
|
1352
1360
|
|
|
1353
1361
|
```ts
|
|
1354
1362
|
import { google } from '@ai-sdk/google';
|
|
@@ -1359,7 +1367,7 @@ const result = await generateText({
|
|
|
1359
1367
|
prompt: 'Generate an image of a comic cat in a spaceship.',
|
|
1360
1368
|
providerOptions: {
|
|
1361
1369
|
google: {
|
|
1362
|
-
|
|
1370
|
+
responseFormat: [{ type: 'image' }],
|
|
1363
1371
|
},
|
|
1364
1372
|
},
|
|
1365
1373
|
});
|
|
@@ -1371,6 +1379,54 @@ for (const file of result.files) {
|
|
|
1371
1379
|
}
|
|
1372
1380
|
```
|
|
1373
1381
|
|
|
1382
|
+
To control aspect ratio, image size, or output mime type, add those
|
|
1383
|
+
fields to the same image entry:
|
|
1384
|
+
|
|
1385
|
+
```ts
|
|
1386
|
+
const result = await generateText({
|
|
1387
|
+
model: google.interactions('gemini-3-pro-image-preview'),
|
|
1388
|
+
prompt: 'Generate a high-quality landscape photo of mountains at sunset.',
|
|
1389
|
+
providerOptions: {
|
|
1390
|
+
google: {
|
|
1391
|
+
responseFormat: [
|
|
1392
|
+
{
|
|
1393
|
+
type: 'image',
|
|
1394
|
+
aspectRatio: '16:9',
|
|
1395
|
+
imageSize: '4K',
|
|
1396
|
+
},
|
|
1397
|
+
],
|
|
1398
|
+
},
|
|
1399
|
+
},
|
|
1400
|
+
});
|
|
1401
|
+
```
|
|
1402
|
+
|
|
1403
|
+
For multimodal output, list one entry per modality. The model returns
|
|
1404
|
+
text in `result.text` and the accompanying image(s) in `result.files`:
|
|
1405
|
+
|
|
1406
|
+
```ts
|
|
1407
|
+
import { google } from '@ai-sdk/google';
|
|
1408
|
+
import { generateText } from 'ai';
|
|
1409
|
+
|
|
1410
|
+
const result = await generateText({
|
|
1411
|
+
model: google.interactions('gemini-2.5-flash-image'),
|
|
1412
|
+
prompt:
|
|
1413
|
+
'Tell me a three sentence bedtime story about a unicorn, accompanied by a suitable illustration.',
|
|
1414
|
+
providerOptions: {
|
|
1415
|
+
google: {
|
|
1416
|
+
responseFormat: [
|
|
1417
|
+
{ type: 'text' },
|
|
1418
|
+
{ type: 'image', aspectRatio: '16:9' },
|
|
1419
|
+
],
|
|
1420
|
+
},
|
|
1421
|
+
},
|
|
1422
|
+
});
|
|
1423
|
+
|
|
1424
|
+
console.log(result.text);
|
|
1425
|
+
|
|
1426
|
+
const images = result.files.filter(file => file.mediaType.startsWith('image/'));
|
|
1427
|
+
// images[0].uint8Array | images[0].base64 | images[0].mediaType
|
|
1428
|
+
```
|
|
1429
|
+
|
|
1374
1430
|
Iterative image editing pairs naturally with stateful chaining — keep
|
|
1375
1431
|
`previousInteractionId` set across turns and the model edits its prior
|
|
1376
1432
|
output:
|
|
@@ -1384,7 +1440,7 @@ const model = google.interactions('gemini-3-pro-image-preview');
|
|
|
1384
1440
|
const turn1 = await generateText({
|
|
1385
1441
|
model,
|
|
1386
1442
|
prompt: 'Generate an image of a comic cat in a spaceship.',
|
|
1387
|
-
providerOptions: { google: {
|
|
1443
|
+
providerOptions: { google: { responseFormat: [{ type: 'image' }] } },
|
|
1388
1444
|
});
|
|
1389
1445
|
|
|
1390
1446
|
const interactionId = turn1.providerMetadata?.google?.interactionId as
|
|
@@ -1396,7 +1452,7 @@ const turn2 = await generateText({
|
|
|
1396
1452
|
prompt: 'now make the cat red',
|
|
1397
1453
|
providerOptions: {
|
|
1398
1454
|
google: {
|
|
1399
|
-
|
|
1455
|
+
responseFormat: [{ type: 'image' }],
|
|
1400
1456
|
previousInteractionId: interactionId,
|
|
1401
1457
|
},
|
|
1402
1458
|
},
|