@ai-sdk/google 3.0.73 → 3.0.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/index.d.mts +17 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +521 -340
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +521 -340
- package/dist/index.mjs.map +1 -1
- package/dist/internal/index.d.mts +1 -0
- package/dist/internal/index.d.ts +1 -0
- package/dist/internal/index.js +43 -28
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/index.mjs +43 -28
- package/dist/internal/index.mjs.map +1 -1
- package/docs/15-google-generative-ai.mdx +72 -16
- package/package.json +1 -1
- package/src/convert-to-google-generative-ai-messages.ts +20 -2
- package/src/google-generative-ai-language-model.ts +5 -4
- package/src/google-generative-ai-prompt.ts +5 -1
- package/src/interactions/build-google-interactions-stream-transform.ts +285 -154
- package/src/interactions/convert-to-google-interactions-input.ts +57 -133
- package/src/interactions/extract-google-interactions-sources.ts +3 -3
- package/src/interactions/google-interactions-api.ts +179 -115
- package/src/interactions/google-interactions-language-model-options.ts +61 -0
- package/src/interactions/google-interactions-language-model.ts +100 -38
- package/src/interactions/google-interactions-prompt.ts +189 -114
- package/src/interactions/map-google-interactions-finish-reason.ts +3 -5
- package/src/interactions/parse-google-interactions-outputs.ts +80 -74
- package/src/interactions/prepare-google-interactions-tools.ts +1 -1
- package/src/interactions/stream-google-interactions.ts +1 -1
- package/src/interactions/synthesize-google-interactions-agent-stream.ts +1 -1
|
@@ -1179,21 +1179,28 @@ The following optional provider options are available:
|
|
|
1179
1179
|
Whether the model returns synthesized thought summaries on reasoning
|
|
1180
1180
|
parts. Defaults to the API default.
|
|
1181
1181
|
|
|
1182
|
-
- **
|
|
1182
|
+
- **responseFormat** _Array\<\{ type: 'text' | 'image' | 'audio'; mimeType?: string; schema?: unknown; aspectRatio?: string; imageSize?: '1K' \| '2K' \| '4K' \| '512' \}\>_
|
|
1183
1183
|
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1184
|
+
Output-format entries that map directly to the API's `response_format`
|
|
1185
|
+
array. Use this for fine-grained control over image, audio, or non-JSON
|
|
1186
|
+
text outputs (e.g. `aspectRatio` and `imageSize` for image generation).
|
|
1187
|
+
The AI SDK call-level `responseFormat: { type: 'json', schema }` still
|
|
1188
|
+
drives JSON-mode automatically and prepends a matching text entry;
|
|
1189
|
+
entries listed here are appended.
|
|
1187
1190
|
|
|
1188
|
-
|
|
1191
|
+
`aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`,
|
|
1192
|
+
`9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
|
|
1189
1193
|
|
|
1190
|
-
|
|
1194
|
+
- **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_ (deprecated)
|
|
1191
1195
|
|
|
1192
|
-
|
|
1196
|
+
Use **responseFormat** with a `{ type: 'image', ... }` entry instead.
|
|
1197
|
+
Retained for backwards compatibility; the SDK translates `imageConfig`
|
|
1198
|
+
into a matching `response_format` image entry and emits a warning when
|
|
1199
|
+
set. Ignored when `responseFormat` already supplies an image entry.
|
|
1193
1200
|
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1201
|
+
- **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
|
|
1202
|
+
|
|
1203
|
+
Media resolution applied to image inputs / outputs.
|
|
1197
1204
|
|
|
1198
1205
|
- **serviceTier** _'flex' | 'standard' | 'priority'_
|
|
1199
1206
|
|
|
@@ -1346,9 +1353,10 @@ const { text, toolCalls } = await generateText({
|
|
|
1346
1353
|
|
|
1347
1354
|
### Image output via Interactions
|
|
1348
1355
|
|
|
1349
|
-
|
|
1350
|
-
images as `LanguageModelV4FilePart` files in
|
|
1351
|
-
is required
|
|
1356
|
+
Add a `{ type: 'image' }` entry to `responseFormat` on a Gemini
|
|
1357
|
+
image-capable model to get images as `LanguageModelV4FilePart` files in
|
|
1358
|
+
the response. No tool wrapping is required, and the entry doubles as the
|
|
1359
|
+
place to set `aspectRatio`, `imageSize`, and `mimeType`.
|
|
1352
1360
|
|
|
1353
1361
|
```ts
|
|
1354
1362
|
import { google } from '@ai-sdk/google';
|
|
@@ -1359,7 +1367,7 @@ const result = await generateText({
|
|
|
1359
1367
|
prompt: 'Generate an image of a comic cat in a spaceship.',
|
|
1360
1368
|
providerOptions: {
|
|
1361
1369
|
google: {
|
|
1362
|
-
|
|
1370
|
+
responseFormat: [{ type: 'image' }],
|
|
1363
1371
|
},
|
|
1364
1372
|
},
|
|
1365
1373
|
});
|
|
@@ -1371,6 +1379,54 @@ for (const file of result.files) {
|
|
|
1371
1379
|
}
|
|
1372
1380
|
```
|
|
1373
1381
|
|
|
1382
|
+
To control aspect ratio, image size, or output mime type, add those
|
|
1383
|
+
fields to the same image entry:
|
|
1384
|
+
|
|
1385
|
+
```ts
|
|
1386
|
+
const result = await generateText({
|
|
1387
|
+
model: google.interactions('gemini-3-pro-image-preview'),
|
|
1388
|
+
prompt: 'Generate a high-quality landscape photo of mountains at sunset.',
|
|
1389
|
+
providerOptions: {
|
|
1390
|
+
google: {
|
|
1391
|
+
responseFormat: [
|
|
1392
|
+
{
|
|
1393
|
+
type: 'image',
|
|
1394
|
+
aspectRatio: '16:9',
|
|
1395
|
+
imageSize: '4K',
|
|
1396
|
+
},
|
|
1397
|
+
],
|
|
1398
|
+
},
|
|
1399
|
+
},
|
|
1400
|
+
});
|
|
1401
|
+
```
|
|
1402
|
+
|
|
1403
|
+
For multimodal output, list one entry per modality. The model returns
|
|
1404
|
+
text in `result.text` and the accompanying image(s) in `result.files`:
|
|
1405
|
+
|
|
1406
|
+
```ts
|
|
1407
|
+
import { google } from '@ai-sdk/google';
|
|
1408
|
+
import { generateText } from 'ai';
|
|
1409
|
+
|
|
1410
|
+
const result = await generateText({
|
|
1411
|
+
model: google.interactions('gemini-2.5-flash-image'),
|
|
1412
|
+
prompt:
|
|
1413
|
+
'Tell me a three sentence bedtime story about a unicorn, accompanied by a suitable illustration.',
|
|
1414
|
+
providerOptions: {
|
|
1415
|
+
google: {
|
|
1416
|
+
responseFormat: [
|
|
1417
|
+
{ type: 'text' },
|
|
1418
|
+
{ type: 'image', aspectRatio: '16:9' },
|
|
1419
|
+
],
|
|
1420
|
+
},
|
|
1421
|
+
},
|
|
1422
|
+
});
|
|
1423
|
+
|
|
1424
|
+
console.log(result.text);
|
|
1425
|
+
|
|
1426
|
+
const images = result.files.filter(file => file.mediaType.startsWith('image/'));
|
|
1427
|
+
// images[0].uint8Array | images[0].base64 | images[0].mediaType
|
|
1428
|
+
```
|
|
1429
|
+
|
|
1374
1430
|
Iterative image editing pairs naturally with stateful chaining — keep
|
|
1375
1431
|
`previousInteractionId` set across turns and the model edits its prior
|
|
1376
1432
|
output:
|
|
@@ -1384,7 +1440,7 @@ const model = google.interactions('gemini-3-pro-image-preview');
|
|
|
1384
1440
|
const turn1 = await generateText({
|
|
1385
1441
|
model,
|
|
1386
1442
|
prompt: 'Generate an image of a comic cat in a spaceship.',
|
|
1387
|
-
providerOptions: { google: {
|
|
1443
|
+
providerOptions: { google: { responseFormat: [{ type: 'image' }] } },
|
|
1388
1444
|
});
|
|
1389
1445
|
|
|
1390
1446
|
const interactionId = turn1.providerMetadata?.google?.interactionId as
|
|
@@ -1396,7 +1452,7 @@ const turn2 = await generateText({
|
|
|
1396
1452
|
prompt: 'now make the cat red',
|
|
1397
1453
|
providerOptions: {
|
|
1398
1454
|
google: {
|
|
1399
|
-
|
|
1455
|
+
responseFormat: [{ type: 'image' }],
|
|
1400
1456
|
previousInteractionId: interactionId,
|
|
1401
1457
|
},
|
|
1402
1458
|
},
|
package/package.json
CHANGED
|
@@ -57,6 +57,7 @@ function appendToolResultParts(
|
|
|
57
57
|
type: string;
|
|
58
58
|
[key: string]: unknown;
|
|
59
59
|
}>,
|
|
60
|
+
toolCallId?: string,
|
|
60
61
|
): void {
|
|
61
62
|
const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
|
|
62
63
|
const responseTextParts: string[] = [];
|
|
@@ -99,6 +100,7 @@ function appendToolResultParts(
|
|
|
99
100
|
|
|
100
101
|
parts.push({
|
|
101
102
|
functionResponse: {
|
|
103
|
+
...(toolCallId != null ? { id: toolCallId } : {}),
|
|
102
104
|
name: toolName,
|
|
103
105
|
response: {
|
|
104
106
|
name: toolName,
|
|
@@ -126,12 +128,14 @@ function appendLegacyToolResultParts(
|
|
|
126
128
|
type: string;
|
|
127
129
|
[key: string]: unknown;
|
|
128
130
|
}>,
|
|
131
|
+
toolCallId?: string,
|
|
129
132
|
): void {
|
|
130
133
|
for (const contentPart of outputValue) {
|
|
131
134
|
switch (contentPart.type) {
|
|
132
135
|
case 'text':
|
|
133
136
|
parts.push({
|
|
134
137
|
functionResponse: {
|
|
138
|
+
...(toolCallId != null ? { id: toolCallId } : {}),
|
|
135
139
|
name: toolName,
|
|
136
140
|
response: {
|
|
137
141
|
name: toolName,
|
|
@@ -315,6 +319,9 @@ export function convertToGoogleGenerativeAIMessages(
|
|
|
315
319
|
|
|
316
320
|
return {
|
|
317
321
|
functionCall: {
|
|
322
|
+
...(part.toolCallId != null
|
|
323
|
+
? { id: part.toolCallId }
|
|
324
|
+
: {}),
|
|
318
325
|
name: part.toolName,
|
|
319
326
|
args: part.input,
|
|
320
327
|
},
|
|
@@ -405,13 +412,24 @@ export function convertToGoogleGenerativeAIMessages(
|
|
|
405
412
|
|
|
406
413
|
if (output.type === 'content') {
|
|
407
414
|
if (supportsFunctionResponseParts) {
|
|
408
|
-
appendToolResultParts(
|
|
415
|
+
appendToolResultParts(
|
|
416
|
+
parts,
|
|
417
|
+
part.toolName,
|
|
418
|
+
output.value,
|
|
419
|
+
part.toolCallId,
|
|
420
|
+
);
|
|
409
421
|
} else {
|
|
410
|
-
appendLegacyToolResultParts(
|
|
422
|
+
appendLegacyToolResultParts(
|
|
423
|
+
parts,
|
|
424
|
+
part.toolName,
|
|
425
|
+
output.value,
|
|
426
|
+
part.toolCallId,
|
|
427
|
+
);
|
|
411
428
|
}
|
|
412
429
|
} else {
|
|
413
430
|
parts.push({
|
|
414
431
|
functionResponse: {
|
|
432
|
+
...(part.toolCallId != null ? { id: part.toolCallId } : {}),
|
|
415
433
|
name: part.toolName,
|
|
416
434
|
response: {
|
|
417
435
|
name: part.toolName,
|
|
@@ -349,7 +349,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
|
|
|
349
349
|
} else if ('functionCall' in part && part.functionCall.name != null) {
|
|
350
350
|
content.push({
|
|
351
351
|
type: 'tool-call' as const,
|
|
352
|
-
toolCallId: this.config.generateId(),
|
|
352
|
+
toolCallId: part.functionCall.id ?? this.config.generateId(),
|
|
353
353
|
toolName: part.functionCall.name,
|
|
354
354
|
input: JSON.stringify(part.functionCall.args ?? {}),
|
|
355
355
|
providerMetadata: part.thoughtSignature
|
|
@@ -828,7 +828,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
|
|
|
828
828
|
part.functionCall.name != null &&
|
|
829
829
|
part.functionCall.willContinue === true
|
|
830
830
|
) {
|
|
831
|
-
const toolCallId = generateId();
|
|
831
|
+
const toolCallId = part.functionCall.id ?? generateId();
|
|
832
832
|
const accumulator = new GoogleJSONAccumulator();
|
|
833
833
|
activeStreamingToolCalls.push({
|
|
834
834
|
toolCallId,
|
|
@@ -910,7 +910,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
|
|
|
910
910
|
|
|
911
911
|
hasToolCalls = true;
|
|
912
912
|
} else if (isCompleteCall) {
|
|
913
|
-
const toolCallId = generateId();
|
|
913
|
+
const toolCallId = part.functionCall.id ?? generateId();
|
|
914
914
|
const toolName = part.functionCall.name!;
|
|
915
915
|
const args =
|
|
916
916
|
typeof part.functionCall.args === 'string'
|
|
@@ -947,7 +947,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
|
|
|
947
947
|
|
|
948
948
|
hasToolCalls = true;
|
|
949
949
|
} else if (isNoArgsCompleteCall) {
|
|
950
|
-
const toolCallId = generateId();
|
|
950
|
+
const toolCallId = part.functionCall.id ?? generateId();
|
|
951
951
|
const toolName = part.functionCall.name!;
|
|
952
952
|
|
|
953
953
|
controller.enqueue({
|
|
@@ -1257,6 +1257,7 @@ const getContentSchema = () =>
|
|
|
1257
1257
|
// note: order matters since text can be fully empty
|
|
1258
1258
|
z.object({
|
|
1259
1259
|
functionCall: z.object({
|
|
1260
|
+
id: z.string().nullish(),
|
|
1260
1261
|
name: z.string().nullish(),
|
|
1261
1262
|
args: z.unknown().nullish(),
|
|
1262
1263
|
partialArgs: z.array(partialArgSchema).nullish(),
|
|
@@ -23,9 +23,13 @@ export type GoogleGenerativeAIContent = {
|
|
|
23
23
|
export type GoogleGenerativeAIContentPart =
|
|
24
24
|
| { text: string; thought?: boolean; thoughtSignature?: string }
|
|
25
25
|
| { inlineData: { mimeType: string; data: string } }
|
|
26
|
-
| {
|
|
26
|
+
| {
|
|
27
|
+
functionCall: { id?: string; name: string; args: unknown };
|
|
28
|
+
thoughtSignature?: string;
|
|
29
|
+
}
|
|
27
30
|
| {
|
|
28
31
|
functionResponse: {
|
|
32
|
+
id?: string;
|
|
29
33
|
name: string;
|
|
30
34
|
response: unknown;
|
|
31
35
|
parts?: Array<GoogleGenerativeAIFunctionResponsePart>;
|