@ai-sdk/google 3.0.73 → 3.0.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/index.d.mts +17 -0
  3. package/dist/index.d.ts +17 -0
  4. package/dist/index.js +521 -340
  5. package/dist/index.js.map +1 -1
  6. package/dist/index.mjs +521 -340
  7. package/dist/index.mjs.map +1 -1
  8. package/dist/internal/index.d.mts +1 -0
  9. package/dist/internal/index.d.ts +1 -0
  10. package/dist/internal/index.js +43 -28
  11. package/dist/internal/index.js.map +1 -1
  12. package/dist/internal/index.mjs +43 -28
  13. package/dist/internal/index.mjs.map +1 -1
  14. package/docs/15-google-generative-ai.mdx +72 -16
  15. package/package.json +1 -1
  16. package/src/convert-to-google-generative-ai-messages.ts +20 -2
  17. package/src/google-generative-ai-language-model.ts +5 -4
  18. package/src/google-generative-ai-prompt.ts +5 -1
  19. package/src/interactions/build-google-interactions-stream-transform.ts +285 -154
  20. package/src/interactions/convert-to-google-interactions-input.ts +57 -133
  21. package/src/interactions/extract-google-interactions-sources.ts +3 -3
  22. package/src/interactions/google-interactions-api.ts +179 -115
  23. package/src/interactions/google-interactions-language-model-options.ts +61 -0
  24. package/src/interactions/google-interactions-language-model.ts +100 -38
  25. package/src/interactions/google-interactions-prompt.ts +189 -114
  26. package/src/interactions/map-google-interactions-finish-reason.ts +3 -5
  27. package/src/interactions/parse-google-interactions-outputs.ts +80 -74
  28. package/src/interactions/prepare-google-interactions-tools.ts +1 -1
  29. package/src/interactions/stream-google-interactions.ts +1 -1
  30. package/src/interactions/synthesize-google-interactions-agent-stream.ts +1 -1
@@ -1179,21 +1179,28 @@ The following optional provider options are available:
1179
1179
  Whether the model returns synthesized thought summaries on reasoning
1180
1180
  parts. Defaults to the API default.
1181
1181
 
1182
- - **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_
1182
+ - **responseFormat** _Array\<\{ type: 'text' | 'image' | 'audio'; mimeType?: string; schema?: unknown; aspectRatio?: string; imageSize?: '1K' \| '2K' \| '4K' \| '512' \}\>_
1183
1183
 
1184
- Image generation configuration when `responseModalities` includes
1185
- `'image'`. `aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`,
1186
- `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
1184
+ Output-format entries that map directly to the API's `response_format`
1185
+ array. Use this for fine-grained control over image, audio, or non-JSON
1186
+ text outputs (e.g. `aspectRatio` and `imageSize` for image generation).
1187
+ The AI SDK call-level `responseFormat: { type: 'json', schema }` still
1188
+ drives JSON-mode automatically and prepends a matching text entry;
1189
+ entries listed here are appended.
1187
1190
 
1188
- - **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
1191
+ `aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`,
1192
+ `9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
1189
1193
 
1190
- Media resolution applied to image inputs / outputs.
1194
+ - **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_ (deprecated)
1191
1195
 
1192
- - **responseModalities** _Array\<'text' | 'image' | 'audio' | 'video' | 'document'\>_
1196
+ Use **responseFormat** with a `{ type: 'image', ... }` entry instead.
1197
+ Retained for backwards compatibility; the SDK translates `imageConfig`
1198
+ into a matching `response_format` image entry and emits a warning when
1199
+ set. Ignored when `responseFormat` already supplies an image entry.
1193
1200
 
1194
- The modalities the model may emit. Defaults to text-only. Pass
1195
- `['image']` (or `['text', 'image']`) to enable native image output. See
1196
- [Image output](#image-output-via-interactions).
1201
+ - **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
1202
+
1203
+ Media resolution applied to image inputs / outputs.
1197
1204
 
1198
1205
  - **serviceTier** _'flex' | 'standard' | 'priority'_
1199
1206
 
@@ -1346,9 +1353,10 @@ const { text, toolCalls } = await generateText({
1346
1353
 
1347
1354
  ### Image output via Interactions
1348
1355
 
1349
- Set `responseModalities: ['image']` on a Gemini image-capable model to get
1350
- images as `LanguageModelV4FilePart` files in the response. No tool wrapping
1351
- is required.
1356
+ Add a `{ type: 'image' }` entry to `responseFormat` on a Gemini
1357
+ image-capable model to get images as `LanguageModelV4FilePart` files in
1358
+ the response. No tool wrapping is required, and the entry doubles as the
1359
+ place to set `aspectRatio`, `imageSize`, and `mimeType`.
1352
1360
 
1353
1361
  ```ts
1354
1362
  import { google } from '@ai-sdk/google';
@@ -1359,7 +1367,7 @@ const result = await generateText({
1359
1367
  prompt: 'Generate an image of a comic cat in a spaceship.',
1360
1368
  providerOptions: {
1361
1369
  google: {
1362
- responseModalities: ['image'],
1370
+ responseFormat: [{ type: 'image' }],
1363
1371
  },
1364
1372
  },
1365
1373
  });
@@ -1371,6 +1379,54 @@ for (const file of result.files) {
1371
1379
  }
1372
1380
  ```
1373
1381
 
1382
+ To control aspect ratio, image size, or output mime type, add those
1383
+ fields to the same image entry:
1384
+
1385
+ ```ts
1386
+ const result = await generateText({
1387
+ model: google.interactions('gemini-3-pro-image-preview'),
1388
+ prompt: 'Generate a high-quality landscape photo of mountains at sunset.',
1389
+ providerOptions: {
1390
+ google: {
1391
+ responseFormat: [
1392
+ {
1393
+ type: 'image',
1394
+ aspectRatio: '16:9',
1395
+ imageSize: '4K',
1396
+ },
1397
+ ],
1398
+ },
1399
+ },
1400
+ });
1401
+ ```
1402
+
1403
+ For multimodal output, list one entry per modality. The model returns
1404
+ text in `result.text` and the accompanying image(s) in `result.files`:
1405
+
1406
+ ```ts
1407
+ import { google } from '@ai-sdk/google';
1408
+ import { generateText } from 'ai';
1409
+
1410
+ const result = await generateText({
1411
+ model: google.interactions('gemini-2.5-flash-image'),
1412
+ prompt:
1413
+ 'Tell me a three sentence bedtime story about a unicorn, accompanied by a suitable illustration.',
1414
+ providerOptions: {
1415
+ google: {
1416
+ responseFormat: [
1417
+ { type: 'text' },
1418
+ { type: 'image', aspectRatio: '16:9' },
1419
+ ],
1420
+ },
1421
+ },
1422
+ });
1423
+
1424
+ console.log(result.text);
1425
+
1426
+ const images = result.files.filter(file => file.mediaType.startsWith('image/'));
1427
+ // images[0].uint8Array | images[0].base64 | images[0].mediaType
1428
+ ```
1429
+
1374
1430
  Iterative image editing pairs naturally with stateful chaining — keep
1375
1431
  `previousInteractionId` set across turns and the model edits its prior
1376
1432
  output:
@@ -1384,7 +1440,7 @@ const model = google.interactions('gemini-3-pro-image-preview');
1384
1440
  const turn1 = await generateText({
1385
1441
  model,
1386
1442
  prompt: 'Generate an image of a comic cat in a spaceship.',
1387
- providerOptions: { google: { responseModalities: ['image'] } },
1443
+ providerOptions: { google: { responseFormat: [{ type: 'image' }] } },
1388
1444
  });
1389
1445
 
1390
1446
  const interactionId = turn1.providerMetadata?.google?.interactionId as
@@ -1396,7 +1452,7 @@ const turn2 = await generateText({
1396
1452
  prompt: 'now make the cat red',
1397
1453
  providerOptions: {
1398
1454
  google: {
1399
- responseModalities: ['image'],
1455
+ responseFormat: [{ type: 'image' }],
1400
1456
  previousInteractionId: interactionId,
1401
1457
  },
1402
1458
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/google",
3
- "version": "3.0.73",
3
+ "version": "3.0.75",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -57,6 +57,7 @@ function appendToolResultParts(
57
57
  type: string;
58
58
  [key: string]: unknown;
59
59
  }>,
60
+ toolCallId?: string,
60
61
  ): void {
61
62
  const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
62
63
  const responseTextParts: string[] = [];
@@ -99,6 +100,7 @@ function appendToolResultParts(
99
100
 
100
101
  parts.push({
101
102
  functionResponse: {
103
+ ...(toolCallId != null ? { id: toolCallId } : {}),
102
104
  name: toolName,
103
105
  response: {
104
106
  name: toolName,
@@ -126,12 +128,14 @@ function appendLegacyToolResultParts(
126
128
  type: string;
127
129
  [key: string]: unknown;
128
130
  }>,
131
+ toolCallId?: string,
129
132
  ): void {
130
133
  for (const contentPart of outputValue) {
131
134
  switch (contentPart.type) {
132
135
  case 'text':
133
136
  parts.push({
134
137
  functionResponse: {
138
+ ...(toolCallId != null ? { id: toolCallId } : {}),
135
139
  name: toolName,
136
140
  response: {
137
141
  name: toolName,
@@ -315,6 +319,9 @@ export function convertToGoogleGenerativeAIMessages(
315
319
 
316
320
  return {
317
321
  functionCall: {
322
+ ...(part.toolCallId != null
323
+ ? { id: part.toolCallId }
324
+ : {}),
318
325
  name: part.toolName,
319
326
  args: part.input,
320
327
  },
@@ -405,13 +412,24 @@ export function convertToGoogleGenerativeAIMessages(
405
412
 
406
413
  if (output.type === 'content') {
407
414
  if (supportsFunctionResponseParts) {
408
- appendToolResultParts(parts, part.toolName, output.value);
415
+ appendToolResultParts(
416
+ parts,
417
+ part.toolName,
418
+ output.value,
419
+ part.toolCallId,
420
+ );
409
421
  } else {
410
- appendLegacyToolResultParts(parts, part.toolName, output.value);
422
+ appendLegacyToolResultParts(
423
+ parts,
424
+ part.toolName,
425
+ output.value,
426
+ part.toolCallId,
427
+ );
411
428
  }
412
429
  } else {
413
430
  parts.push({
414
431
  functionResponse: {
432
+ ...(part.toolCallId != null ? { id: part.toolCallId } : {}),
415
433
  name: part.toolName,
416
434
  response: {
417
435
  name: part.toolName,
@@ -349,7 +349,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
349
349
  } else if ('functionCall' in part && part.functionCall.name != null) {
350
350
  content.push({
351
351
  type: 'tool-call' as const,
352
- toolCallId: this.config.generateId(),
352
+ toolCallId: part.functionCall.id ?? this.config.generateId(),
353
353
  toolName: part.functionCall.name,
354
354
  input: JSON.stringify(part.functionCall.args ?? {}),
355
355
  providerMetadata: part.thoughtSignature
@@ -828,7 +828,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
828
828
  part.functionCall.name != null &&
829
829
  part.functionCall.willContinue === true
830
830
  ) {
831
- const toolCallId = generateId();
831
+ const toolCallId = part.functionCall.id ?? generateId();
832
832
  const accumulator = new GoogleJSONAccumulator();
833
833
  activeStreamingToolCalls.push({
834
834
  toolCallId,
@@ -910,7 +910,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
910
910
 
911
911
  hasToolCalls = true;
912
912
  } else if (isCompleteCall) {
913
- const toolCallId = generateId();
913
+ const toolCallId = part.functionCall.id ?? generateId();
914
914
  const toolName = part.functionCall.name!;
915
915
  const args =
916
916
  typeof part.functionCall.args === 'string'
@@ -947,7 +947,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
947
947
 
948
948
  hasToolCalls = true;
949
949
  } else if (isNoArgsCompleteCall) {
950
- const toolCallId = generateId();
950
+ const toolCallId = part.functionCall.id ?? generateId();
951
951
  const toolName = part.functionCall.name!;
952
952
 
953
953
  controller.enqueue({
@@ -1257,6 +1257,7 @@ const getContentSchema = () =>
1257
1257
  // note: order matters since text can be fully empty
1258
1258
  z.object({
1259
1259
  functionCall: z.object({
1260
+ id: z.string().nullish(),
1260
1261
  name: z.string().nullish(),
1261
1262
  args: z.unknown().nullish(),
1262
1263
  partialArgs: z.array(partialArgSchema).nullish(),
@@ -23,9 +23,13 @@ export type GoogleGenerativeAIContent = {
23
23
  export type GoogleGenerativeAIContentPart =
24
24
  | { text: string; thought?: boolean; thoughtSignature?: string }
25
25
  | { inlineData: { mimeType: string; data: string } }
26
- | { functionCall: { name: string; args: unknown }; thoughtSignature?: string }
26
+ | {
27
+ functionCall: { id?: string; name: string; args: unknown };
28
+ thoughtSignature?: string;
29
+ }
27
30
  | {
28
31
  functionResponse: {
32
+ id?: string;
29
33
  name: string;
30
34
  response: unknown;
31
35
  parts?: Array<GoogleGenerativeAIFunctionResponsePart>;