npm - @ai-sdk/google - Versions diffs - 3.0.73 → 3.0.75 - Mend

@ai-sdk/google 3.0.73 → 3.0.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +12 -0
package/dist/index.d.mts +17 -0
package/dist/index.d.ts +17 -0
package/dist/index.js +521 -340
package/dist/index.js.map +1 -1
package/dist/index.mjs +521 -340
package/dist/index.mjs.map +1 -1
package/dist/internal/index.d.mts +1 -0
package/dist/internal/index.d.ts +1 -0
package/dist/internal/index.js +43 -28
package/dist/internal/index.js.map +1 -1
package/dist/internal/index.mjs +43 -28
package/dist/internal/index.mjs.map +1 -1
package/docs/15-google-generative-ai.mdx +72 -16
package/package.json +1 -1
package/src/convert-to-google-generative-ai-messages.ts +20 -2
package/src/google-generative-ai-language-model.ts +5 -4
package/src/google-generative-ai-prompt.ts +5 -1
package/src/interactions/build-google-interactions-stream-transform.ts +285 -154
package/src/interactions/convert-to-google-interactions-input.ts +57 -133
package/src/interactions/extract-google-interactions-sources.ts +3 -3
package/src/interactions/google-interactions-api.ts +179 -115
package/src/interactions/google-interactions-language-model-options.ts +61 -0
package/src/interactions/google-interactions-language-model.ts +100 -38
package/src/interactions/google-interactions-prompt.ts +189 -114
package/src/interactions/map-google-interactions-finish-reason.ts +3 -5
package/src/interactions/parse-google-interactions-outputs.ts +80 -74
package/src/interactions/prepare-google-interactions-tools.ts +1 -1
package/src/interactions/stream-google-interactions.ts +1 -1
package/src/interactions/synthesize-google-interactions-agent-stream.ts +1 -1

package/docs/15-google-generative-ai.mdx CHANGED Viewed

@@ -1179,21 +1179,28 @@ The following optional provider options are available:
   Whether the model returns synthesized thought summaries on reasoning
   parts. Defaults to the API default.
-- **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_
+- **responseFormat** _Array\<\{ type: 'text' | 'image' | 'audio'; mimeType?: string; schema?: unknown; aspectRatio?: string; imageSize?: '1K' \| '2K' \| '4K' \| '512' \}\>_
-  Image generation configuration when `responseModalities` includes
-  `'image'`. `aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`,
-  `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
+  Output-format entries that map directly to the API's `response_format`
+  array. Use this for fine-grained control over image, audio, or non-JSON
+  text outputs (e.g. `aspectRatio` and `imageSize` for image generation).
+  The AI SDK call-level `responseFormat: { type: 'json', schema }` still
+  drives JSON-mode automatically and prepends a matching text entry;
+  entries listed here are appended.
-- **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
+  `aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`,
+  `9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
-  Media resolution applied to image inputs / outputs.
+- **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_ (deprecated)
-- **responseModalities** _Array\<'text' | 'image' | 'audio' | 'video' | 'document'\>_
+  Use **responseFormat** with a `{ type: 'image', ... }` entry instead.
+  Retained for backwards compatibility; the SDK translates `imageConfig`
+  into a matching `response_format` image entry and emits a warning when
+  set. Ignored when `responseFormat` already supplies an image entry.
-  The modalities the model may emit. Defaults to text-only. Pass
-  `['image']` (or `['text', 'image']`) to enable native image output. See
-  [Image output](#image-output-via-interactions).
+- **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
+  Media resolution applied to image inputs / outputs.
 - **serviceTier** _'flex' | 'standard' | 'priority'_
@@ -1346,9 +1353,10 @@ const { text, toolCalls } = await generateText({
 ### Image output via Interactions
-Set `responseModalities: ['image']` on a Gemini image-capable model to get
-images as `LanguageModelV4FilePart` files in the response. No tool wrapping
-is required.
+Add a `{ type: 'image' }` entry to `responseFormat` on a Gemini
+image-capable model to get images as `LanguageModelV4FilePart` files in
+the response. No tool wrapping is required, and the entry doubles as the
+place to set `aspectRatio`, `imageSize`, and `mimeType`.
 ```ts
 import { google } from '@ai-sdk/google';
@@ -1359,7 +1367,7 @@ const result = await generateText({
   prompt: 'Generate an image of a comic cat in a spaceship.',
   providerOptions: {
     google: {
-      responseModalities: ['image'],
+      responseFormat: [{ type: 'image' }],
     },
   },
 });
@@ -1371,6 +1379,54 @@ for (const file of result.files) {
 }
 ```
+To control aspect ratio, image size, or output mime type, add those
+fields to the same image entry:
+```ts
+const result = await generateText({
+  model: google.interactions('gemini-3-pro-image-preview'),
+  prompt: 'Generate a high-quality landscape photo of mountains at sunset.',
+  providerOptions: {
+    google: {
+      responseFormat: [
+        {
+          type: 'image',
+          aspectRatio: '16:9',
+          imageSize: '4K',
+        },
+      ],
+    },
+  },
+});
+```
+For multimodal output, list one entry per modality. The model returns
+text in `result.text` and the accompanying image(s) in `result.files`:
+```ts
+import { google } from '@ai-sdk/google';
+import { generateText } from 'ai';
+const result = await generateText({
+  model: google.interactions('gemini-2.5-flash-image'),
+  prompt:
+    'Tell me a three sentence bedtime story about a unicorn, accompanied by a suitable illustration.',
+  providerOptions: {
+    google: {
+      responseFormat: [
+        { type: 'text' },
+        { type: 'image', aspectRatio: '16:9' },
+      ],
+    },
+  },
+});
+console.log(result.text);
+const images = result.files.filter(file => file.mediaType.startsWith('image/'));
+// images[0].uint8Array | images[0].base64 | images[0].mediaType
+```
 Iterative image editing pairs naturally with stateful chaining — keep
 `previousInteractionId` set across turns and the model edits its prior
 output:
@@ -1384,7 +1440,7 @@ const model = google.interactions('gemini-3-pro-image-preview');
 const turn1 = await generateText({
   model,
   prompt: 'Generate an image of a comic cat in a spaceship.',
-  providerOptions: { google: { responseModalities: ['image'] } },
+  providerOptions: { google: { responseFormat: [{ type: 'image' }] } },
 });
 const interactionId = turn1.providerMetadata?.google?.interactionId as
@@ -1396,7 +1452,7 @@ const turn2 = await generateText({
   prompt: 'now make the cat red',
   providerOptions: {
     google: {
-      responseModalities: ['image'],
+      responseFormat: [{ type: 'image' }],
       previousInteractionId: interactionId,
     },
   },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ai-sdk/google",
-  "version": "3.0.73",
+  "version": "3.0.75",
   "license": "Apache-2.0",
   "sideEffects": false,
   "main": "./dist/index.js",

package/src/convert-to-google-generative-ai-messages.ts CHANGED Viewed

@@ -57,6 +57,7 @@ function appendToolResultParts(
     type: string;
     [key: string]: unknown;
   }>,
+  toolCallId?: string,
 ): void {
   const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
   const responseTextParts: string[] = [];
@@ -99,6 +100,7 @@ function appendToolResultParts(
   parts.push({
     functionResponse: {
+      ...(toolCallId != null ? { id: toolCallId } : {}),
       name: toolName,
       response: {
         name: toolName,
@@ -126,12 +128,14 @@ function appendLegacyToolResultParts(
     type: string;
     [key: string]: unknown;
   }>,
+  toolCallId?: string,
 ): void {
   for (const contentPart of outputValue) {
     switch (contentPart.type) {
       case 'text':
         parts.push({
           functionResponse: {
+            ...(toolCallId != null ? { id: toolCallId } : {}),
             name: toolName,
             response: {
               name: toolName,
@@ -315,6 +319,9 @@ export function convertToGoogleGenerativeAIMessages(
                   return {
                     functionCall: {
+                      ...(part.toolCallId != null
+                        ? { id: part.toolCallId }
+                        : {}),
                       name: part.toolName,
                       args: part.input,
                     },
@@ -405,13 +412,24 @@ export function convertToGoogleGenerativeAIMessages(
           if (output.type === 'content') {
             if (supportsFunctionResponseParts) {
-              appendToolResultParts(parts, part.toolName, output.value);
+              appendToolResultParts(
+                parts,
+                part.toolName,
+                output.value,
+                part.toolCallId,
+              );
             } else {
-              appendLegacyToolResultParts(parts, part.toolName, output.value);
+              appendLegacyToolResultParts(
+                parts,
+                part.toolName,
+                output.value,
+                part.toolCallId,
+              );
             }
           } else {
             parts.push({
               functionResponse: {
+                ...(part.toolCallId != null ? { id: part.toolCallId } : {}),
                 name: part.toolName,
                 response: {
                   name: part.toolName,

package/src/google-generative-ai-language-model.ts CHANGED Viewed

@@ -349,7 +349,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
       } else if ('functionCall' in part && part.functionCall.name != null) {
         content.push({
           type: 'tool-call' as const,
-          toolCallId: this.config.generateId(),
+          toolCallId: part.functionCall.id ?? this.config.generateId(),
           toolName: part.functionCall.name,
           input: JSON.stringify(part.functionCall.args ?? {}),
           providerMetadata: part.thoughtSignature
@@ -828,7 +828,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
                     part.functionCall.name != null &&
                     part.functionCall.willContinue === true
                   ) {
-                    const toolCallId = generateId();
+                    const toolCallId = part.functionCall.id ?? generateId();
                     const accumulator = new GoogleJSONAccumulator();
                     activeStreamingToolCalls.push({
                       toolCallId,
@@ -910,7 +910,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
                   hasToolCalls = true;
                 } else if (isCompleteCall) {
-                  const toolCallId = generateId();
+                  const toolCallId = part.functionCall.id ?? generateId();
                   const toolName = part.functionCall.name!;
                   const args =
                     typeof part.functionCall.args === 'string'
@@ -947,7 +947,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV3 {
                   hasToolCalls = true;
                 } else if (isNoArgsCompleteCall) {
-                  const toolCallId = generateId();
+                  const toolCallId = part.functionCall.id ?? generateId();
                   const toolName = part.functionCall.name!;
                   controller.enqueue({
@@ -1257,6 +1257,7 @@ const getContentSchema = () =>
           // note: order matters since text can be fully empty
           z.object({
             functionCall: z.object({
+              id: z.string().nullish(),
               name: z.string().nullish(),
               args: z.unknown().nullish(),
               partialArgs: z.array(partialArgSchema).nullish(),

package/src/google-generative-ai-prompt.ts CHANGED Viewed

@@ -23,9 +23,13 @@ export type GoogleGenerativeAIContent = {
 export type GoogleGenerativeAIContentPart =
   | { text: string; thought?: boolean; thoughtSignature?: string }
   | { inlineData: { mimeType: string; data: string } }
-  | { functionCall: { name: string; args: unknown }; thoughtSignature?: string }
+  | {
+      functionCall: { id?: string; name: string; args: unknown };
+      thoughtSignature?: string;
+    }
   | {
       functionResponse: {
+        id?: string;
         name: string;
         response: unknown;
         parts?: Array<GoogleGenerativeAIFunctionResponsePart>;