@ai-sdk/google 4.0.0-beta.8 → 4.0.0-beta.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +608 -5
  2. package/README.md +6 -4
  3. package/dist/index.d.ts +297 -54
  4. package/dist/index.js +5409 -640
  5. package/dist/index.js.map +1 -1
  6. package/dist/internal/index.d.ts +97 -26
  7. package/dist/internal/index.js +1653 -453
  8. package/dist/internal/index.js.map +1 -1
  9. package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
  10. package/package.json +16 -17
  11. package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
  12. package/src/convert-json-schema-to-openapi-schema.ts +1 -1
  13. package/src/convert-to-google-messages.ts +647 -0
  14. package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
  15. package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
  16. package/src/google-error.ts +1 -1
  17. package/src/google-files.ts +225 -0
  18. package/src/google-image-model-options.ts +35 -0
  19. package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
  20. package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
  21. package/src/google-json-accumulator.ts +371 -0
  22. package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
  23. package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +691 -217
  24. package/src/google-prepare-tools.ts +72 -12
  25. package/src/google-prompt.ts +86 -0
  26. package/src/google-provider.ts +157 -53
  27. package/src/google-speech-api.ts +36 -0
  28. package/src/google-speech-model-options.ts +48 -0
  29. package/src/google-speech-model.ts +311 -0
  30. package/src/google-video-model-options.ts +43 -0
  31. package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
  32. package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
  33. package/src/index.ts +40 -9
  34. package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
  35. package/src/interactions/cancel-google-interaction.ts +60 -0
  36. package/src/interactions/convert-google-interactions-usage.ts +47 -0
  37. package/src/interactions/convert-to-google-interactions-input.ts +557 -0
  38. package/src/interactions/extract-google-interactions-sources.ts +252 -0
  39. package/src/interactions/google-interactions-agent.ts +15 -0
  40. package/src/interactions/google-interactions-api.ts +530 -0
  41. package/src/interactions/google-interactions-language-model-options.ts +262 -0
  42. package/src/interactions/google-interactions-language-model.ts +776 -0
  43. package/src/interactions/google-interactions-prompt.ts +582 -0
  44. package/src/interactions/google-interactions-provider-metadata.ts +23 -0
  45. package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
  46. package/src/interactions/parse-google-interactions-outputs.ts +252 -0
  47. package/src/interactions/poll-google-interactions.ts +129 -0
  48. package/src/interactions/prepare-google-interactions-tools.ts +245 -0
  49. package/src/interactions/stream-google-interactions.ts +242 -0
  50. package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
  51. package/src/internal/index.ts +3 -2
  52. package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
  53. package/src/realtime/google-realtime-event-mapper.ts +383 -0
  54. package/src/realtime/google-realtime-model-options.ts +3 -0
  55. package/src/realtime/google-realtime-model.ts +160 -0
  56. package/src/realtime/index.ts +2 -0
  57. package/src/tool/code-execution.ts +2 -2
  58. package/src/tool/enterprise-web-search.ts +9 -3
  59. package/src/tool/file-search.ts +5 -7
  60. package/src/tool/google-maps.ts +3 -2
  61. package/src/tool/google-search.ts +11 -12
  62. package/src/tool/url-context.ts +4 -2
  63. package/src/tool/vertex-rag-store.ts +9 -6
  64. package/dist/index.d.mts +0 -384
  65. package/dist/index.mjs +0 -2519
  66. package/dist/index.mjs.map +0 -1
  67. package/dist/internal/index.d.mts +0 -287
  68. package/dist/internal/index.mjs +0 -1708
  69. package/dist/internal/index.mjs.map +0 -1
  70. package/src/convert-to-google-generative-ai-messages.ts +0 -239
  71. package/src/google-generative-ai-prompt.ts +0 -47
@@ -1,12 +1,12 @@
1
1
  ---
2
- title: Google Generative AI
3
- description: Learn how to use Google Generative AI Provider.
2
+ title: Google
3
+ description: Learn how to use Google Provider.
4
4
  ---
5
5
 
6
- # Google Generative AI Provider
6
+ # Google Provider
7
7
 
8
- The [Google Generative AI](https://ai.google.dev) provider contains language and embedding model support for
9
- the [Google Generative AI](https://ai.google.dev/api/rest) APIs.
8
+ The [Google](https://ai.google.dev) provider contains language and embedding model support for
9
+ the [Google](https://ai.google.dev/api/rest) APIs.
10
10
 
11
11
  ## Setup
12
12
 
@@ -36,17 +36,17 @@ You can import the default provider instance `google` from `@ai-sdk/google`:
36
36
  import { google } from '@ai-sdk/google';
37
37
  ```
38
38
 
39
- If you need a customized setup, you can import `createGoogleGenerativeAI` from `@ai-sdk/google` and create a provider instance with your settings:
39
+ If you need a customized setup, you can import `createGoogle` from `@ai-sdk/google` and create a provider instance with your settings:
40
40
 
41
41
  ```ts
42
- import { createGoogleGenerativeAI } from '@ai-sdk/google';
42
+ import { createGoogle } from '@ai-sdk/google';
43
43
 
44
- const google = createGoogleGenerativeAI({
44
+ const google = createGoogle({
45
45
  // custom settings
46
46
  });
47
47
  ```
48
48
 
49
- You can use the following optional settings to customize the Google Generative AI provider instance:
49
+ You can use the following optional settings to customize the Google provider instance:
50
50
 
51
51
  - **baseURL** _string_
52
52
 
@@ -89,7 +89,7 @@ The models support tool calls and some have multi-modal capabilities.
89
89
  const model = google('gemini-2.5-flash');
90
90
  ```
91
91
 
92
- You can use Google Generative AI language models to generate text with the `generateText` function:
92
+ You can use Google language models to generate text with the `generateText` function:
93
93
 
94
94
  ```ts
95
95
  import { google } from '@ai-sdk/google';
@@ -101,11 +101,11 @@ const { text } = await generateText({
101
101
  });
102
102
  ```
103
103
 
104
- Google Generative AI language models can also be used in the `streamText` function
104
+ Google language models can also be used in the `streamText` function
105
105
  and support structured data generation with [`Output`](/docs/reference/ai-sdk-core/output)
106
106
  (see [AI SDK Core](/docs/ai-sdk-core)).
107
107
 
108
- Google Generative AI also supports some model specific settings that are not part of the [standard call settings](/docs/ai-sdk-core/settings).
108
+ Google also supports some model specific settings that are not part of the [standard call settings](/docs/ai-sdk-core/settings).
109
109
  You can pass them as an options argument:
110
110
 
111
111
  ```ts
@@ -128,7 +128,7 @@ await generateText({
128
128
  });
129
129
  ```
130
130
 
131
- The following optional provider options are available for Google Generative AI models:
131
+ The following optional provider options are available for Google models:
132
132
 
133
133
  - **cachedContent** _string_
134
134
 
@@ -141,7 +141,7 @@ The following optional provider options are available for Google Generative AI m
141
141
 
142
142
  This is useful when the JSON Schema contains elements that are
143
143
  not supported by the OpenAPI schema version that
144
- Google Generative AI uses. You can use this to disable
144
+ Google uses. You can use this to disable
145
145
  structured outputs if you need to.
146
146
 
147
147
  See [Troubleshooting: Schema Limitations](#schema-limitations) for more details.
@@ -149,11 +149,9 @@ The following optional provider options are available for Google Generative AI m
149
149
  - **safetySettings** _Array\<\{ category: string; threshold: string \}\>_
150
150
 
151
151
  Optional. Safety settings for the model.
152
-
153
152
  - **category** _string_
154
153
 
155
154
  The category of the safety setting. Can be one of the following:
156
-
157
155
  - `HARM_CATEGORY_UNSPECIFIED`
158
156
  - `HARM_CATEGORY_HATE_SPEECH`
159
157
  - `HARM_CATEGORY_DANGEROUS_CONTENT`
@@ -164,7 +162,6 @@ The following optional provider options are available for Google Generative AI m
164
162
  - **threshold** _string_
165
163
 
166
164
  The threshold of the safety setting. Can be one of the following:
167
-
168
165
  - `HARM_BLOCK_THRESHOLD_UNSPECIFIED`
169
166
  - `BLOCK_LOW_AND_ABOVE`
170
167
  - `BLOCK_MEDIUM_AND_ABOVE`
@@ -177,8 +174,7 @@ The following optional provider options are available for Google Generative AI m
177
174
 
178
175
  - **thinkingConfig** _\{ thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high'; thinkingBudget?: number; includeThoughts?: boolean \}_
179
176
 
180
- Optional. Configuration for the model's thinking process. Only supported by specific [Google Generative AI models](https://ai.google.dev/gemini-api/docs/thinking).
181
-
177
+ Optional. Configuration for the model's thinking process. Only supported by specific [Google models](https://ai.google.dev/gemini-api/docs/thinking).
182
178
  - **thinkingLevel** _'minimal' | 'low' | 'medium' | 'high'_
183
179
 
184
180
  Optional. Controls the thinking depth for Gemini 3 models. Gemini 3.1 Pro supports 'low', 'medium', and 'high', Gemini 3 Pro supports 'low' and 'high', while Gemini 3 Flash supports all four levels: 'minimal', 'low', 'medium', and 'high'. Only supported by Gemini 3 models.
@@ -186,7 +182,7 @@ The following optional provider options are available for Google Generative AI m
186
182
  - **thinkingBudget** _number_
187
183
 
188
184
  Optional. Gives the model guidance on the number of thinking tokens it can use when generating a response. Setting it to 0 disables thinking, if the model supports it.
189
- For more information about the possible value ranges for each model see [Google Generative AI thinking documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget).
185
+ For more information about the possible value ranges for each model see [Google thinking documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget).
190
186
 
191
187
  <Note>
192
188
  This option is for Gemini 2.5 models. Gemini 3 models should use
@@ -199,12 +195,10 @@ The following optional provider options are available for Google Generative AI m
199
195
 
200
196
  - **imageConfig** _\{ aspectRatio?: string, imageSize?: string \}_
201
197
 
202
- Optional. Configuration for the models image generation. Only supported by specific [Google Generative AI models](https://ai.google.dev/gemini-api/docs/image-generation).
203
-
198
+ Optional. Configuration for the models image generation. Only supported by specific [Google models](https://ai.google.dev/gemini-api/docs/image-generation).
204
199
  - **aspectRatio** _string_
205
200
 
206
201
  Model defaults to generate 1:1 squares, or to matching the output image size to that of your input image. Can be one of the following:
207
-
208
202
  - 1:1
209
203
  - 2:3
210
204
  - 3:2
@@ -219,7 +213,6 @@ The following optional provider options are available for Google Generative AI m
219
213
  - **imageSize** _string_
220
214
 
221
215
  Controls the output image resolution. Defaults to 1K. Can be one of the following:
222
-
223
216
  - 1K
224
217
  - 2K
225
218
  - 4K
@@ -232,7 +225,6 @@ The following optional provider options are available for Google Generative AI m
232
225
  - **mediaResolution** _string_
233
226
 
234
227
  Optional. If specified, the media resolution specified will be used. Can be one of the following:
235
-
236
228
  - `MEDIA_RESOLUTION_UNSPECIFIED`
237
229
  - `MEDIA_RESOLUTION_LOW`
238
230
  - `MEDIA_RESOLUTION_MEDIUM`
@@ -245,6 +237,18 @@ The following optional provider options are available for Google Generative AI m
245
237
  Optional. Defines labels used in billing reports. Available on Vertex AI only.
246
238
  See [Google Cloud labels documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls).
247
239
 
240
+ - **serviceTier** _'standard' | 'flex' | 'priority'_
241
+
242
+ Optional. The service tier to use for the request.
243
+ Set to `'flex'` for 50% cheaper processing at the cost of increased latency.
244
+ Set to `'priority'` for ultra-low latency at a 75-100% price premium over `'standard'`.
245
+
246
+ Because Priority can be gracefully downgraded to Standard under load, the
247
+ tier the request actually ran on is surfaced on
248
+ `result.providerMetadata.google.serviceTier`. See
249
+ [Priority inference](https://ai.google.dev/gemini-api/docs/priority-inference)
250
+ and [Flex inference](https://ai.google.dev/gemini-api/docs/flex-inference).
251
+
248
252
  - **threshold** _string_
249
253
 
250
254
  Optional. Standalone threshold setting that can be used independently of `safetySettings`.
@@ -252,7 +256,7 @@ The following optional provider options are available for Google Generative AI m
252
256
 
253
257
  ### Thinking
254
258
 
255
- The Gemini 2.5 and Gemini 3 series models use an internal "thinking process" that significantly improves their reasoning and multi-step planning abilities, making them highly effective for complex tasks such as coding, advanced mathematics, and data analysis. For more information see [Google Generative AI thinking documentation](https://ai.google.dev/gemini-api/docs/thinking).
259
+ The Gemini 2.5 and Gemini 3 series models use an internal "thinking process" that significantly improves their reasoning and multi-step planning abilities, making them highly effective for complex tasks such as coding, advanced mathematics, and data analysis. For more information see [Google thinking documentation](https://ai.google.dev/gemini-api/docs/thinking).
256
260
 
257
261
  #### Gemini 3 Models
258
262
 
@@ -312,7 +316,7 @@ console.log(reasoning); // Reasoning summary
312
316
 
313
317
  ### File Inputs
314
318
 
315
- The Google Generative AI provider supports file inputs, e.g. PDF files.
319
+ The Google provider supports file inputs, e.g. PDF files.
316
320
 
317
321
  ```ts
318
322
  import { google } from '@ai-sdk/google';
@@ -378,7 +382,7 @@ See [File Parts](/docs/foundations/prompts#file-parts) for details on how to use
378
382
 
379
383
  ### Cached Content
380
384
 
381
- Google Generative AI supports both explicit and implicit caching to help reduce costs on repetitive content.
385
+ Google supports both explicit and implicit caching to help reduce costs on repetitive content.
382
386
 
383
387
  #### Implicit Caching
384
388
 
@@ -510,7 +514,7 @@ the model has access to the latest information using Google Search.
510
514
 
511
515
  ```ts highlight="8,17-20"
512
516
  import { google } from '@ai-sdk/google';
513
- import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
517
+ import { GoogleProviderMetadata } from '@ai-sdk/google';
514
518
  import { generateText } from 'ai';
515
519
 
516
520
  const { text, sources, providerMetadata } = await generateText({
@@ -525,9 +529,7 @@ const { text, sources, providerMetadata } = await generateText({
525
529
 
526
530
  // access the grounding metadata. Casting to the provider metadata type
527
531
  // is optional but provides autocomplete and type safety.
528
- const metadata = providerMetadata?.google as
529
- | GoogleGenerativeAIProviderMetadata
530
- | undefined;
532
+ const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
531
533
  const groundingMetadata = metadata?.groundingMetadata;
532
534
  const safetyRatings = metadata?.safetyRatings;
533
535
  ```
@@ -537,14 +539,12 @@ The `googleSearch` tool accepts the following optional configuration options:
537
539
  - **searchTypes** _object_
538
540
 
539
541
  Enables specific search types. Both can be combined.
540
-
541
542
  - `webSearch`: Enable web search grounding (pass `{}` to enable). This is the default.
542
543
  - `imageSearch`: Enable [image search grounding](https://ai.google.dev/gemini-api/docs/image-generation#image-search) (pass `{}` to enable).
543
544
 
544
545
  - **timeRangeFilter** _object_
545
546
 
546
547
  Restricts search results to a specific time range. Both `startTime` and `endTime` are required.
547
-
548
548
  - `startTime`: Start time in ISO 8601 format (e.g. `'2025-01-01T00:00:00Z'`).
549
549
  - `endTime`: End time in ISO 8601 format (e.g. `'2025-12-31T23:59:59Z'`).
550
550
 
@@ -563,12 +563,10 @@ When Google Search grounding is enabled, the model will include sources in the r
563
563
  Additionally, the grounding metadata includes detailed information about how search results were used to ground the model's response. Here are the available fields:
564
564
 
565
565
  - **`webSearchQueries`** (`string[] | null`)
566
-
567
566
  - Array of search queries used to retrieve information
568
567
  - Example: `["What's the weather in Chicago this weekend?"]`
569
568
 
570
569
  - **`searchEntryPoint`** (`{ renderedContent: string } | null`)
571
-
572
570
  - Contains the main search result content used as an entry point
573
571
  - The `renderedContent` field contains the formatted content
574
572
 
@@ -619,10 +617,10 @@ the model has access to a compliance-focused web index designed for highly-regul
619
617
  </Note>
620
618
 
621
619
  ```ts
622
- import { createVertex } from '@ai-sdk/google-vertex';
620
+ import { createGoogleVertex } from '@ai-sdk/google-vertex';
623
621
  import { generateText } from 'ai';
624
622
 
625
- const vertex = createVertex({
623
+ const vertex = createGoogleVertex({
626
624
  project: 'my-project',
627
625
  location: 'us-central1',
628
626
  });
@@ -686,9 +684,7 @@ const { text, sources, providerMetadata } = await generateText({
686
684
  },
687
685
  });
688
686
 
689
- const metadata = providerMetadata?.google as
690
- | GoogleGenerativeAIProviderMetadata
691
- | undefined;
687
+ const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
692
688
  const groundingMetadata = metadata?.groundingMetadata;
693
689
  const urlContextMetadata = metadata?.urlContextMetadata;
694
690
  ```
@@ -696,7 +692,6 @@ const urlContextMetadata = metadata?.urlContextMetadata;
696
692
  The URL context metadata includes detailed information about how the model used the URL context to generate the response. Here are the available fields:
697
693
 
698
694
  - **`urlMetadata`** (`{ retrievedUrl: string; urlRetrievalStatus: string; }[] | null`)
699
-
700
695
  - Array of URL context metadata
701
696
  - Each object includes:
702
697
  - **`retrievedUrl`**: The URL of the context
@@ -708,7 +703,7 @@ Example response:
708
703
  {
709
704
  "urlMetadata": [
710
705
  {
711
- "retrievedUrl": "https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai",
706
+ "retrievedUrl": "https://ai-sdk.dev/providers/ai-sdk-providers/google",
712
707
  "urlRetrievalStatus": "URL_RETRIEVAL_STATUS_SUCCESS"
713
708
  }
714
709
  ]
@@ -722,8 +717,8 @@ With the URL context tool, you will also get the `groundingMetadata`.
722
717
  "groundingChunks": [
723
718
  {
724
719
  "web": {
725
- "uri": "https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai",
726
- "title": "Google Generative AI - AI SDK Providers"
720
+ "uri": "https://ai-sdk.dev/providers/ai-sdk-providers/google",
721
+ "title": "Google - AI SDK Providers"
727
722
  }
728
723
  }
729
724
  ],
@@ -760,7 +755,7 @@ import { generateText } from 'ai';
760
755
 
761
756
  const { text, sources, providerMetadata } = await generateText({
762
757
  model: google('gemini-2.5-flash'),
763
- prompt: `Based on this context: https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai, tell me how to use Gemini with AI SDK.
758
+ prompt: `Based on this context: https://ai-sdk.dev/providers/ai-sdk-providers/google, tell me how to use Gemini with AI SDK.
764
759
  Also, provide the latest news about AI SDK V5.`,
765
760
  tools: {
766
761
  google_search: google.tools.googleSearch({}),
@@ -768,9 +763,7 @@ const { text, sources, providerMetadata } = await generateText({
768
763
  },
769
764
  });
770
765
 
771
- const metadata = providerMetadata?.google as
772
- | GoogleGenerativeAIProviderMetadata
773
- | undefined;
766
+ const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
774
767
  const groundingMetadata = metadata?.groundingMetadata;
775
768
  const urlContextMetadata = metadata?.urlContextMetadata;
776
769
  ```
@@ -782,7 +775,7 @@ the model has access to Google Maps data for location-aware responses. This enab
782
775
 
783
776
  ```ts highlight="7-16"
784
777
  import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
785
- import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
778
+ import { GoogleProviderMetadata } from '@ai-sdk/google';
786
779
  import { generateText } from 'ai';
787
780
 
788
781
  const { text, sources, providerMetadata } = await generateText({
@@ -801,9 +794,7 @@ const { text, sources, providerMetadata } = await generateText({
801
794
  'What are the best Italian restaurants within a 15-minute walk from here?',
802
795
  });
803
796
 
804
- const metadata = providerMetadata?.google as
805
- | GoogleGenerativeAIProviderMetadata
806
- | undefined;
797
+ const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
807
798
  const groundingMetadata = metadata?.groundingMetadata;
808
799
  ```
809
800
 
@@ -842,11 +833,11 @@ This enables the model to provide answers based on your specific data sources an
842
833
  </Note>
843
834
 
844
835
  ```ts highlight="8,17-20"
845
- import { createVertex } from '@ai-sdk/google-vertex';
846
- import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
836
+ import { createGoogleVertex } from '@ai-sdk/google-vertex';
837
+ import { GoogleProviderMetadata } from '@ai-sdk/google';
847
838
  import { generateText } from 'ai';
848
839
 
849
- const vertex = createVertex({
840
+ const vertex = createGoogleVertex({
850
841
  project: 'my-project',
851
842
  location: 'us-central1',
852
843
  });
@@ -866,9 +857,7 @@ const { text, sources, providerMetadata } = await generateText({
866
857
 
867
858
  // access the grounding metadata. Casting to the provider metadata type
868
859
  // is optional but provides autocomplete and type safety.
869
- const metadata = providerMetadata?.google as
870
- | GoogleGenerativeAIProviderMetadata
871
- | undefined;
860
+ const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
872
861
  const groundingMetadata = metadata?.groundingMetadata;
873
862
  const safetyRatings = metadata?.safetyRatings;
874
863
  ```
@@ -878,7 +867,6 @@ When RAG Engine Grounding is enabled, the model will include sources from your R
878
867
  Additionally, the grounding metadata includes detailed information about how RAG results were used to ground the model's response. Here are the available fields:
879
868
 
880
869
  - **`groundingChunks`** (Array of chunk objects | null)
881
-
882
870
  - Contains the retrieved context chunks from your RAG corpus
883
871
  - Each chunk includes:
884
872
  - **`retrievedContext`**: Information about the retrieved context
@@ -887,7 +875,6 @@ Additionally, the grounding metadata includes detailed information about how RAG
887
875
  - `text`: The actual text content of the chunk
888
876
 
889
877
  - **`groundingSupports`** (Array of support objects | null)
890
-
891
878
  - Contains details about how specific response parts are supported by RAG results
892
879
  - Each support object includes:
893
880
  - **`segment`**: Information about the grounded text segment
@@ -931,12 +918,10 @@ Example response:
931
918
  The `vertexRagStore` tool accepts the following configuration options:
932
919
 
933
920
  - **`ragCorpus`** (`string`, required)
934
-
935
921
  - The RagCorpus resource name in the format: `projects/{project}/locations/{location}/ragCorpora/{rag_corpus}`
936
922
  - This identifies your specific RAG corpus to search against
937
923
 
938
924
  - **`topK`** (`number`, optional)
939
-
940
925
  - The number of top contexts to retrieve from your RAG corpus
941
926
  - Defaults to the corpus configuration if not specified
942
927
 
@@ -1051,7 +1036,7 @@ const { output } = await generateText({
1051
1036
  });
1052
1037
  ```
1053
1038
 
1054
- The following Zod features are known to not work with Google Generative AI:
1039
+ The following Zod features are known to not work with Google:
1055
1040
 
1056
1041
  - `z.union`
1057
1042
  - `z.record`
@@ -1060,6 +1045,7 @@ The following Zod features are known to not work with Google Generative AI:
1060
1045
 
1061
1046
  | Model | Image Input | Object Generation | Tool Usage | Tool Streaming | Google Search | URL Context |
1062
1047
  | ------------------------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
1048
+ | `gemini-3.5-flash` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
1063
1049
  | `gemini-3.1-pro-preview` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
1064
1050
  | `gemini-3.1-flash-image-preview` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
1065
1051
  | `gemini-3.1-flash-lite-preview` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
@@ -1079,6 +1065,635 @@ The following Zod features are known to not work with Google Generative AI:
1079
1065
  available provider model ID as a string if needed.
1080
1066
  </Note>
1081
1067
 
1068
+ ## Realtime Models
1069
+
1070
+ <Note type="warning">Realtime is an experimental feature.</Note>
1071
+
1072
+ You can create models that call the [Gemini Live API](https://ai.google.dev/gemini-api/docs/live)
1073
+ using the `.experimental_realtime()` factory method.
1074
+
1075
+ ```ts
1076
+ import { google } from '@ai-sdk/google';
1077
+
1078
+ const model = google.experimental_realtime('gemini-3.1-flash-live-preview');
1079
+ ```
1080
+
1081
+ Realtime sessions run in the browser and require a short-lived token created on
1082
+ your server with `google.experimental_realtime.getToken()`:
1083
+
1084
+ ```ts
1085
+ const token = await google.experimental_realtime.getToken({
1086
+ model: 'gemini-3.1-flash-live-preview',
1087
+ });
1088
+ ```
1089
+
1090
+ Google realtime models may require provider-specific audio formats, depending
1091
+ on the model and modality. See [Realtime](/docs/ai-sdk-core/realtime) for the
1092
+ complete setup and tool calling pattern.
1093
+
1094
+ ## Interactions API
1095
+
1096
+ The [Gemini Interactions API](https://ai.google.dev/gemini-api/docs/interactions)
1097
+ (`POST /v1beta/interactions`) is a separate Google endpoint with server-side
1098
+ state, unified content blocks, first-class built-in tools, agent presets,
1099
+ managed agents that run in a sandboxed Linux environment, and native
1100
+ multimodal image output. It is reached via the `google.interactions(...)`
1101
+ factory:
1102
+
1103
+ ```ts
1104
+ import { google } from '@ai-sdk/google';
1105
+ import { generateText } from 'ai';
1106
+
1107
+ const { text } = await generateText({
1108
+ model: google.interactions('gemini-2.5-flash'),
1109
+ prompt: 'Hello, how are you?',
1110
+ });
1111
+ ```
1112
+
1113
+ `google.interactions(...)` accepts a model ID string (e.g.
1114
+ `'gemini-2.5-flash'`, `'gemini-3-pro-preview'`), `{ agent: <name> }` to use
1115
+ a Gemini [agent preset](#agent-presets), or `{ managedAgent: <name> }` to
1116
+ invoke a [managed agent](#managed-agents) you created on Google's side.
1117
+ The returned model can be passed to `generateText` and `streamText` like
1118
+ any other AI SDK language model.
1119
+
1120
+ <Note>
1121
+ Use `google(...)` for the standard `:generateContent` /
1122
+ `:streamGenerateContent` endpoints, and `google.interactions(...)` for the new
1123
+ Interactions endpoint. Pick one per model instance — they target different
1124
+ request bodies and SSE event vocabularies.
1125
+ </Note>
1126
+
1127
+ ### Provider Options
1128
+
1129
+ The Interactions model reads its options from the shared
1130
+ `providerOptions.google.*` namespace. Validate them with the
1131
+ `GoogleLanguageModelInteractionsOptions` type:
1132
+
1133
+ ```ts
1134
+ import {
1135
+ google,
1136
+ type GoogleLanguageModelInteractionsOptions,
1137
+ } from '@ai-sdk/google';
1138
+ import { generateText } from 'ai';
1139
+
1140
+ await generateText({
1141
+ model: google.interactions('gemini-2.5-flash'),
1142
+ prompt: 'What color is the sky in one word?',
1143
+ providerOptions: {
1144
+ google: {
1145
+ serviceTier: 'priority',
1146
+ } satisfies GoogleLanguageModelInteractionsOptions,
1147
+ },
1148
+ });
1149
+ ```
1150
+
1151
+ The following optional provider options are available:
1152
+
1153
+ - **previousInteractionId** _string_
1154
+
1155
+ Server-side interaction id from a prior turn. When set, the server pulls
1156
+ prior context from its own state and only the new user message is sent on
1157
+ the wire. Pair with the default `store: true` to chain stateful
1158
+ conversations. See [Stateful chaining](#stateful-chaining).
1159
+
1160
+ - **store** _boolean_
1161
+
1162
+ Whether the server should persist the interaction. Defaults to `true`.
1163
+ Set to `false` for stateless multi-turn conversations where the full
1164
+ message history is re-sent on every turn.
1165
+
1166
+ - **agent** _string_
1167
+
1168
+ Name of a Gemini agent preset (e.g. `'deep-research-pro-preview-12-2025'`).
1169
+
1170
+ <Note>
1171
+
1172
+ Prefer the factory form `google.interactions({ agent: '...' })` over
1173
+ setting `agent` in provider options — the factory is type-checked
1174
+ against the supported agent names.
1175
+
1176
+ </Note>
1177
+
1178
+ - **agentConfig** _object_
1179
+
1180
+ Per-agent configuration. Currently supports `{ type: 'dynamic' }` and
1181
+ `{ type: 'deep-research', thinkingSummaries?, visualization?, collaborativePlanning? }`.
1182
+
1183
+ - **thinkingLevel** _'minimal' | 'low' | 'medium' | 'high'_
1184
+
1185
+ Controls reasoning depth for thinking-enabled models. Mapped onto the
1186
+ Interactions request's `thinking_level`.
1187
+
1188
+ - **thinkingSummaries** _'auto' | 'none'_
1189
+
1190
+ Whether the model returns synthesized thought summaries on reasoning
1191
+ parts. Defaults to the API default.
1192
+
1193
+ - **responseFormat** _Array\<\{ type: 'text' | 'image' | 'audio'; mimeType?: string; schema?: unknown; aspectRatio?: string; imageSize?: '1K' \| '2K' \| '4K' \| '512' \}\>_
1194
+
1195
+ Output-format entries that map directly to the API's `response_format`
1196
+ array. Use this for fine-grained control over image, audio, or non-JSON
1197
+ text outputs (e.g. `aspectRatio` and `imageSize` for image generation).
1198
+ The AI SDK call-level `responseFormat: { type: 'json', schema }` still
1199
+ drives JSON-mode automatically and prepends a matching text entry;
1200
+ entries listed here are appended.
1201
+
1202
+ `aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`,
1203
+ `9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
1204
+
1205
+ - **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_ (deprecated)
1206
+
1207
+ Use **responseFormat** with a `{ type: 'image', ... }` entry instead.
1208
+ Retained for backwards compatibility; the SDK translates `imageConfig`
1209
+ into a matching `response_format` image entry and emits a warning when
1210
+ set. Ignored when `responseFormat` already supplies an image entry.
1211
+
1212
+ - **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
1213
+
1214
+ Media resolution applied to image inputs / outputs.
1215
+
1216
+ - **serviceTier** _'flex' | 'standard' | 'priority'_
1217
+
1218
+ Service tier for the request. Mirrored back on
1219
+ `result.providerMetadata.google.serviceTier` for observability.
1220
+
1221
+ - **systemInstruction** _string_
1222
+
1223
+ Alternative to the AI SDK `system` message. If both are set, the AI SDK
1224
+ `system` message wins and a warning is emitted.
1225
+
1226
+ - **background** _boolean_
1227
+
1228
+ Run the interaction in the background. Required for agents whose
1229
+ server-side workflow cannot complete within a single request/response;
1230
+ rejected by agents that only support synchronous calls. When `true`,
1231
+ the POST returns a non-terminal status and the SDK polls
1232
+ `GET /interactions/{id}` until the work completes.
1233
+
1234
+ - **environment** _string \| object_
1235
+
1236
+ Sandbox environment configuration for [managed agents](#managed-agents).
1237
+ Pass `'remote'` to provision a fresh sandbox, an `environment_id`
1238
+ string to reuse an existing one, or an object of the form
1239
+ `{ type: 'remote', sources?, network? }` to preload files and/or
1240
+ constrain outbound traffic. Only applies to agent calls.
1241
+
1242
+ - **pollingTimeoutMs** _number_
1243
+
1244
+ Maximum time, in milliseconds, to poll a background interaction before
1245
+ giving up. Defaults to 30 minutes (1,800,000 ms). Long-running agents
1246
+ may need longer.
1247
+
1248
+ ### Provider Metadata
1249
+
1250
+ `result.providerMetadata.google` (typed via `GoogleInteractionsProviderMetadata`)
1251
+ exposes:
1252
+
1253
+ - **interactionId** _string_
1254
+
1255
+ Server-side interaction id. Pass this back as `previousInteractionId` on
1256
+ the next turn to chain.
1257
+
1258
+ - **serviceTier** _string_
1259
+
1260
+ Service tier the request actually ran on.
1261
+
1262
+ - **signature** _string_
1263
+
1264
+ Per-block signature hash, set by the SDK on output reasoning and
1265
+ tool-call parts. Round-tripped automatically on the next turn.
1266
+
1267
+ ### Stateful chaining
1268
+
1269
+ With the default `store: true`, the server retains the prior turn so the
1270
+ next request only needs to send the new user message and the
1271
+ `previousInteractionId`:
1272
+
1273
+ ```ts
1274
+ import {
1275
+ google,
1276
+ type GoogleLanguageModelInteractionsOptions,
1277
+ } from '@ai-sdk/google';
1278
+ import { generateText } from 'ai';
1279
+
1280
+ const turn1 = await generateText({
1281
+ model: google.interactions('gemini-2.5-flash'),
1282
+ prompt: 'What are the three largest cities in Spain?',
1283
+ });
1284
+
1285
+ const interactionId = turn1.providerMetadata?.google?.interactionId as
1286
+ | string
1287
+ | undefined;
1288
+
1289
+ const turn2 = await generateText({
1290
+ model: google.interactions('gemini-2.5-flash'),
1291
+ prompt: 'What is the most famous landmark in the second one?',
1292
+ providerOptions: {
1293
+ google: {
1294
+ previousInteractionId: interactionId,
1295
+ } satisfies GoogleLanguageModelInteractionsOptions,
1296
+ },
1297
+ });
1298
+ ```
1299
+
1300
+ For stateless multi-turn conversations, set `store: false` and re-send the
1301
+ full message history on every turn (no `previousInteractionId`):
1302
+
1303
+ ```ts
1304
+ import {
1305
+ google,
1306
+ type GoogleLanguageModelInteractionsOptions,
1307
+ } from '@ai-sdk/google';
1308
+ import { generateText, type ModelMessage } from 'ai';
1309
+
1310
+ const messages: Array<ModelMessage> = [
1311
+ { role: 'user', content: 'What are the three largest cities in Spain?' },
1312
+ ];
1313
+
1314
+ const turn1 = await generateText({
1315
+ model: google.interactions('gemini-2.5-flash'),
1316
+ messages,
1317
+ providerOptions: {
1318
+ google: { store: false } satisfies GoogleLanguageModelInteractionsOptions,
1319
+ },
1320
+ });
1321
+
1322
+ messages.push(...turn1.responseMessages);
1323
+ messages.push({
1324
+ role: 'user',
1325
+ content: 'What is the most famous landmark in the second one?',
1326
+ });
1327
+
1328
+ const turn2 = await generateText({
1329
+ model: google.interactions('gemini-2.5-flash'),
1330
+ messages,
1331
+ providerOptions: {
1332
+ google: { store: false } satisfies GoogleLanguageModelInteractionsOptions,
1333
+ },
1334
+ });
1335
+ ```
1336
+
1337
+ ### Built-in Tools
1338
+
1339
+ The Interactions API ships a built-in tool catalog. The provider-defined
1340
+ tools under `google.tools.*` map onto Interactions tool descriptors:
1341
+
1342
+ | AI SDK tool | Interactions tool type | Notes |
1343
+ | ------------------------------------- | ---------------------- | ----------------------------------------- |
1344
+ | `google.tools.googleSearch` | `google_search` | Web / image search grounding. |
1345
+ | `google.tools.codeExecution` | `code_execution` | Server-side Python execution. |
1346
+ | `google.tools.urlContext` | `url_context` | Fetch URLs referenced in the prompt. |
1347
+ | `google.tools.fileSearch` | `file_search` | Retrieval from File Search stores. |
1348
+ | `google.tools.googleMaps` | `google_maps` | Maps grounding for nearby-places queries. |
1349
+ | _provider tool_ `google.computer_use` | `computer_use` | Computer use (browser environment). |
1350
+ | _provider tool_ `google.mcp_server` | `mcp_server` | Remote MCP server passthrough. |
1351
+ | _provider tool_ `google.retrieval` | `retrieval` | Vertex AI Search retrieval. |
1352
+
1353
+ Function tools (`type: 'function'`) defined with the AI SDK `tool(...)`
1354
+ helper are translated to Interactions `function` tool descriptors. Other
1355
+ tool kinds emit a warning and are dropped.
1356
+
1357
+ ```ts
1358
+ import { google } from '@ai-sdk/google';
1359
+ import { generateText } from 'ai';
1360
+
1361
+ const { text, sources } = await generateText({
1362
+ model: google.interactions('gemini-2.5-flash'),
1363
+ tools: {
1364
+ google_search: google.tools.googleSearch({}),
1365
+ },
1366
+ prompt:
1367
+ "What's a notable AI development from this past week? " +
1368
+ 'Include the date for each item you mention.',
1369
+ });
1370
+ ```
1371
+
1372
+ Function tools work the same way as on the standard provider:
1373
+
1374
+ ```ts
1375
+ import { google } from '@ai-sdk/google';
1376
+ import { generateText, stepCountIs, tool } from 'ai';
1377
+ import { z } from 'zod';
1378
+
1379
+ const weatherTool = tool({
1380
+ description: 'Get the weather for a city.',
1381
+ inputSchema: z.object({ city: z.string() }),
1382
+ execute: async ({ city }) => `It is sunny in ${city}.`,
1383
+ });
1384
+
1385
+ const { text, toolCalls } = await generateText({
1386
+ model: google.interactions('gemini-2.5-flash'),
1387
+ tools: { getWeather: weatherTool },
1388
+ stopWhen: stepCountIs(5),
1389
+ prompt: 'What is the weather in San Francisco right now?',
1390
+ });
1391
+ ```
1392
+
1393
+ ### Image output via Interactions
1394
+
1395
+ Add a `{ type: 'image' }` entry to `responseFormat` on a Gemini
1396
+ image-capable model to get images as `LanguageModelV4FilePart` files in
1397
+ the response. No tool wrapping is required, and the entry doubles as the
1398
+ place to set `aspectRatio`, `imageSize`, and `mimeType`.
1399
+
1400
+ ```ts
1401
+ import {
1402
+ google,
1403
+ type GoogleLanguageModelInteractionsOptions,
1404
+ } from '@ai-sdk/google';
1405
+ import { generateText } from 'ai';
1406
+
1407
+ const result = await generateText({
1408
+ model: google.interactions('gemini-3-pro-image-preview'),
1409
+ prompt: 'Generate an image of a comic cat in a spaceship.',
1410
+ providerOptions: {
1411
+ google: {
1412
+ responseFormat: [{ type: 'image' }],
1413
+ } satisfies GoogleLanguageModelInteractionsOptions,
1414
+ },
1415
+ });
1416
+
1417
+ for (const file of result.files) {
1418
+ if (file.mediaType.startsWith('image/')) {
1419
+ // file.uint8Array | file.base64 | file.mediaType
1420
+ }
1421
+ }
1422
+ ```
1423
+
1424
+ To control aspect ratio, image size, or output mime type, add those
1425
+ fields to the same image entry:
1426
+
1427
+ ```ts
1428
+ import {
1429
+ google,
1430
+ type GoogleLanguageModelInteractionsOptions,
1431
+ } from '@ai-sdk/google';
1432
+ import { generateText } from 'ai';
1433
+
1434
+ const result = await generateText({
1435
+ model: google.interactions('gemini-3-pro-image-preview'),
1436
+ prompt: 'Generate a high-quality landscape photo of mountains at sunset.',
1437
+ providerOptions: {
1438
+ google: {
1439
+ responseFormat: [
1440
+ {
1441
+ type: 'image',
1442
+ aspectRatio: '16:9',
1443
+ imageSize: '4K',
1444
+ },
1445
+ ],
1446
+ } satisfies GoogleLanguageModelInteractionsOptions,
1447
+ },
1448
+ });
1449
+ ```
1450
+
1451
+ For multimodal output, list one entry per modality. The model returns
1452
+ text in `result.text` and the accompanying image(s) in `result.files`:
1453
+
1454
+ ```ts
1455
+ import {
1456
+ google,
1457
+ type GoogleLanguageModelInteractionsOptions,
1458
+ } from '@ai-sdk/google';
1459
+ import { generateText } from 'ai';
1460
+
1461
+ const result = await generateText({
1462
+ model: google.interactions('gemini-2.5-flash-image'),
1463
+ prompt:
1464
+ 'Tell me a three sentence bedtime story about a unicorn, accompanied by a suitable illustration.',
1465
+ providerOptions: {
1466
+ google: {
1467
+ responseFormat: [
1468
+ { type: 'text' },
1469
+ { type: 'image', aspectRatio: '16:9' },
1470
+ ],
1471
+ } satisfies GoogleLanguageModelInteractionsOptions,
1472
+ },
1473
+ });
1474
+
1475
+ console.log(result.text);
1476
+
1477
+ const images = result.files.filter(file => file.mediaType.startsWith('image/'));
1478
+ // images[0].uint8Array | images[0].base64 | images[0].mediaType
1479
+ ```
1480
+
1481
+ Iterative image editing pairs naturally with stateful chaining — keep
1482
+ `previousInteractionId` set across turns and the model edits its prior
1483
+ output:
1484
+
1485
+ ```ts
1486
+ import {
1487
+ google,
1488
+ type GoogleLanguageModelInteractionsOptions,
1489
+ } from '@ai-sdk/google';
1490
+ import { generateText } from 'ai';
1491
+
1492
+ const model = google.interactions('gemini-3-pro-image-preview');
1493
+
1494
+ const turn1 = await generateText({
1495
+ model,
1496
+ prompt: 'Generate an image of a comic cat in a spaceship.',
1497
+ providerOptions: {
1498
+ google: {
1499
+ responseFormat: [{ type: 'image' }],
1500
+ } satisfies GoogleLanguageModelInteractionsOptions,
1501
+ },
1502
+ });
1503
+
1504
+ const interactionId = turn1.providerMetadata?.google?.interactionId as
1505
+ | string
1506
+ | undefined;
1507
+
1508
+ const turn2 = await generateText({
1509
+ model,
1510
+ prompt: 'now make the cat red',
1511
+ providerOptions: {
1512
+ google: {
1513
+ responseFormat: [{ type: 'image' }],
1514
+ previousInteractionId: interactionId,
1515
+ } satisfies GoogleLanguageModelInteractionsOptions,
1516
+ },
1517
+ });
1518
+ ```
1519
+
1520
+ ### Agent presets
1521
+
1522
+ Pass `{ agent: <name> }` to target a Gemini agent preset. The factory
1523
+ type-checks the agent name against the supported set:
1524
+
1525
+ ```ts
1526
+ import {
1527
+ google,
1528
+ type GoogleLanguageModelInteractionsOptions,
1529
+ } from '@ai-sdk/google';
1530
+ import { generateText } from 'ai';
1531
+
1532
+ const result = await generateText({
1533
+ model: google.interactions({
1534
+ agent: 'deep-research-pro-preview-12-2025',
1535
+ }),
1536
+ prompt:
1537
+ 'Briefly summarize the most-cited papers on retrieval-augmented generation since 2024 (2-3 sentences).',
1538
+ providerOptions: {
1539
+ google: {
1540
+ background: true,
1541
+ } satisfies GoogleLanguageModelInteractionsOptions,
1542
+ },
1543
+ });
1544
+ ```
1545
+
1546
+ Whether an agent runs synchronously or in the background depends on the
1547
+ agent. Long-running presets (such as the `deep-research-*` family)
1548
+ require `background: true` — the POST returns a non-terminal status and
1549
+ the SDK polls `GET /interactions/{id}` internally until the interaction
1550
+ completes. Other agents accept synchronous calls only and will reject
1551
+ `background: true`. Set the flag explicitly via
1552
+ `providerOptions.google.background`.
1553
+
1554
+ The default polling timeout is 30 minutes; raise it via
1555
+ `pollingTimeoutMs` for slower agents:
1556
+
1557
+ ```ts
1558
+ import {
1559
+ google,
1560
+ type GoogleLanguageModelInteractionsOptions,
1561
+ } from '@ai-sdk/google';
1562
+ import { generateText } from 'ai';
1563
+
1564
+ await generateText({
1565
+ model: google.interactions({ agent: 'deep-research-max-preview-04-2026' }),
1566
+ prompt: 'Produce a long-form research brief on ...',
1567
+ providerOptions: {
1568
+ google: {
1569
+ background: true,
1570
+ pollingTimeoutMs: 60 * 60 * 1000, // 1 hour
1571
+ } satisfies GoogleLanguageModelInteractionsOptions,
1572
+ },
1573
+ });
1574
+ ```
1575
+
1576
+ Agents also chain through `previousInteractionId` like model-id calls.
1577
+
1578
+ ### Managed Agents
1579
+
1580
+ [Managed agents](https://ai.google.dev/gemini-api/docs/agents) run inside a
1581
+ sandboxed Linux environment provisioned per interaction. Pass the `environment`
1582
+ provider option to control how the sandbox is set up; the option is only
1583
+ accepted on agent calls.
1584
+
1585
+ The simplest form provisions a fresh sandbox:
1586
+
1587
+ ```ts
1588
+ import {
1589
+ google,
1590
+ type GoogleLanguageModelInteractionsOptions,
1591
+ } from '@ai-sdk/google';
1592
+ import { generateText } from 'ai';
1593
+
1594
+ const result = await generateText({
1595
+ model: google.interactions({ agent: 'antigravity-preview-05-2026' }),
1596
+ prompt: 'What is 2 + 2?',
1597
+ providerOptions: {
1598
+ google: {
1599
+ environment: 'remote',
1600
+ } satisfies GoogleLanguageModelInteractionsOptions,
1601
+ },
1602
+ });
1603
+ ```
1604
+
1605
+ `environment` accepts three shapes:
1606
+
1607
+ - `'remote'` — provision a fresh sandbox for this call.
1608
+ - any other string — an `environment_id` to reuse, forking the previous
1609
+ sandbox so its filesystem and installed packages persist.
1610
+ - an object — provision a fresh sandbox and optionally preload `sources`
1611
+ and/or constrain outbound traffic via `network`:
1612
+
1613
+ ```ts
1614
+ import {
1615
+ google,
1616
+ type GoogleLanguageModelInteractionsOptions,
1617
+ } from '@ai-sdk/google';
1618
+ import { generateText } from 'ai';
1619
+
1620
+ await generateText({
1621
+ model: google.interactions({ agent: 'antigravity-preview-05-2026' }),
1622
+ prompt:
1623
+ 'Read the file at /data/note.txt and tell me exactly what it contains.',
1624
+ providerOptions: {
1625
+ google: {
1626
+ environment: {
1627
+ type: 'remote',
1628
+ sources: [
1629
+ {
1630
+ type: 'inline',
1631
+ content: 'hello from the AI SDK example\n',
1632
+ target: '/data/note.txt',
1633
+ },
1634
+ ],
1635
+ },
1636
+ } satisfies GoogleLanguageModelInteractionsOptions,
1637
+ },
1638
+ });
1639
+ ```
1640
+
1641
+ Three source types are supported: `inline` (write a string into the
1642
+ sandbox at `target`), `repository` (clone a git repository — pass the
1643
+ URL as `source`), and `gcs` (mount a Google Cloud Storage prefix).
1644
+
1645
+ The `network` field accepts the string `'disabled'` to block all
1646
+ outbound traffic, or an object with an `allowlist` array whose entries
1647
+ each carry a `domain` plus an optional `transform` array of header
1648
+ objects to inject into matching requests.
1649
+
1650
+ #### Custom managed agents
1651
+
1652
+ For user-defined agents that you created on Google's side via the
1653
+ Gemini API's `/v1beta/agents` endpoint, pass the agent's name through the dedicated
1654
+ `managedAgent` factory shape instead of `agent` (which only accepts
1655
+ known preset names):
1656
+
1657
+ ```ts
1658
+ import {
1659
+ google,
1660
+ type GoogleLanguageModelInteractionsOptions,
1661
+ } from '@ai-sdk/google';
1662
+ import { generateText } from 'ai';
1663
+
1664
+ const result = await generateText({
1665
+ model: google.interactions({ managedAgent: 'my-custom-agent' }),
1666
+ prompt: 'Hello!',
1667
+ providerOptions: {
1668
+ google: {
1669
+ environment: 'remote',
1670
+ } satisfies GoogleLanguageModelInteractionsOptions,
1671
+ },
1672
+ });
1673
+ ```
1674
+
1675
+ ### Streaming
1676
+
1677
+ `streamText` is supported. The stream's `finish` part exposes
1678
+ `interactionId` on `providerMetadata.google` so callers can chain.
1679
+
1680
+ ```ts
1681
+ import { google } from '@ai-sdk/google';
1682
+ import { streamText } from 'ai';
1683
+
1684
+ const result = streamText({
1685
+ model: google.interactions('gemini-2.5-flash'),
1686
+ prompt: 'Hello, how are you?',
1687
+ });
1688
+
1689
+ for await (const textPart of result.textStream) {
1690
+ process.stdout.write(textPart);
1691
+ }
1692
+
1693
+ const googleMetadata = (await result.providerMetadata)?.google;
1694
+ console.log('Interaction id:', googleMetadata?.interactionId);
1695
+ ```
1696
+
1082
1697
  ## Gemma Models
1083
1698
 
1084
1699
  You can use [Gemma models](https://deepmind.google/models/gemma/) with the Google Generative AI API.
@@ -1111,12 +1726,12 @@ using the `.embedding()` factory method.
1111
1726
  const model = google.embedding('gemini-embedding-001');
1112
1727
  ```
1113
1728
 
1114
- The Google Generative AI provider sends API calls to the right endpoint based on the type of embedding:
1729
+ The Google provider sends API calls to the right endpoint based on the type of embedding:
1115
1730
 
1116
1731
  - **Single embeddings**: When embedding a single value with `embed()`, the provider uses the single `:embedContent` endpoint, which typically has higher rate limits compared to the batch endpoint.
1117
1732
  - **Batch embeddings**: When embedding multiple values with `embedMany()` or multiple values in `embed()`, the provider uses the `:batchEmbedContents` endpoint.
1118
1733
 
1119
- Google Generative AI embedding models support additional settings. You can pass them as an options argument:
1734
+ Google embedding models support additional settings. You can pass them as an options argument:
1120
1735
 
1121
1736
  ```ts
1122
1737
  import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
@@ -1158,7 +1773,7 @@ const { embeddings } = await embedMany({
1158
1773
  });
1159
1774
  ```
1160
1775
 
1161
- The following optional provider options are available for Google Generative AI embedding models:
1776
+ The following optional provider options are available for Google embedding models:
1162
1777
 
1163
1778
  - **outputDimensionality**: _number_
1164
1779
 
@@ -1167,7 +1782,6 @@ The following optional provider options are available for Google Generative AI e
1167
1782
  - **taskType**: _string_
1168
1783
 
1169
1784
  Optional. Specifies the task type for generating embeddings. Supported task types include:
1170
-
1171
1785
  - `SEMANTIC_SIMILARITY`: Optimized for text similarity.
1172
1786
  - `CLASSIFICATION`: Optimized for text classification.
1173
1787
  - `CLUSTERING`: Optimized for clustering texts based on similarity.
@@ -1179,13 +1793,14 @@ The following optional provider options are available for Google Generative AI e
1179
1793
 
1180
1794
  - **content**: _array_
1181
1795
 
1182
- Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either `{ text: string }` or `{ inlineData: { mimeType: string, data: string } }`. Supported by `gemini-embedding-2-preview`.
1796
+ Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be `{ text: string }`, `{ inlineData: { mimeType: string, data: string } }` for inline base64 data, or `{ fileData: { fileUri: string, mimeType: string } }` to reference remote content via HTTP URL or Google Cloud Storage URI (`gs://...`). Supported by `gemini-embedding-2-preview`.
1183
1797
 
1184
1798
  ### Model Capabilities
1185
1799
 
1186
1800
  | Model | Default Dimensions | Custom Dimensions | Multimodal |
1187
1801
  | ---------------------------- | ------------------ | ------------------- | ------------------- |
1188
1802
  | `gemini-embedding-001` | 3072 | <Check size={18} /> | <Cross size={18} /> |
1803
+ | `gemini-embedding-2` | 3072 | <Check size={18} /> | <Check size={18} /> |
1189
1804
  | `gemini-embedding-2-preview` | 3072 | <Check size={18} /> | <Check size={18} /> |
1190
1805
 
1191
1806
  ## Image Models
@@ -1309,6 +1924,29 @@ const { image } = await generateImage({
1309
1924
  details.
1310
1925
  </Note>
1311
1926
 
1927
+ #### Google Search Grounding
1928
+
1929
+ Gemini image models support [Google Search grounding](#google-search) through `providerOptions.google.googleSearch`. The value matches the args of `google.tools.googleSearch(...)`; pass `{}` to enable with defaults, or `{ searchTypes: { imageSearch: {} } }` to ground on reference photos.
1930
+
1931
+ ```ts
1932
+ import { google } from '@ai-sdk/google';
1933
+ import { generateImage } from 'ai';
1934
+
1935
+ const result = await generateImage({
1936
+ model: google.image('gemini-3.1-flash-image-preview'),
1937
+ prompt:
1938
+ 'Search for live footage of the 2026 Super Bowl halftime show artist, then generate a close-up in space.',
1939
+ providerOptions: {
1940
+ google: {
1941
+ googleSearch: { searchTypes: { imageSearch: {} } },
1942
+ },
1943
+ },
1944
+ });
1945
+
1946
+ // Grounding metadata is forwarded onto the image result:
1947
+ console.log(result.providerMetadata?.google?.groundingMetadata);
1948
+ ```
1949
+
1312
1950
  #### Gemini Image Model Capabilities
1313
1951
 
1314
1952
  | Model | Image Generation | Image Editing | Aspect Ratios |
@@ -1323,3 +1961,80 @@ const { image } = await generateImage({
1323
1961
  2K, 4K via `providerOptions.google.imageConfig.imageSize`), and Google Search
1324
1962
  grounding.
1325
1963
  </Note>
1964
+
1965
+ ## Speech Models
1966
+
1967
+ You can create models that call the [Gemini text-to-speech API](https://ai.google.dev/gemini-api/docs/speech-generation)
1968
+ using the `.speech()` factory method.
1969
+
1970
+ The first argument is the model id e.g. `gemini-2.5-flash-preview-tts`.
1971
+
1972
+ ```ts
1973
+ const model = google.speech('gemini-2.5-flash-preview-tts');
1974
+ ```
1975
+
1976
+ The `voice` argument can be set to one of Gemini's [30 prebuilt voices](https://ai.google.dev/gemini-api/docs/speech-generation#voices)
1977
+ e.g. `Kore`, `Puck`, `Zephyr`, or `Charon`. Voice names are case-sensitive. It defaults to `Kore`.
1978
+
1979
+ ```ts highlight="6"
1980
+ import { generateSpeech } from 'ai';
1981
+ import { google } from '@ai-sdk/google';
1982
+
1983
+ const result = await generateSpeech({
1984
+ model: google.speech('gemini-2.5-flash-preview-tts'),
1985
+ text: 'Hello, world!',
1986
+ voice: 'Kore', // Gemini voice name
1987
+ });
1988
+ ```
1989
+
1990
+ By default the generated audio is returned as a playable WAV file (`result.audio.mediaType` is
1991
+ `audio/wav`). Set `outputFormat: 'pcm'` to receive the raw signed 16-bit little-endian mono PCM
1992
+ bytes instead; the sample rate is reported in `result.providerMetadata.google.sampleRate`.
1993
+
1994
+ Gemini honors natural-language style direction. The `instructions` argument is prepended to the
1995
+ spoken text, so `instructions: 'Say cheerfully'` with `text: 'Hello'` speaks `Say cheerfully: Hello`.
1996
+
1997
+ ### Multi-speaker audio
1998
+
1999
+ For multi-speaker dialogue, pass a `multiSpeakerVoiceConfig` through `providerOptions`. Each speaker
2000
+ name must match a name used in the input text. When set, it overrides the top-level `voice`.
2001
+
2002
+ ```ts highlight="8-23"
2003
+ import { generateSpeech } from 'ai';
2004
+ import { google, type GoogleSpeechModelOptions } from '@ai-sdk/google';
2005
+
2006
+ const result = await generateSpeech({
2007
+ model: google.speech('gemini-2.5-flash-preview-tts'),
2008
+ text: 'Joe: How are you? Jane: Doing great, thanks!',
2009
+ providerOptions: {
2010
+ google: {
2011
+ multiSpeakerVoiceConfig: {
2012
+ speakerVoiceConfigs: [
2013
+ {
2014
+ speaker: 'Joe',
2015
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } },
2016
+ },
2017
+ {
2018
+ speaker: 'Jane',
2019
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Puck' } },
2020
+ },
2021
+ ],
2022
+ },
2023
+ } satisfies GoogleSpeechModelOptions,
2024
+ },
2025
+ });
2026
+ ```
2027
+
2028
+ <Note>
2029
+ Gemini TTS models do not support the `speed` or `language` options; passing
2030
+ them adds a warning to `result.warnings`. Language is detected automatically
2031
+ from the input text.
2032
+ </Note>
2033
+
2034
+ ### Model Capabilities
2035
+
2036
+ | Model | Multi-speaker | Style via instructions |
2037
+ | ------------------------------ | ------------------- | ---------------------- |
2038
+ | `gemini-2.5-flash-preview-tts` | <Check size={18} /> | <Check size={18} /> |
2039
+ | `gemini-2.5-pro-preview-tts` | <Check size={18} /> | <Check size={18} /> |
2040
+ | `gemini-3.1-flash-tts-preview` | <Check size={18} /> | <Check size={18} /> |