@ai-sdk/google 4.0.0-beta.7 → 4.0.0-beta.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +614 -5
- package/README.md +6 -4
- package/dist/index.d.ts +301 -50
- package/dist/index.js +5410 -639
- package/dist/index.js.map +1 -1
- package/dist/internal/index.d.ts +100 -26
- package/dist/internal/index.js +1653 -451
- package/dist/internal/index.js.map +1 -1
- package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
- package/package.json +16 -17
- package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
- package/src/convert-json-schema-to-openapi-schema.ts +1 -1
- package/src/convert-to-google-messages.ts +647 -0
- package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
- package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
- package/src/google-error.ts +1 -1
- package/src/google-files.ts +225 -0
- package/src/google-image-model-options.ts +35 -0
- package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
- package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
- package/src/google-json-accumulator.ts +371 -0
- package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
- package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +701 -219
- package/src/google-prepare-tools.ts +72 -12
- package/src/google-prompt.ts +86 -0
- package/src/google-provider.ts +157 -53
- package/src/google-speech-api.ts +36 -0
- package/src/google-speech-model-options.ts +48 -0
- package/src/google-speech-model.ts +311 -0
- package/src/google-video-model-options.ts +43 -0
- package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
- package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
- package/src/index.ts +40 -9
- package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
- package/src/interactions/cancel-google-interaction.ts +60 -0
- package/src/interactions/convert-google-interactions-usage.ts +47 -0
- package/src/interactions/convert-to-google-interactions-input.ts +557 -0
- package/src/interactions/extract-google-interactions-sources.ts +252 -0
- package/src/interactions/google-interactions-agent.ts +15 -0
- package/src/interactions/google-interactions-api.ts +530 -0
- package/src/interactions/google-interactions-language-model-options.ts +262 -0
- package/src/interactions/google-interactions-language-model.ts +776 -0
- package/src/interactions/google-interactions-prompt.ts +582 -0
- package/src/interactions/google-interactions-provider-metadata.ts +23 -0
- package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
- package/src/interactions/parse-google-interactions-outputs.ts +252 -0
- package/src/interactions/poll-google-interactions.ts +129 -0
- package/src/interactions/prepare-google-interactions-tools.ts +245 -0
- package/src/interactions/stream-google-interactions.ts +242 -0
- package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
- package/src/internal/index.ts +3 -2
- package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
- package/src/realtime/google-realtime-event-mapper.ts +383 -0
- package/src/realtime/google-realtime-model-options.ts +3 -0
- package/src/realtime/google-realtime-model.ts +160 -0
- package/src/realtime/index.ts +2 -0
- package/src/tool/code-execution.ts +2 -2
- package/src/tool/enterprise-web-search.ts +9 -3
- package/src/tool/file-search.ts +5 -7
- package/src/tool/google-maps.ts +3 -2
- package/src/tool/google-search.ts +11 -12
- package/src/tool/url-context.ts +4 -2
- package/src/tool/vertex-rag-store.ts +9 -6
- package/dist/index.d.mts +0 -376
- package/dist/index.mjs +0 -2517
- package/dist/index.mjs.map +0 -1
- package/dist/internal/index.d.mts +0 -284
- package/dist/internal/index.mjs +0 -1706
- package/dist/internal/index.mjs.map +0 -1
- package/src/convert-to-google-generative-ai-messages.ts +0 -239
- package/src/google-generative-ai-prompt.ts +0 -38
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: Google
|
|
3
|
-
description: Learn how to use Google
|
|
2
|
+
title: Google
|
|
3
|
+
description: Learn how to use Google Provider.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# Google
|
|
6
|
+
# Google Provider
|
|
7
7
|
|
|
8
|
-
The [Google
|
|
9
|
-
the [Google
|
|
8
|
+
The [Google](https://ai.google.dev) provider contains language and embedding model support for
|
|
9
|
+
the [Google](https://ai.google.dev/api/rest) APIs.
|
|
10
10
|
|
|
11
11
|
## Setup
|
|
12
12
|
|
|
@@ -36,17 +36,17 @@ You can import the default provider instance `google` from `@ai-sdk/google`:
|
|
|
36
36
|
import { google } from '@ai-sdk/google';
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
-
If you need a customized setup, you can import `
|
|
39
|
+
If you need a customized setup, you can import `createGoogle` from `@ai-sdk/google` and create a provider instance with your settings:
|
|
40
40
|
|
|
41
41
|
```ts
|
|
42
|
-
import {
|
|
42
|
+
import { createGoogle } from '@ai-sdk/google';
|
|
43
43
|
|
|
44
|
-
const google =
|
|
44
|
+
const google = createGoogle({
|
|
45
45
|
// custom settings
|
|
46
46
|
});
|
|
47
47
|
```
|
|
48
48
|
|
|
49
|
-
You can use the following optional settings to customize the Google
|
|
49
|
+
You can use the following optional settings to customize the Google provider instance:
|
|
50
50
|
|
|
51
51
|
- **baseURL** _string_
|
|
52
52
|
|
|
@@ -89,7 +89,7 @@ The models support tool calls and some have multi-modal capabilities.
|
|
|
89
89
|
const model = google('gemini-2.5-flash');
|
|
90
90
|
```
|
|
91
91
|
|
|
92
|
-
You can use Google
|
|
92
|
+
You can use Google language models to generate text with the `generateText` function:
|
|
93
93
|
|
|
94
94
|
```ts
|
|
95
95
|
import { google } from '@ai-sdk/google';
|
|
@@ -101,11 +101,11 @@ const { text } = await generateText({
|
|
|
101
101
|
});
|
|
102
102
|
```
|
|
103
103
|
|
|
104
|
-
Google
|
|
104
|
+
Google language models can also be used in the `streamText` function
|
|
105
105
|
and support structured data generation with [`Output`](/docs/reference/ai-sdk-core/output)
|
|
106
106
|
(see [AI SDK Core](/docs/ai-sdk-core)).
|
|
107
107
|
|
|
108
|
-
Google
|
|
108
|
+
Google also supports some model specific settings that are not part of the [standard call settings](/docs/ai-sdk-core/settings).
|
|
109
109
|
You can pass them as an options argument:
|
|
110
110
|
|
|
111
111
|
```ts
|
|
@@ -128,7 +128,7 @@ await generateText({
|
|
|
128
128
|
});
|
|
129
129
|
```
|
|
130
130
|
|
|
131
|
-
The following optional provider options are available for Google
|
|
131
|
+
The following optional provider options are available for Google models:
|
|
132
132
|
|
|
133
133
|
- **cachedContent** _string_
|
|
134
134
|
|
|
@@ -141,7 +141,7 @@ The following optional provider options are available for Google Generative AI m
|
|
|
141
141
|
|
|
142
142
|
This is useful when the JSON Schema contains elements that are
|
|
143
143
|
not supported by the OpenAPI schema version that
|
|
144
|
-
Google
|
|
144
|
+
Google uses. You can use this to disable
|
|
145
145
|
structured outputs if you need to.
|
|
146
146
|
|
|
147
147
|
See [Troubleshooting: Schema Limitations](#schema-limitations) for more details.
|
|
@@ -149,11 +149,9 @@ The following optional provider options are available for Google Generative AI m
|
|
|
149
149
|
- **safetySettings** _Array\<\{ category: string; threshold: string \}\>_
|
|
150
150
|
|
|
151
151
|
Optional. Safety settings for the model.
|
|
152
|
-
|
|
153
152
|
- **category** _string_
|
|
154
153
|
|
|
155
154
|
The category of the safety setting. Can be one of the following:
|
|
156
|
-
|
|
157
155
|
- `HARM_CATEGORY_UNSPECIFIED`
|
|
158
156
|
- `HARM_CATEGORY_HATE_SPEECH`
|
|
159
157
|
- `HARM_CATEGORY_DANGEROUS_CONTENT`
|
|
@@ -164,7 +162,6 @@ The following optional provider options are available for Google Generative AI m
|
|
|
164
162
|
- **threshold** _string_
|
|
165
163
|
|
|
166
164
|
The threshold of the safety setting. Can be one of the following:
|
|
167
|
-
|
|
168
165
|
- `HARM_BLOCK_THRESHOLD_UNSPECIFIED`
|
|
169
166
|
- `BLOCK_LOW_AND_ABOVE`
|
|
170
167
|
- `BLOCK_MEDIUM_AND_ABOVE`
|
|
@@ -177,8 +174,7 @@ The following optional provider options are available for Google Generative AI m
|
|
|
177
174
|
|
|
178
175
|
- **thinkingConfig** _\{ thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high'; thinkingBudget?: number; includeThoughts?: boolean \}_
|
|
179
176
|
|
|
180
|
-
Optional. Configuration for the model's thinking process. Only supported by specific [Google
|
|
181
|
-
|
|
177
|
+
Optional. Configuration for the model's thinking process. Only supported by specific [Google models](https://ai.google.dev/gemini-api/docs/thinking).
|
|
182
178
|
- **thinkingLevel** _'minimal' | 'low' | 'medium' | 'high'_
|
|
183
179
|
|
|
184
180
|
Optional. Controls the thinking depth for Gemini 3 models. Gemini 3.1 Pro supports 'low', 'medium', and 'high', Gemini 3 Pro supports 'low' and 'high', while Gemini 3 Flash supports all four levels: 'minimal', 'low', 'medium', and 'high'. Only supported by Gemini 3 models.
|
|
@@ -186,7 +182,7 @@ The following optional provider options are available for Google Generative AI m
|
|
|
186
182
|
- **thinkingBudget** _number_
|
|
187
183
|
|
|
188
184
|
Optional. Gives the model guidance on the number of thinking tokens it can use when generating a response. Setting it to 0 disables thinking, if the model supports it.
|
|
189
|
-
For more information about the possible value ranges for each model see [Google
|
|
185
|
+
For more information about the possible value ranges for each model see [Google thinking documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget).
|
|
190
186
|
|
|
191
187
|
<Note>
|
|
192
188
|
This option is for Gemini 2.5 models. Gemini 3 models should use
|
|
@@ -199,12 +195,10 @@ The following optional provider options are available for Google Generative AI m
|
|
|
199
195
|
|
|
200
196
|
- **imageConfig** _\{ aspectRatio?: string, imageSize?: string \}_
|
|
201
197
|
|
|
202
|
-
Optional. Configuration for the models image generation. Only supported by specific [Google
|
|
203
|
-
|
|
198
|
+
Optional. Configuration for the models image generation. Only supported by specific [Google models](https://ai.google.dev/gemini-api/docs/image-generation).
|
|
204
199
|
- **aspectRatio** _string_
|
|
205
200
|
|
|
206
201
|
Model defaults to generate 1:1 squares, or to matching the output image size to that of your input image. Can be one of the following:
|
|
207
|
-
|
|
208
202
|
- 1:1
|
|
209
203
|
- 2:3
|
|
210
204
|
- 3:2
|
|
@@ -219,7 +213,6 @@ The following optional provider options are available for Google Generative AI m
|
|
|
219
213
|
- **imageSize** _string_
|
|
220
214
|
|
|
221
215
|
Controls the output image resolution. Defaults to 1K. Can be one of the following:
|
|
222
|
-
|
|
223
216
|
- 1K
|
|
224
217
|
- 2K
|
|
225
218
|
- 4K
|
|
@@ -232,7 +225,6 @@ The following optional provider options are available for Google Generative AI m
|
|
|
232
225
|
- **mediaResolution** _string_
|
|
233
226
|
|
|
234
227
|
Optional. If specified, the media resolution specified will be used. Can be one of the following:
|
|
235
|
-
|
|
236
228
|
- `MEDIA_RESOLUTION_UNSPECIFIED`
|
|
237
229
|
- `MEDIA_RESOLUTION_LOW`
|
|
238
230
|
- `MEDIA_RESOLUTION_MEDIUM`
|
|
@@ -245,6 +237,18 @@ The following optional provider options are available for Google Generative AI m
|
|
|
245
237
|
Optional. Defines labels used in billing reports. Available on Vertex AI only.
|
|
246
238
|
See [Google Cloud labels documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls).
|
|
247
239
|
|
|
240
|
+
- **serviceTier** _'standard' | 'flex' | 'priority'_
|
|
241
|
+
|
|
242
|
+
Optional. The service tier to use for the request.
|
|
243
|
+
Set to `'flex'` for 50% cheaper processing at the cost of increased latency.
|
|
244
|
+
Set to `'priority'` for ultra-low latency at a 75-100% price premium over `'standard'`.
|
|
245
|
+
|
|
246
|
+
Because Priority can be gracefully downgraded to Standard under load, the
|
|
247
|
+
tier the request actually ran on is surfaced on
|
|
248
|
+
`result.providerMetadata.google.serviceTier`. See
|
|
249
|
+
[Priority inference](https://ai.google.dev/gemini-api/docs/priority-inference)
|
|
250
|
+
and [Flex inference](https://ai.google.dev/gemini-api/docs/flex-inference).
|
|
251
|
+
|
|
248
252
|
- **threshold** _string_
|
|
249
253
|
|
|
250
254
|
Optional. Standalone threshold setting that can be used independently of `safetySettings`.
|
|
@@ -252,7 +256,7 @@ The following optional provider options are available for Google Generative AI m
|
|
|
252
256
|
|
|
253
257
|
### Thinking
|
|
254
258
|
|
|
255
|
-
The Gemini 2.5 and Gemini 3 series models use an internal "thinking process" that significantly improves their reasoning and multi-step planning abilities, making them highly effective for complex tasks such as coding, advanced mathematics, and data analysis. For more information see [Google
|
|
259
|
+
The Gemini 2.5 and Gemini 3 series models use an internal "thinking process" that significantly improves their reasoning and multi-step planning abilities, making them highly effective for complex tasks such as coding, advanced mathematics, and data analysis. For more information see [Google thinking documentation](https://ai.google.dev/gemini-api/docs/thinking).
|
|
256
260
|
|
|
257
261
|
#### Gemini 3 Models
|
|
258
262
|
|
|
@@ -312,7 +316,7 @@ console.log(reasoning); // Reasoning summary
|
|
|
312
316
|
|
|
313
317
|
### File Inputs
|
|
314
318
|
|
|
315
|
-
The Google
|
|
319
|
+
The Google provider supports file inputs, e.g. PDF files.
|
|
316
320
|
|
|
317
321
|
```ts
|
|
318
322
|
import { google } from '@ai-sdk/google';
|
|
@@ -378,7 +382,7 @@ See [File Parts](/docs/foundations/prompts#file-parts) for details on how to use
|
|
|
378
382
|
|
|
379
383
|
### Cached Content
|
|
380
384
|
|
|
381
|
-
Google
|
|
385
|
+
Google supports both explicit and implicit caching to help reduce costs on repetitive content.
|
|
382
386
|
|
|
383
387
|
#### Implicit Caching
|
|
384
388
|
|
|
@@ -510,7 +514,7 @@ the model has access to the latest information using Google Search.
|
|
|
510
514
|
|
|
511
515
|
```ts highlight="8,17-20"
|
|
512
516
|
import { google } from '@ai-sdk/google';
|
|
513
|
-
import {
|
|
517
|
+
import { GoogleProviderMetadata } from '@ai-sdk/google';
|
|
514
518
|
import { generateText } from 'ai';
|
|
515
519
|
|
|
516
520
|
const { text, sources, providerMetadata } = await generateText({
|
|
@@ -525,9 +529,7 @@ const { text, sources, providerMetadata } = await generateText({
|
|
|
525
529
|
|
|
526
530
|
// access the grounding metadata. Casting to the provider metadata type
|
|
527
531
|
// is optional but provides autocomplete and type safety.
|
|
528
|
-
const metadata = providerMetadata?.google as
|
|
529
|
-
| GoogleGenerativeAIProviderMetadata
|
|
530
|
-
| undefined;
|
|
532
|
+
const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
|
|
531
533
|
const groundingMetadata = metadata?.groundingMetadata;
|
|
532
534
|
const safetyRatings = metadata?.safetyRatings;
|
|
533
535
|
```
|
|
@@ -537,14 +539,12 @@ The `googleSearch` tool accepts the following optional configuration options:
|
|
|
537
539
|
- **searchTypes** _object_
|
|
538
540
|
|
|
539
541
|
Enables specific search types. Both can be combined.
|
|
540
|
-
|
|
541
542
|
- `webSearch`: Enable web search grounding (pass `{}` to enable). This is the default.
|
|
542
543
|
- `imageSearch`: Enable [image search grounding](https://ai.google.dev/gemini-api/docs/image-generation#image-search) (pass `{}` to enable).
|
|
543
544
|
|
|
544
545
|
- **timeRangeFilter** _object_
|
|
545
546
|
|
|
546
547
|
Restricts search results to a specific time range. Both `startTime` and `endTime` are required.
|
|
547
|
-
|
|
548
548
|
- `startTime`: Start time in ISO 8601 format (e.g. `'2025-01-01T00:00:00Z'`).
|
|
549
549
|
- `endTime`: End time in ISO 8601 format (e.g. `'2025-12-31T23:59:59Z'`).
|
|
550
550
|
|
|
@@ -563,12 +563,10 @@ When Google Search grounding is enabled, the model will include sources in the r
|
|
|
563
563
|
Additionally, the grounding metadata includes detailed information about how search results were used to ground the model's response. Here are the available fields:
|
|
564
564
|
|
|
565
565
|
- **`webSearchQueries`** (`string[] | null`)
|
|
566
|
-
|
|
567
566
|
- Array of search queries used to retrieve information
|
|
568
567
|
- Example: `["What's the weather in Chicago this weekend?"]`
|
|
569
568
|
|
|
570
569
|
- **`searchEntryPoint`** (`{ renderedContent: string } | null`)
|
|
571
|
-
|
|
572
570
|
- Contains the main search result content used as an entry point
|
|
573
571
|
- The `renderedContent` field contains the formatted content
|
|
574
572
|
|
|
@@ -619,10 +617,10 @@ the model has access to a compliance-focused web index designed for highly-regul
|
|
|
619
617
|
</Note>
|
|
620
618
|
|
|
621
619
|
```ts
|
|
622
|
-
import {
|
|
620
|
+
import { createGoogleVertex } from '@ai-sdk/google-vertex';
|
|
623
621
|
import { generateText } from 'ai';
|
|
624
622
|
|
|
625
|
-
const vertex =
|
|
623
|
+
const vertex = createGoogleVertex({
|
|
626
624
|
project: 'my-project',
|
|
627
625
|
location: 'us-central1',
|
|
628
626
|
});
|
|
@@ -686,9 +684,7 @@ const { text, sources, providerMetadata } = await generateText({
|
|
|
686
684
|
},
|
|
687
685
|
});
|
|
688
686
|
|
|
689
|
-
const metadata = providerMetadata?.google as
|
|
690
|
-
| GoogleGenerativeAIProviderMetadata
|
|
691
|
-
| undefined;
|
|
687
|
+
const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
|
|
692
688
|
const groundingMetadata = metadata?.groundingMetadata;
|
|
693
689
|
const urlContextMetadata = metadata?.urlContextMetadata;
|
|
694
690
|
```
|
|
@@ -696,7 +692,6 @@ const urlContextMetadata = metadata?.urlContextMetadata;
|
|
|
696
692
|
The URL context metadata includes detailed information about how the model used the URL context to generate the response. Here are the available fields:
|
|
697
693
|
|
|
698
694
|
- **`urlMetadata`** (`{ retrievedUrl: string; urlRetrievalStatus: string; }[] | null`)
|
|
699
|
-
|
|
700
695
|
- Array of URL context metadata
|
|
701
696
|
- Each object includes:
|
|
702
697
|
- **`retrievedUrl`**: The URL of the context
|
|
@@ -708,7 +703,7 @@ Example response:
|
|
|
708
703
|
{
|
|
709
704
|
"urlMetadata": [
|
|
710
705
|
{
|
|
711
|
-
"retrievedUrl": "https://ai-sdk.dev/providers/ai-sdk-providers/google
|
|
706
|
+
"retrievedUrl": "https://ai-sdk.dev/providers/ai-sdk-providers/google",
|
|
712
707
|
"urlRetrievalStatus": "URL_RETRIEVAL_STATUS_SUCCESS"
|
|
713
708
|
}
|
|
714
709
|
]
|
|
@@ -722,8 +717,8 @@ With the URL context tool, you will also get the `groundingMetadata`.
|
|
|
722
717
|
"groundingChunks": [
|
|
723
718
|
{
|
|
724
719
|
"web": {
|
|
725
|
-
"uri": "https://ai-sdk.dev/providers/ai-sdk-providers/google
|
|
726
|
-
"title": "Google
|
|
720
|
+
"uri": "https://ai-sdk.dev/providers/ai-sdk-providers/google",
|
|
721
|
+
"title": "Google - AI SDK Providers"
|
|
727
722
|
}
|
|
728
723
|
}
|
|
729
724
|
],
|
|
@@ -760,7 +755,7 @@ import { generateText } from 'ai';
|
|
|
760
755
|
|
|
761
756
|
const { text, sources, providerMetadata } = await generateText({
|
|
762
757
|
model: google('gemini-2.5-flash'),
|
|
763
|
-
prompt: `Based on this context: https://ai-sdk.dev/providers/ai-sdk-providers/google
|
|
758
|
+
prompt: `Based on this context: https://ai-sdk.dev/providers/ai-sdk-providers/google, tell me how to use Gemini with AI SDK.
|
|
764
759
|
Also, provide the latest news about AI SDK V5.`,
|
|
765
760
|
tools: {
|
|
766
761
|
google_search: google.tools.googleSearch({}),
|
|
@@ -768,9 +763,7 @@ const { text, sources, providerMetadata } = await generateText({
|
|
|
768
763
|
},
|
|
769
764
|
});
|
|
770
765
|
|
|
771
|
-
const metadata = providerMetadata?.google as
|
|
772
|
-
| GoogleGenerativeAIProviderMetadata
|
|
773
|
-
| undefined;
|
|
766
|
+
const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
|
|
774
767
|
const groundingMetadata = metadata?.groundingMetadata;
|
|
775
768
|
const urlContextMetadata = metadata?.urlContextMetadata;
|
|
776
769
|
```
|
|
@@ -782,7 +775,7 @@ the model has access to Google Maps data for location-aware responses. This enab
|
|
|
782
775
|
|
|
783
776
|
```ts highlight="7-16"
|
|
784
777
|
import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
|
|
785
|
-
import {
|
|
778
|
+
import { GoogleProviderMetadata } from '@ai-sdk/google';
|
|
786
779
|
import { generateText } from 'ai';
|
|
787
780
|
|
|
788
781
|
const { text, sources, providerMetadata } = await generateText({
|
|
@@ -801,9 +794,7 @@ const { text, sources, providerMetadata } = await generateText({
|
|
|
801
794
|
'What are the best Italian restaurants within a 15-minute walk from here?',
|
|
802
795
|
});
|
|
803
796
|
|
|
804
|
-
const metadata = providerMetadata?.google as
|
|
805
|
-
| GoogleGenerativeAIProviderMetadata
|
|
806
|
-
| undefined;
|
|
797
|
+
const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
|
|
807
798
|
const groundingMetadata = metadata?.groundingMetadata;
|
|
808
799
|
```
|
|
809
800
|
|
|
@@ -842,11 +833,11 @@ This enables the model to provide answers based on your specific data sources an
|
|
|
842
833
|
</Note>
|
|
843
834
|
|
|
844
835
|
```ts highlight="8,17-20"
|
|
845
|
-
import {
|
|
846
|
-
import {
|
|
836
|
+
import { createGoogleVertex } from '@ai-sdk/google-vertex';
|
|
837
|
+
import { GoogleProviderMetadata } from '@ai-sdk/google';
|
|
847
838
|
import { generateText } from 'ai';
|
|
848
839
|
|
|
849
|
-
const vertex =
|
|
840
|
+
const vertex = createGoogleVertex({
|
|
850
841
|
project: 'my-project',
|
|
851
842
|
location: 'us-central1',
|
|
852
843
|
});
|
|
@@ -866,9 +857,7 @@ const { text, sources, providerMetadata } = await generateText({
|
|
|
866
857
|
|
|
867
858
|
// access the grounding metadata. Casting to the provider metadata type
|
|
868
859
|
// is optional but provides autocomplete and type safety.
|
|
869
|
-
const metadata = providerMetadata?.google as
|
|
870
|
-
| GoogleGenerativeAIProviderMetadata
|
|
871
|
-
| undefined;
|
|
860
|
+
const metadata = providerMetadata?.google as GoogleProviderMetadata | undefined;
|
|
872
861
|
const groundingMetadata = metadata?.groundingMetadata;
|
|
873
862
|
const safetyRatings = metadata?.safetyRatings;
|
|
874
863
|
```
|
|
@@ -878,7 +867,6 @@ When RAG Engine Grounding is enabled, the model will include sources from your R
|
|
|
878
867
|
Additionally, the grounding metadata includes detailed information about how RAG results were used to ground the model's response. Here are the available fields:
|
|
879
868
|
|
|
880
869
|
- **`groundingChunks`** (Array of chunk objects | null)
|
|
881
|
-
|
|
882
870
|
- Contains the retrieved context chunks from your RAG corpus
|
|
883
871
|
- Each chunk includes:
|
|
884
872
|
- **`retrievedContext`**: Information about the retrieved context
|
|
@@ -887,7 +875,6 @@ Additionally, the grounding metadata includes detailed information about how RAG
|
|
|
887
875
|
- `text`: The actual text content of the chunk
|
|
888
876
|
|
|
889
877
|
- **`groundingSupports`** (Array of support objects | null)
|
|
890
|
-
|
|
891
878
|
- Contains details about how specific response parts are supported by RAG results
|
|
892
879
|
- Each support object includes:
|
|
893
880
|
- **`segment`**: Information about the grounded text segment
|
|
@@ -931,12 +918,10 @@ Example response:
|
|
|
931
918
|
The `vertexRagStore` tool accepts the following configuration options:
|
|
932
919
|
|
|
933
920
|
- **`ragCorpus`** (`string`, required)
|
|
934
|
-
|
|
935
921
|
- The RagCorpus resource name in the format: `projects/{project}/locations/{location}/ragCorpora/{rag_corpus}`
|
|
936
922
|
- This identifies your specific RAG corpus to search against
|
|
937
923
|
|
|
938
924
|
- **`topK`** (`number`, optional)
|
|
939
|
-
|
|
940
925
|
- The number of top contexts to retrieve from your RAG corpus
|
|
941
926
|
- Defaults to the corpus configuration if not specified
|
|
942
927
|
|
|
@@ -1051,7 +1036,7 @@ const { output } = await generateText({
|
|
|
1051
1036
|
});
|
|
1052
1037
|
```
|
|
1053
1038
|
|
|
1054
|
-
The following Zod features are known to not work with Google
|
|
1039
|
+
The following Zod features are known to not work with Google:
|
|
1055
1040
|
|
|
1056
1041
|
- `z.union`
|
|
1057
1042
|
- `z.record`
|
|
@@ -1060,6 +1045,7 @@ The following Zod features are known to not work with Google Generative AI:
|
|
|
1060
1045
|
|
|
1061
1046
|
| Model | Image Input | Object Generation | Tool Usage | Tool Streaming | Google Search | URL Context |
|
|
1062
1047
|
| ------------------------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
|
|
1048
|
+
| `gemini-3.5-flash` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
1063
1049
|
| `gemini-3.1-pro-preview` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
1064
1050
|
| `gemini-3.1-flash-image-preview` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
1065
1051
|
| `gemini-3.1-flash-lite-preview` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
@@ -1079,6 +1065,635 @@ The following Zod features are known to not work with Google Generative AI:
|
|
|
1079
1065
|
available provider model ID as a string if needed.
|
|
1080
1066
|
</Note>
|
|
1081
1067
|
|
|
1068
|
+
## Realtime Models
|
|
1069
|
+
|
|
1070
|
+
<Note type="warning">Realtime is an experimental feature.</Note>
|
|
1071
|
+
|
|
1072
|
+
You can create models that call the [Gemini Live API](https://ai.google.dev/gemini-api/docs/live)
|
|
1073
|
+
using the `.experimental_realtime()` factory method.
|
|
1074
|
+
|
|
1075
|
+
```ts
|
|
1076
|
+
import { google } from '@ai-sdk/google';
|
|
1077
|
+
|
|
1078
|
+
const model = google.experimental_realtime('gemini-3.1-flash-live-preview');
|
|
1079
|
+
```
|
|
1080
|
+
|
|
1081
|
+
Realtime sessions run in the browser and require a short-lived token created on
|
|
1082
|
+
your server with `google.experimental_realtime.getToken()`:
|
|
1083
|
+
|
|
1084
|
+
```ts
|
|
1085
|
+
const token = await google.experimental_realtime.getToken({
|
|
1086
|
+
model: 'gemini-3.1-flash-live-preview',
|
|
1087
|
+
});
|
|
1088
|
+
```
|
|
1089
|
+
|
|
1090
|
+
Google realtime models may require provider-specific audio formats, depending
|
|
1091
|
+
on the model and modality. See [Realtime](/docs/ai-sdk-core/realtime) for the
|
|
1092
|
+
complete setup and tool calling pattern.
|
|
1093
|
+
|
|
1094
|
+
## Interactions API
|
|
1095
|
+
|
|
1096
|
+
The [Gemini Interactions API](https://ai.google.dev/gemini-api/docs/interactions)
|
|
1097
|
+
(`POST /v1beta/interactions`) is a separate Google endpoint with server-side
|
|
1098
|
+
state, unified content blocks, first-class built-in tools, agent presets,
|
|
1099
|
+
managed agents that run in a sandboxed Linux environment, and native
|
|
1100
|
+
multimodal image output. It is reached via the `google.interactions(...)`
|
|
1101
|
+
factory:
|
|
1102
|
+
|
|
1103
|
+
```ts
|
|
1104
|
+
import { google } from '@ai-sdk/google';
|
|
1105
|
+
import { generateText } from 'ai';
|
|
1106
|
+
|
|
1107
|
+
const { text } = await generateText({
|
|
1108
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1109
|
+
prompt: 'Hello, how are you?',
|
|
1110
|
+
});
|
|
1111
|
+
```
|
|
1112
|
+
|
|
1113
|
+
`google.interactions(...)` accepts a model ID string (e.g.
|
|
1114
|
+
`'gemini-2.5-flash'`, `'gemini-3-pro-preview'`), `{ agent: <name> }` to use
|
|
1115
|
+
a Gemini [agent preset](#agent-presets), or `{ managedAgent: <name> }` to
|
|
1116
|
+
invoke a [managed agent](#managed-agents) you created on Google's side.
|
|
1117
|
+
The returned model can be passed to `generateText` and `streamText` like
|
|
1118
|
+
any other AI SDK language model.
|
|
1119
|
+
|
|
1120
|
+
<Note>
|
|
1121
|
+
Use `google(...)` for the standard `:generateContent` /
|
|
1122
|
+
`:streamGenerateContent` endpoints, and `google.interactions(...)` for the new
|
|
1123
|
+
Interactions endpoint. Pick one per model instance — they target different
|
|
1124
|
+
request bodies and SSE event vocabularies.
|
|
1125
|
+
</Note>
|
|
1126
|
+
|
|
1127
|
+
### Provider Options
|
|
1128
|
+
|
|
1129
|
+
The Interactions model reads its options from the shared
|
|
1130
|
+
`providerOptions.google.*` namespace. Validate them with the
|
|
1131
|
+
`GoogleLanguageModelInteractionsOptions` type:
|
|
1132
|
+
|
|
1133
|
+
```ts
|
|
1134
|
+
import {
|
|
1135
|
+
google,
|
|
1136
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1137
|
+
} from '@ai-sdk/google';
|
|
1138
|
+
import { generateText } from 'ai';
|
|
1139
|
+
|
|
1140
|
+
await generateText({
|
|
1141
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1142
|
+
prompt: 'What color is the sky in one word?',
|
|
1143
|
+
providerOptions: {
|
|
1144
|
+
google: {
|
|
1145
|
+
serviceTier: 'priority',
|
|
1146
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1147
|
+
},
|
|
1148
|
+
});
|
|
1149
|
+
```
|
|
1150
|
+
|
|
1151
|
+
The following optional provider options are available:
|
|
1152
|
+
|
|
1153
|
+
- **previousInteractionId** _string_
|
|
1154
|
+
|
|
1155
|
+
Server-side interaction id from a prior turn. When set, the server pulls
|
|
1156
|
+
prior context from its own state and only the new user message is sent on
|
|
1157
|
+
the wire. Pair with the default `store: true` to chain stateful
|
|
1158
|
+
conversations. See [Stateful chaining](#stateful-chaining).
|
|
1159
|
+
|
|
1160
|
+
- **store** _boolean_
|
|
1161
|
+
|
|
1162
|
+
Whether the server should persist the interaction. Defaults to `true`.
|
|
1163
|
+
Set to `false` for stateless multi-turn conversations where the full
|
|
1164
|
+
message history is re-sent on every turn.
|
|
1165
|
+
|
|
1166
|
+
- **agent** _string_
|
|
1167
|
+
|
|
1168
|
+
Name of a Gemini agent preset (e.g. `'deep-research-pro-preview-12-2025'`).
|
|
1169
|
+
|
|
1170
|
+
<Note>
|
|
1171
|
+
|
|
1172
|
+
Prefer the factory form `google.interactions({ agent: '...' })` over
|
|
1173
|
+
setting `agent` in provider options — the factory is type-checked
|
|
1174
|
+
against the supported agent names.
|
|
1175
|
+
|
|
1176
|
+
</Note>
|
|
1177
|
+
|
|
1178
|
+
- **agentConfig** _object_
|
|
1179
|
+
|
|
1180
|
+
Per-agent configuration. Currently supports `{ type: 'dynamic' }` and
|
|
1181
|
+
`{ type: 'deep-research', thinkingSummaries?, visualization?, collaborativePlanning? }`.
|
|
1182
|
+
|
|
1183
|
+
- **thinkingLevel** _'minimal' | 'low' | 'medium' | 'high'_
|
|
1184
|
+
|
|
1185
|
+
Controls reasoning depth for thinking-enabled models. Mapped onto the
|
|
1186
|
+
Interactions request's `thinking_level`.
|
|
1187
|
+
|
|
1188
|
+
- **thinkingSummaries** _'auto' | 'none'_
|
|
1189
|
+
|
|
1190
|
+
Whether the model returns synthesized thought summaries on reasoning
|
|
1191
|
+
parts. Defaults to the API default.
|
|
1192
|
+
|
|
1193
|
+
- **responseFormat** _Array\<\{ type: 'text' | 'image' | 'audio'; mimeType?: string; schema?: unknown; aspectRatio?: string; imageSize?: '1K' \| '2K' \| '4K' \| '512' \}\>_
|
|
1194
|
+
|
|
1195
|
+
Output-format entries that map directly to the API's `response_format`
|
|
1196
|
+
array. Use this for fine-grained control over image, audio, or non-JSON
|
|
1197
|
+
text outputs (e.g. `aspectRatio` and `imageSize` for image generation).
|
|
1198
|
+
The AI SDK call-level `responseFormat: { type: 'json', schema }` still
|
|
1199
|
+
drives JSON-mode automatically and prepends a matching text entry;
|
|
1200
|
+
entries listed here are appended.
|
|
1201
|
+
|
|
1202
|
+
`aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`,
|
|
1203
|
+
`9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
|
|
1204
|
+
|
|
1205
|
+
- **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_ (deprecated)
|
|
1206
|
+
|
|
1207
|
+
Use **responseFormat** with a `{ type: 'image', ... }` entry instead.
|
|
1208
|
+
Retained for backwards compatibility; the SDK translates `imageConfig`
|
|
1209
|
+
into a matching `response_format` image entry and emits a warning when
|
|
1210
|
+
set. Ignored when `responseFormat` already supplies an image entry.
|
|
1211
|
+
|
|
1212
|
+
- **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
|
|
1213
|
+
|
|
1214
|
+
Media resolution applied to image inputs / outputs.
|
|
1215
|
+
|
|
1216
|
+
- **serviceTier** _'flex' | 'standard' | 'priority'_
|
|
1217
|
+
|
|
1218
|
+
Service tier for the request. Mirrored back on
|
|
1219
|
+
`result.providerMetadata.google.serviceTier` for observability.
|
|
1220
|
+
|
|
1221
|
+
- **systemInstruction** _string_
|
|
1222
|
+
|
|
1223
|
+
Alternative to the AI SDK `system` message. If both are set, the AI SDK
|
|
1224
|
+
`system` message wins and a warning is emitted.
|
|
1225
|
+
|
|
1226
|
+
- **background** _boolean_
|
|
1227
|
+
|
|
1228
|
+
Run the interaction in the background. Required for agents whose
|
|
1229
|
+
server-side workflow cannot complete within a single request/response;
|
|
1230
|
+
rejected by agents that only support synchronous calls. When `true`,
|
|
1231
|
+
the POST returns a non-terminal status and the SDK polls
|
|
1232
|
+
`GET /interactions/{id}` until the work completes.
|
|
1233
|
+
|
|
1234
|
+
- **environment** _string \| object_
|
|
1235
|
+
|
|
1236
|
+
Sandbox environment configuration for [managed agents](#managed-agents).
|
|
1237
|
+
Pass `'remote'` to provision a fresh sandbox, an `environment_id`
|
|
1238
|
+
string to reuse an existing one, or an object of the form
|
|
1239
|
+
`{ type: 'remote', sources?, network? }` to preload files and/or
|
|
1240
|
+
constrain outbound traffic. Only applies to agent calls.
|
|
1241
|
+
|
|
1242
|
+
- **pollingTimeoutMs** _number_
|
|
1243
|
+
|
|
1244
|
+
Maximum time, in milliseconds, to poll a background interaction before
|
|
1245
|
+
giving up. Defaults to 30 minutes (1,800,000 ms). Long-running agents
|
|
1246
|
+
may need longer.
|
|
1247
|
+
|
|
1248
|
+
### Provider Metadata
|
|
1249
|
+
|
|
1250
|
+
`result.providerMetadata.google` (typed via `GoogleInteractionsProviderMetadata`)
|
|
1251
|
+
exposes:
|
|
1252
|
+
|
|
1253
|
+
- **interactionId** _string_
|
|
1254
|
+
|
|
1255
|
+
Server-side interaction id. Pass this back as `previousInteractionId` on
|
|
1256
|
+
the next turn to chain.
|
|
1257
|
+
|
|
1258
|
+
- **serviceTier** _string_
|
|
1259
|
+
|
|
1260
|
+
Service tier the request actually ran on.
|
|
1261
|
+
|
|
1262
|
+
- **signature** _string_
|
|
1263
|
+
|
|
1264
|
+
Per-block signature hash, set by the SDK on output reasoning and
|
|
1265
|
+
tool-call parts. Round-tripped automatically on the next turn.
|
|
1266
|
+
|
|
1267
|
+
### Stateful chaining
|
|
1268
|
+
|
|
1269
|
+
With the default `store: true`, the server retains the prior turn so the
|
|
1270
|
+
next request only needs to send the new user message and the
|
|
1271
|
+
`previousInteractionId`:
|
|
1272
|
+
|
|
1273
|
+
```ts
|
|
1274
|
+
import {
|
|
1275
|
+
google,
|
|
1276
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1277
|
+
} from '@ai-sdk/google';
|
|
1278
|
+
import { generateText } from 'ai';
|
|
1279
|
+
|
|
1280
|
+
const turn1 = await generateText({
|
|
1281
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1282
|
+
prompt: 'What are the three largest cities in Spain?',
|
|
1283
|
+
});
|
|
1284
|
+
|
|
1285
|
+
const interactionId = turn1.providerMetadata?.google?.interactionId as
|
|
1286
|
+
| string
|
|
1287
|
+
| undefined;
|
|
1288
|
+
|
|
1289
|
+
const turn2 = await generateText({
|
|
1290
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1291
|
+
prompt: 'What is the most famous landmark in the second one?',
|
|
1292
|
+
providerOptions: {
|
|
1293
|
+
google: {
|
|
1294
|
+
previousInteractionId: interactionId,
|
|
1295
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1296
|
+
},
|
|
1297
|
+
});
|
|
1298
|
+
```
|
|
1299
|
+
|
|
1300
|
+
For stateless multi-turn conversations, set `store: false` and re-send the
|
|
1301
|
+
full message history on every turn (no `previousInteractionId`):
|
|
1302
|
+
|
|
1303
|
+
```ts
|
|
1304
|
+
import {
|
|
1305
|
+
google,
|
|
1306
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1307
|
+
} from '@ai-sdk/google';
|
|
1308
|
+
import { generateText, type ModelMessage } from 'ai';
|
|
1309
|
+
|
|
1310
|
+
const messages: Array<ModelMessage> = [
|
|
1311
|
+
{ role: 'user', content: 'What are the three largest cities in Spain?' },
|
|
1312
|
+
];
|
|
1313
|
+
|
|
1314
|
+
const turn1 = await generateText({
|
|
1315
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1316
|
+
messages,
|
|
1317
|
+
providerOptions: {
|
|
1318
|
+
google: { store: false } satisfies GoogleLanguageModelInteractionsOptions,
|
|
1319
|
+
},
|
|
1320
|
+
});
|
|
1321
|
+
|
|
1322
|
+
messages.push(...turn1.responseMessages);
|
|
1323
|
+
messages.push({
|
|
1324
|
+
role: 'user',
|
|
1325
|
+
content: 'What is the most famous landmark in the second one?',
|
|
1326
|
+
});
|
|
1327
|
+
|
|
1328
|
+
const turn2 = await generateText({
|
|
1329
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1330
|
+
messages,
|
|
1331
|
+
providerOptions: {
|
|
1332
|
+
google: { store: false } satisfies GoogleLanguageModelInteractionsOptions,
|
|
1333
|
+
},
|
|
1334
|
+
});
|
|
1335
|
+
```
|
|
1336
|
+
|
|
1337
|
+
### Built-in Tools
|
|
1338
|
+
|
|
1339
|
+
The Interactions API ships a built-in tool catalog. The provider-defined
|
|
1340
|
+
tools under `google.tools.*` map onto Interactions tool descriptors:
|
|
1341
|
+
|
|
1342
|
+
| AI SDK tool | Interactions tool type | Notes |
|
|
1343
|
+
| ------------------------------------- | ---------------------- | ----------------------------------------- |
|
|
1344
|
+
| `google.tools.googleSearch` | `google_search` | Web / image search grounding. |
|
|
1345
|
+
| `google.tools.codeExecution` | `code_execution` | Server-side Python execution. |
|
|
1346
|
+
| `google.tools.urlContext` | `url_context` | Fetch URLs referenced in the prompt. |
|
|
1347
|
+
| `google.tools.fileSearch` | `file_search` | Retrieval from File Search stores. |
|
|
1348
|
+
| `google.tools.googleMaps` | `google_maps` | Maps grounding for nearby-places queries. |
|
|
1349
|
+
| _provider tool_ `google.computer_use` | `computer_use` | Computer use (browser environment). |
|
|
1350
|
+
| _provider tool_ `google.mcp_server` | `mcp_server` | Remote MCP server passthrough. |
|
|
1351
|
+
| _provider tool_ `google.retrieval` | `retrieval` | Vertex AI Search retrieval. |
|
|
1352
|
+
|
|
1353
|
+
Function tools (`type: 'function'`) defined with the AI SDK `tool(...)`
|
|
1354
|
+
helper are translated to Interactions `function` tool descriptors. Other
|
|
1355
|
+
tool kinds emit a warning and are dropped.
|
|
1356
|
+
|
|
1357
|
+
```ts
|
|
1358
|
+
import { google } from '@ai-sdk/google';
|
|
1359
|
+
import { generateText } from 'ai';
|
|
1360
|
+
|
|
1361
|
+
const { text, sources } = await generateText({
|
|
1362
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1363
|
+
tools: {
|
|
1364
|
+
google_search: google.tools.googleSearch({}),
|
|
1365
|
+
},
|
|
1366
|
+
prompt:
|
|
1367
|
+
"What's a notable AI development from this past week? " +
|
|
1368
|
+
'Include the date for each item you mention.',
|
|
1369
|
+
});
|
|
1370
|
+
```
|
|
1371
|
+
|
|
1372
|
+
Function tools work the same way as on the standard provider:
|
|
1373
|
+
|
|
1374
|
+
```ts
|
|
1375
|
+
import { google } from '@ai-sdk/google';
|
|
1376
|
+
import { generateText, stepCountIs, tool } from 'ai';
|
|
1377
|
+
import { z } from 'zod';
|
|
1378
|
+
|
|
1379
|
+
const weatherTool = tool({
|
|
1380
|
+
description: 'Get the weather for a city.',
|
|
1381
|
+
inputSchema: z.object({ city: z.string() }),
|
|
1382
|
+
execute: async ({ city }) => `It is sunny in ${city}.`,
|
|
1383
|
+
});
|
|
1384
|
+
|
|
1385
|
+
const { text, toolCalls } = await generateText({
|
|
1386
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1387
|
+
tools: { getWeather: weatherTool },
|
|
1388
|
+
stopWhen: stepCountIs(5),
|
|
1389
|
+
prompt: 'What is the weather in San Francisco right now?',
|
|
1390
|
+
});
|
|
1391
|
+
```
|
|
1392
|
+
|
|
1393
|
+
### Image output via Interactions
|
|
1394
|
+
|
|
1395
|
+
Add a `{ type: 'image' }` entry to `responseFormat` on a Gemini
|
|
1396
|
+
image-capable model to get images as `LanguageModelV4FilePart` files in
|
|
1397
|
+
the response. No tool wrapping is required, and the entry doubles as the
|
|
1398
|
+
place to set `aspectRatio`, `imageSize`, and `mimeType`.
|
|
1399
|
+
|
|
1400
|
+
```ts
|
|
1401
|
+
import {
|
|
1402
|
+
google,
|
|
1403
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1404
|
+
} from '@ai-sdk/google';
|
|
1405
|
+
import { generateText } from 'ai';
|
|
1406
|
+
|
|
1407
|
+
const result = await generateText({
|
|
1408
|
+
model: google.interactions('gemini-3-pro-image-preview'),
|
|
1409
|
+
prompt: 'Generate an image of a comic cat in a spaceship.',
|
|
1410
|
+
providerOptions: {
|
|
1411
|
+
google: {
|
|
1412
|
+
responseFormat: [{ type: 'image' }],
|
|
1413
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1414
|
+
},
|
|
1415
|
+
});
|
|
1416
|
+
|
|
1417
|
+
for (const file of result.files) {
|
|
1418
|
+
if (file.mediaType.startsWith('image/')) {
|
|
1419
|
+
// file.uint8Array | file.base64 | file.mediaType
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
```
|
|
1423
|
+
|
|
1424
|
+
To control aspect ratio, image size, or output mime type, add those
|
|
1425
|
+
fields to the same image entry:
|
|
1426
|
+
|
|
1427
|
+
```ts
|
|
1428
|
+
import {
|
|
1429
|
+
google,
|
|
1430
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1431
|
+
} from '@ai-sdk/google';
|
|
1432
|
+
import { generateText } from 'ai';
|
|
1433
|
+
|
|
1434
|
+
const result = await generateText({
|
|
1435
|
+
model: google.interactions('gemini-3-pro-image-preview'),
|
|
1436
|
+
prompt: 'Generate a high-quality landscape photo of mountains at sunset.',
|
|
1437
|
+
providerOptions: {
|
|
1438
|
+
google: {
|
|
1439
|
+
responseFormat: [
|
|
1440
|
+
{
|
|
1441
|
+
type: 'image',
|
|
1442
|
+
aspectRatio: '16:9',
|
|
1443
|
+
imageSize: '4K',
|
|
1444
|
+
},
|
|
1445
|
+
],
|
|
1446
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1447
|
+
},
|
|
1448
|
+
});
|
|
1449
|
+
```
|
|
1450
|
+
|
|
1451
|
+
For multimodal output, list one entry per modality. The model returns
|
|
1452
|
+
text in `result.text` and the accompanying image(s) in `result.files`:
|
|
1453
|
+
|
|
1454
|
+
```ts
|
|
1455
|
+
import {
|
|
1456
|
+
google,
|
|
1457
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1458
|
+
} from '@ai-sdk/google';
|
|
1459
|
+
import { generateText } from 'ai';
|
|
1460
|
+
|
|
1461
|
+
const result = await generateText({
|
|
1462
|
+
model: google.interactions('gemini-2.5-flash-image'),
|
|
1463
|
+
prompt:
|
|
1464
|
+
'Tell me a three sentence bedtime story about a unicorn, accompanied by a suitable illustration.',
|
|
1465
|
+
providerOptions: {
|
|
1466
|
+
google: {
|
|
1467
|
+
responseFormat: [
|
|
1468
|
+
{ type: 'text' },
|
|
1469
|
+
{ type: 'image', aspectRatio: '16:9' },
|
|
1470
|
+
],
|
|
1471
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1472
|
+
},
|
|
1473
|
+
});
|
|
1474
|
+
|
|
1475
|
+
console.log(result.text);
|
|
1476
|
+
|
|
1477
|
+
const images = result.files.filter(file => file.mediaType.startsWith('image/'));
|
|
1478
|
+
// images[0].uint8Array | images[0].base64 | images[0].mediaType
|
|
1479
|
+
```
|
|
1480
|
+
|
|
1481
|
+
Iterative image editing pairs naturally with stateful chaining — keep
|
|
1482
|
+
`previousInteractionId` set across turns and the model edits its prior
|
|
1483
|
+
output:
|
|
1484
|
+
|
|
1485
|
+
```ts
|
|
1486
|
+
import {
|
|
1487
|
+
google,
|
|
1488
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1489
|
+
} from '@ai-sdk/google';
|
|
1490
|
+
import { generateText } from 'ai';
|
|
1491
|
+
|
|
1492
|
+
const model = google.interactions('gemini-3-pro-image-preview');
|
|
1493
|
+
|
|
1494
|
+
const turn1 = await generateText({
|
|
1495
|
+
model,
|
|
1496
|
+
prompt: 'Generate an image of a comic cat in a spaceship.',
|
|
1497
|
+
providerOptions: {
|
|
1498
|
+
google: {
|
|
1499
|
+
responseFormat: [{ type: 'image' }],
|
|
1500
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1501
|
+
},
|
|
1502
|
+
});
|
|
1503
|
+
|
|
1504
|
+
const interactionId = turn1.providerMetadata?.google?.interactionId as
|
|
1505
|
+
| string
|
|
1506
|
+
| undefined;
|
|
1507
|
+
|
|
1508
|
+
const turn2 = await generateText({
|
|
1509
|
+
model,
|
|
1510
|
+
prompt: 'now make the cat red',
|
|
1511
|
+
providerOptions: {
|
|
1512
|
+
google: {
|
|
1513
|
+
responseFormat: [{ type: 'image' }],
|
|
1514
|
+
previousInteractionId: interactionId,
|
|
1515
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1516
|
+
},
|
|
1517
|
+
});
|
|
1518
|
+
```
|
|
1519
|
+
|
|
1520
|
+
### Agent presets
|
|
1521
|
+
|
|
1522
|
+
Pass `{ agent: <name> }` to target a Gemini agent preset. The factory
|
|
1523
|
+
type-checks the agent name against the supported set:
|
|
1524
|
+
|
|
1525
|
+
```ts
|
|
1526
|
+
import {
|
|
1527
|
+
google,
|
|
1528
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1529
|
+
} from '@ai-sdk/google';
|
|
1530
|
+
import { generateText } from 'ai';
|
|
1531
|
+
|
|
1532
|
+
const result = await generateText({
|
|
1533
|
+
model: google.interactions({
|
|
1534
|
+
agent: 'deep-research-pro-preview-12-2025',
|
|
1535
|
+
}),
|
|
1536
|
+
prompt:
|
|
1537
|
+
'Briefly summarize the most-cited papers on retrieval-augmented generation since 2024 (2-3 sentences).',
|
|
1538
|
+
providerOptions: {
|
|
1539
|
+
google: {
|
|
1540
|
+
background: true,
|
|
1541
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1542
|
+
},
|
|
1543
|
+
});
|
|
1544
|
+
```
|
|
1545
|
+
|
|
1546
|
+
Whether an agent runs synchronously or in the background depends on the
|
|
1547
|
+
agent. Long-running presets (such as the `deep-research-*` family)
|
|
1548
|
+
require `background: true` — the POST returns a non-terminal status and
|
|
1549
|
+
the SDK polls `GET /interactions/{id}` internally until the interaction
|
|
1550
|
+
completes. Other agents accept synchronous calls only and will reject
|
|
1551
|
+
`background: true`. Set the flag explicitly via
|
|
1552
|
+
`providerOptions.google.background`.
|
|
1553
|
+
|
|
1554
|
+
The default polling timeout is 30 minutes; raise it via
|
|
1555
|
+
`pollingTimeoutMs` for slower agents:
|
|
1556
|
+
|
|
1557
|
+
```ts
|
|
1558
|
+
import {
|
|
1559
|
+
google,
|
|
1560
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1561
|
+
} from '@ai-sdk/google';
|
|
1562
|
+
import { generateText } from 'ai';
|
|
1563
|
+
|
|
1564
|
+
await generateText({
|
|
1565
|
+
model: google.interactions({ agent: 'deep-research-max-preview-04-2026' }),
|
|
1566
|
+
prompt: 'Produce a long-form research brief on ...',
|
|
1567
|
+
providerOptions: {
|
|
1568
|
+
google: {
|
|
1569
|
+
background: true,
|
|
1570
|
+
pollingTimeoutMs: 60 * 60 * 1000, // 1 hour
|
|
1571
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1572
|
+
},
|
|
1573
|
+
});
|
|
1574
|
+
```
|
|
1575
|
+
|
|
1576
|
+
Agents also chain through `previousInteractionId` like model-id calls.
|
|
1577
|
+
|
|
1578
|
+
### Managed Agents
|
|
1579
|
+
|
|
1580
|
+
[Managed agents](https://ai.google.dev/gemini-api/docs/agents) run inside a
|
|
1581
|
+
sandboxed Linux environment provisioned per interaction. Pass the `environment`
|
|
1582
|
+
provider option to control how the sandbox is set up; the option is only
|
|
1583
|
+
accepted on agent calls.
|
|
1584
|
+
|
|
1585
|
+
The simplest form provisions a fresh sandbox:
|
|
1586
|
+
|
|
1587
|
+
```ts
|
|
1588
|
+
import {
|
|
1589
|
+
google,
|
|
1590
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1591
|
+
} from '@ai-sdk/google';
|
|
1592
|
+
import { generateText } from 'ai';
|
|
1593
|
+
|
|
1594
|
+
const result = await generateText({
|
|
1595
|
+
model: google.interactions({ agent: 'antigravity-preview-05-2026' }),
|
|
1596
|
+
prompt: 'What is 2 + 2?',
|
|
1597
|
+
providerOptions: {
|
|
1598
|
+
google: {
|
|
1599
|
+
environment: 'remote',
|
|
1600
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1601
|
+
},
|
|
1602
|
+
});
|
|
1603
|
+
```
|
|
1604
|
+
|
|
1605
|
+
`environment` accepts three shapes:
|
|
1606
|
+
|
|
1607
|
+
- `'remote'` — provision a fresh sandbox for this call.
|
|
1608
|
+
- any other string — an `environment_id` to reuse, forking the previous
|
|
1609
|
+
sandbox so its filesystem and installed packages persist.
|
|
1610
|
+
- an object — provision a fresh sandbox and optionally preload `sources`
|
|
1611
|
+
and/or constrain outbound traffic via `network`:
|
|
1612
|
+
|
|
1613
|
+
```ts
|
|
1614
|
+
import {
|
|
1615
|
+
google,
|
|
1616
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1617
|
+
} from '@ai-sdk/google';
|
|
1618
|
+
import { generateText } from 'ai';
|
|
1619
|
+
|
|
1620
|
+
await generateText({
|
|
1621
|
+
model: google.interactions({ agent: 'antigravity-preview-05-2026' }),
|
|
1622
|
+
prompt:
|
|
1623
|
+
'Read the file at /data/note.txt and tell me exactly what it contains.',
|
|
1624
|
+
providerOptions: {
|
|
1625
|
+
google: {
|
|
1626
|
+
environment: {
|
|
1627
|
+
type: 'remote',
|
|
1628
|
+
sources: [
|
|
1629
|
+
{
|
|
1630
|
+
type: 'inline',
|
|
1631
|
+
content: 'hello from the AI SDK example\n',
|
|
1632
|
+
target: '/data/note.txt',
|
|
1633
|
+
},
|
|
1634
|
+
],
|
|
1635
|
+
},
|
|
1636
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1637
|
+
},
|
|
1638
|
+
});
|
|
1639
|
+
```
|
|
1640
|
+
|
|
1641
|
+
Three source types are supported: `inline` (write a string into the
|
|
1642
|
+
sandbox at `target`), `repository` (clone a git repository — pass the
|
|
1643
|
+
URL as `source`), and `gcs` (mount a Google Cloud Storage prefix).
|
|
1644
|
+
|
|
1645
|
+
The `network` field accepts the string `'disabled'` to block all
|
|
1646
|
+
outbound traffic, or an object with an `allowlist` array whose entries
|
|
1647
|
+
each carry a `domain` plus an optional `transform` array of header
|
|
1648
|
+
objects to inject into matching requests.
|
|
1649
|
+
|
|
1650
|
+
#### Custom managed agents
|
|
1651
|
+
|
|
1652
|
+
For user-defined agents that you created on Google's side via the
|
|
1653
|
+
Gemini API's `/v1beta/agents` endpoint, pass the agent's name through the dedicated
|
|
1654
|
+
`managedAgent` factory shape instead of `agent` (which only accepts
|
|
1655
|
+
known preset names):
|
|
1656
|
+
|
|
1657
|
+
```ts
|
|
1658
|
+
import {
|
|
1659
|
+
google,
|
|
1660
|
+
type GoogleLanguageModelInteractionsOptions,
|
|
1661
|
+
} from '@ai-sdk/google';
|
|
1662
|
+
import { generateText } from 'ai';
|
|
1663
|
+
|
|
1664
|
+
const result = await generateText({
|
|
1665
|
+
model: google.interactions({ managedAgent: 'my-custom-agent' }),
|
|
1666
|
+
prompt: 'Hello!',
|
|
1667
|
+
providerOptions: {
|
|
1668
|
+
google: {
|
|
1669
|
+
environment: 'remote',
|
|
1670
|
+
} satisfies GoogleLanguageModelInteractionsOptions,
|
|
1671
|
+
},
|
|
1672
|
+
});
|
|
1673
|
+
```
|
|
1674
|
+
|
|
1675
|
+
### Streaming
|
|
1676
|
+
|
|
1677
|
+
`streamText` is supported. The stream's `finish` part exposes
|
|
1678
|
+
`interactionId` on `providerMetadata.google` so callers can chain.
|
|
1679
|
+
|
|
1680
|
+
```ts
|
|
1681
|
+
import { google } from '@ai-sdk/google';
|
|
1682
|
+
import { streamText } from 'ai';
|
|
1683
|
+
|
|
1684
|
+
const result = streamText({
|
|
1685
|
+
model: google.interactions('gemini-2.5-flash'),
|
|
1686
|
+
prompt: 'Hello, how are you?',
|
|
1687
|
+
});
|
|
1688
|
+
|
|
1689
|
+
for await (const textPart of result.textStream) {
|
|
1690
|
+
process.stdout.write(textPart);
|
|
1691
|
+
}
|
|
1692
|
+
|
|
1693
|
+
const googleMetadata = (await result.providerMetadata)?.google;
|
|
1694
|
+
console.log('Interaction id:', googleMetadata?.interactionId);
|
|
1695
|
+
```
|
|
1696
|
+
|
|
1082
1697
|
## Gemma Models
|
|
1083
1698
|
|
|
1084
1699
|
You can use [Gemma models](https://deepmind.google/models/gemma/) with the Google Generative AI API.
|
|
@@ -1111,12 +1726,12 @@ using the `.embedding()` factory method.
|
|
|
1111
1726
|
const model = google.embedding('gemini-embedding-001');
|
|
1112
1727
|
```
|
|
1113
1728
|
|
|
1114
|
-
The Google
|
|
1729
|
+
The Google provider sends API calls to the right endpoint based on the type of embedding:
|
|
1115
1730
|
|
|
1116
1731
|
- **Single embeddings**: When embedding a single value with `embed()`, the provider uses the single `:embedContent` endpoint, which typically has higher rate limits compared to the batch endpoint.
|
|
1117
1732
|
- **Batch embeddings**: When embedding multiple values with `embedMany()` or multiple values in `embed()`, the provider uses the `:batchEmbedContents` endpoint.
|
|
1118
1733
|
|
|
1119
|
-
Google
|
|
1734
|
+
Google embedding models support additional settings. You can pass them as an options argument:
|
|
1120
1735
|
|
|
1121
1736
|
```ts
|
|
1122
1737
|
import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
|
|
@@ -1158,7 +1773,7 @@ const { embeddings } = await embedMany({
|
|
|
1158
1773
|
});
|
|
1159
1774
|
```
|
|
1160
1775
|
|
|
1161
|
-
The following optional provider options are available for Google
|
|
1776
|
+
The following optional provider options are available for Google embedding models:
|
|
1162
1777
|
|
|
1163
1778
|
- **outputDimensionality**: _number_
|
|
1164
1779
|
|
|
@@ -1167,7 +1782,6 @@ The following optional provider options are available for Google Generative AI e
|
|
|
1167
1782
|
- **taskType**: _string_
|
|
1168
1783
|
|
|
1169
1784
|
Optional. Specifies the task type for generating embeddings. Supported task types include:
|
|
1170
|
-
|
|
1171
1785
|
- `SEMANTIC_SIMILARITY`: Optimized for text similarity.
|
|
1172
1786
|
- `CLASSIFICATION`: Optimized for text classification.
|
|
1173
1787
|
- `CLUSTERING`: Optimized for clustering texts based on similarity.
|
|
@@ -1179,13 +1793,14 @@ The following optional provider options are available for Google Generative AI e
|
|
|
1179
1793
|
|
|
1180
1794
|
- **content**: _array_
|
|
1181
1795
|
|
|
1182
|
-
Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be
|
|
1796
|
+
Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be `{ text: string }`, `{ inlineData: { mimeType: string, data: string } }` for inline base64 data, or `{ fileData: { fileUri: string, mimeType: string } }` to reference remote content via HTTP URL or Google Cloud Storage URI (`gs://...`). Supported by `gemini-embedding-2-preview`.
|
|
1183
1797
|
|
|
1184
1798
|
### Model Capabilities
|
|
1185
1799
|
|
|
1186
1800
|
| Model | Default Dimensions | Custom Dimensions | Multimodal |
|
|
1187
1801
|
| ---------------------------- | ------------------ | ------------------- | ------------------- |
|
|
1188
1802
|
| `gemini-embedding-001` | 3072 | <Check size={18} /> | <Cross size={18} /> |
|
|
1803
|
+
| `gemini-embedding-2` | 3072 | <Check size={18} /> | <Check size={18} /> |
|
|
1189
1804
|
| `gemini-embedding-2-preview` | 3072 | <Check size={18} /> | <Check size={18} /> |
|
|
1190
1805
|
|
|
1191
1806
|
## Image Models
|
|
@@ -1309,6 +1924,29 @@ const { image } = await generateImage({
|
|
|
1309
1924
|
details.
|
|
1310
1925
|
</Note>
|
|
1311
1926
|
|
|
1927
|
+
#### Google Search Grounding
|
|
1928
|
+
|
|
1929
|
+
Gemini image models support [Google Search grounding](#google-search) through `providerOptions.google.googleSearch`. The value matches the args of `google.tools.googleSearch(...)`; pass `{}` to enable with defaults, or `{ searchTypes: { imageSearch: {} } }` to ground on reference photos.
|
|
1930
|
+
|
|
1931
|
+
```ts
|
|
1932
|
+
import { google } from '@ai-sdk/google';
|
|
1933
|
+
import { generateImage } from 'ai';
|
|
1934
|
+
|
|
1935
|
+
const result = await generateImage({
|
|
1936
|
+
model: google.image('gemini-3.1-flash-image-preview'),
|
|
1937
|
+
prompt:
|
|
1938
|
+
'Search for live footage of the 2026 Super Bowl halftime show artist, then generate a close-up in space.',
|
|
1939
|
+
providerOptions: {
|
|
1940
|
+
google: {
|
|
1941
|
+
googleSearch: { searchTypes: { imageSearch: {} } },
|
|
1942
|
+
},
|
|
1943
|
+
},
|
|
1944
|
+
});
|
|
1945
|
+
|
|
1946
|
+
// Grounding metadata is forwarded onto the image result:
|
|
1947
|
+
console.log(result.providerMetadata?.google?.groundingMetadata);
|
|
1948
|
+
```
|
|
1949
|
+
|
|
1312
1950
|
#### Gemini Image Model Capabilities
|
|
1313
1951
|
|
|
1314
1952
|
| Model | Image Generation | Image Editing | Aspect Ratios |
|
|
@@ -1323,3 +1961,80 @@ const { image } = await generateImage({
|
|
|
1323
1961
|
2K, 4K via `providerOptions.google.imageConfig.imageSize`), and Google Search
|
|
1324
1962
|
grounding.
|
|
1325
1963
|
</Note>
|
|
1964
|
+
|
|
1965
|
+
## Speech Models
|
|
1966
|
+
|
|
1967
|
+
You can create models that call the [Gemini text-to-speech API](https://ai.google.dev/gemini-api/docs/speech-generation)
|
|
1968
|
+
using the `.speech()` factory method.
|
|
1969
|
+
|
|
1970
|
+
The first argument is the model id e.g. `gemini-2.5-flash-preview-tts`.
|
|
1971
|
+
|
|
1972
|
+
```ts
|
|
1973
|
+
const model = google.speech('gemini-2.5-flash-preview-tts');
|
|
1974
|
+
```
|
|
1975
|
+
|
|
1976
|
+
The `voice` argument can be set to one of Gemini's [30 prebuilt voices](https://ai.google.dev/gemini-api/docs/speech-generation#voices)
|
|
1977
|
+
e.g. `Kore`, `Puck`, `Zephyr`, or `Charon`. Voice names are case-sensitive. It defaults to `Kore`.
|
|
1978
|
+
|
|
1979
|
+
```ts highlight="6"
|
|
1980
|
+
import { generateSpeech } from 'ai';
|
|
1981
|
+
import { google } from '@ai-sdk/google';
|
|
1982
|
+
|
|
1983
|
+
const result = await generateSpeech({
|
|
1984
|
+
model: google.speech('gemini-2.5-flash-preview-tts'),
|
|
1985
|
+
text: 'Hello, world!',
|
|
1986
|
+
voice: 'Kore', // Gemini voice name
|
|
1987
|
+
});
|
|
1988
|
+
```
|
|
1989
|
+
|
|
1990
|
+
By default the generated audio is returned as a playable WAV file (`result.audio.mediaType` is
|
|
1991
|
+
`audio/wav`). Set `outputFormat: 'pcm'` to receive the raw signed 16-bit little-endian mono PCM
|
|
1992
|
+
bytes instead; the sample rate is reported in `result.providerMetadata.google.sampleRate`.
|
|
1993
|
+
|
|
1994
|
+
Gemini honors natural-language style direction. The `instructions` argument is prepended to the
|
|
1995
|
+
spoken text, so `instructions: 'Say cheerfully'` with `text: 'Hello'` speaks `Say cheerfully: Hello`.
|
|
1996
|
+
|
|
1997
|
+
### Multi-speaker audio
|
|
1998
|
+
|
|
1999
|
+
For multi-speaker dialogue, pass a `multiSpeakerVoiceConfig` through `providerOptions`. Each speaker
|
|
2000
|
+
name must match a name used in the input text. When set, it overrides the top-level `voice`.
|
|
2001
|
+
|
|
2002
|
+
```ts highlight="8-23"
|
|
2003
|
+
import { generateSpeech } from 'ai';
|
|
2004
|
+
import { google, type GoogleSpeechModelOptions } from '@ai-sdk/google';
|
|
2005
|
+
|
|
2006
|
+
const result = await generateSpeech({
|
|
2007
|
+
model: google.speech('gemini-2.5-flash-preview-tts'),
|
|
2008
|
+
text: 'Joe: How are you? Jane: Doing great, thanks!',
|
|
2009
|
+
providerOptions: {
|
|
2010
|
+
google: {
|
|
2011
|
+
multiSpeakerVoiceConfig: {
|
|
2012
|
+
speakerVoiceConfigs: [
|
|
2013
|
+
{
|
|
2014
|
+
speaker: 'Joe',
|
|
2015
|
+
voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } },
|
|
2016
|
+
},
|
|
2017
|
+
{
|
|
2018
|
+
speaker: 'Jane',
|
|
2019
|
+
voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Puck' } },
|
|
2020
|
+
},
|
|
2021
|
+
],
|
|
2022
|
+
},
|
|
2023
|
+
} satisfies GoogleSpeechModelOptions,
|
|
2024
|
+
},
|
|
2025
|
+
});
|
|
2026
|
+
```
|
|
2027
|
+
|
|
2028
|
+
<Note>
|
|
2029
|
+
Gemini TTS models do not support the `speed` or `language` options; passing
|
|
2030
|
+
them adds a warning to `result.warnings`. Language is detected automatically
|
|
2031
|
+
from the input text.
|
|
2032
|
+
</Note>
|
|
2033
|
+
|
|
2034
|
+
### Model Capabilities
|
|
2035
|
+
|
|
2036
|
+
| Model | Multi-speaker | Style via instructions |
|
|
2037
|
+
| ------------------------------ | ------------------- | ---------------------- |
|
|
2038
|
+
| `gemini-2.5-flash-preview-tts` | <Check size={18} /> | <Check size={18} /> |
|
|
2039
|
+
| `gemini-2.5-pro-preview-tts` | <Check size={18} /> | <Check size={18} /> |
|
|
2040
|
+
| `gemini-3.1-flash-tts-preview` | <Check size={18} /> | <Check size={18} /> |
|