@ai-sdk/google-vertex 5.0.0-beta.10 → 5.0.0-beta.108

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +930 -8
  2. package/README.md +65 -2
  3. package/dist/anthropic/edge/index.d.ts +18 -151
  4. package/dist/anthropic/edge/index.js +88 -70
  5. package/dist/anthropic/edge/index.js.map +1 -1
  6. package/dist/anthropic/index.d.ts +18 -151
  7. package/dist/anthropic/index.js +88 -81
  8. package/dist/anthropic/index.js.map +1 -1
  9. package/dist/edge/index.d.ts +58 -29
  10. package/dist/edge/index.js +581 -278
  11. package/dist/edge/index.js.map +1 -1
  12. package/dist/index.d.ts +70 -32
  13. package/dist/index.js +590 -296
  14. package/dist/index.js.map +1 -1
  15. package/dist/maas/edge/index.d.ts +76 -0
  16. package/dist/maas/edge/index.js +209 -0
  17. package/dist/maas/edge/index.js.map +1 -0
  18. package/dist/maas/index.d.ts +60 -0
  19. package/dist/maas/index.js +109 -0
  20. package/dist/maas/index.js.map +1 -0
  21. package/dist/xai/edge/index.d.ts +92 -0
  22. package/dist/xai/edge/index.js +259 -0
  23. package/dist/xai/edge/index.js.map +1 -0
  24. package/dist/xai/index.d.ts +76 -0
  25. package/dist/xai/index.js +159 -0
  26. package/dist/xai/index.js.map +1 -0
  27. package/docs/16-google-vertex.mdx +640 -102
  28. package/maas/edge.d.ts +1 -0
  29. package/maas/index.d.ts +1 -0
  30. package/package.json +49 -24
  31. package/src/anthropic/edge/google-vertex-anthropic-provider-edge.ts +17 -13
  32. package/src/anthropic/edge/index.ts +6 -2
  33. package/src/anthropic/{google-vertex-anthropic-messages-options.ts → google-vertex-anthropic-options.ts} +4 -1
  34. package/src/anthropic/google-vertex-anthropic-provider-node.ts +18 -13
  35. package/src/anthropic/google-vertex-anthropic-provider.ts +68 -19
  36. package/src/anthropic/index.ts +6 -2
  37. package/src/edge/google-vertex-provider-edge.ts +10 -12
  38. package/src/edge/index.ts +8 -1
  39. package/src/google-vertex-auth-google-auth-library.ts +13 -26
  40. package/src/google-vertex-config.ts +2 -2
  41. package/src/{google-vertex-embedding-options.ts → google-vertex-embedding-model-options.ts} +1 -0
  42. package/src/google-vertex-embedding-model.ts +35 -10
  43. package/src/google-vertex-image-model-options.ts +74 -0
  44. package/src/google-vertex-image-model.ts +106 -133
  45. package/src/google-vertex-options.ts +1 -1
  46. package/src/google-vertex-provider-base.ts +311 -0
  47. package/src/google-vertex-provider.ts +43 -233
  48. package/src/google-vertex-speech-model-options.ts +11 -0
  49. package/src/google-vertex-transcription-model-options.ts +46 -0
  50. package/src/google-vertex-transcription-model.ts +231 -0
  51. package/src/google-vertex-video-model-options.ts +49 -0
  52. package/src/google-vertex-video-model.ts +39 -75
  53. package/src/index.ts +20 -5
  54. package/src/maas/edge/google-vertex-maas-provider-edge.ts +64 -0
  55. package/src/maas/edge/index.ts +13 -0
  56. package/src/maas/google-vertex-maas-options.ts +15 -0
  57. package/src/maas/google-vertex-maas-provider-node.ts +65 -0
  58. package/src/maas/google-vertex-maas-provider.ts +122 -0
  59. package/src/maas/index.ts +13 -0
  60. package/src/xai/edge/google-vertex-xai-provider-edge.ts +61 -0
  61. package/src/xai/edge/index.ts +9 -0
  62. package/src/xai/google-vertex-xai-options.ts +7 -0
  63. package/src/xai/google-vertex-xai-provider-node.ts +62 -0
  64. package/src/xai/google-vertex-xai-provider.ts +212 -0
  65. package/src/xai/index.ts +9 -0
  66. package/xai/edge.d.ts +1 -0
  67. package/xai/index.d.ts +1 -0
  68. package/dist/anthropic/edge/index.d.mts +0 -231
  69. package/dist/anthropic/edge/index.mjs +0 -259
  70. package/dist/anthropic/edge/index.mjs.map +0 -1
  71. package/dist/anthropic/index.d.mts +0 -215
  72. package/dist/anthropic/index.mjs +0 -164
  73. package/dist/anthropic/index.mjs.map +0 -1
  74. package/dist/edge/index.d.mts +0 -160
  75. package/dist/edge/index.mjs +0 -1049
  76. package/dist/edge/index.mjs.map +0 -1
  77. package/dist/index.d.mts +0 -219
  78. package/dist/index.mjs +0 -960
  79. package/dist/index.mjs.map +0 -1
  80. package/src/google-vertex-provider-node.ts +0 -49
@@ -5,19 +5,20 @@ description: Learn how to use the Google Vertex AI provider.
5
5
 
6
6
  # Google Vertex Provider
7
7
 
8
- The Google Vertex provider for the [AI SDK](/docs) contains language model support for the [Google Vertex AI](https://cloud.google.com/vertex-ai) APIs. This includes support for [Google's Gemini models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models) and [Anthropic's Claude partner models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude).
8
+ The Google Vertex provider for the [AI SDK](/docs) contains language model support for the [Google Vertex AI](https://cloud.google.com/vertex-ai) APIs. This includes support for [Google's Gemini models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models), [Anthropic's Claude partner models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude), [xAI's Grok partner models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/grok), and [MaaS (Model as a Service) open models](https://cloud.google.com/vertex-ai/generative-ai/docs/maas/use-open-models).
9
9
 
10
10
  <Note>
11
11
  The Google Vertex provider is compatible with both Node.js and Edge runtimes.
12
12
  The Edge runtime is supported through the `@ai-sdk/google-vertex/edge`
13
13
  sub-module. More details can be found in the [Google Vertex Edge
14
- Runtime](#google-vertex-edge-runtime) and [Google Vertex Anthropic Edge
15
- Runtime](#google-vertex-anthropic-edge-runtime) sections below.
14
+ Runtime](#google-vertex-edge-runtime), [Google Vertex Anthropic Edge
15
+ Runtime](#google-vertex-anthropic-edge-runtime), and [Google Vertex MaaS Edge
16
+ Runtime](#google-vertex-maas-edge-runtime) sections below.
16
17
  </Note>
17
18
 
18
19
  ## Setup
19
20
 
20
- The Google Vertex and Google Vertex Anthropic providers are both available in the `@ai-sdk/google-vertex` module. You can install it with
21
+ The Google Vertex, Google Vertex Anthropic, Google Vertex xAI, and Google Vertex MaaS providers are available in the `@ai-sdk/google-vertex` module. You can install it with
21
22
 
22
23
  <Tabs items={['pnpm', 'npm', 'yarn', 'bun']}>
23
24
  <Tab>
@@ -44,18 +45,18 @@ The Google Vertex provider instance is used to create model instances that call
44
45
 
45
46
  ### Provider Instance
46
47
 
47
- You can import the default provider instance `vertex` from `@ai-sdk/google-vertex`:
48
+ You can import the default provider instance `googleVertex` from `@ai-sdk/google-vertex`:
48
49
 
49
50
  ```ts
50
- import { vertex } from '@ai-sdk/google-vertex';
51
+ import { googleVertex } from '@ai-sdk/google-vertex';
51
52
  ```
52
53
 
53
- If you need a customized setup, you can import `createVertex` from `@ai-sdk/google-vertex` and create a provider instance with your settings:
54
+ If you need a customized setup, you can import `createGoogleVertex` from `@ai-sdk/google-vertex` and create a provider instance with your settings:
54
55
 
55
56
  ```ts
56
- import { createVertex } from '@ai-sdk/google-vertex';
57
+ import { createGoogleVertex } from '@ai-sdk/google-vertex';
57
58
 
58
- const vertex = createVertex({
59
+ const googleVertex = createGoogleVertex({
59
60
  project: 'my-project', // optional
60
61
  location: 'us-central1', // optional
61
62
  });
@@ -67,12 +68,12 @@ Google Vertex supports multiple authentication methods depending on your runtime
67
68
 
68
69
  The Node.js runtime is the default runtime supported by the AI SDK. It supports all standard Google Cloud authentication options through the [`google-auth-library`](https://github.com/googleapis/google-auth-library-nodejs?tab=readme-ov-file#ways-to-authenticate). Typical use involves setting a path to a json credentials file in the `GOOGLE_APPLICATION_CREDENTIALS` environment variable. The credentials file can be obtained from the [Google Cloud Console](https://console.cloud.google.com/apis/credentials).
69
70
 
70
- If you want to customize the Google authentication options you can pass them as options to the `createVertex` function, for example:
71
+ If you want to customize the Google authentication options you can pass them as options to the `createGoogleVertex` function, for example:
71
72
 
72
73
  ```ts
73
- import { createVertex } from '@ai-sdk/google-vertex';
74
+ import { createGoogleVertex } from '@ai-sdk/google-vertex';
74
75
 
75
- const vertex = createVertex({
76
+ const googleVertex = createGoogleVertex({
76
77
  googleAuthOptions: {
77
78
  credentials: {
78
79
  client_email: 'my-email',
@@ -99,7 +100,6 @@ You can use the following optional settings to customize the provider instance:
99
100
  - **googleAuthOptions** _object_
100
101
 
101
102
  Optional. The Authentication options used by the [Google Auth Library](https://github.com/googleapis/google-auth-library-nodejs/). See also the [GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/08978822e1b7b5961f0e355df51d738e012be392/src/auth/googleauth.ts#L87C18-L87C35) interface.
102
-
103
103
  - **authClient** _object_
104
104
  An `AuthClient` to use.
105
105
 
@@ -127,7 +127,6 @@ You can use the following optional settings to customize the provider instance:
127
127
  - **headers** _Resolvable&lt;Record&lt;string, string | undefined&gt;&gt;_
128
128
 
129
129
  Headers to include in the requests. Can be provided in multiple formats:
130
-
131
130
  - A record of header key-value pairs: `Record<string, string | undefined>`
132
131
  - A function that returns headers: `() => Record<string, string | undefined>`
133
132
  - An async function that returns headers: `async () => Record<string, string | undefined>`
@@ -155,10 +154,10 @@ For example, direct file system access is not available, and many Node.js-specif
155
154
 
156
155
  The Edge runtime version of the Google Vertex provider supports Google's [Application Default Credentials](https://github.com/googleapis/google-auth-library-nodejs?tab=readme-ov-file#application-default-credentials) through environment variables. The values can be obtained from a json credentials file from the [Google Cloud Console](https://console.cloud.google.com/apis/credentials).
157
156
 
158
- You can import the default provider instance `vertex` from `@ai-sdk/google-vertex/edge`:
157
+ You can import the default provider instance `googleVertex` from `@ai-sdk/google-vertex/edge`:
159
158
 
160
159
  ```ts
161
- import { vertex } from '@ai-sdk/google-vertex/edge';
160
+ import { googleVertex } from '@ai-sdk/google-vertex/edge';
162
161
  ```
163
162
 
164
163
  <Note>
@@ -167,12 +166,12 @@ import { vertex } from '@ai-sdk/google-vertex/edge';
167
166
  `@ai-sdk/google-vertex/edge` to differentiate it from the Node.js provider.
168
167
  </Note>
169
168
 
170
- If you need a customized setup, you can import `createVertex` from `@ai-sdk/google-vertex/edge` and create a provider instance with your settings:
169
+ If you need a customized setup, you can import `createGoogleVertex` from `@ai-sdk/google-vertex/edge` and create a provider instance with your settings:
171
170
 
172
171
  ```ts
173
- import { createVertex } from '@ai-sdk/google-vertex/edge';
172
+ import { createGoogleVertex } from '@ai-sdk/google-vertex/edge';
174
173
 
175
- const vertex = createVertex({
174
+ const googleVertex = createGoogleVertex({
176
175
  project: 'my-project', // optional
177
176
  location: 'us-central1', // optional
178
177
  });
@@ -203,7 +202,6 @@ You can use the following optional settings to customize the provider instance:
203
202
  - **googleCredentials** _object_
204
203
 
205
204
  Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
206
-
207
205
  - **clientEmail** _string_
208
206
  The client email from the service account JSON file. Defaults to the contents of the `GOOGLE_CLIENT_EMAIL` environment variable.
209
207
 
@@ -216,7 +214,6 @@ You can use the following optional settings to customize the provider instance:
216
214
  - **headers** _Resolvable&lt;Record&lt;string, string | undefined&gt;&gt;_
217
215
 
218
216
  Headers to include in the requests. Can be provided in multiple formats:
219
-
220
217
  - A record of header key-value pairs: `Record<string, string | undefined>`
221
218
  - A function that returns headers: `() => Record<string, string | undefined>`
222
219
  - An async function that returns headers: `async () => Record<string, string | undefined>`
@@ -234,9 +231,9 @@ You can use the following optional settings to customize the provider instance:
234
231
  Express mode provides a simplified authentication method using an API key instead of OAuth or service account credentials. When using express mode, the `project` and `location` settings are not required.
235
232
 
236
233
  ```ts
237
- import { createVertex } from '@ai-sdk/google-vertex';
234
+ import { createGoogleVertex } from '@ai-sdk/google-vertex';
238
235
 
239
- const vertex = createVertex({
236
+ const googleVertex = createGoogleVertex({
240
237
  apiKey: process.env.GOOGLE_VERTEX_API_KEY,
241
238
  });
242
239
  ```
@@ -254,7 +251,7 @@ You can create models that call the Vertex API using the provider instance.
254
251
  The first argument is the model id, e.g. `gemini-2.5-pro`.
255
252
 
256
253
  ```ts
257
- const model = vertex('gemini-2.5-pro');
254
+ const model = googleVertex('gemini-2.5-pro');
258
255
  ```
259
256
 
260
257
  <Note>
@@ -268,10 +265,10 @@ of the [standard call settings](/docs/ai-sdk-core/settings). You can pass them a
268
265
  an options argument:
269
266
 
270
267
  ```ts
271
- import { vertex } from '@ai-sdk/google-vertex';
268
+ import { googleVertex } from '@ai-sdk/google-vertex';
272
269
  import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
273
270
 
274
- const model = vertex('gemini-2.5-pro');
271
+ const model = googleVertex('gemini-2.5-pro');
275
272
 
276
273
  await generateText({
277
274
  model,
@@ -309,11 +306,9 @@ The following optional provider options are available for Google Vertex models:
309
306
  - **safetySettings** _Array\<\{ category: string; threshold: string \}\>_
310
307
 
311
308
  Optional. Safety settings for the model.
312
-
313
309
  - **category** _string_
314
310
 
315
311
  The category of the safety setting. Can be one of the following:
316
-
317
312
  - `HARM_CATEGORY_UNSPECIFIED`
318
313
  - `HARM_CATEGORY_HATE_SPEECH`
319
314
  - `HARM_CATEGORY_DANGEROUS_CONTENT`
@@ -324,7 +319,6 @@ The following optional provider options are available for Google Vertex models:
324
319
  - **threshold** _string_
325
320
 
326
321
  The threshold of the safety setting. Can be one of the following:
327
-
328
322
  - `HARM_BLOCK_THRESHOLD_UNSPECIFIED`
329
323
  - `BLOCK_LOW_AND_ABOVE`
330
324
  - `BLOCK_MEDIUM_AND_ABOVE`
@@ -344,14 +338,52 @@ The following optional provider options are available for Google Vertex models:
344
338
 
345
339
  Consult [Google's Documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls) for usage details.
346
340
 
341
+ - **streamFunctionCallArguments** _boolean_
342
+
343
+ Optional. When set to true, function call arguments will be streamed
344
+ incrementally in streaming responses. This enables `tool-input-delta` events
345
+ to arrive as the model generates function call arguments, reducing perceived
346
+ latency for tool calls. Defaults to `false`. Only supported on the Vertex AI API (not the Gemini API) with Gemini 3+ models.
347
+
348
+ Consult [Google's Documentation](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/function-calling#streaming-fc) for details.
349
+
350
+ - **sharedRequestType** _'priority' | 'flex' | 'standard'_
351
+
352
+ Optional. Selects a pay-as-you-go (PayGo) tier by setting the
353
+ `X-Vertex-AI-LLM-Shared-Request-Type` request header. Use `'priority'` for
354
+ consistent low-latency performance at a premium, or `'flex'` for a 50%
355
+ discount with longer expected latency. Both are supported only on the
356
+ `global` endpoint and on a subset of Gemini models.
357
+
358
+ By default — with Provisioned Throughput allocated and `requestType` unset
359
+ — the request consumes Provisioned Throughput quota first and only falls
360
+ back to the chosen shared tier if PT capacity is exhausted. To bypass
361
+ Provisioned Throughput entirely, also set `requestType: 'shared'`.
362
+
363
+ The served tier is reported back on
364
+ `result.providerMetadata.googleVertex.usageMetadata.trafficType` as
365
+ `ON_DEMAND_PRIORITY`, `ON_DEMAND_FLEX`, or (if downgraded under load) plain
366
+ `ON_DEMAND`.
367
+
368
+ See [Priority PayGo](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/priority-paygo)
369
+ and [Flex PayGo](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/flex-paygo)
370
+ for supported models, ramp limits, and downgrade behavior.
371
+
372
+ - **requestType** _'shared'_
373
+
374
+ Optional. Sets the `X-Vertex-AI-LLM-Request-Type` request header. Combine
375
+ with `sharedRequestType` to skip Provisioned Throughput entirely and route
376
+ the request through shared PayGo capacity. See
377
+ [Priority PayGo](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/priority-paygo).
378
+
347
379
  You can use Google Vertex language models to generate text with the `generateText` function:
348
380
 
349
381
  ```ts highlight="1,4"
350
- import { vertex } from '@ai-sdk/google-vertex';
382
+ import { googleVertex } from '@ai-sdk/google-vertex';
351
383
  import { generateText } from 'ai';
352
384
 
353
385
  const { text } = await generateText({
354
- model: vertex('gemini-2.5-pro'),
386
+ model: googleVertex('gemini-2.5-pro'),
355
387
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
356
388
  });
357
389
  ```
@@ -366,12 +398,12 @@ With [Code Execution](https://cloud.google.com/vertex-ai/generative-ai/docs/mult
366
398
  You can enable code execution by adding the `code_execution` tool to your request.
367
399
 
368
400
  ```ts
369
- import { vertex } from '@ai-sdk/google-vertex';
401
+ import { googleVertex } from '@ai-sdk/google-vertex';
370
402
  import { generateText } from 'ai';
371
403
 
372
404
  const result = await generateText({
373
- model: vertex('gemini-2.5-pro'),
374
- tools: { code_execution: vertex.tools.codeExecution({}) },
405
+ model: googleVertex('gemini-2.5-pro'),
406
+ tools: { code_execution: googleVertex.tools.codeExecution({}) },
375
407
  prompt:
376
408
  'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.',
377
409
  });
@@ -384,12 +416,12 @@ The response will contain `tool-call` and `tool-result` parts for the executed c
384
416
  URL Context allows Gemini models to retrieve and analyze content from URLs. Supported models: Gemini 2.5 Flash-Lite, 2.5 Pro, 2.5 Flash, 2.0 Flash.
385
417
 
386
418
  ```ts
387
- import { vertex } from '@ai-sdk/google-vertex';
419
+ import { googleVertex } from '@ai-sdk/google-vertex';
388
420
  import { generateText } from 'ai';
389
421
 
390
422
  const result = await generateText({
391
- model: vertex('gemini-2.5-pro'),
392
- tools: { url_context: vertex.tools.urlContext({}) },
423
+ model: googleVertex('gemini-2.5-pro'),
424
+ tools: { url_context: googleVertex.tools.urlContext({}) },
393
425
  prompt: 'What are the key points from https://example.com/article?',
394
426
  });
395
427
  ```
@@ -399,12 +431,12 @@ const result = await generateText({
399
431
  Google Search enables Gemini models to access real-time web information. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro.
400
432
 
401
433
  ```ts
402
- import { vertex } from '@ai-sdk/google-vertex';
434
+ import { googleVertex } from '@ai-sdk/google-vertex';
403
435
  import { generateText } from 'ai';
404
436
 
405
437
  const result = await generateText({
406
- model: vertex('gemini-2.5-pro'),
407
- tools: { google_search: vertex.tools.googleSearch({}) },
438
+ model: googleVertex('gemini-2.5-pro'),
439
+ tools: { google_search: googleVertex.tools.googleSearch({}) },
408
440
  prompt: 'What are the latest developments in AI?',
409
441
  });
410
442
  ```
@@ -414,13 +446,13 @@ const result = await generateText({
414
446
  [Enterprise Web Search](https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/web-grounding-enterprise) provides grounding using a compliance-focused web index designed for highly-regulated industries such as finance, healthcare, and the public sector. Unlike standard Google Search grounding, Enterprise Web Search does not log customer data and supports VPC service controls. Supported models: Gemini 2.0 and newer.
415
447
 
416
448
  ```ts
417
- import { vertex } from '@ai-sdk/google-vertex';
449
+ import { googleVertex } from '@ai-sdk/google-vertex';
418
450
  import { generateText } from 'ai';
419
451
 
420
452
  const result = await generateText({
421
- model: vertex('gemini-2.5-flash'),
453
+ model: googleVertex('gemini-2.5-flash'),
422
454
  tools: {
423
- enterprise_web_search: vertex.tools.enterpriseWebSearch({}),
455
+ enterprise_web_search: googleVertex.tools.enterpriseWebSearch({}),
424
456
  },
425
457
  prompt: 'What are the latest FDA regulations for clinical trials?',
426
458
  });
@@ -431,14 +463,14 @@ const result = await generateText({
431
463
  Google Maps grounding enables Gemini models to access Google Maps data for location-aware responses. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro, 3.0 Pro.
432
464
 
433
465
  ```ts
434
- import { vertex } from '@ai-sdk/google-vertex';
466
+ import { googleVertex } from '@ai-sdk/google-vertex';
435
467
  import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
436
468
  import { generateText } from 'ai';
437
469
 
438
470
  const result = await generateText({
439
- model: vertex('gemini-2.5-flash'),
471
+ model: googleVertex('gemini-2.5-flash'),
440
472
  tools: {
441
- google_maps: vertex.tools.googleMaps({}),
473
+ google_maps: googleVertex.tools.googleMaps({}),
442
474
  },
443
475
  providerOptions: {
444
476
  vertex: {
@@ -453,6 +485,59 @@ const result = await generateText({
453
485
 
454
486
  The optional `retrievalConfig.latLng` provider option provides location context for queries about nearby places. This configuration applies to any grounding tools that support location context.
455
487
 
488
+ #### Streaming Function Call Arguments
489
+
490
+ For Gemini 3 Pro and later models on Vertex AI, you can stream function call
491
+ arguments as they are generated by setting `streamFunctionCallArguments` to
492
+ `true`. This reduces perceived latency when functions need to be called, as
493
+ `tool-input-delta` events arrive incrementally instead of waiting for the
494
+ complete arguments. This option defaults to `false`.
495
+
496
+ ```ts
497
+ import { googleVertex } from '@ai-sdk/google-vertex';
498
+ import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
499
+ import { streamText } from 'ai';
500
+ import { z } from 'zod';
501
+
502
+ const result = streamText({
503
+ model: googleVertex('gemini-3.1-pro-preview'),
504
+ prompt: 'What is the weather in Boston and San Francisco?',
505
+ tools: {
506
+ getWeather: {
507
+ description: 'Get the current weather in a given location',
508
+ inputSchema: z.object({
509
+ location: z.string().describe('City name'),
510
+ }),
511
+ },
512
+ },
513
+ providerOptions: {
514
+ vertex: {
515
+ streamFunctionCallArguments: true,
516
+ } satisfies GoogleLanguageModelOptions,
517
+ },
518
+ });
519
+
520
+ for await (const part of result.stream) {
521
+ switch (part.type) {
522
+ case 'tool-input-start':
523
+ console.log(`Tool call started: ${part.toolName}`);
524
+ break;
525
+ case 'tool-input-delta':
526
+ process.stdout.write(part.delta);
527
+ break;
528
+ case 'tool-call':
529
+ console.log(`Tool call complete: ${part.toolName}`, part.input);
530
+ break;
531
+ }
532
+ }
533
+ ```
534
+
535
+ <Note>
536
+ This feature is only available on the Vertex AI API. It is not supported on
537
+ the Gemini API. When used with the Google provider, a warning will be emitted
538
+ and the option will be ignored.
539
+ </Note>
540
+
456
541
  #### Reasoning (Thinking Tokens)
457
542
 
458
543
  Google Vertex AI, through its support for Gemini models, can also emit "thinking" tokens, representing the model's reasoning process. The AI SDK exposes these as reasoning information.
@@ -460,13 +545,13 @@ Google Vertex AI, through its support for Gemini models, can also emit "thinking
460
545
  To enable thinking tokens for compatible Gemini models via Vertex, set `includeThoughts: true` in the `thinkingConfig` provider option. These options are passed through `providerOptions.vertex`:
461
546
 
462
547
  ```ts
463
- import { vertex } from '@ai-sdk/google-vertex';
548
+ import { googleVertex } from '@ai-sdk/google-vertex';
464
549
  import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
465
550
  import { generateText, streamText } from 'ai';
466
551
 
467
552
  // For generateText:
468
553
  const { text, reasoningText, reasoning } = await generateText({
469
- model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
554
+ model: googleVertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
470
555
  providerOptions: {
471
556
  vertex: {
472
557
  thinkingConfig: {
@@ -484,7 +569,7 @@ console.log('Final Text:', text);
484
569
 
485
570
  // For streamText:
486
571
  const result = streamText({
487
- model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
572
+ model: googleVertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
488
573
  providerOptions: {
489
574
  vertex: {
490
575
  thinkingConfig: {
@@ -496,7 +581,7 @@ const result = streamText({
496
581
  prompt: 'Explain quantum computing in simple terms.',
497
582
  });
498
583
 
499
- for await (const part of result.fullStream) {
584
+ for await (const part of result.stream) {
500
585
  if (part.type === 'reasoning') {
501
586
  process.stdout.write(`THOUGHT: ${part.textDelta}\n`);
502
587
  } else if (part.type === 'text-delta') {
@@ -521,11 +606,11 @@ When `includeThoughts` is true, parts of the API response marked with `thought:
521
606
  The Google Vertex provider supports file inputs, e.g. PDF files.
522
607
 
523
608
  ```ts
524
- import { vertex } from '@ai-sdk/google-vertex';
609
+ import { googleVertex } from '@ai-sdk/google-vertex';
525
610
  import { generateText } from 'ai';
526
611
 
527
612
  const { text } = await generateText({
528
- model: vertex('gemini-2.5-pro'),
613
+ model: googleVertex('gemini-2.5-pro'),
529
614
  messages: [
530
615
  {
531
616
  role: 'user',
@@ -560,7 +645,7 @@ Google Vertex AI supports both explicit and implicit caching to help reduce cost
560
645
  #### Implicit Caching
561
646
 
562
647
  ```ts
563
- import { vertex } from '@ai-sdk/google-vertex';
648
+ import { googleVertex } from '@ai-sdk/google-vertex';
564
649
  import { generateText } from 'ai';
565
650
 
566
651
  // Structure prompts with consistent content at the beginning
@@ -568,13 +653,13 @@ const baseContext =
568
653
  'You are a cooking assistant with expertise in Italian cuisine. Here are 1000 lasagna recipes for reference...';
569
654
 
570
655
  const { text: veggieLasagna } = await generateText({
571
- model: vertex('gemini-2.5-pro'),
656
+ model: googleVertex('gemini-2.5-pro'),
572
657
  prompt: `${baseContext}\n\nWrite a vegetarian lasagna recipe for 4 people.`,
573
658
  });
574
659
 
575
660
  // Second request with same prefix - eligible for cache hit
576
661
  const { text: meatLasagna, providerMetadata } = await generateText({
577
- model: vertex('gemini-2.5-pro'),
662
+ model: googleVertex('gemini-2.5-pro'),
578
663
  prompt: `${baseContext}\n\nWrite a meat lasagna recipe for 12 people.`,
579
664
  });
580
665
 
@@ -632,12 +717,12 @@ console.log('Cache created:', cache.name);
632
717
  Then use the cache with the AI SDK:
633
718
 
634
719
  ```ts
635
- import { vertex } from '@ai-sdk/google-vertex';
720
+ import { googleVertex } from '@ai-sdk/google-vertex';
636
721
  import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
637
722
  import { generateText } from 'ai';
638
723
 
639
724
  const { text: veggieLasagnaRecipe } = await generateText({
640
- model: vertex('gemini-2.5-pro'),
725
+ model: googleVertex('gemini-2.5-pro'),
641
726
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
642
727
  providerOptions: {
643
728
  vertex: {
@@ -647,7 +732,7 @@ const { text: veggieLasagnaRecipe } = await generateText({
647
732
  });
648
733
 
649
734
  const { text: meatLasagnaRecipe } = await generateText({
650
- model: vertex('gemini-2.5-pro'),
735
+ model: googleVertex('gemini-2.5-pro'),
651
736
  prompt: 'Write a meat lasagna recipe for 12 people.',
652
737
  providerOptions: {
653
738
  vertex: {
@@ -716,12 +801,12 @@ By default, structured outputs are enabled (and for tool calling they are requir
716
801
  You can disable structured outputs for object generation as a workaround:
717
802
 
718
803
  ```ts highlight="7,12"
719
- import { vertex } from '@ai-sdk/google-vertex';
804
+ import { googleVertex } from '@ai-sdk/google-vertex';
720
805
  import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
721
806
  import { generateText, Output } from 'ai';
722
807
 
723
808
  const result = await generateText({
724
- model: vertex('gemini-2.5-pro'),
809
+ model: googleVertex('gemini-2.5-pro'),
725
810
  providerOptions: {
726
811
  vertex: {
727
812
  structuredOutputs: false,
@@ -756,6 +841,7 @@ The following Zod features are known to not work with Google Vertex:
756
841
 
757
842
  | Model | Image Input | Object Generation | Tool Usage | Tool Streaming |
758
843
  | ---------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
844
+ | `gemini-3.5-flash` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
759
845
  | `gemini-3-pro-preview` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
760
846
  | `gemini-2.5-pro` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
761
847
  | `gemini-2.5-flash` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
@@ -773,19 +859,19 @@ The following Zod features are known to not work with Google Vertex:
773
859
  You can create models that call the Google Vertex AI embeddings API using the `.embeddingModel()` factory method:
774
860
 
775
861
  ```ts
776
- const model = vertex.embeddingModel('text-embedding-005');
862
+ const model = googleVertex.embeddingModel('text-embedding-005');
777
863
  ```
778
864
 
779
865
  Google Vertex AI embedding models support additional settings. You can pass them as an options argument:
780
866
 
781
867
  ```ts
782
868
  import {
783
- vertex,
869
+ googleVertex,
784
870
  type GoogleVertexEmbeddingModelOptions,
785
871
  } from '@ai-sdk/google-vertex';
786
872
  import { embed } from 'ai';
787
873
 
788
- const model = vertex.embeddingModel('text-embedding-005');
874
+ const model = googleVertex.embeddingModel('text-embedding-005');
789
875
 
790
876
  const { embedding } = await embed({
791
877
  model,
@@ -809,7 +895,6 @@ The following optional provider options are available for Google Vertex AI embed
809
895
  - **taskType**: _string_
810
896
 
811
897
  Optional. Specifies the task type for generating embeddings. Supported task types include:
812
-
813
898
  - `SEMANTIC_SIMILARITY`: Optimized for text similarity.
814
899
  - `CLASSIFICATION`: Optimized for text classification.
815
900
  - `CLUSTERING`: Optimized for clustering texts based on similarity.
@@ -832,6 +917,7 @@ The following optional provider options are available for Google Vertex AI embed
832
917
  | Model | Max Values Per Call | Parallel Calls | Multimodal |
833
918
  | ---------------------------- | ------------------- | ------------------- | ------------------- |
834
919
  | `text-embedding-005` | 2048 | <Check size={18} /> | <Cross size={18} /> |
920
+ | `gemini-embedding-2` | 2048 | <Check size={18} /> | <Check size={18} /> |
835
921
  | `gemini-embedding-2-preview` | 2048 | <Check size={18} /> | <Check size={18} /> |
836
922
 
837
923
  <Note>
@@ -848,11 +934,11 @@ You can create image models using the `.image()` factory method. The Google Vert
848
934
  [Imagen models](https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-images) generate images using the Imagen on Vertex AI API.
849
935
 
850
936
  ```ts
851
- import { vertex } from '@ai-sdk/google-vertex';
937
+ import { googleVertex } from '@ai-sdk/google-vertex';
852
938
  import { generateImage } from 'ai';
853
939
 
854
940
  const { image } = await generateImage({
855
- model: vertex.image('imagen-4.0-generate-001'),
941
+ model: googleVertex.image('imagen-4.0-generate-001'),
856
942
  prompt: 'A futuristic cityscape at sunset',
857
943
  aspectRatio: '16:9',
858
944
  });
@@ -861,12 +947,12 @@ const { image } = await generateImage({
861
947
  Further configuration can be done using Google Vertex provider options. You can validate the provider options using the `GoogleVertexImageModelOptions` type.
862
948
 
863
949
  ```ts
864
- import { vertex } from '@ai-sdk/google-vertex';
950
+ import { googleVertex } from '@ai-sdk/google-vertex';
865
951
  import { GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
866
952
  import { generateImage } from 'ai';
867
953
 
868
954
  const { image } = await generateImage({
869
- model: vertex.image('imagen-4.0-generate-001'),
955
+ model: googleVertex.image('imagen-4.0-generate-001'),
870
956
  providerOptions: {
871
957
  vertex: {
872
958
  negativePrompt: 'pixelated, blurry, low-quality',
@@ -901,12 +987,12 @@ The following provider options are available:
901
987
  Additional information about the images can be retrieved using Google Vertex meta data.
902
988
 
903
989
  ```ts
904
- import { vertex } from '@ai-sdk/google-vertex';
990
+ import { googleVertex } from '@ai-sdk/google-vertex';
905
991
  import { GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
906
992
  import { generateImage } from 'ai';
907
993
 
908
994
  const { image, providerMetadata } = await generateImage({
909
- model: vertex.image('imagen-4.0-generate-001'),
995
+ model: googleVertex.image('imagen-4.0-generate-001'),
910
996
  prompt: 'A futuristic cityscape at sunset',
911
997
  aspectRatio: '16:9',
912
998
  });
@@ -930,7 +1016,10 @@ Google Vertex Imagen models support image editing through inpainting, outpaintin
930
1016
  Insert or replace objects in specific areas using a mask:
931
1017
 
932
1018
  ```ts
933
- import { vertex, GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
1019
+ import {
1020
+ googleVertex,
1021
+ GoogleVertexImageModelOptions,
1022
+ } from '@ai-sdk/google-vertex';
934
1023
  import { generateImage } from 'ai';
935
1024
  import fs from 'fs';
936
1025
 
@@ -938,7 +1027,7 @@ const image = fs.readFileSync('./input-image.png');
938
1027
  const mask = fs.readFileSync('./mask.png'); // White = edit area
939
1028
 
940
1029
  const { images } = await generateImage({
941
- model: vertex.image('imagen-3.0-capability-001'),
1030
+ model: googleVertex.image('imagen-3.0-capability-001'),
942
1031
  prompt: {
943
1032
  text: 'A sunlit indoor lounge area with a pool containing a flamingo',
944
1033
  images: [image],
@@ -962,7 +1051,10 @@ const { images } = await generateImage({
962
1051
  Extend an image beyond its original boundaries:
963
1052
 
964
1053
  ```ts
965
- import { vertex, GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
1054
+ import {
1055
+ googleVertex,
1056
+ GoogleVertexImageModelOptions,
1057
+ } from '@ai-sdk/google-vertex';
966
1058
  import { generateImage } from 'ai';
967
1059
  import fs from 'fs';
968
1060
 
@@ -970,7 +1062,7 @@ const image = fs.readFileSync('./input-image.png');
970
1062
  const mask = fs.readFileSync('./outpaint-mask.png'); // White = extend area
971
1063
 
972
1064
  const { images } = await generateImage({
973
- model: vertex.image('imagen-3.0-capability-001'),
1065
+ model: googleVertex.image('imagen-3.0-capability-001'),
974
1066
  prompt: {
975
1067
  text: 'Extend the scene with more of the forest background',
976
1068
  images: [image],
@@ -993,7 +1085,6 @@ const { images } = await generateImage({
993
1085
  The following options are available under `providerOptions.vertex.edit`:
994
1086
 
995
1087
  - **mode** - The edit mode to use:
996
-
997
1088
  - `EDIT_MODE_INPAINT_INSERTION` - Insert objects into masked areas
998
1089
  - `EDIT_MODE_INPAINT_REMOVAL` - Remove objects from masked areas
999
1090
  - `EDIT_MODE_OUTPAINT` - Extend image beyond boundaries
@@ -1004,7 +1095,6 @@ The following options are available under `providerOptions.vertex.edit`:
1004
1095
  - **baseSteps** _number_ - Number of sampling steps (35-75). Higher values = better quality but slower.
1005
1096
 
1006
1097
  - **maskMode** - How to interpret the mask:
1007
-
1008
1098
  - `MASK_MODE_USER_PROVIDED` - Use the provided mask directly
1009
1099
  - `MASK_MODE_DEFAULT` - Default mask mode
1010
1100
  - `MASK_MODE_DETECTION_BOX` - Mask from detected bounding boxes
@@ -1035,11 +1125,11 @@ The following options are available under `providerOptions.vertex.edit`:
1035
1125
  [Gemini image models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-image) (e.g. `gemini-2.5-flash-image`) are multimodal output language models that can be used with `generateImage()` for a simpler image generation experience. Internally, the provider calls the language model API with `responseModalities: ['IMAGE']`.
1036
1126
 
1037
1127
  ```ts
1038
- import { vertex } from '@ai-sdk/google-vertex';
1128
+ import { googleVertex } from '@ai-sdk/google-vertex';
1039
1129
  import { generateImage } from 'ai';
1040
1130
 
1041
1131
  const { image } = await generateImage({
1042
- model: vertex.image('gemini-2.5-flash-image'),
1132
+ model: googleVertex.image('gemini-2.5-flash-image'),
1043
1133
  prompt: 'A photorealistic image of a cat wearing a wizard hat',
1044
1134
  aspectRatio: '1:1',
1045
1135
  });
@@ -1048,14 +1138,14 @@ const { image } = await generateImage({
1048
1138
  Gemini image models also support image editing by providing input images:
1049
1139
 
1050
1140
  ```ts
1051
- import { vertex } from '@ai-sdk/google-vertex';
1141
+ import { googleVertex } from '@ai-sdk/google-vertex';
1052
1142
  import { generateImage } from 'ai';
1053
1143
  import fs from 'node:fs';
1054
1144
 
1055
1145
  const sourceImage = fs.readFileSync('./cat.png');
1056
1146
 
1057
1147
  const { image } = await generateImage({
1058
- model: vertex.image('gemini-2.5-flash-image'),
1148
+ model: googleVertex.image('gemini-2.5-flash-image'),
1059
1149
  prompt: {
1060
1150
  text: 'Add a small wizard hat to this cat',
1061
1151
  images: [sourceImage],
@@ -1066,11 +1156,11 @@ const { image } = await generateImage({
1066
1156
  You can also use URLs (including `gs://` Cloud Storage URIs) for input images:
1067
1157
 
1068
1158
  ```ts
1069
- import { vertex } from '@ai-sdk/google-vertex';
1159
+ import { googleVertex } from '@ai-sdk/google-vertex';
1070
1160
  import { generateImage } from 'ai';
1071
1161
 
1072
1162
  const { image } = await generateImage({
1073
- model: vertex.image('gemini-2.5-flash-image'),
1163
+ model: googleVertex.image('gemini-2.5-flash-image'),
1074
1164
  prompt: {
1075
1165
  text: 'Add a small wizard hat to this cat',
1076
1166
  images: ['https://example.com/cat.png'],
@@ -1111,11 +1201,11 @@ You can create [Veo](https://cloud.google.com/vertex-ai/generative-ai/docs/video
1111
1201
  using the `.video()` factory method. For more on video generation with the AI SDK see [generateVideo()](/docs/reference/ai-sdk-core/generate-video).
1112
1202
 
1113
1203
  ```ts
1114
- import { vertex } from '@ai-sdk/google-vertex';
1204
+ import { googleVertex } from '@ai-sdk/google-vertex';
1115
1205
  import { experimental_generateVideo as generateVideo } from 'ai';
1116
1206
 
1117
1207
  const { video } = await generateVideo({
1118
- model: vertex.video('veo-3.1-generate-001'),
1208
+ model: googleVertex.video('veo-3.1-generate-001'),
1119
1209
  prompt:
1120
1210
  'A pangolin curled on a mossy stone in a glowing bioluminescent forest',
1121
1211
  aspectRatio: '16:9',
@@ -1125,11 +1215,11 @@ const { video } = await generateVideo({
1125
1215
  You can configure resolution and duration:
1126
1216
 
1127
1217
  ```ts
1128
- import { vertex } from '@ai-sdk/google-vertex';
1218
+ import { googleVertex } from '@ai-sdk/google-vertex';
1129
1219
  import { experimental_generateVideo as generateVideo } from 'ai';
1130
1220
 
1131
1221
  const { video } = await generateVideo({
1132
- model: vertex.video('veo-3.1-generate-001'),
1222
+ model: googleVertex.video('veo-3.1-generate-001'),
1133
1223
  prompt: 'A serene mountain landscape at sunset',
1134
1224
  aspectRatio: '16:9',
1135
1225
  resolution: '1920x1080',
@@ -1142,12 +1232,12 @@ const { video } = await generateVideo({
1142
1232
  Further configuration can be done using Google Vertex provider options. You can validate the provider options using the `GoogleVertexVideoModelOptions` type.
1143
1233
 
1144
1234
  ```ts
1145
- import { vertex } from '@ai-sdk/google-vertex';
1235
+ import { googleVertex } from '@ai-sdk/google-vertex';
1146
1236
  import { GoogleVertexVideoModelOptions } from '@ai-sdk/google-vertex';
1147
1237
  import { experimental_generateVideo as generateVideo } from 'ai';
1148
1238
 
1149
1239
  const { video } = await generateVideo({
1150
- model: vertex.video('veo-3.1-generate-001'),
1240
+ model: googleVertex.video('veo-3.1-generate-001'),
1151
1241
  prompt: 'A serene mountain landscape at sunset',
1152
1242
  aspectRatio: '16:9',
1153
1243
  providerOptions: {
@@ -1210,6 +1300,118 @@ The following provider options are available:
1210
1300
  model ID as a string if needed.
1211
1301
  </Note>
1212
1302
 
1303
+ ### Speech Models
1304
+
1305
+ You can create [Gemini text-to-speech](https://docs.cloud.google.com/text-to-speech/docs/gemini-tts)
1306
+ models that call the Vertex AI API using the `.speech()` factory method. For more on speech
1307
+ generation with the AI SDK see [generateSpeech()](/docs/reference/ai-sdk-core/generate-speech).
1308
+
1309
+ ```ts
1310
+ import { googleVertex } from '@ai-sdk/google-vertex';
1311
+ import { generateSpeech } from 'ai';
1312
+
1313
+ const result = await generateSpeech({
1314
+ model: googleVertex.speech('gemini-2.5-flash-tts'),
1315
+ text: 'Hello, world!',
1316
+ voice: 'Kore', // Gemini voice name
1317
+ });
1318
+ ```
1319
+
1320
+ The `voice` argument accepts one of Gemini's [30 prebuilt voices](https://ai.google.dev/gemini-api/docs/speech-generation#voices)
1321
+ (e.g. `Kore`, `Puck`, `Zephyr`); it defaults to `Kore`. Multi-speaker dialogue is available via
1322
+ `providerOptions.googleVertex.multiSpeakerVoiceConfig`.
1323
+
1324
+ By default the audio is returned as a playable WAV (Gemini returns raw PCM, which the provider
1325
+ wraps). Set `outputFormat: 'pcm'` for the raw signed 16-bit little-endian mono bytes; the sample
1326
+ rate is reported in `result.providerMetadata.google.sampleRate`.
1327
+
1328
+ #### Speech Model Capabilities
1329
+
1330
+ | Model | Multi-speaker | Style via instructions |
1331
+ | ----------------------------------- | ------------------- | ---------------------- |
1332
+ | `gemini-2.5-flash-tts` | <Check size={18} /> | <Check size={18} /> |
1333
+ | `gemini-2.5-pro-tts` | <Check size={18} /> | <Check size={18} /> |
1334
+ | `gemini-2.5-flash-lite-preview-tts` | <Check size={18} /> | <Check size={18} /> |
1335
+ | `gemini-3.1-flash-tts-preview` | <Check size={18} /> | <Check size={18} /> |
1336
+
1337
+ ### Transcription Models
1338
+
1339
+ You can transcribe audio with Google Cloud Speech-to-Text models using the
1340
+ `.transcription()` factory method together with
1341
+ [`transcribe()`](/docs/reference/ai-sdk-core/transcribe).
1342
+
1343
+ ```ts
1344
+ import { googleVertex } from '@ai-sdk/google-vertex';
1345
+ import { transcribe } from 'ai';
1346
+ import { readFile } from 'fs/promises';
1347
+
1348
+ const result = await transcribe({
1349
+ model: googleVertex.transcription('chirp_2'),
1350
+ audio: await readFile('audio.wav'),
1351
+ });
1352
+ ```
1353
+
1354
+ The provider supports [Chirp](https://docs.cloud.google.com/speech-to-text/docs/models/chirp-3)
1355
+ models `chirp_2` and `chirp_3`, plus `telephony` for phone-call audio.
1356
+ Speech-to-Text uses standard Google Cloud credentials (OAuth, Application Default
1357
+ Credentials, or a service account) and calls the Cloud Speech-to-Text API.
1358
+ Express Mode API keys are not supported for transcription models. Set
1359
+ `GOOGLE_VERTEX_LOCATION` (or `providerOptions.googleVertex.region`) to a
1360
+ Speech-to-Text region. For Chirp, `chirp_2` is available in `us-central1`,
1361
+ `europe-west4`, and `asia-southeast1`; `chirp_3` in the `us` and `eu`
1362
+ multi-regions. Chirp is not available in the `global` Speech-to-Text location,
1363
+ and these regions differ from Vertex AI regions. `telephony` availability
1364
+ depends on the selected Speech-to-Text region and language.
1365
+
1366
+ The synchronous API transcribes audio up to one minute or 10 MB, whichever is
1367
+ reached first. The spoken language is auto-detected by default; pass
1368
+ `languageCodes` to restrict it. For `telephony`, pass a supported language code
1369
+ such as `['en-US']`.
1370
+
1371
+ ```ts
1372
+ const result = await transcribe({
1373
+ model: googleVertex.transcription('chirp_3'),
1374
+ audio: await readFile('audio.wav'),
1375
+ providerOptions: {
1376
+ googleVertex: {
1377
+ region: 'us',
1378
+ languageCodes: ['en-US'],
1379
+ },
1380
+ },
1381
+ });
1382
+ ```
1383
+
1384
+ The following provider options are available:
1385
+
1386
+ - **languageCodes** _string[]_
1387
+
1388
+ BCP-47 language codes to recognize, or `['auto']` to detect the spoken
1389
+ language. Defaults to `['auto']`. Multiple explicit language codes require a
1390
+ multi-region Speech-to-Text endpoint such as `us` or `eu`.
1391
+
1392
+ - **enableAutomaticPunctuation** _boolean_
1393
+
1394
+ Whether to add punctuation to the transcript. Defaults to `true`.
1395
+
1396
+ - **enableWordTimeOffsets** _boolean_
1397
+
1398
+ Whether to include word-level timestamps in `result.segments`. Defaults to
1399
+ `true`. Google notes that enabling word-level timestamps can reduce
1400
+ transcription quality and speed.
1401
+
1402
+ - **region** _string_
1403
+
1404
+ The Speech-to-Text region for the request. Defaults to the provider
1405
+ `location`.
1406
+
1407
+ #### Transcription Model Capabilities
1408
+
1409
+ | Model | Word timestamps | Language detection |
1410
+ | ----------- | --------------------------------------------------------- | ------------------------------------------------------------------------------ |
1411
+ | `chirp_2` | Available with a potential quality and speed tradeoff | Auto detection with `['auto']` |
1412
+ | `chirp_3` | Available with a potential transcription quality tradeoff | Auto detection with `['auto']` |
1413
+ | `telephony` | Available | Explicit supported language codes, with alternative language detection support |
1414
+
1213
1415
  ## Google Vertex Anthropic Provider Usage
1214
1416
 
1215
1417
  The Google Vertex Anthropic provider for the [AI SDK](/docs) offers support for Anthropic's Claude models through the Google Vertex AI APIs. This section provides details on how to set up and use the Google Vertex Anthropic provider.
@@ -1267,7 +1469,6 @@ You can use the following optional settings to customize the Google Vertex Anthr
1267
1469
  - **googleAuthOptions** _object_
1268
1470
 
1269
1471
  Optional. The Authentication options used by the [Google Auth Library](https://github.com/googleapis/google-auth-library-nodejs/). See also the [GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/08978822e1b7b5961f0e355df51d738e012be392/src/auth/googleauth.ts#L87C18-L87C35) interface.
1270
-
1271
1472
  - **authClient** _object_
1272
1473
  An `AuthClient` to use.
1273
1474
 
@@ -1295,7 +1496,6 @@ You can use the following optional settings to customize the Google Vertex Anthr
1295
1496
  - **headers** _Resolvable&lt;Record&lt;string, string | undefined&gt;&gt;_
1296
1497
 
1297
1498
  Headers to include in the requests. Can be provided in multiple formats:
1298
-
1299
1499
  - A record of header key-value pairs: `Record<string, string | undefined>`
1300
1500
  - A function that returns headers: `() => Record<string, string | undefined>`
1301
1501
  - An async function that returns headers: `async () => Record<string, string | undefined>`
@@ -1358,7 +1558,6 @@ You can use the following optional settings to customize the provider instance:
1358
1558
  - **googleCredentials** _object_
1359
1559
 
1360
1560
  Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
1361
-
1362
1561
  - **clientEmail** _string_
1363
1562
  The client email from the service account JSON file. Defaults to the contents of the `GOOGLE_CLIENT_EMAIL` environment variable.
1364
1563
 
@@ -1371,7 +1570,6 @@ You can use the following optional settings to customize the provider instance:
1371
1570
  - **headers** _Resolvable&lt;Record&lt;string, string | undefined&gt;&gt;_
1372
1571
 
1373
1572
  Headers to include in the requests. Can be provided in multiple formats:
1374
-
1375
1573
  - A record of header key-value pairs: `Record<string, string | undefined>`
1376
1574
  - A function that returns headers: `() => Record<string, string | undefined>`
1377
1575
  - An async function that returns headers: `async () => Record<string, string | undefined>`
@@ -1429,6 +1627,11 @@ The following optional provider options are available for Anthropic models:
1429
1627
 
1430
1628
  Optional. See [Reasoning section](#reasoning) for more details.
1431
1629
 
1630
+ - `metadata` _object_
1631
+
1632
+ Optional. Metadata to include with the request. See the [Anthropic API documentation](https://platform.claude.com/docs/en/api/messages/create) for details.
1633
+ - `userId` _string_ - An external identifier for the end-user.
1634
+
1432
1635
  ### Reasoning
1433
1636
 
1434
1637
  Anthropic has reasoning support for the `claude-3-7-sonnet@20250219` model.
@@ -1469,13 +1672,12 @@ on how to integrate reasoning into your chatbot.
1469
1672
  In the messages and message parts, you can use the `providerOptions` property to set cache control breakpoints.
1470
1673
  You need to set the `anthropic` property in the `providerOptions` object to `{ cacheControl: { type: 'ephemeral' } }` to set a cache control breakpoint.
1471
1674
 
1472
- The cache creation input tokens are then returned in the `providerMetadata` object
1473
- for `generateText`, again under the `anthropic` property.
1474
- When you use `streamText`, the response contains a promise
1475
- that resolves to the metadata. Alternatively you can receive it in the
1476
- `onFinish` callback.
1675
+ Cache read and cache write (creation) token counts are returned on the standard
1676
+ `usage` object for both `generateText` and `streamText`. You can access them at
1677
+ `result.usage.inputTokenDetails.cacheReadTokens` and
1678
+ `result.usage.inputTokenDetails.cacheWriteTokens`.
1477
1679
 
1478
- ```ts highlight="8,18-20,29-30"
1680
+ ```ts highlight="8,16-18,27-31"
1479
1681
  import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
1480
1682
  import { generateText } from 'ai';
1481
1683
 
@@ -1502,13 +1704,19 @@ const result = await generateText({
1502
1704
  });
1503
1705
 
1504
1706
  console.log(result.text);
1505
- console.log(result.providerMetadata?.anthropic);
1506
- // e.g. { cacheCreationInputTokens: 2118, cacheReadInputTokens: 0 }
1707
+ console.log(
1708
+ 'Cache read tokens:',
1709
+ result.usage.inputTokenDetails.cacheReadTokens,
1710
+ );
1711
+ console.log(
1712
+ 'Cache write tokens:',
1713
+ result.usage.inputTokenDetails.cacheWriteTokens,
1714
+ );
1507
1715
  ```
1508
1716
 
1509
1717
  You can also use cache control on system messages by providing multiple system messages at the head of your messages array:
1510
1718
 
1511
- ```ts highlight="3,9-11"
1719
+ ```ts highlight="3,7-9"
1512
1720
  const result = await generateText({
1513
1721
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
1514
1722
  messages: [
@@ -1548,6 +1756,12 @@ Google Vertex Anthropic supports a subset of Anthropic's built-in tools. The fol
1548
1756
  `@ai-sdk/anthropic` provider if you need access to all Anthropic tools.
1549
1757
  </Note>
1550
1758
 
1759
+ <Note>
1760
+ Google Vertex Anthropic does not support strict mode on tool definitions.
1761
+ Setting `strict: true` on a tool will be ignored and a warning will be
1762
+ emitted.
1763
+ </Note>
1764
+
1551
1765
  For more background on Anthropic tools, see [Anthropic's documentation](https://platform.claude.com/docs/en/agents-and-tools/tool-use/overview).
1552
1766
 
1553
1767
  #### Bash Tool
@@ -1640,7 +1854,7 @@ const computerTool = vertexAnthropic.tools.computer_20241022({
1640
1854
  toModelOutput({ output }) {
1641
1855
  return typeof output === 'string'
1642
1856
  ? [{ type: 'text', text: output }]
1643
- : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
1857
+ : [{ type: 'file-data', data: output.data, mediaType: 'image/png' }];
1644
1858
  },
1645
1859
  });
1646
1860
  ```
@@ -1704,3 +1918,327 @@ See also [Anthropic Model Comparison](https://docs.anthropic.com/en/docs/about-c
1704
1918
  The table above lists popular models. You can also pass any available provider
1705
1919
  model ID as a string if needed.
1706
1920
  </Note>
1921
+
1922
+ ## Google Vertex xAI Provider Usage
1923
+
1924
+ The Google Vertex xAI provider offers support for xAI's Grok partner models through the Google Vertex AI OpenAI-compatible Chat Completions API.
1925
+
1926
+ For more information, see the [Vertex AI Grok documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/grok).
1927
+
1928
+ ### Provider Instance
1929
+
1930
+ You can import the default provider instance `googleVertexXai` from `@ai-sdk/google-vertex/xai`:
1931
+
1932
+ ```typescript
1933
+ import { googleVertexXai } from '@ai-sdk/google-vertex/xai';
1934
+ ```
1935
+
1936
+ If you need a customized setup, you can import `createGoogleVertexXai` from `@ai-sdk/google-vertex/xai` and create a provider instance with your settings:
1937
+
1938
+ ```typescript
1939
+ import { createGoogleVertexXai } from '@ai-sdk/google-vertex/xai';
1940
+
1941
+ const googleVertexXai = createGoogleVertexXai({
1942
+ project: 'my-project', // optional
1943
+ location: 'global', // optional, defaults to 'global'
1944
+ });
1945
+ ```
1946
+
1947
+ #### Node.js Runtime
1948
+
1949
+ For Node.js environments, the Google Vertex xAI provider supports all standard Google Cloud authentication options through the `google-auth-library`:
1950
+
1951
+ ```typescript
1952
+ import { createGoogleVertexXai } from '@ai-sdk/google-vertex/xai';
1953
+
1954
+ const googleVertexXai = createGoogleVertexXai({
1955
+ googleAuthOptions: {
1956
+ credentials: {
1957
+ client_email: 'my-email',
1958
+ private_key: 'my-private-key',
1959
+ },
1960
+ },
1961
+ });
1962
+ ```
1963
+
1964
+ ##### Optional Provider Settings
1965
+
1966
+ - **project** _string_
1967
+
1968
+ The Google Cloud project ID. Defaults to the `GOOGLE_VERTEX_PROJECT` environment variable.
1969
+
1970
+ - **location** _string_
1971
+
1972
+ The Google Cloud location. Grok models are available on the global endpoint. Defaults to the `GOOGLE_VERTEX_LOCATION` environment variable. If not set, defaults to `global`.
1973
+
1974
+ - **googleAuthOptions** _object_
1975
+
1976
+ Optional. The Authentication options used by the [Google Auth Library](https://github.com/googleapis/google-auth-library-nodejs/).
1977
+
1978
+ - **headers** _Resolvable&lt;Record&lt;string, string | undefined&gt;&gt;_
1979
+
1980
+ Headers to include in requests.
1981
+
1982
+ - **fetch** _(input: RequestInfo, init?: RequestInit) => Promise&lt;Response&gt;_
1983
+
1984
+ Custom [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) implementation.
1985
+
1986
+ <a id="google-vertex-xai-edge-runtime"></a>
1987
+
1988
+ #### Edge Runtime
1989
+
1990
+ For Edge runtimes, import from `@ai-sdk/google-vertex/xai/edge`:
1991
+
1992
+ ```typescript
1993
+ import { googleVertexXai } from '@ai-sdk/google-vertex/xai/edge';
1994
+ ```
1995
+
1996
+ ```typescript
1997
+ import { createGoogleVertexXai } from '@ai-sdk/google-vertex/xai/edge';
1998
+
1999
+ const googleVertexXai = createGoogleVertexXai({
2000
+ project: 'my-project',
2001
+ location: 'global',
2002
+ });
2003
+ ```
2004
+
2005
+ For Edge runtime authentication, set these environment variables:
2006
+
2007
+ - `GOOGLE_CLIENT_EMAIL`
2008
+ - `GOOGLE_PRIVATE_KEY`
2009
+ - `GOOGLE_PRIVATE_KEY_ID` (optional)
2010
+
2011
+ ### Language Models
2012
+
2013
+ You can create models using the provider instance. The first argument is the model ID:
2014
+
2015
+ ```ts
2016
+ import { googleVertexXai } from '@ai-sdk/google-vertex/xai';
2017
+ import { generateText } from 'ai';
2018
+
2019
+ const { text } = await generateText({
2020
+ model: googleVertexXai('xai/grok-4.1-fast-reasoning'),
2021
+ prompt: 'Invent a new holiday and describe its traditions.',
2022
+ });
2023
+ ```
2024
+
2025
+ Streaming is also supported:
2026
+
2027
+ ```ts
2028
+ import { googleVertexXai } from '@ai-sdk/google-vertex/xai';
2029
+ import { streamText } from 'ai';
2030
+
2031
+ const result = streamText({
2032
+ model: googleVertexXai('xai/grok-4.1-fast-reasoning'),
2033
+ prompt: 'Invent a new holiday and describe its traditions.',
2034
+ });
2035
+
2036
+ for await (const textPart of result.textStream) {
2037
+ process.stdout.write(textPart);
2038
+ }
2039
+ ```
2040
+
2041
+ ### Function Calling
2042
+
2043
+ Grok models on Vertex support OpenAI-compatible function calling. You can use AI SDK tools as usual:
2044
+
2045
+ ```ts
2046
+ import { googleVertexXai } from '@ai-sdk/google-vertex/xai';
2047
+ import { generateText, tool } from 'ai';
2048
+ import { z } from 'zod';
2049
+
2050
+ const result = await generateText({
2051
+ model: googleVertexXai('xai/grok-4.1-fast-reasoning'),
2052
+ tools: {
2053
+ weather: tool({
2054
+ description: 'Get the weather in a city',
2055
+ inputSchema: z.object({ city: z.string() }),
2056
+ execute: async ({ city }) => `The weather in ${city} is sunny.`,
2057
+ }),
2058
+ },
2059
+ prompt: 'What is the weather in San Francisco?',
2060
+ });
2061
+ ```
2062
+
2063
+ ### Structured Outputs
2064
+
2065
+ Grok models on Vertex support JSON mode and schema-backed structured outputs:
2066
+
2067
+ ```ts
2068
+ import { googleVertexXai } from '@ai-sdk/google-vertex/xai';
2069
+ import { generateText, Output } from 'ai';
2070
+ import { z } from 'zod';
2071
+
2072
+ const result = await generateText({
2073
+ model: googleVertexXai('xai/grok-4.1-fast-reasoning'),
2074
+ output: Output.object({
2075
+ schema: z.object({
2076
+ name: z.string(),
2077
+ date: z.string(),
2078
+ participants: z.array(z.string()),
2079
+ }),
2080
+ }),
2081
+ prompt: 'Alice and Bob are going to a science fair on Friday.',
2082
+ });
2083
+ ```
2084
+
2085
+ ### Available Models
2086
+
2087
+ The following models are available through the Google Vertex xAI provider. You can also pass any valid model ID as a string.
2088
+
2089
+ | Model ID | Reasoning |
2090
+ | --------------------------------- | --------- |
2091
+ | `xai/grok-4.20-reasoning` | Yes |
2092
+ | `xai/grok-4.20-non-reasoning` | No |
2093
+ | `xai/grok-4.1-fast-reasoning` | Yes |
2094
+ | `xai/grok-4.1-fast-non-reasoning` | No |
2095
+
2096
+ <Note>
2097
+ Grok reasoning models on Vertex report reasoning token counts in usage
2098
+ metadata. They do not support the `reasoning_effort` request parameter.
2099
+ </Note>
2100
+
2101
+ ## Google Vertex MaaS Provider Usage
2102
+
2103
+ The Google Vertex MaaS (Model as a Service) provider offers access to partner and open models hosted on Vertex AI through an OpenAI-compatible Chat Completions API. This includes models from DeepSeek, Qwen, Meta, MiniMax, Moonshot, and OpenAI.
2104
+
2105
+ For more information, see the [Vertex AI MaaS documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/maas/use-open-models).
2106
+
2107
+ ### Provider Instance
2108
+
2109
+ You can import the default provider instance `vertexMaas` from `@ai-sdk/google-vertex/maas`:
2110
+
2111
+ ```typescript
2112
+ import { vertexMaas } from '@ai-sdk/google-vertex/maas';
2113
+ ```
2114
+
2115
+ If you need a customized setup, you can import `createVertexMaas` from `@ai-sdk/google-vertex/maas` and create a provider instance with your settings:
2116
+
2117
+ ```typescript
2118
+ import { createVertexMaas } from '@ai-sdk/google-vertex/maas';
2119
+
2120
+ const vertexMaas = createVertexMaas({
2121
+ project: 'my-project', // optional
2122
+ location: 'us-east5', // optional, defaults to 'global'
2123
+ });
2124
+ ```
2125
+
2126
+ #### Node.js Runtime
2127
+
2128
+ For Node.js environments, the Google Vertex MaaS provider supports all standard Google Cloud authentication options through the `google-auth-library`:
2129
+
2130
+ ```typescript
2131
+ import { createVertexMaas } from '@ai-sdk/google-vertex/maas';
2132
+
2133
+ const vertexMaas = createVertexMaas({
2134
+ googleAuthOptions: {
2135
+ credentials: {
2136
+ client_email: 'my-email',
2137
+ private_key: 'my-private-key',
2138
+ },
2139
+ },
2140
+ });
2141
+ ```
2142
+
2143
+ ##### Optional Provider Settings
2144
+
2145
+ - **project** _string_
2146
+
2147
+ The Google Cloud project ID. Defaults to the `GOOGLE_VERTEX_PROJECT` environment variable.
2148
+
2149
+ - **location** _string_
2150
+
2151
+ The Google Cloud location, e.g. `us-east5` or `global`. Defaults to the `GOOGLE_VERTEX_LOCATION` environment variable. If not set, defaults to `global`.
2152
+
2153
+ - **googleAuthOptions** _object_
2154
+
2155
+ Optional. The Authentication options used by the [Google Auth Library](https://github.com/googleapis/google-auth-library-nodejs/).
2156
+
2157
+ - **headers** _Resolvable&lt;Record&lt;string, string | undefined&gt;&gt;_
2158
+
2159
+ Headers to include in requests.
2160
+
2161
+ - **fetch** _(input: RequestInfo, init?: RequestInit) => Promise&lt;Response&gt;_
2162
+
2163
+ Custom [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) implementation.
2164
+
2165
+ <a id="google-vertex-maas-edge-runtime"></a>
2166
+
2167
+ #### Edge Runtime
2168
+
2169
+ For Edge runtimes, import from `@ai-sdk/google-vertex/maas/edge`:
2170
+
2171
+ ```typescript
2172
+ import { vertexMaas } from '@ai-sdk/google-vertex/maas/edge';
2173
+ ```
2174
+
2175
+ ```typescript
2176
+ import { createVertexMaas } from '@ai-sdk/google-vertex/maas/edge';
2177
+
2178
+ const vertexMaas = createVertexMaas({
2179
+ project: 'my-project',
2180
+ location: 'us-east5',
2181
+ });
2182
+ ```
2183
+
2184
+ For Edge runtime authentication, set these environment variables:
2185
+
2186
+ - `GOOGLE_CLIENT_EMAIL`
2187
+ - `GOOGLE_PRIVATE_KEY`
2188
+ - `GOOGLE_PRIVATE_KEY_ID` (optional)
2189
+
2190
+ ### Language Models
2191
+
2192
+ You can create models using the provider instance. The first argument is the model ID:
2193
+
2194
+ ```ts
2195
+ import { vertexMaas } from '@ai-sdk/google-vertex/maas';
2196
+ import { generateText } from 'ai';
2197
+
2198
+ const { text } = await generateText({
2199
+ model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
2200
+ prompt: 'Invent a new holiday and describe its traditions.',
2201
+ });
2202
+ ```
2203
+
2204
+ Streaming is also supported:
2205
+
2206
+ ```ts
2207
+ import { vertexMaas } from '@ai-sdk/google-vertex/maas';
2208
+ import { streamText } from 'ai';
2209
+
2210
+ const result = streamText({
2211
+ model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
2212
+ prompt: 'Invent a new holiday and describe its traditions.',
2213
+ });
2214
+
2215
+ for await (const textPart of result.textStream) {
2216
+ process.stdout.write(textPart);
2217
+ }
2218
+ ```
2219
+
2220
+ ### Available Models
2221
+
2222
+ The following models are available through the MaaS provider. You can also pass any valid model ID as a string.
2223
+
2224
+ | Model ID | Provider |
2225
+ | ---------------------------------------------- | -------- |
2226
+ | `deepseek-ai/deepseek-r1-0528-maas` | DeepSeek |
2227
+ | `deepseek-ai/deepseek-v3.1-maas` | DeepSeek |
2228
+ | `deepseek-ai/deepseek-v3.2-maas` | DeepSeek |
2229
+ | `openai/gpt-oss-120b-maas` | OpenAI |
2230
+ | `openai/gpt-oss-20b-maas` | OpenAI |
2231
+ | `meta/llama-4-maverick-17b-128e-instruct-maas` | Meta |
2232
+ | `meta/llama-4-scout-17b-16e-instruct-maas` | Meta |
2233
+ | `minimax/minimax-m2-maas` | MiniMax |
2234
+ | `qwen/qwen3-coder-480b-a35b-instruct-maas` | Qwen |
2235
+ | `qwen/qwen3-next-80b-a3b-instruct-maas` | Qwen |
2236
+ | `qwen/qwen3-next-80b-a3b-thinking-maas` | Qwen |
2237
+ | `moonshotai/kimi-k2-thinking-maas` | Moonshot |
2238
+
2239
+ <Note>
2240
+ Model availability depends on your Google Cloud project and region. Check the
2241
+ [Vertex AI Model
2242
+ Garden](https://console.cloud.google.com/vertex-ai/model-garden) for the
2243
+ latest available models.
2244
+ </Note>