@ai-sdk/google 4.0.0-beta.7 → 4.0.0-beta.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +614 -5
  2. package/README.md +6 -4
  3. package/dist/index.d.ts +301 -50
  4. package/dist/index.js +5410 -639
  5. package/dist/index.js.map +1 -1
  6. package/dist/internal/index.d.ts +100 -26
  7. package/dist/internal/index.js +1653 -451
  8. package/dist/internal/index.js.map +1 -1
  9. package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
  10. package/package.json +16 -17
  11. package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
  12. package/src/convert-json-schema-to-openapi-schema.ts +1 -1
  13. package/src/convert-to-google-messages.ts +647 -0
  14. package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
  15. package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
  16. package/src/google-error.ts +1 -1
  17. package/src/google-files.ts +225 -0
  18. package/src/google-image-model-options.ts +35 -0
  19. package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
  20. package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
  21. package/src/google-json-accumulator.ts +371 -0
  22. package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
  23. package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +701 -219
  24. package/src/google-prepare-tools.ts +72 -12
  25. package/src/google-prompt.ts +86 -0
  26. package/src/google-provider.ts +157 -53
  27. package/src/google-speech-api.ts +36 -0
  28. package/src/google-speech-model-options.ts +48 -0
  29. package/src/google-speech-model.ts +311 -0
  30. package/src/google-video-model-options.ts +43 -0
  31. package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
  32. package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
  33. package/src/index.ts +40 -9
  34. package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
  35. package/src/interactions/cancel-google-interaction.ts +60 -0
  36. package/src/interactions/convert-google-interactions-usage.ts +47 -0
  37. package/src/interactions/convert-to-google-interactions-input.ts +557 -0
  38. package/src/interactions/extract-google-interactions-sources.ts +252 -0
  39. package/src/interactions/google-interactions-agent.ts +15 -0
  40. package/src/interactions/google-interactions-api.ts +530 -0
  41. package/src/interactions/google-interactions-language-model-options.ts +262 -0
  42. package/src/interactions/google-interactions-language-model.ts +776 -0
  43. package/src/interactions/google-interactions-prompt.ts +582 -0
  44. package/src/interactions/google-interactions-provider-metadata.ts +23 -0
  45. package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
  46. package/src/interactions/parse-google-interactions-outputs.ts +252 -0
  47. package/src/interactions/poll-google-interactions.ts +129 -0
  48. package/src/interactions/prepare-google-interactions-tools.ts +245 -0
  49. package/src/interactions/stream-google-interactions.ts +242 -0
  50. package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
  51. package/src/internal/index.ts +3 -2
  52. package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
  53. package/src/realtime/google-realtime-event-mapper.ts +383 -0
  54. package/src/realtime/google-realtime-model-options.ts +3 -0
  55. package/src/realtime/google-realtime-model.ts +160 -0
  56. package/src/realtime/index.ts +2 -0
  57. package/src/tool/code-execution.ts +2 -2
  58. package/src/tool/enterprise-web-search.ts +9 -3
  59. package/src/tool/file-search.ts +5 -7
  60. package/src/tool/google-maps.ts +3 -2
  61. package/src/tool/google-search.ts +11 -12
  62. package/src/tool/url-context.ts +4 -2
  63. package/src/tool/vertex-rag-store.ts +9 -6
  64. package/dist/index.d.mts +0 -376
  65. package/dist/index.mjs +0 -2517
  66. package/dist/index.mjs.map +0 -1
  67. package/dist/internal/index.d.mts +0 -284
  68. package/dist/internal/index.mjs +0 -1706
  69. package/dist/internal/index.mjs.map +0 -1
  70. package/src/convert-to-google-generative-ai-messages.ts +0 -239
  71. package/src/google-generative-ai-prompt.ts +0 -38
@@ -1,19 +1,21 @@
1
1
  import {
2
- LanguageModelV3CallOptions,
3
- SharedV3Warning,
4
2
  UnsupportedFunctionalityError,
3
+ type LanguageModelV4CallOptions,
4
+ type SharedV4Warning,
5
5
  } from '@ai-sdk/provider';
6
6
  import { convertJSONSchemaToOpenAPISchema } from './convert-json-schema-to-openapi-schema';
7
- import { GoogleGenerativeAIModelId } from './google-generative-ai-options';
7
+ import type { GoogleModelId } from './google-language-model-options';
8
8
 
9
9
  export function prepareTools({
10
10
  tools,
11
11
  toolChoice,
12
12
  modelId,
13
+ isVertexProvider = false,
13
14
  }: {
14
- tools: LanguageModelV3CallOptions['tools'];
15
- toolChoice?: LanguageModelV3CallOptions['toolChoice'];
16
- modelId: GoogleGenerativeAIModelId;
15
+ tools: LanguageModelV4CallOptions['tools'];
16
+ toolChoice?: LanguageModelV4CallOptions['toolChoice'];
17
+ modelId: GoogleModelId;
18
+ isVertexProvider?: boolean;
17
19
  }): {
18
20
  tools:
19
21
  | Array<
@@ -30,30 +32,33 @@ export function prepareTools({
30
32
  toolConfig:
31
33
  | undefined
32
34
  | {
33
- functionCallingConfig: {
35
+ functionCallingConfig?: {
34
36
  mode: 'AUTO' | 'NONE' | 'ANY' | 'VALIDATED';
35
37
  allowedFunctionNames?: string[];
38
+ streamFunctionCallArguments?: boolean;
36
39
  };
40
+ includeServerSideToolInvocations?: boolean;
37
41
  };
38
- toolWarnings: SharedV3Warning[];
42
+ toolWarnings: SharedV4Warning[];
39
43
  } {
40
44
  // when the tools array is empty, change it to undefined to prevent errors:
41
45
  tools = tools?.length ? tools : undefined;
42
46
 
43
- const toolWarnings: SharedV3Warning[] = [];
47
+ const toolWarnings: SharedV4Warning[] = [];
44
48
 
45
49
  const isLatest = (
46
50
  [
47
51
  'gemini-flash-latest',
48
52
  'gemini-flash-lite-latest',
49
53
  'gemini-pro-latest',
50
- ] as const satisfies GoogleGenerativeAIModelId[]
54
+ ] as const satisfies GoogleModelId[]
51
55
  ).some(id => id === modelId);
52
56
  const isGemini2orNewer =
53
57
  modelId.includes('gemini-2') ||
54
58
  modelId.includes('gemini-3') ||
55
59
  modelId.includes('nano-banana') ||
56
60
  isLatest;
61
+ const isGemini3orNewer = modelId.includes('gemini-3');
57
62
  const supportsFileSearch =
58
63
  modelId.includes('gemini-2.5') || modelId.includes('gemini-3');
59
64
 
@@ -65,7 +70,7 @@ export function prepareTools({
65
70
  const hasFunctionTools = tools.some(tool => tool.type === 'function');
66
71
  const hasProviderTools = tools.some(tool => tool.type === 'provider');
67
72
 
68
- if (hasFunctionTools && hasProviderTools) {
73
+ if (hasFunctionTools && hasProviderTools && !isGemini3orNewer) {
69
74
  toolWarnings.push({
70
75
  type: 'unsupported',
71
76
  feature: `combination of function and provider-defined tools`,
@@ -120,7 +125,7 @@ export function prepareTools({
120
125
  type: 'unsupported',
121
126
  feature: `provider-defined tool ${tool.id}`,
122
127
  details:
123
- 'The code execution tools is not supported with other Gemini models than Gemini 2.',
128
+ 'The code execution tool is not supported with other Gemini models than Gemini 2.',
124
129
  });
125
130
  }
126
131
  break;
@@ -178,6 +183,61 @@ export function prepareTools({
178
183
  }
179
184
  });
180
185
 
186
+ if (hasFunctionTools && isGemini3orNewer && googleTools.length > 0) {
187
+ const functionDeclarations: Array<{
188
+ name: string;
189
+ description: string;
190
+ parameters: unknown;
191
+ }> = [];
192
+ for (const tool of tools) {
193
+ if (tool.type === 'function') {
194
+ functionDeclarations.push({
195
+ name: tool.name,
196
+ description: tool.description ?? '',
197
+ parameters: convertJSONSchemaToOpenAPISchema(tool.inputSchema),
198
+ });
199
+ }
200
+ }
201
+
202
+ const combinedToolConfig: {
203
+ functionCallingConfig: {
204
+ mode: 'VALIDATED' | 'ANY' | 'NONE';
205
+ allowedFunctionNames?: string[];
206
+ };
207
+ includeServerSideToolInvocations?: true;
208
+ } = {
209
+ functionCallingConfig: { mode: 'VALIDATED' },
210
+ ...(!isVertexProvider && {
211
+ includeServerSideToolInvocations: true,
212
+ }),
213
+ };
214
+
215
+ if (toolChoice != null) {
216
+ switch (toolChoice.type) {
217
+ case 'auto':
218
+ break;
219
+ case 'none':
220
+ combinedToolConfig.functionCallingConfig = { mode: 'NONE' };
221
+ break;
222
+ case 'required':
223
+ combinedToolConfig.functionCallingConfig = { mode: 'ANY' };
224
+ break;
225
+ case 'tool':
226
+ combinedToolConfig.functionCallingConfig = {
227
+ mode: 'ANY',
228
+ allowedFunctionNames: [toolChoice.toolName],
229
+ };
230
+ break;
231
+ }
232
+ }
233
+
234
+ return {
235
+ tools: [...googleTools, { functionDeclarations }],
236
+ toolConfig: combinedToolConfig,
237
+ toolWarnings,
238
+ };
239
+ }
240
+
181
241
  return {
182
242
  tools: googleTools.length > 0 ? googleTools : undefined,
183
243
  toolConfig: undefined,
@@ -0,0 +1,86 @@
1
+ import type {
2
+ GroundingMetadataSchema,
3
+ PromptFeedbackSchema,
4
+ SafetyRatingSchema,
5
+ UrlContextMetadataSchema,
6
+ UsageMetadataSchema,
7
+ } from './google-language-model';
8
+
9
+ export type GooglePrompt = {
10
+ systemInstruction?: GoogleSystemInstruction;
11
+ contents: Array<GoogleContent>;
12
+ };
13
+
14
+ export type GoogleSystemInstruction = {
15
+ parts: Array<{ text: string }>;
16
+ };
17
+
18
+ export type GoogleContent = {
19
+ role: 'user' | 'model';
20
+ parts: Array<GoogleContentPart>;
21
+ };
22
+
23
+ export type GoogleContentPart =
24
+ | { text: string; thought?: boolean; thoughtSignature?: string }
25
+ | {
26
+ inlineData: { mimeType: string; data: string };
27
+ thought?: boolean;
28
+ thoughtSignature?: string;
29
+ }
30
+ | {
31
+ functionCall: { id?: string; name: string; args: unknown };
32
+ thoughtSignature?: string;
33
+ }
34
+ | {
35
+ functionResponse: {
36
+ id?: string;
37
+ name: string;
38
+ response: unknown;
39
+ parts?: Array<GoogleFunctionResponsePart>;
40
+ };
41
+ }
42
+ | {
43
+ fileData: { mimeType: string; fileUri: string };
44
+ thought?: boolean;
45
+ thoughtSignature?: string;
46
+ }
47
+ | {
48
+ toolCall: {
49
+ toolType: string;
50
+ args?: unknown;
51
+ id: string;
52
+ };
53
+ thoughtSignature?: string;
54
+ }
55
+ | {
56
+ toolResponse: {
57
+ toolType: string;
58
+ response?: unknown;
59
+ id: string;
60
+ };
61
+ thoughtSignature?: string;
62
+ };
63
+
64
+ export type GoogleFunctionResponsePart = {
65
+ inlineData: { mimeType: string; data: string };
66
+ };
67
+
68
+ export type GoogleGroundingMetadata = GroundingMetadataSchema;
69
+
70
+ export type GoogleUrlContextMetadata = UrlContextMetadataSchema;
71
+
72
+ export type GoogleSafetyRating = SafetyRatingSchema;
73
+
74
+ export type GooglePromptFeedback = PromptFeedbackSchema;
75
+
76
+ export type GoogleUsageMetadata = UsageMetadataSchema;
77
+
78
+ export interface GoogleProviderMetadata {
79
+ promptFeedback: GooglePromptFeedback | null;
80
+ groundingMetadata: GoogleGroundingMetadata | null;
81
+ urlContextMetadata: GoogleUrlContextMetadata | null;
82
+ safetyRatings: GoogleSafetyRating[] | null;
83
+ usageMetadata: GoogleUsageMetadata | null;
84
+ finishMessage: string | null;
85
+ serviceTier: string | null;
86
+ }
@@ -1,90 +1,129 @@
1
- import {
2
- EmbeddingModelV3,
3
- Experimental_VideoModelV3,
4
- ImageModelV3,
5
- LanguageModelV3,
6
- ProviderV3,
1
+ import type {
2
+ EmbeddingModelV4,
3
+ Experimental_VideoModelV4,
4
+ FilesV4,
5
+ ImageModelV4,
6
+ LanguageModelV4,
7
+ ProviderV4,
8
+ Experimental_RealtimeFactoryV4 as RealtimeFactoryV4,
9
+ Experimental_RealtimeFactoryV4GetTokenOptions as RealtimeFactoryV4GetTokenOptions,
10
+ SpeechModelV4,
7
11
  } from '@ai-sdk/provider';
8
12
  import {
9
- FetchFunction,
10
13
  generateId,
11
14
  loadApiKey,
12
15
  withoutTrailingSlash,
13
16
  withUserAgentSuffix,
17
+ type FetchFunction,
14
18
  } from '@ai-sdk/provider-utils';
15
19
  import { VERSION } from './version';
16
- import { GoogleGenerativeAIEmbeddingModel } from './google-generative-ai-embedding-model';
17
- import { GoogleGenerativeAIEmbeddingModelId } from './google-generative-ai-embedding-options';
18
- import { GoogleGenerativeAILanguageModel } from './google-generative-ai-language-model';
19
- import { GoogleGenerativeAIModelId } from './google-generative-ai-options';
20
+ import { GoogleEmbeddingModel } from './google-embedding-model';
21
+ import type { GoogleEmbeddingModelId } from './google-embedding-model-options';
22
+ import { GoogleLanguageModel } from './google-language-model';
23
+ import type { GoogleModelId } from './google-language-model-options';
20
24
  import { googleTools } from './google-tools';
21
25
 
26
+ import type {
27
+ GoogleImageSettings,
28
+ GoogleImageModelId,
29
+ } from './google-image-settings';
30
+ import { GoogleImageModel } from './google-image-model';
31
+ import { GoogleFiles } from './google-files';
32
+ import { GoogleVideoModel } from './google-video-model';
33
+ import type { GoogleVideoModelId } from './google-video-settings';
34
+ import { GoogleSpeechModel } from './google-speech-model';
35
+ import type { GoogleSpeechModelId } from './google-speech-model-options';
22
36
  import {
23
- GoogleGenerativeAIImageSettings,
24
- GoogleGenerativeAIImageModelId,
25
- } from './google-generative-ai-image-settings';
26
- import { GoogleGenerativeAIImageModel } from './google-generative-ai-image-model';
27
- import { GoogleGenerativeAIVideoModel } from './google-generative-ai-video-model';
28
- import { GoogleGenerativeAIVideoModelId } from './google-generative-ai-video-settings';
37
+ GoogleInteractionsLanguageModel,
38
+ type GoogleInteractionsModelInput,
39
+ } from './interactions/google-interactions-language-model';
40
+ import type { GoogleInteractionsModelId } from './interactions/google-interactions-language-model-options';
41
+ import type { GoogleInteractionsAgentName } from './interactions/google-interactions-agent';
42
+ import { GoogleRealtimeModel } from './realtime/google-realtime-model';
29
43
 
30
- export interface GoogleGenerativeAIProvider extends ProviderV3 {
31
- (modelId: GoogleGenerativeAIModelId): LanguageModelV3;
44
+ export interface GoogleProvider extends ProviderV4 {
45
+ (modelId: GoogleModelId): LanguageModelV4;
32
46
 
33
- languageModel(modelId: GoogleGenerativeAIModelId): LanguageModelV3;
47
+ languageModel(modelId: GoogleModelId): LanguageModelV4;
34
48
 
35
- chat(modelId: GoogleGenerativeAIModelId): LanguageModelV3;
49
+ chat(modelId: GoogleModelId): LanguageModelV4;
36
50
 
37
51
  /**
38
52
  * Creates a model for image generation.
39
53
  */
40
54
  image(
41
- modelId: GoogleGenerativeAIImageModelId,
42
- settings?: GoogleGenerativeAIImageSettings,
43
- ): ImageModelV3;
55
+ modelId: GoogleImageModelId,
56
+ settings?: GoogleImageSettings,
57
+ ): ImageModelV4;
44
58
 
45
59
  /**
46
60
  * @deprecated Use `chat()` instead.
47
61
  */
48
- generativeAI(modelId: GoogleGenerativeAIModelId): LanguageModelV3;
62
+ generativeAI(modelId: GoogleModelId): LanguageModelV4;
49
63
 
50
64
  /**
51
65
  * Creates a model for text embeddings.
52
66
  */
53
- embedding(modelId: GoogleGenerativeAIEmbeddingModelId): EmbeddingModelV3;
67
+ embedding(modelId: GoogleEmbeddingModelId): EmbeddingModelV4;
54
68
 
55
69
  /**
56
70
  * Creates a model for text embeddings.
57
71
  */
58
- embeddingModel(modelId: GoogleGenerativeAIEmbeddingModelId): EmbeddingModelV3;
72
+ embeddingModel(modelId: GoogleEmbeddingModelId): EmbeddingModelV4;
59
73
 
60
74
  /**
61
75
  * @deprecated Use `embedding` instead.
62
76
  */
63
- textEmbedding(modelId: GoogleGenerativeAIEmbeddingModelId): EmbeddingModelV3;
77
+ textEmbedding(modelId: GoogleEmbeddingModelId): EmbeddingModelV4;
64
78
 
65
79
  /**
66
80
  * @deprecated Use `embeddingModel` instead.
67
81
  */
68
- textEmbeddingModel(
69
- modelId: GoogleGenerativeAIEmbeddingModelId,
70
- ): EmbeddingModelV3;
82
+ textEmbeddingModel(modelId: GoogleEmbeddingModelId): EmbeddingModelV4;
71
83
 
72
84
  /**
73
85
  * Creates a model for video generation.
74
86
  */
75
- video(modelId: GoogleGenerativeAIVideoModelId): Experimental_VideoModelV3;
87
+ video(modelId: GoogleVideoModelId): Experimental_VideoModelV4;
76
88
 
77
89
  /**
78
90
  * Creates a model for video generation.
79
91
  */
80
- videoModel(
81
- modelId: GoogleGenerativeAIVideoModelId,
82
- ): Experimental_VideoModelV3;
92
+ videoModel(modelId: GoogleVideoModelId): Experimental_VideoModelV4;
93
+
94
+ /**
95
+ * Creates a model for speech generation (text-to-speech).
96
+ */
97
+ speech(modelId: GoogleSpeechModelId): SpeechModelV4;
98
+
99
+ /**
100
+ * Creates a model for speech generation (text-to-speech).
101
+ */
102
+ speechModel(modelId: GoogleSpeechModelId): SpeechModelV4;
103
+
104
+ files(): FilesV4;
105
+
106
+ /**
107
+ * Creates a language model targeting the Gemini Interactions API
108
+ * (`POST /v1beta/interactions`). Pass:
109
+ * - a model ID (string),
110
+ * - `{ agent: <name> }` to use a known Gemini agent preset, or
111
+ * - `{ managedAgent: <name> }` to use a user-defined agent created via
112
+ * the `/v1beta/agents` endpoint.
113
+ */
114
+ interactions(
115
+ modelIdOrAgent:
116
+ | GoogleInteractionsModelId
117
+ | { agent: GoogleInteractionsAgentName }
118
+ | { managedAgent: string },
119
+ ): LanguageModelV4;
120
+
121
+ experimental_realtime: RealtimeFactoryV4;
83
122
 
84
123
  tools: typeof googleTools;
85
124
  }
86
125
 
87
- export interface GoogleGenerativeAIProviderSettings {
126
+ export interface GoogleProviderSettings {
88
127
  /**
89
128
  * Use a different URL prefix for API calls, e.g. to use proxy servers.
90
129
  * The default prefix is `https://generativelanguage.googleapis.com/v1beta`.
@@ -121,11 +160,11 @@ export interface GoogleGenerativeAIProviderSettings {
121
160
  }
122
161
 
123
162
  /**
124
- * Create a Google Generative AI provider instance.
163
+ * Create a Google provider instance.
125
164
  */
126
- export function createGoogleGenerativeAI(
127
- options: GoogleGenerativeAIProviderSettings = {},
128
- ): GoogleGenerativeAIProvider {
165
+ export function createGoogle(
166
+ options: GoogleProviderSettings = {},
167
+ ): GoogleProvider {
129
168
  const baseURL =
130
169
  withoutTrailingSlash(options.baseURL) ??
131
170
  'https://generativelanguage.googleapis.com/v1beta';
@@ -145,8 +184,8 @@ export function createGoogleGenerativeAI(
145
184
  `ai-sdk/google/${VERSION}`,
146
185
  );
147
186
 
148
- const createChatModel = (modelId: GoogleGenerativeAIModelId) =>
149
- new GoogleGenerativeAILanguageModel(modelId, {
187
+ const createChatModel = (modelId: GoogleModelId) =>
188
+ new GoogleLanguageModel(modelId, {
150
189
  provider: providerName,
151
190
  baseURL,
152
191
  headers: getHeaders,
@@ -166,8 +205,8 @@ export function createGoogleGenerativeAI(
166
205
  fetch: options.fetch,
167
206
  });
168
207
 
169
- const createEmbeddingModel = (modelId: GoogleGenerativeAIEmbeddingModelId) =>
170
- new GoogleGenerativeAIEmbeddingModel(modelId, {
208
+ const createEmbeddingModel = (modelId: GoogleEmbeddingModelId) =>
209
+ new GoogleEmbeddingModel(modelId, {
171
210
  provider: providerName,
172
211
  baseURL,
173
212
  headers: getHeaders,
@@ -175,18 +214,26 @@ export function createGoogleGenerativeAI(
175
214
  });
176
215
 
177
216
  const createImageModel = (
178
- modelId: GoogleGenerativeAIImageModelId,
179
- settings: GoogleGenerativeAIImageSettings = {},
217
+ modelId: GoogleImageModelId,
218
+ settings: GoogleImageSettings = {},
180
219
  ) =>
181
- new GoogleGenerativeAIImageModel(modelId, settings, {
220
+ new GoogleImageModel(modelId, settings, {
221
+ provider: providerName,
222
+ baseURL,
223
+ headers: getHeaders,
224
+ fetch: options.fetch,
225
+ });
226
+
227
+ const createFiles = () =>
228
+ new GoogleFiles({
182
229
  provider: providerName,
183
230
  baseURL,
184
231
  headers: getHeaders,
185
232
  fetch: options.fetch,
186
233
  });
187
234
 
188
- const createVideoModel = (modelId: GoogleGenerativeAIVideoModelId) =>
189
- new GoogleGenerativeAIVideoModel(modelId, {
235
+ const createVideoModel = (modelId: GoogleVideoModelId) =>
236
+ new GoogleVideoModel(modelId, {
190
237
  provider: providerName,
191
238
  baseURL,
192
239
  headers: getHeaders,
@@ -194,7 +241,59 @@ export function createGoogleGenerativeAI(
194
241
  generateId: options.generateId ?? generateId,
195
242
  });
196
243
 
197
- const provider = function (modelId: GoogleGenerativeAIModelId) {
244
+ const createRealtimeModel = (modelId: string) =>
245
+ new GoogleRealtimeModel(modelId, {
246
+ provider: `${providerName}.realtime`,
247
+ baseURL,
248
+ headers: getHeaders,
249
+ fetch: options.fetch,
250
+ });
251
+
252
+ const createSpeechModel = (modelId: GoogleSpeechModelId) =>
253
+ new GoogleSpeechModel(modelId, {
254
+ provider: `${providerName}.speech`,
255
+ baseURL,
256
+ headers: getHeaders,
257
+ fetch: options.fetch,
258
+ });
259
+
260
+ const experimentalRealtimeFactory = Object.assign(
261
+ (modelId: string) => createRealtimeModel(modelId),
262
+ {
263
+ getToken: async (tokenOptions: RealtimeFactoryV4GetTokenOptions) => {
264
+ const model = createRealtimeModel(tokenOptions.model);
265
+ const secret = await model.doCreateClientSecret({
266
+ sessionConfig: tokenOptions.sessionConfig,
267
+ expiresAfterSeconds: tokenOptions.expiresAfterSeconds,
268
+ });
269
+
270
+ return {
271
+ token: secret.token,
272
+ url: secret.url,
273
+ expiresAt: secret.expiresAt,
274
+ };
275
+ },
276
+ },
277
+ ) as RealtimeFactoryV4;
278
+
279
+ const createInteractionsModel = (
280
+ modelIdOrAgent:
281
+ | GoogleInteractionsModelId
282
+ | { agent: GoogleInteractionsAgentName }
283
+ | { managedAgent: string },
284
+ ) =>
285
+ new GoogleInteractionsLanguageModel(
286
+ modelIdOrAgent as GoogleInteractionsModelInput,
287
+ {
288
+ provider: `${providerName}.interactions`,
289
+ baseURL,
290
+ headers: getHeaders,
291
+ generateId: options.generateId ?? generateId,
292
+ fetch: options.fetch,
293
+ },
294
+ );
295
+
296
+ const provider = function (modelId: GoogleModelId) {
198
297
  if (new.target) {
199
298
  throw new Error(
200
299
  'The Google Generative AI model function cannot be called with the new keyword.',
@@ -204,7 +303,7 @@ export function createGoogleGenerativeAI(
204
303
  return createChatModel(modelId);
205
304
  };
206
305
 
207
- provider.specificationVersion = 'v3' as const;
306
+ provider.specificationVersion = 'v4' as const;
208
307
  provider.languageModel = createChatModel;
209
308
  provider.chat = createChatModel;
210
309
  provider.generativeAI = createChatModel;
@@ -216,12 +315,17 @@ export function createGoogleGenerativeAI(
216
315
  provider.imageModel = createImageModel;
217
316
  provider.video = createVideoModel;
218
317
  provider.videoModel = createVideoModel;
318
+ provider.experimental_realtime = experimentalRealtimeFactory;
319
+ provider.files = createFiles;
320
+ provider.speech = createSpeechModel;
321
+ provider.speechModel = createSpeechModel;
322
+ provider.interactions = createInteractionsModel;
219
323
  provider.tools = googleTools;
220
324
 
221
- return provider as GoogleGenerativeAIProvider;
325
+ return provider as GoogleProvider;
222
326
  }
223
327
 
224
328
  /**
225
329
  * Default Google Generative AI provider instance.
226
330
  */
227
- export const google = createGoogleGenerativeAI();
331
+ export const google = createGoogle();
@@ -0,0 +1,36 @@
1
+ import { lazySchema, zodSchema } from '@ai-sdk/provider-utils';
2
+ import { z } from 'zod/v4';
3
+
4
+ /**
5
+ * Response schema for the Gemini `:generateContent` endpoint when called with
6
+ * `responseModalities: ['AUDIO']`. The generated audio is returned as base64
7
+ * encoded raw PCM in the first inline-data part.
8
+ */
9
+ export const googleSpeechResponseSchema = lazySchema(() =>
10
+ zodSchema(
11
+ z.object({
12
+ candidates: z
13
+ .array(
14
+ z.object({
15
+ content: z
16
+ .object({
17
+ parts: z
18
+ .array(
19
+ z.object({
20
+ inlineData: z
21
+ .object({
22
+ mimeType: z.string().nullish(),
23
+ data: z.string().nullish(),
24
+ })
25
+ .nullish(),
26
+ }),
27
+ )
28
+ .nullish(),
29
+ })
30
+ .nullish(),
31
+ }),
32
+ )
33
+ .nullish(),
34
+ }),
35
+ ),
36
+ );
@@ -0,0 +1,48 @@
1
+ import {
2
+ lazySchema,
3
+ zodSchema,
4
+ type InferSchema,
5
+ } from '@ai-sdk/provider-utils';
6
+ import { z } from 'zod/v4';
7
+
8
+ export type GoogleSpeechModelId =
9
+ | 'gemini-2.5-flash-preview-tts'
10
+ | 'gemini-2.5-pro-preview-tts'
11
+ | 'gemini-3.1-flash-tts-preview'
12
+ | (string & {});
13
+
14
+ const prebuiltVoiceConfigSchema = z.object({
15
+ voiceName: z.string(),
16
+ });
17
+
18
+ const voiceConfigSchema = z.object({
19
+ prebuiltVoiceConfig: prebuiltVoiceConfigSchema,
20
+ });
21
+
22
+ export const googleSpeechProviderOptionsSchema = lazySchema(() =>
23
+ zodSchema(
24
+ z.object({
25
+ /**
26
+ * Multi-speaker configuration for dialogue audio. When provided, this
27
+ * overrides the top-level `voice`. The Gemini TTS API supports up to two
28
+ * speakers; each speaker name must match a name used in the input text.
29
+ *
30
+ * https://ai.google.dev/gemini-api/docs/speech-generation#multi-speaker
31
+ */
32
+ multiSpeakerVoiceConfig: z
33
+ .object({
34
+ speakerVoiceConfigs: z.array(
35
+ z.object({
36
+ speaker: z.string(),
37
+ voiceConfig: voiceConfigSchema,
38
+ }),
39
+ ),
40
+ })
41
+ .optional(),
42
+ }),
43
+ ),
44
+ );
45
+
46
+ export type GoogleSpeechModelOptions = InferSchema<
47
+ typeof googleSpeechProviderOptionsSchema
48
+ >;