@ai-sdk/google 4.0.0-beta.4 → 4.0.0-beta.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -245,6 +245,12 @@ The following optional provider options are available for Google Generative AI m
245
245
  Optional. Defines labels used in billing reports. Available on Vertex AI only.
246
246
  See [Google Cloud labels documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls).
247
247
 
248
+ - **serviceTier** _'standard' | 'flex' | 'priority'_
249
+
250
+ Optional. The service tier to use for the request.
251
+ Set to 'flex' for 50% cheaper processing at the cost of increased latency.
252
+ Set to 'priority' for ultra-low latency at a 75-100% price premium over 'standard'.
253
+
248
254
  - **threshold** _string_
249
255
 
250
256
  Optional. Standalone threshold setting that can be used independently of `safetySettings`.
@@ -1131,6 +1137,28 @@ const { embedding } = await embed({
1131
1137
  google: {
1132
1138
  outputDimensionality: 512, // optional, number of dimensions for the embedding
1133
1139
  taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
1140
+ content: [[{ text: 'additional context' }]], // optional, per-value multimodal content (only 1 here, since `value` is only a single one)
1141
+ } satisfies GoogleEmbeddingModelOptions,
1142
+ },
1143
+ });
1144
+ ```
1145
+
1146
+ When using `embedMany`, provide per-value multimodal content via the `content` option. Each entry corresponds to a value at the same index; use `null` for text-only entries:
1147
+
1148
+ ```ts
1149
+ import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
1150
+ import { embedMany } from 'ai';
1151
+
1152
+ const { embeddings } = await embedMany({
1153
+ model: google.embedding('gemini-embedding-2-preview'),
1154
+ values: ['sunny day at the beach', 'rainy afternoon in the city'],
1155
+ providerOptions: {
1156
+ google: {
1157
+ // content array must have the same length as values
1158
+ content: [
1159
+ [{ inlineData: { mimeType: 'image/png', data: '<base64>' } }], // pairs with values[0]
1160
+ null, // text-only, pairs with values[1]
1161
+ ],
1134
1162
  } satisfies GoogleEmbeddingModelOptions,
1135
1163
  },
1136
1164
  });
@@ -1155,11 +1183,16 @@ The following optional provider options are available for Google Generative AI e
1155
1183
  - `FACT_VERIFICATION`: Optimized for verifying factual information.
1156
1184
  - `CODE_RETRIEVAL_QUERY`: Optimized for retrieving code blocks based on natural language queries.
1157
1185
 
1186
+ - **content**: _array_
1187
+
1188
+ Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either `{ text: string }` or `{ inlineData: { mimeType: string, data: string } }`. Supported by `gemini-embedding-2-preview`.
1189
+
1158
1190
  ### Model Capabilities
1159
1191
 
1160
- | Model | Default Dimensions | Custom Dimensions |
1161
- | ---------------------- | ------------------ | ------------------- |
1162
- | `gemini-embedding-001` | 3072 | <Check size={18} /> |
1192
+ | Model | Default Dimensions | Custom Dimensions | Multimodal |
1193
+ | ---------------------------- | ------------------ | ------------------- | ------------------- |
1194
+ | `gemini-embedding-001` | 3072 | <Check size={18} /> | <Cross size={18} /> |
1195
+ | `gemini-embedding-2-preview` | 3072 | <Check size={18} /> | <Check size={18} /> |
1163
1196
 
1164
1197
  ## Image Models
1165
1198
 
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@ai-sdk/google",
3
- "version": "4.0.0-beta.4",
3
+ "version": "4.0.0-beta.41",
4
+ "type": "module",
4
5
  "license": "Apache-2.0",
5
6
  "sideEffects": false,
6
7
  "main": "./dist/index.js",
7
- "module": "./dist/index.mjs",
8
8
  "types": "./dist/index.d.ts",
9
9
  "files": [
10
10
  "dist/**/*",
@@ -25,26 +25,25 @@
25
25
  "./package.json": "./package.json",
26
26
  ".": {
27
27
  "types": "./dist/index.d.ts",
28
- "import": "./dist/index.mjs",
29
- "require": "./dist/index.js"
28
+ "import": "./dist/index.js",
29
+ "default": "./dist/index.js"
30
30
  },
31
31
  "./internal": {
32
32
  "types": "./dist/internal/index.d.ts",
33
- "import": "./dist/internal/index.mjs",
34
- "module": "./dist/internal/index.mjs",
35
- "require": "./dist/internal/index.js"
33
+ "import": "./dist/internal/index.js",
34
+ "default": "./dist/internal/index.js"
36
35
  }
37
36
  },
38
37
  "dependencies": {
39
- "@ai-sdk/provider": "4.0.0-beta.0",
40
- "@ai-sdk/provider-utils": "5.0.0-beta.1"
38
+ "@ai-sdk/provider": "4.0.0-beta.12",
39
+ "@ai-sdk/provider-utils": "5.0.0-beta.22"
41
40
  },
42
41
  "devDependencies": {
43
42
  "@types/node": "20.17.24",
44
43
  "tsup": "^8",
45
44
  "typescript": "5.8.3",
46
45
  "zod": "3.25.76",
47
- "@ai-sdk/test-server": "2.0.0-beta.0",
46
+ "@ai-sdk/test-server": "2.0.0-beta.1",
48
47
  "@vercel/ai-tsconfig": "0.0.0"
49
48
  },
50
49
  "peerDependencies": {
@@ -71,9 +70,7 @@
71
70
  "build": "pnpm clean && tsup --tsconfig tsconfig.build.json",
72
71
  "build:watch": "pnpm clean && tsup --watch",
73
72
  "clean": "del-cli dist docs *.tsbuildinfo",
74
- "lint": "eslint \"./**/*.ts*\"",
75
73
  "type-check": "tsc --build",
76
- "prettier-check": "prettier --check \"./**/*.ts*\"",
77
74
  "test": "pnpm test:node && pnpm test:edge",
78
75
  "test:update": "pnpm test:node -u",
79
76
  "test:watch": "vitest --config vitest.node.config.js",
@@ -1,4 +1,9 @@
1
- import { LanguageModelV3Usage } from '@ai-sdk/provider';
1
+ import { LanguageModelV4Usage } from '@ai-sdk/provider';
2
+
3
+ export type GoogleGenerativeAITokenDetail = {
4
+ modality: string;
5
+ tokenCount: number;
6
+ };
2
7
 
3
8
  export type GoogleGenerativeAIUsageMetadata = {
4
9
  promptTokenCount?: number | null;
@@ -7,11 +12,13 @@ export type GoogleGenerativeAIUsageMetadata = {
7
12
  cachedContentTokenCount?: number | null;
8
13
  thoughtsTokenCount?: number | null;
9
14
  trafficType?: string | null;
15
+ promptTokensDetails?: GoogleGenerativeAITokenDetail[] | null;
16
+ candidatesTokensDetails?: GoogleGenerativeAITokenDetail[] | null;
10
17
  };
11
18
 
12
19
  export function convertGoogleGenerativeAIUsage(
13
20
  usage: GoogleGenerativeAIUsageMetadata | undefined | null,
14
- ): LanguageModelV3Usage {
21
+ ): LanguageModelV4Usage {
15
22
  if (usage == null) {
16
23
  return {
17
24
  inputTokens: {
@@ -1,23 +1,186 @@
1
1
  import {
2
- LanguageModelV3Prompt,
2
+ LanguageModelV4Prompt,
3
3
  UnsupportedFunctionalityError,
4
4
  } from '@ai-sdk/provider';
5
+ import {
6
+ convertToBase64,
7
+ isProviderReference,
8
+ resolveProviderReference,
9
+ } from '@ai-sdk/provider-utils';
5
10
  import {
6
11
  GoogleGenerativeAIContent,
7
12
  GoogleGenerativeAIContentPart,
13
+ GoogleGenerativeAIFunctionResponsePart,
8
14
  GoogleGenerativeAIPrompt,
9
15
  } from './google-generative-ai-prompt';
10
- import { convertToBase64 } from '@ai-sdk/provider-utils';
16
+
17
+ const dataUrlRegex = /^data:([^;,]+);base64,(.+)$/s;
18
+
19
+ function parseBase64DataUrl(
20
+ value: string,
21
+ ): { mediaType: string; data: string } | undefined {
22
+ const match = dataUrlRegex.exec(value);
23
+ if (match == null) {
24
+ return undefined;
25
+ }
26
+
27
+ return {
28
+ mediaType: match[1],
29
+ data: match[2],
30
+ };
31
+ }
32
+
33
+ function convertUrlToolResultPart(
34
+ url: string,
35
+ ): GoogleGenerativeAIFunctionResponsePart | undefined {
36
+ // Per https://ai.google.dev/api/caching#FunctionResponsePart, only inline data is supported.
37
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/function-calling#functionresponsepart suggests that this
38
+ // may be different for Vertex, but this needs to be confirmed and further tested for both APIs.
39
+ const parsedDataUrl = parseBase64DataUrl(url);
40
+ if (parsedDataUrl == null) {
41
+ return undefined;
42
+ }
43
+
44
+ return {
45
+ inlineData: {
46
+ mimeType: parsedDataUrl.mediaType,
47
+ data: parsedDataUrl.data,
48
+ },
49
+ };
50
+ }
51
+
52
+ /*
53
+ * Appends tool result content parts to the message using the functionResponse
54
+ * format with support for multimodal parts (e.g. inline images/files alongside
55
+ * text). This format is supported by Gemini 3+ models.
56
+ */
57
+ function appendToolResultParts(
58
+ parts: GoogleGenerativeAIContentPart[],
59
+ toolName: string,
60
+ outputValue: Array<{
61
+ type: string;
62
+ [key: string]: unknown;
63
+ }>,
64
+ ): void {
65
+ const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
66
+ const responseTextParts: string[] = [];
67
+
68
+ for (const contentPart of outputValue) {
69
+ switch (contentPart.type) {
70
+ case 'text': {
71
+ responseTextParts.push(contentPart.text as string);
72
+ break;
73
+ }
74
+ case 'file-data': {
75
+ functionResponseParts.push({
76
+ inlineData: {
77
+ mimeType: contentPart.mediaType as string,
78
+ data: contentPart.data as string,
79
+ },
80
+ });
81
+ break;
82
+ }
83
+ case 'file-url': {
84
+ const functionResponsePart = convertUrlToolResultPart(
85
+ contentPart.url as string,
86
+ );
87
+
88
+ if (functionResponsePart != null) {
89
+ functionResponseParts.push(functionResponsePart);
90
+ } else {
91
+ responseTextParts.push(JSON.stringify(contentPart));
92
+ }
93
+ break;
94
+ }
95
+ default: {
96
+ responseTextParts.push(JSON.stringify(contentPart));
97
+ break;
98
+ }
99
+ }
100
+ }
101
+
102
+ parts.push({
103
+ functionResponse: {
104
+ name: toolName,
105
+ response: {
106
+ name: toolName,
107
+ content:
108
+ responseTextParts.length > 0
109
+ ? responseTextParts.join('\n')
110
+ : 'Tool executed successfully.',
111
+ },
112
+ ...(functionResponseParts.length > 0
113
+ ? { parts: functionResponseParts }
114
+ : {}),
115
+ },
116
+ });
117
+ }
118
+
119
+ /*
120
+ * Appends tool result content parts using a legacy format for pre-Gemini 3
121
+ * models that do not support multimodal parts within functionResponse. Instead,
122
+ * non-text content like images is sent as separate top-level inlineData parts.
123
+ */
124
+ function appendLegacyToolResultParts(
125
+ parts: GoogleGenerativeAIContentPart[],
126
+ toolName: string,
127
+ outputValue: Array<{
128
+ type: string;
129
+ [key: string]: unknown;
130
+ }>,
131
+ ): void {
132
+ for (const contentPart of outputValue) {
133
+ switch (contentPart.type) {
134
+ case 'text':
135
+ parts.push({
136
+ functionResponse: {
137
+ name: toolName,
138
+ response: {
139
+ name: toolName,
140
+ content: contentPart.text,
141
+ },
142
+ },
143
+ });
144
+ break;
145
+ case 'file-data':
146
+ if ((contentPart.mediaType as string).startsWith('image/')) {
147
+ parts.push(
148
+ {
149
+ inlineData: {
150
+ mimeType: contentPart.mediaType as string,
151
+ data: contentPart.data as string,
152
+ },
153
+ },
154
+ {
155
+ text: 'Tool executed successfully and returned this image as a response',
156
+ },
157
+ );
158
+ } else {
159
+ parts.push({ text: JSON.stringify(contentPart) });
160
+ }
161
+ break;
162
+ default:
163
+ parts.push({ text: JSON.stringify(contentPart) });
164
+ break;
165
+ }
166
+ }
167
+ }
11
168
 
12
169
  export function convertToGoogleGenerativeAIMessages(
13
- prompt: LanguageModelV3Prompt,
14
- options?: { isGemmaModel?: boolean; providerOptionsName?: string },
170
+ prompt: LanguageModelV4Prompt,
171
+ options?: {
172
+ isGemmaModel?: boolean;
173
+ providerOptionsName?: string;
174
+ supportsFunctionResponseParts?: boolean;
175
+ },
15
176
  ): GoogleGenerativeAIPrompt {
16
177
  const systemInstructionParts: Array<{ text: string }> = [];
17
178
  const contents: Array<GoogleGenerativeAIContent> = [];
18
179
  let systemMessagesAllowed = true;
19
180
  const isGemmaModel = options?.isGemmaModel ?? false;
20
181
  const providerOptionsName = options?.providerOptionsName ?? 'google';
182
+ const supportsFunctionResponseParts =
183
+ options?.supportsFunctionResponseParts ?? true;
21
184
 
22
185
  for (const { role, content } of prompt) {
23
186
  switch (role) {
@@ -46,25 +209,40 @@ export function convertToGoogleGenerativeAIMessages(
46
209
  }
47
210
 
48
211
  case 'file': {
49
- // default to image/jpeg for unknown image/* types
50
212
  const mediaType =
51
213
  part.mediaType === 'image/*' ? 'image/jpeg' : part.mediaType;
52
214
 
53
- parts.push(
54
- part.data instanceof URL
55
- ? {
56
- fileData: {
57
- mimeType: mediaType,
58
- fileUri: part.data.toString(),
59
- },
60
- }
61
- : {
62
- inlineData: {
63
- mimeType: mediaType,
64
- data: convertToBase64(part.data),
65
- },
66
- },
67
- );
215
+ if (part.data instanceof URL) {
216
+ parts.push({
217
+ fileData: {
218
+ mimeType: mediaType,
219
+ fileUri: part.data.toString(),
220
+ },
221
+ });
222
+ } else if (isProviderReference(part.data)) {
223
+ if (providerOptionsName === 'vertex') {
224
+ throw new UnsupportedFunctionalityError({
225
+ functionality: 'file parts with provider references',
226
+ });
227
+ }
228
+
229
+ parts.push({
230
+ fileData: {
231
+ mimeType: mediaType,
232
+ fileUri: resolveProviderReference({
233
+ reference: part.data,
234
+ provider: 'google',
235
+ }),
236
+ },
237
+ });
238
+ } else {
239
+ parts.push({
240
+ inlineData: {
241
+ mimeType: mediaType,
242
+ data: convertToBase64(part.data),
243
+ },
244
+ });
245
+ }
68
246
 
69
247
  break;
70
248
  }
@@ -112,6 +290,24 @@ export function convertToGoogleGenerativeAIMessages(
112
290
  };
113
291
  }
114
292
 
293
+ case 'reasoning-file': {
294
+ if (part.data instanceof URL) {
295
+ throw new UnsupportedFunctionalityError({
296
+ functionality:
297
+ 'File data URLs in assistant messages are not supported',
298
+ });
299
+ }
300
+
301
+ return {
302
+ inlineData: {
303
+ mimeType: part.mediaType,
304
+ data: convertToBase64(part.data),
305
+ },
306
+ thought: true,
307
+ thoughtSignature,
308
+ };
309
+ }
310
+
115
311
  case 'file': {
116
312
  if (part.data instanceof URL) {
117
313
  throw new UnsupportedFunctionalityError({
@@ -120,6 +316,28 @@ export function convertToGoogleGenerativeAIMessages(
120
316
  });
121
317
  }
122
318
 
319
+ if (isProviderReference(part.data)) {
320
+ if (providerOptionsName === 'vertex') {
321
+ throw new UnsupportedFunctionalityError({
322
+ functionality: 'file parts with provider references',
323
+ });
324
+ }
325
+
326
+ return {
327
+ fileData: {
328
+ mimeType: part.mediaType,
329
+ fileUri: resolveProviderReference({
330
+ reference: part.data,
331
+ provider: 'google',
332
+ }),
333
+ },
334
+ ...(providerOpts?.thought === true
335
+ ? { thought: true }
336
+ : {}),
337
+ thoughtSignature,
338
+ };
339
+ }
340
+
123
341
  return {
124
342
  inlineData: {
125
343
  mimeType: part.mediaType,
@@ -133,6 +351,29 @@ export function convertToGoogleGenerativeAIMessages(
133
351
  }
134
352
 
135
353
  case 'tool-call': {
354
+ const serverToolCallId =
355
+ providerOpts?.serverToolCallId != null
356
+ ? String(providerOpts.serverToolCallId)
357
+ : undefined;
358
+ const serverToolType =
359
+ providerOpts?.serverToolType != null
360
+ ? String(providerOpts.serverToolType)
361
+ : undefined;
362
+
363
+ if (serverToolCallId && serverToolType) {
364
+ return {
365
+ toolCall: {
366
+ toolType: serverToolType,
367
+ args:
368
+ typeof part.input === 'string'
369
+ ? JSON.parse(part.input)
370
+ : part.input,
371
+ id: serverToolCallId,
372
+ },
373
+ thoughtSignature,
374
+ };
375
+ }
376
+
136
377
  return {
137
378
  functionCall: {
138
379
  name: part.toolName,
@@ -141,10 +382,36 @@ export function convertToGoogleGenerativeAIMessages(
141
382
  thoughtSignature,
142
383
  };
143
384
  }
385
+
386
+ case 'tool-result': {
387
+ const serverToolCallId =
388
+ providerOpts?.serverToolCallId != null
389
+ ? String(providerOpts.serverToolCallId)
390
+ : undefined;
391
+ const serverToolType =
392
+ providerOpts?.serverToolType != null
393
+ ? String(providerOpts.serverToolType)
394
+ : undefined;
395
+
396
+ if (serverToolCallId && serverToolType) {
397
+ return {
398
+ toolResponse: {
399
+ toolType: serverToolType,
400
+ response:
401
+ part.output.type === 'json' ? part.output.value : {},
402
+ id: serverToolCallId,
403
+ },
404
+ thoughtSignature,
405
+ };
406
+ }
407
+
408
+ return undefined;
409
+ }
144
410
  }
145
411
  })
146
412
  .filter(part => part !== undefined),
147
413
  });
414
+
148
415
  break;
149
416
  }
150
417
 
@@ -157,39 +424,51 @@ export function convertToGoogleGenerativeAIMessages(
157
424
  if (part.type === 'tool-approval-response') {
158
425
  continue;
159
426
  }
427
+
428
+ const partProviderOpts =
429
+ part.providerOptions?.[providerOptionsName] ??
430
+ (providerOptionsName !== 'google'
431
+ ? part.providerOptions?.google
432
+ : part.providerOptions?.vertex);
433
+ const serverToolCallId =
434
+ partProviderOpts?.serverToolCallId != null
435
+ ? String(partProviderOpts.serverToolCallId)
436
+ : undefined;
437
+ const serverToolType =
438
+ partProviderOpts?.serverToolType != null
439
+ ? String(partProviderOpts.serverToolType)
440
+ : undefined;
441
+
442
+ if (serverToolCallId && serverToolType) {
443
+ const serverThoughtSignature =
444
+ partProviderOpts?.thoughtSignature != null
445
+ ? String(partProviderOpts.thoughtSignature)
446
+ : undefined;
447
+
448
+ if (contents.length > 0) {
449
+ const lastContent = contents[contents.length - 1];
450
+ if (lastContent.role === 'model') {
451
+ lastContent.parts.push({
452
+ toolResponse: {
453
+ toolType: serverToolType,
454
+ response:
455
+ part.output.type === 'json' ? part.output.value : {},
456
+ id: serverToolCallId,
457
+ },
458
+ thoughtSignature: serverThoughtSignature,
459
+ });
460
+ continue;
461
+ }
462
+ }
463
+ }
464
+
160
465
  const output = part.output;
161
466
 
162
467
  if (output.type === 'content') {
163
- for (const contentPart of output.value) {
164
- switch (contentPart.type) {
165
- case 'text':
166
- parts.push({
167
- functionResponse: {
168
- name: part.toolName,
169
- response: {
170
- name: part.toolName,
171
- content: contentPart.text,
172
- },
173
- },
174
- });
175
- break;
176
- case 'image-data':
177
- parts.push(
178
- {
179
- inlineData: {
180
- mimeType: contentPart.mediaType,
181
- data: contentPart.data,
182
- },
183
- },
184
- {
185
- text: 'Tool executed successfully and returned this image as a response',
186
- },
187
- );
188
- break;
189
- default:
190
- parts.push({ text: JSON.stringify(contentPart) });
191
- break;
192
- }
468
+ if (supportsFunctionResponseParts) {
469
+ appendToolResultParts(parts, part.toolName, output.value);
470
+ } else {
471
+ appendLegacyToolResultParts(parts, part.toolName, output.value);
193
472
  }
194
473
  } else {
195
474
  parts.push({