@ai-sdk/google 4.0.0-beta.2 → 4.0.0-beta.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +140 -4
- package/dist/index.d.mts +56 -22
- package/dist/index.d.ts +56 -22
- package/dist/index.js +500 -80
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +500 -77
- package/dist/index.mjs.map +1 -1
- package/dist/internal/index.d.mts +34 -15
- package/dist/internal/index.d.ts +34 -15
- package/dist/internal/index.js +452 -67
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/index.mjs +452 -64
- package/dist/internal/index.mjs.map +1 -1
- package/docs/15-google-generative-ai.mdx +36 -3
- package/package.json +3 -5
- package/src/convert-google-generative-ai-usage.ts +2 -2
- package/src/convert-to-google-generative-ai-messages.ts +273 -34
- package/src/google-generative-ai-embedding-model.ts +42 -13
- package/src/google-generative-ai-embedding-options.ts +24 -0
- package/src/google-generative-ai-image-model.ts +14 -14
- package/src/google-generative-ai-language-model.ts +305 -44
- package/src/google-generative-ai-options.ts +11 -1
- package/src/google-generative-ai-prompt.ts +39 -3
- package/src/google-generative-ai-video-model.ts +7 -7
- package/src/google-prepare-tools.ts +63 -8
- package/src/google-provider.ts +18 -18
- package/src/map-google-generative-ai-finish-reason.ts +2 -2
|
@@ -245,6 +245,12 @@ The following optional provider options are available for Google Generative AI m
|
|
|
245
245
|
Optional. Defines labels used in billing reports. Available on Vertex AI only.
|
|
246
246
|
See [Google Cloud labels documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls).
|
|
247
247
|
|
|
248
|
+
- **serviceTier** _'SERVICE_TIER_STANDARD' | 'SERVICE_TIER_FLEX' | 'SERVICE_TIER_PRIORITY'_
|
|
249
|
+
|
|
250
|
+
Optional. The service tier to use for the request.
|
|
251
|
+
Set to 'SERVICE_TIER_FLEX' for 50% cheaper processing at the cost of increased latency.
|
|
252
|
+
Set to 'SERVICE_TIER_PRIORITY' for ultra-low latency at a 75-100% price premium over 'SERVICE_TIER_STANDARD'.
|
|
253
|
+
|
|
248
254
|
- **threshold** _string_
|
|
249
255
|
|
|
250
256
|
Optional. Standalone threshold setting that can be used independently of `safetySettings`.
|
|
@@ -1131,6 +1137,28 @@ const { embedding } = await embed({
|
|
|
1131
1137
|
google: {
|
|
1132
1138
|
outputDimensionality: 512, // optional, number of dimensions for the embedding
|
|
1133
1139
|
taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
|
|
1140
|
+
content: [[{ text: 'additional context' }]], // optional, per-value multimodal content (only 1 here, since `value` is only a single one)
|
|
1141
|
+
} satisfies GoogleEmbeddingModelOptions,
|
|
1142
|
+
},
|
|
1143
|
+
});
|
|
1144
|
+
```
|
|
1145
|
+
|
|
1146
|
+
When using `embedMany`, provide per-value multimodal content via the `content` option. Each entry corresponds to a value at the same index; use `null` for text-only entries:
|
|
1147
|
+
|
|
1148
|
+
```ts
|
|
1149
|
+
import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
|
|
1150
|
+
import { embedMany } from 'ai';
|
|
1151
|
+
|
|
1152
|
+
const { embeddings } = await embedMany({
|
|
1153
|
+
model: google.embedding('gemini-embedding-2-preview'),
|
|
1154
|
+
values: ['sunny day at the beach', 'rainy afternoon in the city'],
|
|
1155
|
+
providerOptions: {
|
|
1156
|
+
google: {
|
|
1157
|
+
// content array must have the same length as values
|
|
1158
|
+
content: [
|
|
1159
|
+
[{ inlineData: { mimeType: 'image/png', data: '<base64>' } }], // pairs with values[0]
|
|
1160
|
+
null, // text-only, pairs with values[1]
|
|
1161
|
+
],
|
|
1134
1162
|
} satisfies GoogleEmbeddingModelOptions,
|
|
1135
1163
|
},
|
|
1136
1164
|
});
|
|
@@ -1155,11 +1183,16 @@ The following optional provider options are available for Google Generative AI e
|
|
|
1155
1183
|
- `FACT_VERIFICATION`: Optimized for verifying factual information.
|
|
1156
1184
|
- `CODE_RETRIEVAL_QUERY`: Optimized for retrieving code blocks based on natural language queries.
|
|
1157
1185
|
|
|
1186
|
+
- **content**: _array_
|
|
1187
|
+
|
|
1188
|
+
Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either `{ text: string }` or `{ inlineData: { mimeType: string, data: string } }`. Supported by `gemini-embedding-2-preview`.
|
|
1189
|
+
|
|
1158
1190
|
### Model Capabilities
|
|
1159
1191
|
|
|
1160
|
-
| Model
|
|
1161
|
-
|
|
|
1162
|
-
| `gemini-embedding-001`
|
|
1192
|
+
| Model | Default Dimensions | Custom Dimensions | Multimodal |
|
|
1193
|
+
| ---------------------------- | ------------------ | ------------------- | ------------------- |
|
|
1194
|
+
| `gemini-embedding-001` | 3072 | <Check size={18} /> | <Cross size={18} /> |
|
|
1195
|
+
| `gemini-embedding-2-preview` | 3072 | <Check size={18} /> | <Check size={18} /> |
|
|
1163
1196
|
|
|
1164
1197
|
## Image Models
|
|
1165
1198
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/google",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.21",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -36,8 +36,8 @@
|
|
|
36
36
|
}
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
|
-
"@ai-sdk/provider": "4.0.0-beta.
|
|
40
|
-
"@ai-sdk/provider-utils": "5.0.0-beta.
|
|
39
|
+
"@ai-sdk/provider": "4.0.0-beta.5",
|
|
40
|
+
"@ai-sdk/provider-utils": "5.0.0-beta.9"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "20.17.24",
|
|
@@ -71,9 +71,7 @@
|
|
|
71
71
|
"build": "pnpm clean && tsup --tsconfig tsconfig.build.json",
|
|
72
72
|
"build:watch": "pnpm clean && tsup --watch",
|
|
73
73
|
"clean": "del-cli dist docs *.tsbuildinfo",
|
|
74
|
-
"lint": "eslint \"./**/*.ts*\"",
|
|
75
74
|
"type-check": "tsc --build",
|
|
76
|
-
"prettier-check": "prettier --check \"./**/*.ts*\"",
|
|
77
75
|
"test": "pnpm test:node && pnpm test:edge",
|
|
78
76
|
"test:update": "pnpm test:node -u",
|
|
79
77
|
"test:watch": "vitest --config vitest.node.config.js",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LanguageModelV4Usage } from '@ai-sdk/provider';
|
|
2
2
|
|
|
3
3
|
export type GoogleGenerativeAIUsageMetadata = {
|
|
4
4
|
promptTokenCount?: number | null;
|
|
@@ -11,7 +11,7 @@ export type GoogleGenerativeAIUsageMetadata = {
|
|
|
11
11
|
|
|
12
12
|
export function convertGoogleGenerativeAIUsage(
|
|
13
13
|
usage: GoogleGenerativeAIUsageMetadata | undefined | null,
|
|
14
|
-
):
|
|
14
|
+
): LanguageModelV4Usage {
|
|
15
15
|
if (usage == null) {
|
|
16
16
|
return {
|
|
17
17
|
inputTokens: {
|
|
@@ -1,23 +1,180 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
LanguageModelV4Prompt,
|
|
3
3
|
UnsupportedFunctionalityError,
|
|
4
4
|
} from '@ai-sdk/provider';
|
|
5
|
+
import { convertToBase64 } from '@ai-sdk/provider-utils';
|
|
5
6
|
import {
|
|
6
7
|
GoogleGenerativeAIContent,
|
|
7
8
|
GoogleGenerativeAIContentPart,
|
|
9
|
+
GoogleGenerativeAIFunctionResponsePart,
|
|
8
10
|
GoogleGenerativeAIPrompt,
|
|
9
11
|
} from './google-generative-ai-prompt';
|
|
10
|
-
|
|
12
|
+
|
|
13
|
+
const dataUrlRegex = /^data:([^;,]+);base64,(.+)$/s;
|
|
14
|
+
|
|
15
|
+
function parseBase64DataUrl(
|
|
16
|
+
value: string,
|
|
17
|
+
): { mediaType: string; data: string } | undefined {
|
|
18
|
+
const match = dataUrlRegex.exec(value);
|
|
19
|
+
if (match == null) {
|
|
20
|
+
return undefined;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
mediaType: match[1],
|
|
25
|
+
data: match[2],
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function convertUrlToolResultPart(
|
|
30
|
+
url: string,
|
|
31
|
+
): GoogleGenerativeAIFunctionResponsePart | undefined {
|
|
32
|
+
// Per https://ai.google.dev/api/caching#FunctionResponsePart, only inline data is supported.
|
|
33
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/function-calling#functionresponsepart suggests that this
|
|
34
|
+
// may be different for Vertex, but this needs to be confirmed and further tested for both APIs.
|
|
35
|
+
const parsedDataUrl = parseBase64DataUrl(url);
|
|
36
|
+
if (parsedDataUrl == null) {
|
|
37
|
+
return undefined;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
inlineData: {
|
|
42
|
+
mimeType: parsedDataUrl.mediaType,
|
|
43
|
+
data: parsedDataUrl.data,
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/*
|
|
49
|
+
* Appends tool result content parts to the message using the functionResponse
|
|
50
|
+
* format with support for multimodal parts (e.g. inline images/files alongside
|
|
51
|
+
* text). This format is supported by Gemini 3+ models.
|
|
52
|
+
*/
|
|
53
|
+
function appendToolResultParts(
|
|
54
|
+
parts: GoogleGenerativeAIContentPart[],
|
|
55
|
+
toolName: string,
|
|
56
|
+
outputValue: Array<{
|
|
57
|
+
type: string;
|
|
58
|
+
[key: string]: unknown;
|
|
59
|
+
}>,
|
|
60
|
+
): void {
|
|
61
|
+
const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
|
|
62
|
+
const responseTextParts: string[] = [];
|
|
63
|
+
|
|
64
|
+
for (const contentPart of outputValue) {
|
|
65
|
+
switch (contentPart.type) {
|
|
66
|
+
case 'text': {
|
|
67
|
+
responseTextParts.push(contentPart.text as string);
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
case 'image-data':
|
|
71
|
+
case 'file-data': {
|
|
72
|
+
functionResponseParts.push({
|
|
73
|
+
inlineData: {
|
|
74
|
+
mimeType: contentPart.mediaType as string,
|
|
75
|
+
data: contentPart.data as string,
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
case 'image-url':
|
|
81
|
+
case 'file-url': {
|
|
82
|
+
const functionResponsePart = convertUrlToolResultPart(
|
|
83
|
+
contentPart.url as string,
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
if (functionResponsePart != null) {
|
|
87
|
+
functionResponseParts.push(functionResponsePart);
|
|
88
|
+
} else {
|
|
89
|
+
responseTextParts.push(JSON.stringify(contentPart));
|
|
90
|
+
}
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
default: {
|
|
94
|
+
responseTextParts.push(JSON.stringify(contentPart));
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
parts.push({
|
|
101
|
+
functionResponse: {
|
|
102
|
+
name: toolName,
|
|
103
|
+
response: {
|
|
104
|
+
name: toolName,
|
|
105
|
+
content:
|
|
106
|
+
responseTextParts.length > 0
|
|
107
|
+
? responseTextParts.join('\n')
|
|
108
|
+
: 'Tool executed successfully.',
|
|
109
|
+
},
|
|
110
|
+
...(functionResponseParts.length > 0
|
|
111
|
+
? { parts: functionResponseParts }
|
|
112
|
+
: {}),
|
|
113
|
+
},
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/*
|
|
118
|
+
* Appends tool result content parts using a legacy format for pre-Gemini 3
|
|
119
|
+
* models that do not support multimodal parts within functionResponse. Instead,
|
|
120
|
+
* non-text content like images is sent as separate top-level inlineData parts.
|
|
121
|
+
*/
|
|
122
|
+
function appendLegacyToolResultParts(
|
|
123
|
+
parts: GoogleGenerativeAIContentPart[],
|
|
124
|
+
toolName: string,
|
|
125
|
+
outputValue: Array<{
|
|
126
|
+
type: string;
|
|
127
|
+
[key: string]: unknown;
|
|
128
|
+
}>,
|
|
129
|
+
): void {
|
|
130
|
+
for (const contentPart of outputValue) {
|
|
131
|
+
switch (contentPart.type) {
|
|
132
|
+
case 'text':
|
|
133
|
+
parts.push({
|
|
134
|
+
functionResponse: {
|
|
135
|
+
name: toolName,
|
|
136
|
+
response: {
|
|
137
|
+
name: toolName,
|
|
138
|
+
content: contentPart.text,
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
break;
|
|
143
|
+
case 'image-data':
|
|
144
|
+
parts.push(
|
|
145
|
+
{
|
|
146
|
+
inlineData: {
|
|
147
|
+
mimeType: String(contentPart.mediaType),
|
|
148
|
+
data: String(contentPart.data),
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
text: 'Tool executed successfully and returned this image as a response',
|
|
153
|
+
},
|
|
154
|
+
);
|
|
155
|
+
break;
|
|
156
|
+
default:
|
|
157
|
+
parts.push({ text: JSON.stringify(contentPart) });
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
11
162
|
|
|
12
163
|
export function convertToGoogleGenerativeAIMessages(
|
|
13
|
-
prompt:
|
|
14
|
-
options?: {
|
|
164
|
+
prompt: LanguageModelV4Prompt,
|
|
165
|
+
options?: {
|
|
166
|
+
isGemmaModel?: boolean;
|
|
167
|
+
providerOptionsName?: string;
|
|
168
|
+
supportsFunctionResponseParts?: boolean;
|
|
169
|
+
},
|
|
15
170
|
): GoogleGenerativeAIPrompt {
|
|
16
171
|
const systemInstructionParts: Array<{ text: string }> = [];
|
|
17
172
|
const contents: Array<GoogleGenerativeAIContent> = [];
|
|
18
173
|
let systemMessagesAllowed = true;
|
|
19
174
|
const isGemmaModel = options?.isGemmaModel ?? false;
|
|
20
175
|
const providerOptionsName = options?.providerOptionsName ?? 'google';
|
|
176
|
+
const supportsFunctionResponseParts =
|
|
177
|
+
options?.supportsFunctionResponseParts ?? true;
|
|
21
178
|
|
|
22
179
|
for (const { role, content } of prompt) {
|
|
23
180
|
switch (role) {
|
|
@@ -112,6 +269,24 @@ export function convertToGoogleGenerativeAIMessages(
|
|
|
112
269
|
};
|
|
113
270
|
}
|
|
114
271
|
|
|
272
|
+
case 'reasoning-file': {
|
|
273
|
+
if (part.data instanceof URL) {
|
|
274
|
+
throw new UnsupportedFunctionalityError({
|
|
275
|
+
functionality:
|
|
276
|
+
'File data URLs in assistant messages are not supported',
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return {
|
|
281
|
+
inlineData: {
|
|
282
|
+
mimeType: part.mediaType,
|
|
283
|
+
data: convertToBase64(part.data),
|
|
284
|
+
},
|
|
285
|
+
thought: true,
|
|
286
|
+
thoughtSignature,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
|
|
115
290
|
case 'file': {
|
|
116
291
|
if (part.data instanceof URL) {
|
|
117
292
|
throw new UnsupportedFunctionalityError({
|
|
@@ -125,11 +300,37 @@ export function convertToGoogleGenerativeAIMessages(
|
|
|
125
300
|
mimeType: part.mediaType,
|
|
126
301
|
data: convertToBase64(part.data),
|
|
127
302
|
},
|
|
303
|
+
...(providerOpts?.thought === true
|
|
304
|
+
? { thought: true }
|
|
305
|
+
: {}),
|
|
128
306
|
thoughtSignature,
|
|
129
307
|
};
|
|
130
308
|
}
|
|
131
309
|
|
|
132
310
|
case 'tool-call': {
|
|
311
|
+
const serverToolCallId =
|
|
312
|
+
providerOpts?.serverToolCallId != null
|
|
313
|
+
? String(providerOpts.serverToolCallId)
|
|
314
|
+
: undefined;
|
|
315
|
+
const serverToolType =
|
|
316
|
+
providerOpts?.serverToolType != null
|
|
317
|
+
? String(providerOpts.serverToolType)
|
|
318
|
+
: undefined;
|
|
319
|
+
|
|
320
|
+
if (serverToolCallId && serverToolType) {
|
|
321
|
+
return {
|
|
322
|
+
toolCall: {
|
|
323
|
+
toolType: serverToolType,
|
|
324
|
+
args:
|
|
325
|
+
typeof part.input === 'string'
|
|
326
|
+
? JSON.parse(part.input)
|
|
327
|
+
: part.input,
|
|
328
|
+
id: serverToolCallId,
|
|
329
|
+
},
|
|
330
|
+
thoughtSignature,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
133
334
|
return {
|
|
134
335
|
functionCall: {
|
|
135
336
|
name: part.toolName,
|
|
@@ -138,10 +339,36 @@ export function convertToGoogleGenerativeAIMessages(
|
|
|
138
339
|
thoughtSignature,
|
|
139
340
|
};
|
|
140
341
|
}
|
|
342
|
+
|
|
343
|
+
case 'tool-result': {
|
|
344
|
+
const serverToolCallId =
|
|
345
|
+
providerOpts?.serverToolCallId != null
|
|
346
|
+
? String(providerOpts.serverToolCallId)
|
|
347
|
+
: undefined;
|
|
348
|
+
const serverToolType =
|
|
349
|
+
providerOpts?.serverToolType != null
|
|
350
|
+
? String(providerOpts.serverToolType)
|
|
351
|
+
: undefined;
|
|
352
|
+
|
|
353
|
+
if (serverToolCallId && serverToolType) {
|
|
354
|
+
return {
|
|
355
|
+
toolResponse: {
|
|
356
|
+
toolType: serverToolType,
|
|
357
|
+
response:
|
|
358
|
+
part.output.type === 'json' ? part.output.value : {},
|
|
359
|
+
id: serverToolCallId,
|
|
360
|
+
},
|
|
361
|
+
thoughtSignature,
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return undefined;
|
|
366
|
+
}
|
|
141
367
|
}
|
|
142
368
|
})
|
|
143
369
|
.filter(part => part !== undefined),
|
|
144
370
|
});
|
|
371
|
+
|
|
145
372
|
break;
|
|
146
373
|
}
|
|
147
374
|
|
|
@@ -154,39 +381,51 @@ export function convertToGoogleGenerativeAIMessages(
|
|
|
154
381
|
if (part.type === 'tool-approval-response') {
|
|
155
382
|
continue;
|
|
156
383
|
}
|
|
384
|
+
|
|
385
|
+
const partProviderOpts =
|
|
386
|
+
part.providerOptions?.[providerOptionsName] ??
|
|
387
|
+
(providerOptionsName !== 'google'
|
|
388
|
+
? part.providerOptions?.google
|
|
389
|
+
: part.providerOptions?.vertex);
|
|
390
|
+
const serverToolCallId =
|
|
391
|
+
partProviderOpts?.serverToolCallId != null
|
|
392
|
+
? String(partProviderOpts.serverToolCallId)
|
|
393
|
+
: undefined;
|
|
394
|
+
const serverToolType =
|
|
395
|
+
partProviderOpts?.serverToolType != null
|
|
396
|
+
? String(partProviderOpts.serverToolType)
|
|
397
|
+
: undefined;
|
|
398
|
+
|
|
399
|
+
if (serverToolCallId && serverToolType) {
|
|
400
|
+
const serverThoughtSignature =
|
|
401
|
+
partProviderOpts?.thoughtSignature != null
|
|
402
|
+
? String(partProviderOpts.thoughtSignature)
|
|
403
|
+
: undefined;
|
|
404
|
+
|
|
405
|
+
if (contents.length > 0) {
|
|
406
|
+
const lastContent = contents[contents.length - 1];
|
|
407
|
+
if (lastContent.role === 'model') {
|
|
408
|
+
lastContent.parts.push({
|
|
409
|
+
toolResponse: {
|
|
410
|
+
toolType: serverToolType,
|
|
411
|
+
response:
|
|
412
|
+
part.output.type === 'json' ? part.output.value : {},
|
|
413
|
+
id: serverToolCallId,
|
|
414
|
+
},
|
|
415
|
+
thoughtSignature: serverThoughtSignature,
|
|
416
|
+
});
|
|
417
|
+
continue;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
157
422
|
const output = part.output;
|
|
158
423
|
|
|
159
424
|
if (output.type === 'content') {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
functionResponse: {
|
|
165
|
-
name: part.toolName,
|
|
166
|
-
response: {
|
|
167
|
-
name: part.toolName,
|
|
168
|
-
content: contentPart.text,
|
|
169
|
-
},
|
|
170
|
-
},
|
|
171
|
-
});
|
|
172
|
-
break;
|
|
173
|
-
case 'image-data':
|
|
174
|
-
parts.push(
|
|
175
|
-
{
|
|
176
|
-
inlineData: {
|
|
177
|
-
mimeType: contentPart.mediaType,
|
|
178
|
-
data: contentPart.data,
|
|
179
|
-
},
|
|
180
|
-
},
|
|
181
|
-
{
|
|
182
|
-
text: 'Tool executed successfully and returned this image as a response',
|
|
183
|
-
},
|
|
184
|
-
);
|
|
185
|
-
break;
|
|
186
|
-
default:
|
|
187
|
-
parts.push({ text: JSON.stringify(contentPart) });
|
|
188
|
-
break;
|
|
189
|
-
}
|
|
425
|
+
if (supportsFunctionResponseParts) {
|
|
426
|
+
appendToolResultParts(parts, part.toolName, output.value);
|
|
427
|
+
} else {
|
|
428
|
+
appendLegacyToolResultParts(parts, part.toolName, output.value);
|
|
190
429
|
}
|
|
191
430
|
} else {
|
|
192
431
|
parts.push({
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
EmbeddingModelV4,
|
|
3
3
|
TooManyEmbeddingValuesForCallError,
|
|
4
4
|
} from '@ai-sdk/provider';
|
|
5
5
|
import {
|
|
@@ -26,8 +26,8 @@ type GoogleGenerativeAIEmbeddingConfig = {
|
|
|
26
26
|
fetch?: FetchFunction;
|
|
27
27
|
};
|
|
28
28
|
|
|
29
|
-
export class GoogleGenerativeAIEmbeddingModel implements
|
|
30
|
-
readonly specificationVersion = '
|
|
29
|
+
export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV4 {
|
|
30
|
+
readonly specificationVersion = 'v4';
|
|
31
31
|
readonly modelId: GoogleGenerativeAIEmbeddingModelId;
|
|
32
32
|
readonly maxEmbeddingsPerCall = 2048;
|
|
33
33
|
readonly supportsParallelCalls = true;
|
|
@@ -50,8 +50,8 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
50
50
|
headers,
|
|
51
51
|
abortSignal,
|
|
52
52
|
providerOptions,
|
|
53
|
-
}: Parameters<
|
|
54
|
-
Awaited<ReturnType<
|
|
53
|
+
}: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
|
|
54
|
+
Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
|
|
55
55
|
> {
|
|
56
56
|
// Parse provider options
|
|
57
57
|
const googleOptions = await parseProviderOptions({
|
|
@@ -74,8 +74,26 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
74
74
|
headers,
|
|
75
75
|
);
|
|
76
76
|
|
|
77
|
-
|
|
77
|
+
const multimodalContent = googleOptions?.content;
|
|
78
|
+
|
|
79
|
+
if (
|
|
80
|
+
multimodalContent != null &&
|
|
81
|
+
multimodalContent.length !== values.length
|
|
82
|
+
) {
|
|
83
|
+
throw new Error(
|
|
84
|
+
`The number of multimodal content entries (${multimodalContent.length}) must match the number of values (${values.length}).`,
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// For single embeddings, use the single endpoint
|
|
78
89
|
if (values.length === 1) {
|
|
90
|
+
const valueParts = multimodalContent?.[0];
|
|
91
|
+
const textPart = values[0] ? [{ text: values[0] }] : [];
|
|
92
|
+
const parts =
|
|
93
|
+
valueParts != null
|
|
94
|
+
? [...textPart, ...valueParts]
|
|
95
|
+
: [{ text: values[0] }];
|
|
96
|
+
|
|
79
97
|
const {
|
|
80
98
|
responseHeaders,
|
|
81
99
|
value: response,
|
|
@@ -86,7 +104,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
86
104
|
body: {
|
|
87
105
|
model: `models/${this.modelId}`,
|
|
88
106
|
content: {
|
|
89
|
-
parts
|
|
107
|
+
parts,
|
|
90
108
|
},
|
|
91
109
|
outputDimensionality: googleOptions?.outputDimensionality,
|
|
92
110
|
taskType: googleOptions?.taskType,
|
|
@@ -107,6 +125,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
107
125
|
};
|
|
108
126
|
}
|
|
109
127
|
|
|
128
|
+
// For multiple values, use the batch endpoint
|
|
110
129
|
const {
|
|
111
130
|
responseHeaders,
|
|
112
131
|
value: response,
|
|
@@ -115,12 +134,22 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
115
134
|
url: `${this.config.baseURL}/models/${this.modelId}:batchEmbedContents`,
|
|
116
135
|
headers: mergedHeaders,
|
|
117
136
|
body: {
|
|
118
|
-
requests: values.map(value =>
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
137
|
+
requests: values.map((value, index) => {
|
|
138
|
+
const valueParts = multimodalContent?.[index];
|
|
139
|
+
const textPart = value ? [{ text: value }] : [];
|
|
140
|
+
return {
|
|
141
|
+
model: `models/${this.modelId}`,
|
|
142
|
+
content: {
|
|
143
|
+
role: 'user',
|
|
144
|
+
parts:
|
|
145
|
+
valueParts != null
|
|
146
|
+
? [...textPart, ...valueParts]
|
|
147
|
+
: [{ text: value }],
|
|
148
|
+
},
|
|
149
|
+
outputDimensionality: googleOptions?.outputDimensionality,
|
|
150
|
+
taskType: googleOptions?.taskType,
|
|
151
|
+
};
|
|
152
|
+
}),
|
|
124
153
|
},
|
|
125
154
|
failedResponseHandler: googleFailedResponseHandler,
|
|
126
155
|
successfulResponseHandler: createJsonResponseHandler(
|
|
@@ -7,8 +7,19 @@ import { z } from 'zod/v4';
|
|
|
7
7
|
|
|
8
8
|
export type GoogleGenerativeAIEmbeddingModelId =
|
|
9
9
|
| 'gemini-embedding-001'
|
|
10
|
+
| 'gemini-embedding-2-preview'
|
|
10
11
|
| (string & {});
|
|
11
12
|
|
|
13
|
+
const googleEmbeddingContentPartSchema = z.union([
|
|
14
|
+
z.object({ text: z.string() }),
|
|
15
|
+
z.object({
|
|
16
|
+
inlineData: z.object({
|
|
17
|
+
mimeType: z.string(),
|
|
18
|
+
data: z.string(),
|
|
19
|
+
}),
|
|
20
|
+
}),
|
|
21
|
+
]);
|
|
22
|
+
|
|
12
23
|
export const googleEmbeddingModelOptions = lazySchema(() =>
|
|
13
24
|
zodSchema(
|
|
14
25
|
z.object({
|
|
@@ -42,6 +53,19 @@ export const googleEmbeddingModelOptions = lazySchema(() =>
|
|
|
42
53
|
'CODE_RETRIEVAL_QUERY',
|
|
43
54
|
])
|
|
44
55
|
.optional(),
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Optional. Per-value multimodal content parts for embedding non-text
|
|
59
|
+
* content (images, video, PDF, audio). Each entry corresponds to the
|
|
60
|
+
* embedding value at the same index and its parts are merged with the
|
|
61
|
+
* text value in the request. Use `null` for entries that are text-only.
|
|
62
|
+
*
|
|
63
|
+
* The array length must match the number of values being embedded. In
|
|
64
|
+
* the case of a single embedding, the array length must be 1.
|
|
65
|
+
*/
|
|
66
|
+
content: z
|
|
67
|
+
.array(z.array(googleEmbeddingContentPartSchema).min(1).nullable())
|
|
68
|
+
.optional(),
|
|
45
69
|
}),
|
|
46
70
|
),
|
|
47
71
|
);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
ImageModelV4,
|
|
3
|
+
LanguageModelV4Prompt,
|
|
4
|
+
SharedV4Warning,
|
|
5
5
|
} from '@ai-sdk/provider';
|
|
6
6
|
import {
|
|
7
7
|
combineHeaders,
|
|
@@ -37,8 +37,8 @@ interface GoogleGenerativeAIImageModelConfig {
|
|
|
37
37
|
};
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
export class GoogleGenerativeAIImageModel implements
|
|
41
|
-
readonly specificationVersion = '
|
|
40
|
+
export class GoogleGenerativeAIImageModel implements ImageModelV4 {
|
|
41
|
+
readonly specificationVersion = 'v4';
|
|
42
42
|
|
|
43
43
|
get maxImagesPerCall(): number {
|
|
44
44
|
if (this.settings.maxImagesPerCall != null) {
|
|
@@ -63,8 +63,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
63
63
|
) {}
|
|
64
64
|
|
|
65
65
|
async doGenerate(
|
|
66
|
-
options: Parameters<
|
|
67
|
-
): Promise<Awaited<ReturnType<
|
|
66
|
+
options: Parameters<ImageModelV4['doGenerate']>[0],
|
|
67
|
+
): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
|
|
68
68
|
// Gemini image models use the language model API internally
|
|
69
69
|
if (isGeminiModel(this.modelId)) {
|
|
70
70
|
return this.doGenerateGemini(options);
|
|
@@ -73,8 +73,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
73
73
|
}
|
|
74
74
|
|
|
75
75
|
private async doGenerateImagen(
|
|
76
|
-
options: Parameters<
|
|
77
|
-
): Promise<Awaited<ReturnType<
|
|
76
|
+
options: Parameters<ImageModelV4['doGenerate']>[0],
|
|
77
|
+
): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
|
|
78
78
|
const {
|
|
79
79
|
prompt,
|
|
80
80
|
n = 1,
|
|
@@ -87,7 +87,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
87
87
|
files,
|
|
88
88
|
mask,
|
|
89
89
|
} = options;
|
|
90
|
-
const warnings: Array<
|
|
90
|
+
const warnings: Array<SharedV4Warning> = [];
|
|
91
91
|
|
|
92
92
|
// Imagen API endpoints do not support image editing
|
|
93
93
|
if (files != null && files.length > 0) {
|
|
@@ -181,8 +181,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
181
181
|
}
|
|
182
182
|
|
|
183
183
|
private async doGenerateGemini(
|
|
184
|
-
options: Parameters<
|
|
185
|
-
): Promise<Awaited<ReturnType<
|
|
184
|
+
options: Parameters<ImageModelV4['doGenerate']>[0],
|
|
185
|
+
): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
|
|
186
186
|
const {
|
|
187
187
|
prompt,
|
|
188
188
|
n,
|
|
@@ -195,7 +195,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
195
195
|
files,
|
|
196
196
|
mask,
|
|
197
197
|
} = options;
|
|
198
|
-
const warnings: Array<
|
|
198
|
+
const warnings: Array<SharedV4Warning> = [];
|
|
199
199
|
|
|
200
200
|
// Gemini does not support mask-based inpainting
|
|
201
201
|
if (mask != null) {
|
|
@@ -253,7 +253,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
253
253
|
}
|
|
254
254
|
}
|
|
255
255
|
|
|
256
|
-
const languageModelPrompt:
|
|
256
|
+
const languageModelPrompt: LanguageModelV4Prompt = [
|
|
257
257
|
{ role: 'user', content: userContent },
|
|
258
258
|
];
|
|
259
259
|
|