@ai-sdk/google 4.0.0-beta.3 → 4.0.0-beta.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -245,6 +245,12 @@ The following optional provider options are available for Google Generative AI m
245
245
  Optional. Defines labels used in billing reports. Available on Vertex AI only.
246
246
  See [Google Cloud labels documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls).
247
247
 
248
+ - **serviceTier** _'standard' | 'flex' | 'priority'_
249
+
250
+ Optional. The service tier to use for the request.
251
+ Set to 'flex' for 50% cheaper processing at the cost of increased latency.
252
+ Set to 'priority' for ultra-low latency at a 75-100% price premium over 'standard'.
253
+
248
254
  - **threshold** _string_
249
255
 
250
256
  Optional. Standalone threshold setting that can be used independently of `safetySettings`.
@@ -1131,6 +1137,28 @@ const { embedding } = await embed({
1131
1137
  google: {
1132
1138
  outputDimensionality: 512, // optional, number of dimensions for the embedding
1133
1139
  taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
1140
+ content: [[{ text: 'additional context' }]], // optional, per-value multimodal content (only 1 here, since `value` is only a single one)
1141
+ } satisfies GoogleEmbeddingModelOptions,
1142
+ },
1143
+ });
1144
+ ```
1145
+
1146
+ When using `embedMany`, provide per-value multimodal content via the `content` option. Each entry corresponds to a value at the same index; use `null` for text-only entries:
1147
+
1148
+ ```ts
1149
+ import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
1150
+ import { embedMany } from 'ai';
1151
+
1152
+ const { embeddings } = await embedMany({
1153
+ model: google.embedding('gemini-embedding-2-preview'),
1154
+ values: ['sunny day at the beach', 'rainy afternoon in the city'],
1155
+ providerOptions: {
1156
+ google: {
1157
+ // content array must have the same length as values
1158
+ content: [
1159
+ [{ inlineData: { mimeType: 'image/png', data: '<base64>' } }], // pairs with values[0]
1160
+ null, // text-only, pairs with values[1]
1161
+ ],
1134
1162
  } satisfies GoogleEmbeddingModelOptions,
1135
1163
  },
1136
1164
  });
@@ -1155,11 +1183,16 @@ The following optional provider options are available for Google Generative AI e
1155
1183
  - `FACT_VERIFICATION`: Optimized for verifying factual information.
1156
1184
  - `CODE_RETRIEVAL_QUERY`: Optimized for retrieving code blocks based on natural language queries.
1157
1185
 
1186
+ - **content**: _array_
1187
+
1188
+ Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either `{ text: string }` or `{ inlineData: { mimeType: string, data: string } }`. Supported by `gemini-embedding-2-preview`.
1189
+
1158
1190
  ### Model Capabilities
1159
1191
 
1160
- | Model | Default Dimensions | Custom Dimensions |
1161
- | ---------------------- | ------------------ | ------------------- |
1162
- | `gemini-embedding-001` | 3072 | <Check size={18} /> |
1192
+ | Model | Default Dimensions | Custom Dimensions | Multimodal |
1193
+ | ---------------------------- | ------------------ | ------------------- | ------------------- |
1194
+ | `gemini-embedding-001` | 3072 | <Check size={18} /> | <Cross size={18} /> |
1195
+ | `gemini-embedding-2-preview` | 3072 | <Check size={18} /> | <Check size={18} /> |
1163
1196
 
1164
1197
  ## Image Models
1165
1198
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/google",
3
- "version": "4.0.0-beta.3",
3
+ "version": "4.0.0-beta.30",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -36,8 +36,8 @@
36
36
  }
37
37
  },
38
38
  "dependencies": {
39
- "@ai-sdk/provider": "4.0.0-beta.0",
40
- "@ai-sdk/provider-utils": "5.0.0-beta.1"
39
+ "@ai-sdk/provider": "4.0.0-beta.9",
40
+ "@ai-sdk/provider-utils": "5.0.0-beta.15"
41
41
  },
42
42
  "devDependencies": {
43
43
  "@types/node": "20.17.24",
@@ -71,9 +71,7 @@
71
71
  "build": "pnpm clean && tsup --tsconfig tsconfig.build.json",
72
72
  "build:watch": "pnpm clean && tsup --watch",
73
73
  "clean": "del-cli dist docs *.tsbuildinfo",
74
- "lint": "eslint \"./**/*.ts*\"",
75
74
  "type-check": "tsc --build",
76
- "prettier-check": "prettier --check \"./**/*.ts*\"",
77
75
  "test": "pnpm test:node && pnpm test:edge",
78
76
  "test:update": "pnpm test:node -u",
79
77
  "test:watch": "vitest --config vitest.node.config.js",
@@ -1,4 +1,9 @@
1
- import { LanguageModelV3Usage } from '@ai-sdk/provider';
1
+ import { LanguageModelV4Usage } from '@ai-sdk/provider';
2
+
3
+ export type GoogleGenerativeAITokenDetail = {
4
+ modality: string;
5
+ tokenCount: number;
6
+ };
2
7
 
3
8
  export type GoogleGenerativeAIUsageMetadata = {
4
9
  promptTokenCount?: number | null;
@@ -7,11 +12,13 @@ export type GoogleGenerativeAIUsageMetadata = {
7
12
  cachedContentTokenCount?: number | null;
8
13
  thoughtsTokenCount?: number | null;
9
14
  trafficType?: string | null;
15
+ promptTokensDetails?: GoogleGenerativeAITokenDetail[] | null;
16
+ candidatesTokensDetails?: GoogleGenerativeAITokenDetail[] | null;
10
17
  };
11
18
 
12
19
  export function convertGoogleGenerativeAIUsage(
13
20
  usage: GoogleGenerativeAIUsageMetadata | undefined | null,
14
- ): LanguageModelV3Usage {
21
+ ): LanguageModelV4Usage {
15
22
  if (usage == null) {
16
23
  return {
17
24
  inputTokens: {
@@ -1,23 +1,184 @@
1
1
  import {
2
- LanguageModelV3Prompt,
2
+ LanguageModelV4Prompt,
3
3
  UnsupportedFunctionalityError,
4
4
  } from '@ai-sdk/provider';
5
+ import {
6
+ convertToBase64,
7
+ isProviderReference,
8
+ resolveProviderReference,
9
+ } from '@ai-sdk/provider-utils';
5
10
  import {
6
11
  GoogleGenerativeAIContent,
7
12
  GoogleGenerativeAIContentPart,
13
+ GoogleGenerativeAIFunctionResponsePart,
8
14
  GoogleGenerativeAIPrompt,
9
15
  } from './google-generative-ai-prompt';
10
- import { convertToBase64 } from '@ai-sdk/provider-utils';
16
+
17
+ const dataUrlRegex = /^data:([^;,]+);base64,(.+)$/s;
18
+
19
+ function parseBase64DataUrl(
20
+ value: string,
21
+ ): { mediaType: string; data: string } | undefined {
22
+ const match = dataUrlRegex.exec(value);
23
+ if (match == null) {
24
+ return undefined;
25
+ }
26
+
27
+ return {
28
+ mediaType: match[1],
29
+ data: match[2],
30
+ };
31
+ }
32
+
33
+ function convertUrlToolResultPart(
34
+ url: string,
35
+ ): GoogleGenerativeAIFunctionResponsePart | undefined {
36
+ // Per https://ai.google.dev/api/caching#FunctionResponsePart, only inline data is supported.
37
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/function-calling#functionresponsepart suggests that this
38
+ // may be different for Vertex, but this needs to be confirmed and further tested for both APIs.
39
+ const parsedDataUrl = parseBase64DataUrl(url);
40
+ if (parsedDataUrl == null) {
41
+ return undefined;
42
+ }
43
+
44
+ return {
45
+ inlineData: {
46
+ mimeType: parsedDataUrl.mediaType,
47
+ data: parsedDataUrl.data,
48
+ },
49
+ };
50
+ }
51
+
52
+ /*
53
+ * Appends tool result content parts to the message using the functionResponse
54
+ * format with support for multimodal parts (e.g. inline images/files alongside
55
+ * text). This format is supported by Gemini 3+ models.
56
+ */
57
+ function appendToolResultParts(
58
+ parts: GoogleGenerativeAIContentPart[],
59
+ toolName: string,
60
+ outputValue: Array<{
61
+ type: string;
62
+ [key: string]: unknown;
63
+ }>,
64
+ ): void {
65
+ const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
66
+ const responseTextParts: string[] = [];
67
+
68
+ for (const contentPart of outputValue) {
69
+ switch (contentPart.type) {
70
+ case 'text': {
71
+ responseTextParts.push(contentPart.text as string);
72
+ break;
73
+ }
74
+ case 'image-data':
75
+ case 'file-data': {
76
+ functionResponseParts.push({
77
+ inlineData: {
78
+ mimeType: contentPart.mediaType as string,
79
+ data: contentPart.data as string,
80
+ },
81
+ });
82
+ break;
83
+ }
84
+ case 'image-url':
85
+ case 'file-url': {
86
+ const functionResponsePart = convertUrlToolResultPart(
87
+ contentPart.url as string,
88
+ );
89
+
90
+ if (functionResponsePart != null) {
91
+ functionResponseParts.push(functionResponsePart);
92
+ } else {
93
+ responseTextParts.push(JSON.stringify(contentPart));
94
+ }
95
+ break;
96
+ }
97
+ default: {
98
+ responseTextParts.push(JSON.stringify(contentPart));
99
+ break;
100
+ }
101
+ }
102
+ }
103
+
104
+ parts.push({
105
+ functionResponse: {
106
+ name: toolName,
107
+ response: {
108
+ name: toolName,
109
+ content:
110
+ responseTextParts.length > 0
111
+ ? responseTextParts.join('\n')
112
+ : 'Tool executed successfully.',
113
+ },
114
+ ...(functionResponseParts.length > 0
115
+ ? { parts: functionResponseParts }
116
+ : {}),
117
+ },
118
+ });
119
+ }
120
+
121
+ /*
122
+ * Appends tool result content parts using a legacy format for pre-Gemini 3
123
+ * models that do not support multimodal parts within functionResponse. Instead,
124
+ * non-text content like images is sent as separate top-level inlineData parts.
125
+ */
126
+ function appendLegacyToolResultParts(
127
+ parts: GoogleGenerativeAIContentPart[],
128
+ toolName: string,
129
+ outputValue: Array<{
130
+ type: string;
131
+ [key: string]: unknown;
132
+ }>,
133
+ ): void {
134
+ for (const contentPart of outputValue) {
135
+ switch (contentPart.type) {
136
+ case 'text':
137
+ parts.push({
138
+ functionResponse: {
139
+ name: toolName,
140
+ response: {
141
+ name: toolName,
142
+ content: contentPart.text,
143
+ },
144
+ },
145
+ });
146
+ break;
147
+ case 'image-data':
148
+ parts.push(
149
+ {
150
+ inlineData: {
151
+ mimeType: String(contentPart.mediaType),
152
+ data: String(contentPart.data),
153
+ },
154
+ },
155
+ {
156
+ text: 'Tool executed successfully and returned this image as a response',
157
+ },
158
+ );
159
+ break;
160
+ default:
161
+ parts.push({ text: JSON.stringify(contentPart) });
162
+ break;
163
+ }
164
+ }
165
+ }
11
166
 
12
167
  export function convertToGoogleGenerativeAIMessages(
13
- prompt: LanguageModelV3Prompt,
14
- options?: { isGemmaModel?: boolean; providerOptionsName?: string },
168
+ prompt: LanguageModelV4Prompt,
169
+ options?: {
170
+ isGemmaModel?: boolean;
171
+ providerOptionsName?: string;
172
+ supportsFunctionResponseParts?: boolean;
173
+ },
15
174
  ): GoogleGenerativeAIPrompt {
16
175
  const systemInstructionParts: Array<{ text: string }> = [];
17
176
  const contents: Array<GoogleGenerativeAIContent> = [];
18
177
  let systemMessagesAllowed = true;
19
178
  const isGemmaModel = options?.isGemmaModel ?? false;
20
179
  const providerOptionsName = options?.providerOptionsName ?? 'google';
180
+ const supportsFunctionResponseParts =
181
+ options?.supportsFunctionResponseParts ?? true;
21
182
 
22
183
  for (const { role, content } of prompt) {
23
184
  switch (role) {
@@ -46,25 +207,40 @@ export function convertToGoogleGenerativeAIMessages(
46
207
  }
47
208
 
48
209
  case 'file': {
49
- // default to image/jpeg for unknown image/* types
50
210
  const mediaType =
51
211
  part.mediaType === 'image/*' ? 'image/jpeg' : part.mediaType;
52
212
 
53
- parts.push(
54
- part.data instanceof URL
55
- ? {
56
- fileData: {
57
- mimeType: mediaType,
58
- fileUri: part.data.toString(),
59
- },
60
- }
61
- : {
62
- inlineData: {
63
- mimeType: mediaType,
64
- data: convertToBase64(part.data),
65
- },
66
- },
67
- );
213
+ if (part.data instanceof URL) {
214
+ parts.push({
215
+ fileData: {
216
+ mimeType: mediaType,
217
+ fileUri: part.data.toString(),
218
+ },
219
+ });
220
+ } else if (isProviderReference(part.data)) {
221
+ if (providerOptionsName === 'vertex') {
222
+ throw new UnsupportedFunctionalityError({
223
+ functionality: 'file parts with provider references',
224
+ });
225
+ }
226
+
227
+ parts.push({
228
+ fileData: {
229
+ mimeType: mediaType,
230
+ fileUri: resolveProviderReference({
231
+ reference: part.data,
232
+ provider: 'google',
233
+ }),
234
+ },
235
+ });
236
+ } else {
237
+ parts.push({
238
+ inlineData: {
239
+ mimeType: mediaType,
240
+ data: convertToBase64(part.data),
241
+ },
242
+ });
243
+ }
68
244
 
69
245
  break;
70
246
  }
@@ -112,6 +288,24 @@ export function convertToGoogleGenerativeAIMessages(
112
288
  };
113
289
  }
114
290
 
291
+ case 'reasoning-file': {
292
+ if (part.data instanceof URL) {
293
+ throw new UnsupportedFunctionalityError({
294
+ functionality:
295
+ 'File data URLs in assistant messages are not supported',
296
+ });
297
+ }
298
+
299
+ return {
300
+ inlineData: {
301
+ mimeType: part.mediaType,
302
+ data: convertToBase64(part.data),
303
+ },
304
+ thought: true,
305
+ thoughtSignature,
306
+ };
307
+ }
308
+
115
309
  case 'file': {
116
310
  if (part.data instanceof URL) {
117
311
  throw new UnsupportedFunctionalityError({
@@ -120,16 +314,64 @@ export function convertToGoogleGenerativeAIMessages(
120
314
  });
121
315
  }
122
316
 
317
+ if (isProviderReference(part.data)) {
318
+ if (providerOptionsName === 'vertex') {
319
+ throw new UnsupportedFunctionalityError({
320
+ functionality: 'file parts with provider references',
321
+ });
322
+ }
323
+
324
+ return {
325
+ fileData: {
326
+ mimeType: part.mediaType,
327
+ fileUri: resolveProviderReference({
328
+ reference: part.data,
329
+ provider: 'google',
330
+ }),
331
+ },
332
+ ...(providerOpts?.thought === true
333
+ ? { thought: true }
334
+ : {}),
335
+ thoughtSignature,
336
+ };
337
+ }
338
+
123
339
  return {
124
340
  inlineData: {
125
341
  mimeType: part.mediaType,
126
342
  data: convertToBase64(part.data),
127
343
  },
344
+ ...(providerOpts?.thought === true
345
+ ? { thought: true }
346
+ : {}),
128
347
  thoughtSignature,
129
348
  };
130
349
  }
131
350
 
132
351
  case 'tool-call': {
352
+ const serverToolCallId =
353
+ providerOpts?.serverToolCallId != null
354
+ ? String(providerOpts.serverToolCallId)
355
+ : undefined;
356
+ const serverToolType =
357
+ providerOpts?.serverToolType != null
358
+ ? String(providerOpts.serverToolType)
359
+ : undefined;
360
+
361
+ if (serverToolCallId && serverToolType) {
362
+ return {
363
+ toolCall: {
364
+ toolType: serverToolType,
365
+ args:
366
+ typeof part.input === 'string'
367
+ ? JSON.parse(part.input)
368
+ : part.input,
369
+ id: serverToolCallId,
370
+ },
371
+ thoughtSignature,
372
+ };
373
+ }
374
+
133
375
  return {
134
376
  functionCall: {
135
377
  name: part.toolName,
@@ -138,10 +380,36 @@ export function convertToGoogleGenerativeAIMessages(
138
380
  thoughtSignature,
139
381
  };
140
382
  }
383
+
384
+ case 'tool-result': {
385
+ const serverToolCallId =
386
+ providerOpts?.serverToolCallId != null
387
+ ? String(providerOpts.serverToolCallId)
388
+ : undefined;
389
+ const serverToolType =
390
+ providerOpts?.serverToolType != null
391
+ ? String(providerOpts.serverToolType)
392
+ : undefined;
393
+
394
+ if (serverToolCallId && serverToolType) {
395
+ return {
396
+ toolResponse: {
397
+ toolType: serverToolType,
398
+ response:
399
+ part.output.type === 'json' ? part.output.value : {},
400
+ id: serverToolCallId,
401
+ },
402
+ thoughtSignature,
403
+ };
404
+ }
405
+
406
+ return undefined;
407
+ }
141
408
  }
142
409
  })
143
410
  .filter(part => part !== undefined),
144
411
  });
412
+
145
413
  break;
146
414
  }
147
415
 
@@ -154,39 +422,51 @@ export function convertToGoogleGenerativeAIMessages(
154
422
  if (part.type === 'tool-approval-response') {
155
423
  continue;
156
424
  }
425
+
426
+ const partProviderOpts =
427
+ part.providerOptions?.[providerOptionsName] ??
428
+ (providerOptionsName !== 'google'
429
+ ? part.providerOptions?.google
430
+ : part.providerOptions?.vertex);
431
+ const serverToolCallId =
432
+ partProviderOpts?.serverToolCallId != null
433
+ ? String(partProviderOpts.serverToolCallId)
434
+ : undefined;
435
+ const serverToolType =
436
+ partProviderOpts?.serverToolType != null
437
+ ? String(partProviderOpts.serverToolType)
438
+ : undefined;
439
+
440
+ if (serverToolCallId && serverToolType) {
441
+ const serverThoughtSignature =
442
+ partProviderOpts?.thoughtSignature != null
443
+ ? String(partProviderOpts.thoughtSignature)
444
+ : undefined;
445
+
446
+ if (contents.length > 0) {
447
+ const lastContent = contents[contents.length - 1];
448
+ if (lastContent.role === 'model') {
449
+ lastContent.parts.push({
450
+ toolResponse: {
451
+ toolType: serverToolType,
452
+ response:
453
+ part.output.type === 'json' ? part.output.value : {},
454
+ id: serverToolCallId,
455
+ },
456
+ thoughtSignature: serverThoughtSignature,
457
+ });
458
+ continue;
459
+ }
460
+ }
461
+ }
462
+
157
463
  const output = part.output;
158
464
 
159
465
  if (output.type === 'content') {
160
- for (const contentPart of output.value) {
161
- switch (contentPart.type) {
162
- case 'text':
163
- parts.push({
164
- functionResponse: {
165
- name: part.toolName,
166
- response: {
167
- name: part.toolName,
168
- content: contentPart.text,
169
- },
170
- },
171
- });
172
- break;
173
- case 'image-data':
174
- parts.push(
175
- {
176
- inlineData: {
177
- mimeType: contentPart.mediaType,
178
- data: contentPart.data,
179
- },
180
- },
181
- {
182
- text: 'Tool executed successfully and returned this image as a response',
183
- },
184
- );
185
- break;
186
- default:
187
- parts.push({ text: JSON.stringify(contentPart) });
188
- break;
189
- }
466
+ if (supportsFunctionResponseParts) {
467
+ appendToolResultParts(parts, part.toolName, output.value);
468
+ } else {
469
+ appendLegacyToolResultParts(parts, part.toolName, output.value);
190
470
  }
191
471
  } else {
192
472
  parts.push({
@@ -1,5 +1,5 @@
1
1
  import {
2
- EmbeddingModelV3,
2
+ EmbeddingModelV4,
3
3
  TooManyEmbeddingValuesForCallError,
4
4
  } from '@ai-sdk/provider';
5
5
  import {
@@ -26,8 +26,8 @@ type GoogleGenerativeAIEmbeddingConfig = {
26
26
  fetch?: FetchFunction;
27
27
  };
28
28
 
29
- export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
30
- readonly specificationVersion = 'v3';
29
+ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV4 {
30
+ readonly specificationVersion = 'v4';
31
31
  readonly modelId: GoogleGenerativeAIEmbeddingModelId;
32
32
  readonly maxEmbeddingsPerCall = 2048;
33
33
  readonly supportsParallelCalls = true;
@@ -50,8 +50,8 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
50
50
  headers,
51
51
  abortSignal,
52
52
  providerOptions,
53
- }: Parameters<EmbeddingModelV3['doEmbed']>[0]): Promise<
54
- Awaited<ReturnType<EmbeddingModelV3['doEmbed']>>
53
+ }: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
54
+ Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
55
55
  > {
56
56
  // Parse provider options
57
57
  const googleOptions = await parseProviderOptions({
@@ -74,8 +74,26 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
74
74
  headers,
75
75
  );
76
76
 
77
- // For single embeddings, use the single endpoint (ratelimits, etc.)
77
+ const multimodalContent = googleOptions?.content;
78
+
79
+ if (
80
+ multimodalContent != null &&
81
+ multimodalContent.length !== values.length
82
+ ) {
83
+ throw new Error(
84
+ `The number of multimodal content entries (${multimodalContent.length}) must match the number of values (${values.length}).`,
85
+ );
86
+ }
87
+
88
+ // For single embeddings, use the single endpoint
78
89
  if (values.length === 1) {
90
+ const valueParts = multimodalContent?.[0];
91
+ const textPart = values[0] ? [{ text: values[0] }] : [];
92
+ const parts =
93
+ valueParts != null
94
+ ? [...textPart, ...valueParts]
95
+ : [{ text: values[0] }];
96
+
79
97
  const {
80
98
  responseHeaders,
81
99
  value: response,
@@ -86,7 +104,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
86
104
  body: {
87
105
  model: `models/${this.modelId}`,
88
106
  content: {
89
- parts: [{ text: values[0] }],
107
+ parts,
90
108
  },
91
109
  outputDimensionality: googleOptions?.outputDimensionality,
92
110
  taskType: googleOptions?.taskType,
@@ -107,6 +125,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
107
125
  };
108
126
  }
109
127
 
128
+ // For multiple values, use the batch endpoint
110
129
  const {
111
130
  responseHeaders,
112
131
  value: response,
@@ -115,12 +134,22 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
115
134
  url: `${this.config.baseURL}/models/${this.modelId}:batchEmbedContents`,
116
135
  headers: mergedHeaders,
117
136
  body: {
118
- requests: values.map(value => ({
119
- model: `models/${this.modelId}`,
120
- content: { role: 'user', parts: [{ text: value }] },
121
- outputDimensionality: googleOptions?.outputDimensionality,
122
- taskType: googleOptions?.taskType,
123
- })),
137
+ requests: values.map((value, index) => {
138
+ const valueParts = multimodalContent?.[index];
139
+ const textPart = value ? [{ text: value }] : [];
140
+ return {
141
+ model: `models/${this.modelId}`,
142
+ content: {
143
+ role: 'user',
144
+ parts:
145
+ valueParts != null
146
+ ? [...textPart, ...valueParts]
147
+ : [{ text: value }],
148
+ },
149
+ outputDimensionality: googleOptions?.outputDimensionality,
150
+ taskType: googleOptions?.taskType,
151
+ };
152
+ }),
124
153
  },
125
154
  failedResponseHandler: googleFailedResponseHandler,
126
155
  successfulResponseHandler: createJsonResponseHandler(