@ai-sdk/google 4.0.0-beta.2 → 4.0.0-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -245,6 +245,12 @@ The following optional provider options are available for Google Generative AI m
245
245
  Optional. Defines labels used in billing reports. Available on Vertex AI only.
246
246
  See [Google Cloud labels documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls).
247
247
 
248
+ - **serviceTier** _'SERVICE_TIER_STANDARD' | 'SERVICE_TIER_FLEX' | 'SERVICE_TIER_PRIORITY'_
249
+
250
+ Optional. The service tier to use for the request.
251
+ Set to 'SERVICE_TIER_FLEX' for 50% cheaper processing at the cost of increased latency.
252
+ Set to 'SERVICE_TIER_PRIORITY' for ultra-low latency at a 75-100% price premium over 'SERVICE_TIER_STANDARD'.
253
+
248
254
  - **threshold** _string_
249
255
 
250
256
  Optional. Standalone threshold setting that can be used independently of `safetySettings`.
@@ -1131,6 +1137,28 @@ const { embedding } = await embed({
1131
1137
  google: {
1132
1138
  outputDimensionality: 512, // optional, number of dimensions for the embedding
1133
1139
  taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
1140
+ content: [[{ text: 'additional context' }]], // optional, per-value multimodal content (only 1 here, since `value` is only a single one)
1141
+ } satisfies GoogleEmbeddingModelOptions,
1142
+ },
1143
+ });
1144
+ ```
1145
+
1146
+ When using `embedMany`, provide per-value multimodal content via the `content` option. Each entry corresponds to a value at the same index; use `null` for text-only entries:
1147
+
1148
+ ```ts
1149
+ import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
1150
+ import { embedMany } from 'ai';
1151
+
1152
+ const { embeddings } = await embedMany({
1153
+ model: google.embedding('gemini-embedding-2-preview'),
1154
+ values: ['sunny day at the beach', 'rainy afternoon in the city'],
1155
+ providerOptions: {
1156
+ google: {
1157
+ // content array must have the same length as values
1158
+ content: [
1159
+ [{ inlineData: { mimeType: 'image/png', data: '<base64>' } }], // pairs with values[0]
1160
+ null, // text-only, pairs with values[1]
1161
+ ],
1134
1162
  } satisfies GoogleEmbeddingModelOptions,
1135
1163
  },
1136
1164
  });
@@ -1155,11 +1183,16 @@ The following optional provider options are available for Google Generative AI e
1155
1183
  - `FACT_VERIFICATION`: Optimized for verifying factual information.
1156
1184
  - `CODE_RETRIEVAL_QUERY`: Optimized for retrieving code blocks based on natural language queries.
1157
1185
 
1186
+ - **content**: _array_
1187
+
1188
+ Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use `null` for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either `{ text: string }` or `{ inlineData: { mimeType: string, data: string } }`. Supported by `gemini-embedding-2-preview`.
1189
+
1158
1190
  ### Model Capabilities
1159
1191
 
1160
- | Model | Default Dimensions | Custom Dimensions |
1161
- | ---------------------- | ------------------ | ------------------- |
1162
- | `gemini-embedding-001` | 3072 | <Check size={18} /> |
1192
+ | Model | Default Dimensions | Custom Dimensions | Multimodal |
1193
+ | ---------------------------- | ------------------ | ------------------- | ------------------- |
1194
+ | `gemini-embedding-001` | 3072 | <Check size={18} /> | <Cross size={18} /> |
1195
+ | `gemini-embedding-2-preview` | 3072 | <Check size={18} /> | <Check size={18} /> |
1163
1196
 
1164
1197
  ## Image Models
1165
1198
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/google",
3
- "version": "4.0.0-beta.2",
3
+ "version": "4.0.0-beta.20",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -36,8 +36,8 @@
36
36
  }
37
37
  },
38
38
  "dependencies": {
39
- "@ai-sdk/provider": "4.0.0-beta.0",
40
- "@ai-sdk/provider-utils": "5.0.0-beta.0"
39
+ "@ai-sdk/provider": "4.0.0-beta.5",
40
+ "@ai-sdk/provider-utils": "5.0.0-beta.8"
41
41
  },
42
42
  "devDependencies": {
43
43
  "@types/node": "20.17.24",
@@ -71,9 +71,7 @@
71
71
  "build": "pnpm clean && tsup --tsconfig tsconfig.build.json",
72
72
  "build:watch": "pnpm clean && tsup --watch",
73
73
  "clean": "del-cli dist docs *.tsbuildinfo",
74
- "lint": "eslint \"./**/*.ts*\"",
75
74
  "type-check": "tsc --build",
76
- "prettier-check": "prettier --check \"./**/*.ts*\"",
77
75
  "test": "pnpm test:node && pnpm test:edge",
78
76
  "test:update": "pnpm test:node -u",
79
77
  "test:watch": "vitest --config vitest.node.config.js",
@@ -1,4 +1,4 @@
1
- import { LanguageModelV3Usage } from '@ai-sdk/provider';
1
+ import { LanguageModelV4Usage } from '@ai-sdk/provider';
2
2
 
3
3
  export type GoogleGenerativeAIUsageMetadata = {
4
4
  promptTokenCount?: number | null;
@@ -11,7 +11,7 @@ export type GoogleGenerativeAIUsageMetadata = {
11
11
 
12
12
  export function convertGoogleGenerativeAIUsage(
13
13
  usage: GoogleGenerativeAIUsageMetadata | undefined | null,
14
- ): LanguageModelV3Usage {
14
+ ): LanguageModelV4Usage {
15
15
  if (usage == null) {
16
16
  return {
17
17
  inputTokens: {
@@ -1,23 +1,180 @@
1
1
  import {
2
- LanguageModelV3Prompt,
2
+ LanguageModelV4Prompt,
3
3
  UnsupportedFunctionalityError,
4
4
  } from '@ai-sdk/provider';
5
+ import { convertToBase64 } from '@ai-sdk/provider-utils';
5
6
  import {
6
7
  GoogleGenerativeAIContent,
7
8
  GoogleGenerativeAIContentPart,
9
+ GoogleGenerativeAIFunctionResponsePart,
8
10
  GoogleGenerativeAIPrompt,
9
11
  } from './google-generative-ai-prompt';
10
- import { convertToBase64 } from '@ai-sdk/provider-utils';
12
+
13
+ const dataUrlRegex = /^data:([^;,]+);base64,(.+)$/s;
14
+
15
+ function parseBase64DataUrl(
16
+ value: string,
17
+ ): { mediaType: string; data: string } | undefined {
18
+ const match = dataUrlRegex.exec(value);
19
+ if (match == null) {
20
+ return undefined;
21
+ }
22
+
23
+ return {
24
+ mediaType: match[1],
25
+ data: match[2],
26
+ };
27
+ }
28
+
29
+ function convertUrlToolResultPart(
30
+ url: string,
31
+ ): GoogleGenerativeAIFunctionResponsePart | undefined {
32
+ // Per https://ai.google.dev/api/caching#FunctionResponsePart, only inline data is supported.
33
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/function-calling#functionresponsepart suggests that this
34
+ // may be different for Vertex, but this needs to be confirmed and further tested for both APIs.
35
+ const parsedDataUrl = parseBase64DataUrl(url);
36
+ if (parsedDataUrl == null) {
37
+ return undefined;
38
+ }
39
+
40
+ return {
41
+ inlineData: {
42
+ mimeType: parsedDataUrl.mediaType,
43
+ data: parsedDataUrl.data,
44
+ },
45
+ };
46
+ }
47
+
48
+ /*
49
+ * Appends tool result content parts to the message using the functionResponse
50
+ * format with support for multimodal parts (e.g. inline images/files alongside
51
+ * text). This format is supported by Gemini 3+ models.
52
+ */
53
+ function appendToolResultParts(
54
+ parts: GoogleGenerativeAIContentPart[],
55
+ toolName: string,
56
+ outputValue: Array<{
57
+ type: string;
58
+ [key: string]: unknown;
59
+ }>,
60
+ ): void {
61
+ const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
62
+ const responseTextParts: string[] = [];
63
+
64
+ for (const contentPart of outputValue) {
65
+ switch (contentPart.type) {
66
+ case 'text': {
67
+ responseTextParts.push(contentPart.text as string);
68
+ break;
69
+ }
70
+ case 'image-data':
71
+ case 'file-data': {
72
+ functionResponseParts.push({
73
+ inlineData: {
74
+ mimeType: contentPart.mediaType as string,
75
+ data: contentPart.data as string,
76
+ },
77
+ });
78
+ break;
79
+ }
80
+ case 'image-url':
81
+ case 'file-url': {
82
+ const functionResponsePart = convertUrlToolResultPart(
83
+ contentPart.url as string,
84
+ );
85
+
86
+ if (functionResponsePart != null) {
87
+ functionResponseParts.push(functionResponsePart);
88
+ } else {
89
+ responseTextParts.push(JSON.stringify(contentPart));
90
+ }
91
+ break;
92
+ }
93
+ default: {
94
+ responseTextParts.push(JSON.stringify(contentPart));
95
+ break;
96
+ }
97
+ }
98
+ }
99
+
100
+ parts.push({
101
+ functionResponse: {
102
+ name: toolName,
103
+ response: {
104
+ name: toolName,
105
+ content:
106
+ responseTextParts.length > 0
107
+ ? responseTextParts.join('\n')
108
+ : 'Tool executed successfully.',
109
+ },
110
+ ...(functionResponseParts.length > 0
111
+ ? { parts: functionResponseParts }
112
+ : {}),
113
+ },
114
+ });
115
+ }
116
+
117
+ /*
118
+ * Appends tool result content parts using a legacy format for pre-Gemini 3
119
+ * models that do not support multimodal parts within functionResponse. Instead,
120
+ * non-text content like images is sent as separate top-level inlineData parts.
121
+ */
122
+ function appendLegacyToolResultParts(
123
+ parts: GoogleGenerativeAIContentPart[],
124
+ toolName: string,
125
+ outputValue: Array<{
126
+ type: string;
127
+ [key: string]: unknown;
128
+ }>,
129
+ ): void {
130
+ for (const contentPart of outputValue) {
131
+ switch (contentPart.type) {
132
+ case 'text':
133
+ parts.push({
134
+ functionResponse: {
135
+ name: toolName,
136
+ response: {
137
+ name: toolName,
138
+ content: contentPart.text,
139
+ },
140
+ },
141
+ });
142
+ break;
143
+ case 'image-data':
144
+ parts.push(
145
+ {
146
+ inlineData: {
147
+ mimeType: String(contentPart.mediaType),
148
+ data: String(contentPart.data),
149
+ },
150
+ },
151
+ {
152
+ text: 'Tool executed successfully and returned this image as a response',
153
+ },
154
+ );
155
+ break;
156
+ default:
157
+ parts.push({ text: JSON.stringify(contentPart) });
158
+ break;
159
+ }
160
+ }
161
+ }
11
162
 
12
163
  export function convertToGoogleGenerativeAIMessages(
13
- prompt: LanguageModelV3Prompt,
14
- options?: { isGemmaModel?: boolean; providerOptionsName?: string },
164
+ prompt: LanguageModelV4Prompt,
165
+ options?: {
166
+ isGemmaModel?: boolean;
167
+ providerOptionsName?: string;
168
+ supportsFunctionResponseParts?: boolean;
169
+ },
15
170
  ): GoogleGenerativeAIPrompt {
16
171
  const systemInstructionParts: Array<{ text: string }> = [];
17
172
  const contents: Array<GoogleGenerativeAIContent> = [];
18
173
  let systemMessagesAllowed = true;
19
174
  const isGemmaModel = options?.isGemmaModel ?? false;
20
175
  const providerOptionsName = options?.providerOptionsName ?? 'google';
176
+ const supportsFunctionResponseParts =
177
+ options?.supportsFunctionResponseParts ?? true;
21
178
 
22
179
  for (const { role, content } of prompt) {
23
180
  switch (role) {
@@ -112,6 +269,24 @@ export function convertToGoogleGenerativeAIMessages(
112
269
  };
113
270
  }
114
271
 
272
+ case 'reasoning-file': {
273
+ if (part.data instanceof URL) {
274
+ throw new UnsupportedFunctionalityError({
275
+ functionality:
276
+ 'File data URLs in assistant messages are not supported',
277
+ });
278
+ }
279
+
280
+ return {
281
+ inlineData: {
282
+ mimeType: part.mediaType,
283
+ data: convertToBase64(part.data),
284
+ },
285
+ thought: true,
286
+ thoughtSignature,
287
+ };
288
+ }
289
+
115
290
  case 'file': {
116
291
  if (part.data instanceof URL) {
117
292
  throw new UnsupportedFunctionalityError({
@@ -125,11 +300,37 @@ export function convertToGoogleGenerativeAIMessages(
125
300
  mimeType: part.mediaType,
126
301
  data: convertToBase64(part.data),
127
302
  },
303
+ ...(providerOpts?.thought === true
304
+ ? { thought: true }
305
+ : {}),
128
306
  thoughtSignature,
129
307
  };
130
308
  }
131
309
 
132
310
  case 'tool-call': {
311
+ const serverToolCallId =
312
+ providerOpts?.serverToolCallId != null
313
+ ? String(providerOpts.serverToolCallId)
314
+ : undefined;
315
+ const serverToolType =
316
+ providerOpts?.serverToolType != null
317
+ ? String(providerOpts.serverToolType)
318
+ : undefined;
319
+
320
+ if (serverToolCallId && serverToolType) {
321
+ return {
322
+ toolCall: {
323
+ toolType: serverToolType,
324
+ args:
325
+ typeof part.input === 'string'
326
+ ? JSON.parse(part.input)
327
+ : part.input,
328
+ id: serverToolCallId,
329
+ },
330
+ thoughtSignature,
331
+ };
332
+ }
333
+
133
334
  return {
134
335
  functionCall: {
135
336
  name: part.toolName,
@@ -138,10 +339,36 @@ export function convertToGoogleGenerativeAIMessages(
138
339
  thoughtSignature,
139
340
  };
140
341
  }
342
+
343
+ case 'tool-result': {
344
+ const serverToolCallId =
345
+ providerOpts?.serverToolCallId != null
346
+ ? String(providerOpts.serverToolCallId)
347
+ : undefined;
348
+ const serverToolType =
349
+ providerOpts?.serverToolType != null
350
+ ? String(providerOpts.serverToolType)
351
+ : undefined;
352
+
353
+ if (serverToolCallId && serverToolType) {
354
+ return {
355
+ toolResponse: {
356
+ toolType: serverToolType,
357
+ response:
358
+ part.output.type === 'json' ? part.output.value : {},
359
+ id: serverToolCallId,
360
+ },
361
+ thoughtSignature,
362
+ };
363
+ }
364
+
365
+ return undefined;
366
+ }
141
367
  }
142
368
  })
143
369
  .filter(part => part !== undefined),
144
370
  });
371
+
145
372
  break;
146
373
  }
147
374
 
@@ -154,39 +381,51 @@ export function convertToGoogleGenerativeAIMessages(
154
381
  if (part.type === 'tool-approval-response') {
155
382
  continue;
156
383
  }
384
+
385
+ const partProviderOpts =
386
+ part.providerOptions?.[providerOptionsName] ??
387
+ (providerOptionsName !== 'google'
388
+ ? part.providerOptions?.google
389
+ : part.providerOptions?.vertex);
390
+ const serverToolCallId =
391
+ partProviderOpts?.serverToolCallId != null
392
+ ? String(partProviderOpts.serverToolCallId)
393
+ : undefined;
394
+ const serverToolType =
395
+ partProviderOpts?.serverToolType != null
396
+ ? String(partProviderOpts.serverToolType)
397
+ : undefined;
398
+
399
+ if (serverToolCallId && serverToolType) {
400
+ const serverThoughtSignature =
401
+ partProviderOpts?.thoughtSignature != null
402
+ ? String(partProviderOpts.thoughtSignature)
403
+ : undefined;
404
+
405
+ if (contents.length > 0) {
406
+ const lastContent = contents[contents.length - 1];
407
+ if (lastContent.role === 'model') {
408
+ lastContent.parts.push({
409
+ toolResponse: {
410
+ toolType: serverToolType,
411
+ response:
412
+ part.output.type === 'json' ? part.output.value : {},
413
+ id: serverToolCallId,
414
+ },
415
+ thoughtSignature: serverThoughtSignature,
416
+ });
417
+ continue;
418
+ }
419
+ }
420
+ }
421
+
157
422
  const output = part.output;
158
423
 
159
424
  if (output.type === 'content') {
160
- for (const contentPart of output.value) {
161
- switch (contentPart.type) {
162
- case 'text':
163
- parts.push({
164
- functionResponse: {
165
- name: part.toolName,
166
- response: {
167
- name: part.toolName,
168
- content: contentPart.text,
169
- },
170
- },
171
- });
172
- break;
173
- case 'image-data':
174
- parts.push(
175
- {
176
- inlineData: {
177
- mimeType: contentPart.mediaType,
178
- data: contentPart.data,
179
- },
180
- },
181
- {
182
- text: 'Tool executed successfully and returned this image as a response',
183
- },
184
- );
185
- break;
186
- default:
187
- parts.push({ text: JSON.stringify(contentPart) });
188
- break;
189
- }
425
+ if (supportsFunctionResponseParts) {
426
+ appendToolResultParts(parts, part.toolName, output.value);
427
+ } else {
428
+ appendLegacyToolResultParts(parts, part.toolName, output.value);
190
429
  }
191
430
  } else {
192
431
  parts.push({
@@ -1,5 +1,5 @@
1
1
  import {
2
- EmbeddingModelV3,
2
+ EmbeddingModelV4,
3
3
  TooManyEmbeddingValuesForCallError,
4
4
  } from '@ai-sdk/provider';
5
5
  import {
@@ -26,8 +26,8 @@ type GoogleGenerativeAIEmbeddingConfig = {
26
26
  fetch?: FetchFunction;
27
27
  };
28
28
 
29
- export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
30
- readonly specificationVersion = 'v3';
29
+ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV4 {
30
+ readonly specificationVersion = 'v4';
31
31
  readonly modelId: GoogleGenerativeAIEmbeddingModelId;
32
32
  readonly maxEmbeddingsPerCall = 2048;
33
33
  readonly supportsParallelCalls = true;
@@ -50,8 +50,8 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
50
50
  headers,
51
51
  abortSignal,
52
52
  providerOptions,
53
- }: Parameters<EmbeddingModelV3['doEmbed']>[0]): Promise<
54
- Awaited<ReturnType<EmbeddingModelV3['doEmbed']>>
53
+ }: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
54
+ Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
55
55
  > {
56
56
  // Parse provider options
57
57
  const googleOptions = await parseProviderOptions({
@@ -74,8 +74,26 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
74
74
  headers,
75
75
  );
76
76
 
77
- // For single embeddings, use the single endpoint (ratelimits, etc.)
77
+ const multimodalContent = googleOptions?.content;
78
+
79
+ if (
80
+ multimodalContent != null &&
81
+ multimodalContent.length !== values.length
82
+ ) {
83
+ throw new Error(
84
+ `The number of multimodal content entries (${multimodalContent.length}) must match the number of values (${values.length}).`,
85
+ );
86
+ }
87
+
88
+ // For single embeddings, use the single endpoint
78
89
  if (values.length === 1) {
90
+ const valueParts = multimodalContent?.[0];
91
+ const textPart = values[0] ? [{ text: values[0] }] : [];
92
+ const parts =
93
+ valueParts != null
94
+ ? [...textPart, ...valueParts]
95
+ : [{ text: values[0] }];
96
+
79
97
  const {
80
98
  responseHeaders,
81
99
  value: response,
@@ -86,7 +104,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
86
104
  body: {
87
105
  model: `models/${this.modelId}`,
88
106
  content: {
89
- parts: [{ text: values[0] }],
107
+ parts,
90
108
  },
91
109
  outputDimensionality: googleOptions?.outputDimensionality,
92
110
  taskType: googleOptions?.taskType,
@@ -107,6 +125,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
107
125
  };
108
126
  }
109
127
 
128
+ // For multiple values, use the batch endpoint
110
129
  const {
111
130
  responseHeaders,
112
131
  value: response,
@@ -115,12 +134,22 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
115
134
  url: `${this.config.baseURL}/models/${this.modelId}:batchEmbedContents`,
116
135
  headers: mergedHeaders,
117
136
  body: {
118
- requests: values.map(value => ({
119
- model: `models/${this.modelId}`,
120
- content: { role: 'user', parts: [{ text: value }] },
121
- outputDimensionality: googleOptions?.outputDimensionality,
122
- taskType: googleOptions?.taskType,
123
- })),
137
+ requests: values.map((value, index) => {
138
+ const valueParts = multimodalContent?.[index];
139
+ const textPart = value ? [{ text: value }] : [];
140
+ return {
141
+ model: `models/${this.modelId}`,
142
+ content: {
143
+ role: 'user',
144
+ parts:
145
+ valueParts != null
146
+ ? [...textPart, ...valueParts]
147
+ : [{ text: value }],
148
+ },
149
+ outputDimensionality: googleOptions?.outputDimensionality,
150
+ taskType: googleOptions?.taskType,
151
+ };
152
+ }),
124
153
  },
125
154
  failedResponseHandler: googleFailedResponseHandler,
126
155
  successfulResponseHandler: createJsonResponseHandler(
@@ -7,8 +7,19 @@ import { z } from 'zod/v4';
7
7
 
8
8
  export type GoogleGenerativeAIEmbeddingModelId =
9
9
  | 'gemini-embedding-001'
10
+ | 'gemini-embedding-2-preview'
10
11
  | (string & {});
11
12
 
13
+ const googleEmbeddingContentPartSchema = z.union([
14
+ z.object({ text: z.string() }),
15
+ z.object({
16
+ inlineData: z.object({
17
+ mimeType: z.string(),
18
+ data: z.string(),
19
+ }),
20
+ }),
21
+ ]);
22
+
12
23
  export const googleEmbeddingModelOptions = lazySchema(() =>
13
24
  zodSchema(
14
25
  z.object({
@@ -42,6 +53,19 @@ export const googleEmbeddingModelOptions = lazySchema(() =>
42
53
  'CODE_RETRIEVAL_QUERY',
43
54
  ])
44
55
  .optional(),
56
+
57
+ /**
58
+ * Optional. Per-value multimodal content parts for embedding non-text
59
+ * content (images, video, PDF, audio). Each entry corresponds to the
60
+ * embedding value at the same index and its parts are merged with the
61
+ * text value in the request. Use `null` for entries that are text-only.
62
+ *
63
+ * The array length must match the number of values being embedded. In
64
+ * the case of a single embedding, the array length must be 1.
65
+ */
66
+ content: z
67
+ .array(z.array(googleEmbeddingContentPartSchema).min(1).nullable())
68
+ .optional(),
45
69
  }),
46
70
  ),
47
71
  );
@@ -1,7 +1,7 @@
1
1
  import {
2
- ImageModelV3,
3
- LanguageModelV3Prompt,
4
- SharedV3Warning,
2
+ ImageModelV4,
3
+ LanguageModelV4Prompt,
4
+ SharedV4Warning,
5
5
  } from '@ai-sdk/provider';
6
6
  import {
7
7
  combineHeaders,
@@ -37,8 +37,8 @@ interface GoogleGenerativeAIImageModelConfig {
37
37
  };
38
38
  }
39
39
 
40
- export class GoogleGenerativeAIImageModel implements ImageModelV3 {
41
- readonly specificationVersion = 'v3';
40
+ export class GoogleGenerativeAIImageModel implements ImageModelV4 {
41
+ readonly specificationVersion = 'v4';
42
42
 
43
43
  get maxImagesPerCall(): number {
44
44
  if (this.settings.maxImagesPerCall != null) {
@@ -63,8 +63,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
63
63
  ) {}
64
64
 
65
65
  async doGenerate(
66
- options: Parameters<ImageModelV3['doGenerate']>[0],
67
- ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
66
+ options: Parameters<ImageModelV4['doGenerate']>[0],
67
+ ): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
68
68
  // Gemini image models use the language model API internally
69
69
  if (isGeminiModel(this.modelId)) {
70
70
  return this.doGenerateGemini(options);
@@ -73,8 +73,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
73
73
  }
74
74
 
75
75
  private async doGenerateImagen(
76
- options: Parameters<ImageModelV3['doGenerate']>[0],
77
- ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
76
+ options: Parameters<ImageModelV4['doGenerate']>[0],
77
+ ): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
78
78
  const {
79
79
  prompt,
80
80
  n = 1,
@@ -87,7 +87,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
87
87
  files,
88
88
  mask,
89
89
  } = options;
90
- const warnings: Array<SharedV3Warning> = [];
90
+ const warnings: Array<SharedV4Warning> = [];
91
91
 
92
92
  // Imagen API endpoints do not support image editing
93
93
  if (files != null && files.length > 0) {
@@ -181,8 +181,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
181
181
  }
182
182
 
183
183
  private async doGenerateGemini(
184
- options: Parameters<ImageModelV3['doGenerate']>[0],
185
- ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
184
+ options: Parameters<ImageModelV4['doGenerate']>[0],
185
+ ): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
186
186
  const {
187
187
  prompt,
188
188
  n,
@@ -195,7 +195,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
195
195
  files,
196
196
  mask,
197
197
  } = options;
198
- const warnings: Array<SharedV3Warning> = [];
198
+ const warnings: Array<SharedV4Warning> = [];
199
199
 
200
200
  // Gemini does not support mask-based inpainting
201
201
  if (mask != null) {
@@ -253,7 +253,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
253
253
  }
254
254
  }
255
255
 
256
- const languageModelPrompt: LanguageModelV3Prompt = [
256
+ const languageModelPrompt: LanguageModelV4Prompt = [
257
257
  { role: 'user', content: userContent },
258
258
  ];
259
259