@ai-sdk/google 4.0.0-beta.4 → 4.0.0-beta.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import {
2
- EmbeddingModelV3,
2
+ EmbeddingModelV4,
3
3
  TooManyEmbeddingValuesForCallError,
4
4
  } from '@ai-sdk/provider';
5
5
  import {
@@ -10,6 +10,9 @@ import {
10
10
  parseProviderOptions,
11
11
  postJsonToApi,
12
12
  resolve,
13
+ serializeModelOptions,
14
+ WORKFLOW_SERIALIZE,
15
+ WORKFLOW_DESERIALIZE,
13
16
  zodSchema,
14
17
  } from '@ai-sdk/provider-utils';
15
18
  import { z } from 'zod/v4';
@@ -22,18 +25,35 @@ import {
22
25
  type GoogleGenerativeAIEmbeddingConfig = {
23
26
  provider: string;
24
27
  baseURL: string;
25
- headers: () => Record<string, string | undefined>;
28
+ headers?: () => Record<string, string | undefined>;
26
29
  fetch?: FetchFunction;
27
30
  };
28
31
 
29
- export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
30
- readonly specificationVersion = 'v3';
32
+ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV4 {
33
+ readonly specificationVersion = 'v4';
31
34
  readonly modelId: GoogleGenerativeAIEmbeddingModelId;
32
35
  readonly maxEmbeddingsPerCall = 2048;
33
36
  readonly supportsParallelCalls = true;
34
37
 
35
38
  private readonly config: GoogleGenerativeAIEmbeddingConfig;
36
39
 
40
+ static [WORKFLOW_SERIALIZE](model: GoogleGenerativeAIEmbeddingModel) {
41
+ return serializeModelOptions({
42
+ modelId: model.modelId,
43
+ config: model.config,
44
+ });
45
+ }
46
+
47
+ static [WORKFLOW_DESERIALIZE](options: {
48
+ modelId: string;
49
+ config: GoogleGenerativeAIEmbeddingConfig;
50
+ }) {
51
+ return new GoogleGenerativeAIEmbeddingModel(
52
+ options.modelId,
53
+ options.config,
54
+ );
55
+ }
56
+
37
57
  get provider(): string {
38
58
  return this.config.provider;
39
59
  }
@@ -50,8 +70,8 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
50
70
  headers,
51
71
  abortSignal,
52
72
  providerOptions,
53
- }: Parameters<EmbeddingModelV3['doEmbed']>[0]): Promise<
54
- Awaited<ReturnType<EmbeddingModelV3['doEmbed']>>
73
+ }: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
74
+ Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
55
75
  > {
56
76
  // Parse provider options
57
77
  const googleOptions = await parseProviderOptions({
@@ -70,12 +90,30 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
70
90
  }
71
91
 
72
92
  const mergedHeaders = combineHeaders(
73
- await resolve(this.config.headers),
93
+ this.config.headers ? await resolve(this.config.headers) : undefined,
74
94
  headers,
75
95
  );
76
96
 
77
- // For single embeddings, use the single endpoint (ratelimits, etc.)
97
+ const multimodalContent = googleOptions?.content;
98
+
99
+ if (
100
+ multimodalContent != null &&
101
+ multimodalContent.length !== values.length
102
+ ) {
103
+ throw new Error(
104
+ `The number of multimodal content entries (${multimodalContent.length}) must match the number of values (${values.length}).`,
105
+ );
106
+ }
107
+
108
+ // For single embeddings, use the single endpoint
78
109
  if (values.length === 1) {
110
+ const valueParts = multimodalContent?.[0];
111
+ const textPart = values[0] ? [{ text: values[0] }] : [];
112
+ const parts =
113
+ valueParts != null
114
+ ? [...textPart, ...valueParts]
115
+ : [{ text: values[0] }];
116
+
79
117
  const {
80
118
  responseHeaders,
81
119
  value: response,
@@ -86,7 +124,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
86
124
  body: {
87
125
  model: `models/${this.modelId}`,
88
126
  content: {
89
- parts: [{ text: values[0] }],
127
+ parts,
90
128
  },
91
129
  outputDimensionality: googleOptions?.outputDimensionality,
92
130
  taskType: googleOptions?.taskType,
@@ -107,6 +145,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
107
145
  };
108
146
  }
109
147
 
148
+ // For multiple values, use the batch endpoint
110
149
  const {
111
150
  responseHeaders,
112
151
  value: response,
@@ -115,12 +154,22 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
115
154
  url: `${this.config.baseURL}/models/${this.modelId}:batchEmbedContents`,
116
155
  headers: mergedHeaders,
117
156
  body: {
118
- requests: values.map(value => ({
119
- model: `models/${this.modelId}`,
120
- content: { role: 'user', parts: [{ text: value }] },
121
- outputDimensionality: googleOptions?.outputDimensionality,
122
- taskType: googleOptions?.taskType,
123
- })),
157
+ requests: values.map((value, index) => {
158
+ const valueParts = multimodalContent?.[index];
159
+ const textPart = value ? [{ text: value }] : [];
160
+ return {
161
+ model: `models/${this.modelId}`,
162
+ content: {
163
+ role: 'user',
164
+ parts:
165
+ valueParts != null
166
+ ? [...textPart, ...valueParts]
167
+ : [{ text: value }],
168
+ },
169
+ outputDimensionality: googleOptions?.outputDimensionality,
170
+ taskType: googleOptions?.taskType,
171
+ };
172
+ }),
124
173
  },
125
174
  failedResponseHandler: googleFailedResponseHandler,
126
175
  successfulResponseHandler: createJsonResponseHandler(
@@ -7,8 +7,19 @@ import { z } from 'zod/v4';
7
7
 
8
8
  export type GoogleGenerativeAIEmbeddingModelId =
9
9
  | 'gemini-embedding-001'
10
+ | 'gemini-embedding-2-preview'
10
11
  | (string & {});
11
12
 
13
+ const googleEmbeddingContentPartSchema = z.union([
14
+ z.object({ text: z.string() }),
15
+ z.object({
16
+ inlineData: z.object({
17
+ mimeType: z.string(),
18
+ data: z.string(),
19
+ }),
20
+ }),
21
+ ]);
22
+
12
23
  export const googleEmbeddingModelOptions = lazySchema(() =>
13
24
  zodSchema(
14
25
  z.object({
@@ -42,6 +53,19 @@ export const googleEmbeddingModelOptions = lazySchema(() =>
42
53
  'CODE_RETRIEVAL_QUERY',
43
54
  ])
44
55
  .optional(),
56
+
57
+ /**
58
+ * Optional. Per-value multimodal content parts for embedding non-text
59
+ * content (images, video, PDF, audio). Each entry corresponds to the
60
+ * embedding value at the same index and its parts are merged with the
61
+ * text value in the request. Use `null` for entries that are text-only.
62
+ *
63
+ * The array length must match the number of values being embedded. In
64
+ * the case of a single embedding, the array length must be 1.
65
+ */
66
+ content: z
67
+ .array(z.array(googleEmbeddingContentPartSchema).min(1).nullable())
68
+ .optional(),
45
69
  }),
46
70
  ),
47
71
  );
@@ -0,0 +1,228 @@
1
+ import {
2
+ AISDKError,
3
+ type FilesV4,
4
+ type FilesV4UploadFileCallOptions,
5
+ type FilesV4UploadFileResult,
6
+ type SharedV4Warning,
7
+ } from '@ai-sdk/provider';
8
+ import {
9
+ combineHeaders,
10
+ createJsonResponseHandler,
11
+ delay,
12
+ type FetchFunction,
13
+ lazySchema,
14
+ parseProviderOptions,
15
+ zodSchema,
16
+ getFromApi,
17
+ } from '@ai-sdk/provider-utils';
18
+ import { z } from 'zod/v4';
19
+ import { googleFailedResponseHandler } from './google-error';
20
+
21
+ export type GoogleFilesUploadOptions = {
22
+ displayName?: string | null;
23
+ pollIntervalMs?: number | null;
24
+ pollTimeoutMs?: number | null;
25
+
26
+ [key: string]: unknown;
27
+ };
28
+
29
+ interface GoogleGenerativeAIFilesConfig {
30
+ provider: string;
31
+ baseURL: string;
32
+ headers: () => Record<string, string | undefined>;
33
+ fetch?: FetchFunction;
34
+ }
35
+
36
+ export class GoogleGenerativeAIFiles implements FilesV4 {
37
+ readonly specificationVersion = 'v4';
38
+
39
+ get provider(): string {
40
+ return this.config.provider;
41
+ }
42
+
43
+ constructor(private readonly config: GoogleGenerativeAIFilesConfig) {}
44
+
45
+ async uploadFile(
46
+ options: FilesV4UploadFileCallOptions,
47
+ ): Promise<FilesV4UploadFileResult> {
48
+ const googleOptions = (await parseProviderOptions({
49
+ provider: 'google',
50
+ providerOptions: options.providerOptions,
51
+ schema: googleFilesUploadOptionsSchema,
52
+ })) as GoogleFilesUploadOptions | undefined;
53
+
54
+ const resolvedHeaders = this.config.headers();
55
+ const fetchFn = this.config.fetch ?? globalThis.fetch;
56
+
57
+ const warnings: Array<SharedV4Warning> = [];
58
+ if (options.filename != null) {
59
+ warnings.push({ type: 'unsupported', feature: 'filename' });
60
+ }
61
+
62
+ const data = options.data;
63
+ const fileBytes =
64
+ data instanceof Uint8Array
65
+ ? data
66
+ : Uint8Array.from(atob(data), c => c.charCodeAt(0));
67
+
68
+ const mediaType = options.mediaType;
69
+ const displayName = googleOptions?.displayName;
70
+
71
+ const baseOrigin = this.config.baseURL.replace(/\/v1beta$/, '');
72
+
73
+ const initResponse = await fetchFn(`${baseOrigin}/upload/v1beta/files`, {
74
+ method: 'POST',
75
+ headers: {
76
+ ...resolvedHeaders,
77
+ 'X-Goog-Upload-Protocol': 'resumable',
78
+ 'X-Goog-Upload-Command': 'start',
79
+ 'X-Goog-Upload-Header-Content-Length': String(fileBytes.length),
80
+ 'X-Goog-Upload-Header-Content-Type': mediaType,
81
+ 'Content-Type': 'application/json',
82
+ },
83
+ body: JSON.stringify({
84
+ file: {
85
+ ...(displayName != null ? { display_name: displayName } : {}),
86
+ },
87
+ }),
88
+ });
89
+
90
+ if (!initResponse.ok) {
91
+ const errorBody = await initResponse.text();
92
+ throw new AISDKError({
93
+ name: 'GOOGLE_FILES_UPLOAD_ERROR',
94
+ message: `Failed to initiate resumable upload: ${initResponse.status} ${errorBody}`,
95
+ });
96
+ }
97
+
98
+ const uploadUrl = initResponse.headers.get('x-goog-upload-url');
99
+ if (!uploadUrl) {
100
+ throw new AISDKError({
101
+ name: 'GOOGLE_FILES_UPLOAD_ERROR',
102
+ message: 'No upload URL returned from initiation request',
103
+ });
104
+ }
105
+
106
+ const uploadResponse = await fetchFn(uploadUrl, {
107
+ method: 'POST',
108
+ headers: {
109
+ 'Content-Length': String(fileBytes.length),
110
+ 'X-Goog-Upload-Offset': '0',
111
+ 'X-Goog-Upload-Command': 'upload, finalize',
112
+ },
113
+ body: fileBytes,
114
+ });
115
+
116
+ if (!uploadResponse.ok) {
117
+ const errorBody = await uploadResponse.text();
118
+ throw new AISDKError({
119
+ name: 'GOOGLE_FILES_UPLOAD_ERROR',
120
+ message: `Failed to upload file data: ${uploadResponse.status} ${errorBody}`,
121
+ });
122
+ }
123
+
124
+ const uploadResult = (await uploadResponse.json()) as {
125
+ file: GoogleFileResource;
126
+ };
127
+
128
+ let file = uploadResult.file;
129
+
130
+ const pollIntervalMs = googleOptions?.pollIntervalMs ?? 2000;
131
+ const pollTimeoutMs = googleOptions?.pollTimeoutMs ?? 300000;
132
+ const startTime = Date.now();
133
+
134
+ while (file.state === 'PROCESSING') {
135
+ if (Date.now() - startTime > pollTimeoutMs) {
136
+ throw new AISDKError({
137
+ name: 'GOOGLE_FILES_UPLOAD_TIMEOUT',
138
+ message: `File processing timed out after ${pollTimeoutMs}ms`,
139
+ });
140
+ }
141
+
142
+ await delay(pollIntervalMs);
143
+
144
+ const { value: fileStatus } = await getFromApi({
145
+ url: `${this.config.baseURL}/${file.name}`,
146
+ headers: combineHeaders(resolvedHeaders),
147
+ successfulResponseHandler: createJsonResponseHandler(
148
+ googleFileResponseSchema,
149
+ ),
150
+ failedResponseHandler: googleFailedResponseHandler,
151
+ fetch: this.config.fetch,
152
+ });
153
+
154
+ file = fileStatus;
155
+ }
156
+
157
+ if (file.state === 'FAILED') {
158
+ throw new AISDKError({
159
+ name: 'GOOGLE_FILES_UPLOAD_FAILED',
160
+ message: `File processing failed for ${file.name}`,
161
+ });
162
+ }
163
+
164
+ return {
165
+ warnings,
166
+ providerReference: { google: file.uri },
167
+ mediaType: file.mimeType ?? options.mediaType,
168
+ providerMetadata: {
169
+ google: {
170
+ name: file.name,
171
+ displayName: file.displayName,
172
+ mimeType: file.mimeType,
173
+ sizeBytes: file.sizeBytes,
174
+ state: file.state,
175
+ uri: file.uri,
176
+ ...(file.createTime != null ? { createTime: file.createTime } : {}),
177
+ ...(file.updateTime != null ? { updateTime: file.updateTime } : {}),
178
+ ...(file.expirationTime != null
179
+ ? { expirationTime: file.expirationTime }
180
+ : {}),
181
+ ...(file.sha256Hash != null ? { sha256Hash: file.sha256Hash } : {}),
182
+ },
183
+ },
184
+ };
185
+ }
186
+ }
187
+
188
+ type GoogleFileResource = {
189
+ name: string;
190
+ displayName?: string | null;
191
+ mimeType: string;
192
+ sizeBytes?: string | null;
193
+ createTime?: string | null;
194
+ updateTime?: string | null;
195
+ expirationTime?: string | null;
196
+ sha256Hash?: string | null;
197
+ uri: string;
198
+ state: string;
199
+ };
200
+
201
+ const googleFileResponseSchema = lazySchema(() =>
202
+ zodSchema(
203
+ z.object({
204
+ name: z.string(),
205
+ displayName: z.string().nullish(),
206
+ mimeType: z.string(),
207
+ sizeBytes: z.string().nullish(),
208
+ createTime: z.string().nullish(),
209
+ updateTime: z.string().nullish(),
210
+ expirationTime: z.string().nullish(),
211
+ sha256Hash: z.string().nullish(),
212
+ uri: z.string(),
213
+ state: z.string(),
214
+ }),
215
+ ),
216
+ );
217
+
218
+ const googleFilesUploadOptionsSchema = lazySchema(() =>
219
+ zodSchema(
220
+ z
221
+ .object({
222
+ displayName: z.string().nullish(),
223
+ pollIntervalMs: z.number().positive().nullish(),
224
+ pollTimeoutMs: z.number().positive().nullish(),
225
+ })
226
+ .passthrough(),
227
+ ),
228
+ );
@@ -1,7 +1,7 @@
1
1
  import {
2
- ImageModelV3,
3
- LanguageModelV3Prompt,
4
- SharedV3Warning,
2
+ ImageModelV4,
3
+ LanguageModelV4Prompt,
4
+ SharedV4Warning,
5
5
  } from '@ai-sdk/provider';
6
6
  import {
7
7
  combineHeaders,
@@ -15,6 +15,9 @@ import {
15
15
  postJsonToApi,
16
16
  Resolvable,
17
17
  resolve,
18
+ serializeModelOptions,
19
+ WORKFLOW_SERIALIZE,
20
+ WORKFLOW_DESERIALIZE,
18
21
  zodSchema,
19
22
  } from '@ai-sdk/provider-utils';
20
23
  import { z } from 'zod/v4';
@@ -37,8 +40,26 @@ interface GoogleGenerativeAIImageModelConfig {
37
40
  };
38
41
  }
39
42
 
40
- export class GoogleGenerativeAIImageModel implements ImageModelV3 {
41
- readonly specificationVersion = 'v3';
43
+ export class GoogleGenerativeAIImageModel implements ImageModelV4 {
44
+ readonly specificationVersion = 'v4';
45
+
46
+ static [WORKFLOW_SERIALIZE](model: GoogleGenerativeAIImageModel) {
47
+ return serializeModelOptions({
48
+ modelId: model.modelId,
49
+ config: model.config,
50
+ });
51
+ }
52
+
53
+ static [WORKFLOW_DESERIALIZE](options: {
54
+ modelId: string;
55
+ config: GoogleGenerativeAIImageModelConfig;
56
+ }) {
57
+ return new GoogleGenerativeAIImageModel(
58
+ options.modelId,
59
+ {},
60
+ options.config,
61
+ );
62
+ }
42
63
 
43
64
  get maxImagesPerCall(): number {
44
65
  if (this.settings.maxImagesPerCall != null) {
@@ -63,8 +84,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
63
84
  ) {}
64
85
 
65
86
  async doGenerate(
66
- options: Parameters<ImageModelV3['doGenerate']>[0],
67
- ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
87
+ options: Parameters<ImageModelV4['doGenerate']>[0],
88
+ ): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
68
89
  // Gemini image models use the language model API internally
69
90
  if (isGeminiModel(this.modelId)) {
70
91
  return this.doGenerateGemini(options);
@@ -73,8 +94,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
73
94
  }
74
95
 
75
96
  private async doGenerateImagen(
76
- options: Parameters<ImageModelV3['doGenerate']>[0],
77
- ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
97
+ options: Parameters<ImageModelV4['doGenerate']>[0],
98
+ ): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
78
99
  const {
79
100
  prompt,
80
101
  n = 1,
@@ -87,7 +108,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
87
108
  files,
88
109
  mask,
89
110
  } = options;
90
- const warnings: Array<SharedV3Warning> = [];
111
+ const warnings: Array<SharedV4Warning> = [];
91
112
 
92
113
  // Imagen API endpoints do not support image editing
93
114
  if (files != null && files.length > 0) {
@@ -151,7 +172,10 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
151
172
  predictions: Array<{ bytesBase64Encoded: string }>;
152
173
  }>({
153
174
  url: `${this.config.baseURL}/models/${this.modelId}:predict`,
154
- headers: combineHeaders(await resolve(this.config.headers), headers),
175
+ headers: combineHeaders(
176
+ this.config.headers ? await resolve(this.config.headers) : undefined,
177
+ headers,
178
+ ),
155
179
  body,
156
180
  failedResponseHandler: googleFailedResponseHandler,
157
181
  successfulResponseHandler: createJsonResponseHandler(
@@ -181,8 +205,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
181
205
  }
182
206
 
183
207
  private async doGenerateGemini(
184
- options: Parameters<ImageModelV3['doGenerate']>[0],
185
- ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
208
+ options: Parameters<ImageModelV4['doGenerate']>[0],
209
+ ): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
186
210
  const {
187
211
  prompt,
188
212
  n,
@@ -195,7 +219,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
195
219
  files,
196
220
  mask,
197
221
  } = options;
198
- const warnings: Array<SharedV3Warning> = [];
222
+ const warnings: Array<SharedV4Warning> = [];
199
223
 
200
224
  // Gemini does not support mask-based inpainting
201
225
  if (mask != null) {
@@ -253,7 +277,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
253
277
  }
254
278
  }
255
279
 
256
- const languageModelPrompt: LanguageModelV3Prompt = [
280
+ const languageModelPrompt: LanguageModelV4Prompt = [
257
281
  { role: 'user', content: userContent },
258
282
  ];
259
283