@ai-sdk/google-vertex 4.0.55 → 4.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,18 @@
1
+ import type { GoogleLanguageModelOptions } from '@ai-sdk/google';
2
+ import { GoogleGenerativeAILanguageModel } from '@ai-sdk/google/internal';
1
3
  import {
2
4
  ImageModelV3,
3
5
  ImageModelV3File,
6
+ LanguageModelV3Prompt,
4
7
  SharedV3Warning,
5
8
  } from '@ai-sdk/provider';
6
9
  import {
7
10
  Resolvable,
8
11
  combineHeaders,
12
+ convertToBase64,
9
13
  convertUint8ArrayToBase64,
10
14
  createJsonResponseHandler,
15
+ generateId as defaultGenerateId,
11
16
  parseProviderOptions,
12
17
  postJsonToApi,
13
18
  resolve,
@@ -21,6 +26,7 @@ interface GoogleVertexImageModelConfig {
21
26
  baseURL: string;
22
27
  headers?: Resolvable<Record<string, string | undefined>>;
23
28
  fetch?: typeof fetch;
29
+ generateId?: () => string;
24
30
  _internal?: {
25
31
  currentDate?: () => Date;
26
32
  };
@@ -29,8 +35,13 @@ interface GoogleVertexImageModelConfig {
29
35
  // https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-images
30
36
  export class GoogleVertexImageModel implements ImageModelV3 {
31
37
  readonly specificationVersion = 'v3';
32
- // https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api#parameter_list
33
- readonly maxImagesPerCall = 4;
38
+
39
+ get maxImagesPerCall(): number {
40
+ if (isGeminiModel(this.modelId)) {
41
+ return 10;
42
+ }
43
+ return 4;
44
+ }
34
45
 
35
46
  get provider(): string {
36
47
  return this.config.provider;
@@ -41,7 +52,16 @@ export class GoogleVertexImageModel implements ImageModelV3 {
41
52
  private config: GoogleVertexImageModelConfig,
42
53
  ) {}
43
54
 
44
- async doGenerate({
55
+ async doGenerate(
56
+ options: Parameters<ImageModelV3['doGenerate']>[0],
57
+ ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> {
58
+ if (isGeminiModel(this.modelId)) {
59
+ return this.doGenerateGemini(options);
60
+ }
61
+ return this.doGenerateImagen(options);
62
+ }
63
+
64
+ private async doGenerateImagen({
45
65
  prompt,
46
66
  n,
47
67
  size,
@@ -180,6 +200,149 @@ export class GoogleVertexImageModel implements ImageModelV3 {
180
200
  },
181
201
  };
182
202
  }
203
+
204
+ private async doGenerateGemini({
205
+ prompt,
206
+ n,
207
+ size,
208
+ aspectRatio,
209
+ seed,
210
+ providerOptions,
211
+ headers,
212
+ abortSignal,
213
+ files,
214
+ mask,
215
+ }: Parameters<ImageModelV3['doGenerate']>[0]): Promise<
216
+ Awaited<ReturnType<ImageModelV3['doGenerate']>>
217
+ > {
218
+ const warnings: Array<SharedV3Warning> = [];
219
+
220
+ if (mask != null) {
221
+ throw new Error(
222
+ 'Gemini image models do not support mask-based image editing.',
223
+ );
224
+ }
225
+
226
+ if (n != null && n > 1) {
227
+ throw new Error(
228
+ 'Gemini image models do not support generating a set number of images per call. Use n=1 or omit the n parameter.',
229
+ );
230
+ }
231
+
232
+ if (size != null) {
233
+ warnings.push({
234
+ type: 'unsupported',
235
+ feature: 'size',
236
+ details:
237
+ 'This model does not support the `size` option. Use `aspectRatio` instead.',
238
+ });
239
+ }
240
+
241
+ const userContent: Array<
242
+ | { type: 'text'; text: string }
243
+ | { type: 'file'; data: string | Uint8Array | URL; mediaType: string }
244
+ > = [];
245
+
246
+ if (prompt != null) {
247
+ userContent.push({ type: 'text', text: prompt });
248
+ }
249
+
250
+ if (files != null && files.length > 0) {
251
+ for (const file of files) {
252
+ if (file.type === 'url') {
253
+ userContent.push({
254
+ type: 'file',
255
+ data: new URL(file.url),
256
+ mediaType: 'image/*',
257
+ });
258
+ } else {
259
+ userContent.push({
260
+ type: 'file',
261
+ data:
262
+ typeof file.data === 'string'
263
+ ? file.data
264
+ : new Uint8Array(file.data),
265
+ mediaType: file.mediaType,
266
+ });
267
+ }
268
+ }
269
+ }
270
+
271
+ const languageModelPrompt: LanguageModelV3Prompt = [
272
+ { role: 'user', content: userContent },
273
+ ];
274
+
275
+ const languageModel = new GoogleGenerativeAILanguageModel(this.modelId, {
276
+ provider: this.config.provider,
277
+ baseURL: this.config.baseURL,
278
+ headers: this.config.headers ?? {},
279
+ fetch: this.config.fetch,
280
+ generateId: this.config.generateId ?? defaultGenerateId,
281
+ supportedUrls: () => ({
282
+ '*': [/^https?:\/\/.*$/, /^gs:\/\/.*$/],
283
+ }),
284
+ });
285
+
286
+ const result = await languageModel.doGenerate({
287
+ prompt: languageModelPrompt,
288
+ seed,
289
+ providerOptions: {
290
+ vertex: {
291
+ responseModalities: ['IMAGE'],
292
+ imageConfig: aspectRatio
293
+ ? {
294
+ aspectRatio: aspectRatio as NonNullable<
295
+ GoogleLanguageModelOptions['imageConfig']
296
+ >['aspectRatio'],
297
+ }
298
+ : undefined,
299
+ ...((providerOptions?.vertex as Omit<
300
+ GoogleLanguageModelOptions,
301
+ 'responseModalities' | 'imageConfig'
302
+ >) ?? {}),
303
+ } satisfies GoogleLanguageModelOptions,
304
+ },
305
+ headers,
306
+ abortSignal,
307
+ });
308
+
309
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
310
+
311
+ const images: string[] = [];
312
+ for (const part of result.content) {
313
+ if (part.type === 'file' && part.mediaType.startsWith('image/')) {
314
+ images.push(convertToBase64(part.data));
315
+ }
316
+ }
317
+
318
+ return {
319
+ images,
320
+ warnings,
321
+ providerMetadata: {
322
+ vertex: {
323
+ images: images.map(() => ({})),
324
+ },
325
+ },
326
+ response: {
327
+ timestamp: currentDate,
328
+ modelId: this.modelId,
329
+ headers: result.response?.headers,
330
+ },
331
+ usage: result.usage
332
+ ? {
333
+ inputTokens: result.usage.inputTokens.total,
334
+ outputTokens: result.usage.outputTokens.total,
335
+ totalTokens:
336
+ (result.usage.inputTokens.total ?? 0) +
337
+ (result.usage.outputTokens.total ?? 0),
338
+ }
339
+ : undefined,
340
+ };
341
+ }
342
+ }
343
+
344
+ function isGeminiModel(modelId: string): boolean {
345
+ return modelId.startsWith('gemini-');
183
346
  }
184
347
 
185
348
  // minimal version of the schema, focussed on what is needed for the implementation
@@ -5,4 +5,6 @@ export type GoogleVertexImageModelId =
5
5
  | 'imagen-4.0-generate-001'
6
6
  | 'imagen-4.0-ultra-generate-001'
7
7
  | 'imagen-4.0-fast-generate-001'
8
+ | 'gemini-2.5-flash-image'
9
+ | 'gemini-3-pro-image-preview'
8
10
  | (string & {});
@@ -210,7 +210,10 @@ export function createVertex(
210
210
  new GoogleVertexEmbeddingModel(modelId, createConfig('embedding'));
211
211
 
212
212
  const createImageModel = (modelId: GoogleVertexImageModelId) =>
213
- new GoogleVertexImageModel(modelId, createConfig('image'));
213
+ new GoogleVertexImageModel(modelId, {
214
+ ...createConfig('image'),
215
+ generateId: options.generateId ?? generateId,
216
+ });
214
217
 
215
218
  const createVideoModel = (modelId: GoogleVertexVideoModelId) =>
216
219
  new GoogleVertexVideoModel(modelId, {