@ai-sdk/google 4.0.0-beta.8 → 4.0.0-beta.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +608 -5
- package/README.md +6 -4
- package/dist/index.d.ts +297 -54
- package/dist/index.js +5409 -640
- package/dist/index.js.map +1 -1
- package/dist/internal/index.d.ts +97 -26
- package/dist/internal/index.js +1653 -453
- package/dist/internal/index.js.map +1 -1
- package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
- package/package.json +16 -17
- package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
- package/src/convert-json-schema-to-openapi-schema.ts +1 -1
- package/src/convert-to-google-messages.ts +647 -0
- package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
- package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
- package/src/google-error.ts +1 -1
- package/src/google-files.ts +225 -0
- package/src/google-image-model-options.ts +35 -0
- package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
- package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
- package/src/google-json-accumulator.ts +371 -0
- package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
- package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +691 -217
- package/src/google-prepare-tools.ts +72 -12
- package/src/google-prompt.ts +86 -0
- package/src/google-provider.ts +157 -53
- package/src/google-speech-api.ts +36 -0
- package/src/google-speech-model-options.ts +48 -0
- package/src/google-speech-model.ts +311 -0
- package/src/google-video-model-options.ts +43 -0
- package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
- package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
- package/src/index.ts +40 -9
- package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
- package/src/interactions/cancel-google-interaction.ts +60 -0
- package/src/interactions/convert-google-interactions-usage.ts +47 -0
- package/src/interactions/convert-to-google-interactions-input.ts +557 -0
- package/src/interactions/extract-google-interactions-sources.ts +252 -0
- package/src/interactions/google-interactions-agent.ts +15 -0
- package/src/interactions/google-interactions-api.ts +530 -0
- package/src/interactions/google-interactions-language-model-options.ts +262 -0
- package/src/interactions/google-interactions-language-model.ts +776 -0
- package/src/interactions/google-interactions-prompt.ts +582 -0
- package/src/interactions/google-interactions-provider-metadata.ts +23 -0
- package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
- package/src/interactions/parse-google-interactions-outputs.ts +252 -0
- package/src/interactions/poll-google-interactions.ts +129 -0
- package/src/interactions/prepare-google-interactions-tools.ts +245 -0
- package/src/interactions/stream-google-interactions.ts +242 -0
- package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
- package/src/internal/index.ts +3 -2
- package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
- package/src/realtime/google-realtime-event-mapper.ts +383 -0
- package/src/realtime/google-realtime-model-options.ts +3 -0
- package/src/realtime/google-realtime-model.ts +160 -0
- package/src/realtime/index.ts +2 -0
- package/src/tool/code-execution.ts +2 -2
- package/src/tool/enterprise-web-search.ts +9 -3
- package/src/tool/file-search.ts +5 -7
- package/src/tool/google-maps.ts +3 -2
- package/src/tool/google-search.ts +11 -12
- package/src/tool/url-context.ts +4 -2
- package/src/tool/vertex-rag-store.ts +9 -6
- package/dist/index.d.mts +0 -384
- package/dist/index.mjs +0 -2519
- package/dist/index.mjs.map +0 -1
- package/dist/internal/index.d.mts +0 -287
- package/dist/internal/index.mjs +0 -1708
- package/dist/internal/index.mjs.map +0 -1
- package/src/convert-to-google-generative-ai-messages.ts +0 -239
- package/src/google-generative-ai-prompt.ts +0 -47
|
@@ -1,46 +1,59 @@
|
|
|
1
1
|
import {
|
|
2
|
-
EmbeddingModelV3,
|
|
3
2
|
TooManyEmbeddingValuesForCallError,
|
|
3
|
+
type EmbeddingModelV4,
|
|
4
4
|
} from '@ai-sdk/provider';
|
|
5
5
|
import {
|
|
6
6
|
combineHeaders,
|
|
7
7
|
createJsonResponseHandler,
|
|
8
|
-
FetchFunction,
|
|
9
8
|
lazySchema,
|
|
10
9
|
parseProviderOptions,
|
|
11
10
|
postJsonToApi,
|
|
12
11
|
resolve,
|
|
12
|
+
serializeModelOptions,
|
|
13
|
+
WORKFLOW_SERIALIZE,
|
|
14
|
+
WORKFLOW_DESERIALIZE,
|
|
13
15
|
zodSchema,
|
|
16
|
+
type FetchFunction,
|
|
14
17
|
} from '@ai-sdk/provider-utils';
|
|
15
18
|
import { z } from 'zod/v4';
|
|
16
19
|
import { googleFailedResponseHandler } from './google-error';
|
|
17
20
|
import {
|
|
18
|
-
GoogleGenerativeAIEmbeddingModelId,
|
|
19
21
|
googleEmbeddingModelOptions,
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
type
|
|
22
|
+
type GoogleEmbeddingModelId,
|
|
23
|
+
} from './google-embedding-model-options';
|
|
24
|
+
type GoogleEmbeddingConfig = {
|
|
23
25
|
provider: string;
|
|
24
26
|
baseURL: string;
|
|
25
|
-
headers
|
|
27
|
+
headers?: () => Record<string, string | undefined>;
|
|
26
28
|
fetch?: FetchFunction;
|
|
27
29
|
};
|
|
28
30
|
|
|
29
|
-
export class
|
|
30
|
-
readonly specificationVersion = '
|
|
31
|
-
readonly modelId:
|
|
31
|
+
export class GoogleEmbeddingModel implements EmbeddingModelV4 {
|
|
32
|
+
readonly specificationVersion = 'v4';
|
|
33
|
+
readonly modelId: GoogleEmbeddingModelId;
|
|
32
34
|
readonly maxEmbeddingsPerCall = 2048;
|
|
33
35
|
readonly supportsParallelCalls = true;
|
|
34
36
|
|
|
35
|
-
private readonly config:
|
|
37
|
+
private readonly config: GoogleEmbeddingConfig;
|
|
38
|
+
|
|
39
|
+
static [WORKFLOW_SERIALIZE](model: GoogleEmbeddingModel) {
|
|
40
|
+
return serializeModelOptions({
|
|
41
|
+
modelId: model.modelId,
|
|
42
|
+
config: model.config,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
static [WORKFLOW_DESERIALIZE](options: {
|
|
47
|
+
modelId: string;
|
|
48
|
+
config: GoogleEmbeddingConfig;
|
|
49
|
+
}) {
|
|
50
|
+
return new GoogleEmbeddingModel(options.modelId, options.config);
|
|
51
|
+
}
|
|
36
52
|
|
|
37
53
|
get provider(): string {
|
|
38
54
|
return this.config.provider;
|
|
39
55
|
}
|
|
40
|
-
constructor(
|
|
41
|
-
modelId: GoogleGenerativeAIEmbeddingModelId,
|
|
42
|
-
config: GoogleGenerativeAIEmbeddingConfig,
|
|
43
|
-
) {
|
|
56
|
+
constructor(modelId: GoogleEmbeddingModelId, config: GoogleEmbeddingConfig) {
|
|
44
57
|
this.modelId = modelId;
|
|
45
58
|
this.config = config;
|
|
46
59
|
}
|
|
@@ -50,8 +63,8 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
50
63
|
headers,
|
|
51
64
|
abortSignal,
|
|
52
65
|
providerOptions,
|
|
53
|
-
}: Parameters<
|
|
54
|
-
Awaited<ReturnType<
|
|
66
|
+
}: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
|
|
67
|
+
Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
|
|
55
68
|
> {
|
|
56
69
|
// Parse provider options
|
|
57
70
|
const googleOptions = await parseProviderOptions({
|
|
@@ -70,7 +83,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
|
|
|
70
83
|
}
|
|
71
84
|
|
|
72
85
|
const mergedHeaders = combineHeaders(
|
|
73
|
-
await resolve(this.config.headers),
|
|
86
|
+
this.config.headers ? await resolve(this.config.headers) : undefined,
|
|
74
87
|
headers,
|
|
75
88
|
);
|
|
76
89
|
|
package/src/google-error.ts
CHANGED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AISDKError,
|
|
3
|
+
type FilesV4,
|
|
4
|
+
type FilesV4UploadFileCallOptions,
|
|
5
|
+
type FilesV4UploadFileResult,
|
|
6
|
+
type SharedV4Warning,
|
|
7
|
+
} from '@ai-sdk/provider';
|
|
8
|
+
import {
|
|
9
|
+
combineHeaders,
|
|
10
|
+
convertInlineFileDataToUint8Array,
|
|
11
|
+
createJsonResponseHandler,
|
|
12
|
+
delay,
|
|
13
|
+
lazySchema,
|
|
14
|
+
parseProviderOptions,
|
|
15
|
+
zodSchema,
|
|
16
|
+
getFromApi,
|
|
17
|
+
type FetchFunction,
|
|
18
|
+
} from '@ai-sdk/provider-utils';
|
|
19
|
+
import { z } from 'zod/v4';
|
|
20
|
+
import { googleFailedResponseHandler } from './google-error';
|
|
21
|
+
|
|
22
|
+
export type GoogleFilesUploadOptions = {
|
|
23
|
+
displayName?: string | null;
|
|
24
|
+
pollIntervalMs?: number | null;
|
|
25
|
+
pollTimeoutMs?: number | null;
|
|
26
|
+
|
|
27
|
+
[key: string]: unknown;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
interface GoogleFilesConfig {
|
|
31
|
+
provider: string;
|
|
32
|
+
baseURL: string;
|
|
33
|
+
headers: () => Record<string, string | undefined>;
|
|
34
|
+
fetch?: FetchFunction;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export class GoogleFiles implements FilesV4 {
|
|
38
|
+
readonly specificationVersion = 'v4';
|
|
39
|
+
|
|
40
|
+
get provider(): string {
|
|
41
|
+
return this.config.provider;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
constructor(private readonly config: GoogleFilesConfig) {}
|
|
45
|
+
|
|
46
|
+
async uploadFile(
|
|
47
|
+
options: FilesV4UploadFileCallOptions,
|
|
48
|
+
): Promise<FilesV4UploadFileResult> {
|
|
49
|
+
const googleOptions = (await parseProviderOptions({
|
|
50
|
+
provider: 'google',
|
|
51
|
+
providerOptions: options.providerOptions,
|
|
52
|
+
schema: googleFilesUploadOptionsSchema,
|
|
53
|
+
})) as GoogleFilesUploadOptions | undefined;
|
|
54
|
+
|
|
55
|
+
const resolvedHeaders = this.config.headers();
|
|
56
|
+
const fetchFn = this.config.fetch ?? globalThis.fetch;
|
|
57
|
+
|
|
58
|
+
const warnings: Array<SharedV4Warning> = [];
|
|
59
|
+
if (options.filename != null) {
|
|
60
|
+
warnings.push({ type: 'unsupported', feature: 'filename' });
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const fileBytes = convertInlineFileDataToUint8Array(options.data);
|
|
64
|
+
|
|
65
|
+
const mediaType = options.mediaType;
|
|
66
|
+
const displayName = googleOptions?.displayName;
|
|
67
|
+
|
|
68
|
+
const baseOrigin = this.config.baseURL.replace(/\/v1beta$/, '');
|
|
69
|
+
|
|
70
|
+
const initResponse = await fetchFn(`${baseOrigin}/upload/v1beta/files`, {
|
|
71
|
+
method: 'POST',
|
|
72
|
+
headers: {
|
|
73
|
+
...resolvedHeaders,
|
|
74
|
+
'X-Goog-Upload-Protocol': 'resumable',
|
|
75
|
+
'X-Goog-Upload-Command': 'start',
|
|
76
|
+
'X-Goog-Upload-Header-Content-Length': String(fileBytes.length),
|
|
77
|
+
'X-Goog-Upload-Header-Content-Type': mediaType,
|
|
78
|
+
'Content-Type': 'application/json',
|
|
79
|
+
},
|
|
80
|
+
body: JSON.stringify({
|
|
81
|
+
file: {
|
|
82
|
+
...(displayName != null ? { display_name: displayName } : {}),
|
|
83
|
+
},
|
|
84
|
+
}),
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
if (!initResponse.ok) {
|
|
88
|
+
const errorBody = await initResponse.text();
|
|
89
|
+
throw new AISDKError({
|
|
90
|
+
name: 'GOOGLE_FILES_UPLOAD_ERROR',
|
|
91
|
+
message: `Failed to initiate resumable upload: ${initResponse.status} ${errorBody}`,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const uploadUrl = initResponse.headers.get('x-goog-upload-url');
|
|
96
|
+
if (!uploadUrl) {
|
|
97
|
+
throw new AISDKError({
|
|
98
|
+
name: 'GOOGLE_FILES_UPLOAD_ERROR',
|
|
99
|
+
message: 'No upload URL returned from initiation request',
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const uploadResponse = await fetchFn(uploadUrl, {
|
|
104
|
+
method: 'POST',
|
|
105
|
+
headers: {
|
|
106
|
+
'Content-Length': String(fileBytes.length),
|
|
107
|
+
'X-Goog-Upload-Offset': '0',
|
|
108
|
+
'X-Goog-Upload-Command': 'upload, finalize',
|
|
109
|
+
},
|
|
110
|
+
body: fileBytes,
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
if (!uploadResponse.ok) {
|
|
114
|
+
const errorBody = await uploadResponse.text();
|
|
115
|
+
throw new AISDKError({
|
|
116
|
+
name: 'GOOGLE_FILES_UPLOAD_ERROR',
|
|
117
|
+
message: `Failed to upload file data: ${uploadResponse.status} ${errorBody}`,
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const uploadResult = (await uploadResponse.json()) as {
|
|
122
|
+
file: GoogleFileResource;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
let file = uploadResult.file;
|
|
126
|
+
|
|
127
|
+
const pollIntervalMs = googleOptions?.pollIntervalMs ?? 2000;
|
|
128
|
+
const pollTimeoutMs = googleOptions?.pollTimeoutMs ?? 300000;
|
|
129
|
+
const startTime = Date.now();
|
|
130
|
+
|
|
131
|
+
while (file.state === 'PROCESSING') {
|
|
132
|
+
if (Date.now() - startTime > pollTimeoutMs) {
|
|
133
|
+
throw new AISDKError({
|
|
134
|
+
name: 'GOOGLE_FILES_UPLOAD_TIMEOUT',
|
|
135
|
+
message: `File processing timed out after ${pollTimeoutMs}ms`,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
await delay(pollIntervalMs);
|
|
140
|
+
|
|
141
|
+
const { value: fileStatus } = await getFromApi({
|
|
142
|
+
url: `${this.config.baseURL}/${file.name}`,
|
|
143
|
+
headers: combineHeaders(resolvedHeaders),
|
|
144
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
145
|
+
googleFileResponseSchema,
|
|
146
|
+
),
|
|
147
|
+
failedResponseHandler: googleFailedResponseHandler,
|
|
148
|
+
fetch: this.config.fetch,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
file = fileStatus;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (file.state === 'FAILED') {
|
|
155
|
+
throw new AISDKError({
|
|
156
|
+
name: 'GOOGLE_FILES_UPLOAD_FAILED',
|
|
157
|
+
message: `File processing failed for ${file.name}`,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
warnings,
|
|
163
|
+
providerReference: { google: file.uri },
|
|
164
|
+
mediaType: file.mimeType ?? options.mediaType,
|
|
165
|
+
providerMetadata: {
|
|
166
|
+
google: {
|
|
167
|
+
name: file.name,
|
|
168
|
+
displayName: file.displayName,
|
|
169
|
+
mimeType: file.mimeType,
|
|
170
|
+
sizeBytes: file.sizeBytes,
|
|
171
|
+
state: file.state,
|
|
172
|
+
uri: file.uri,
|
|
173
|
+
...(file.createTime != null ? { createTime: file.createTime } : {}),
|
|
174
|
+
...(file.updateTime != null ? { updateTime: file.updateTime } : {}),
|
|
175
|
+
...(file.expirationTime != null
|
|
176
|
+
? { expirationTime: file.expirationTime }
|
|
177
|
+
: {}),
|
|
178
|
+
...(file.sha256Hash != null ? { sha256Hash: file.sha256Hash } : {}),
|
|
179
|
+
},
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
type GoogleFileResource = {
|
|
186
|
+
name: string;
|
|
187
|
+
displayName?: string | null;
|
|
188
|
+
mimeType: string;
|
|
189
|
+
sizeBytes?: string | null;
|
|
190
|
+
createTime?: string | null;
|
|
191
|
+
updateTime?: string | null;
|
|
192
|
+
expirationTime?: string | null;
|
|
193
|
+
sha256Hash?: string | null;
|
|
194
|
+
uri: string;
|
|
195
|
+
state: string;
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
const googleFileResponseSchema = lazySchema(() =>
|
|
199
|
+
zodSchema(
|
|
200
|
+
z.object({
|
|
201
|
+
name: z.string(),
|
|
202
|
+
displayName: z.string().nullish(),
|
|
203
|
+
mimeType: z.string(),
|
|
204
|
+
sizeBytes: z.string().nullish(),
|
|
205
|
+
createTime: z.string().nullish(),
|
|
206
|
+
updateTime: z.string().nullish(),
|
|
207
|
+
expirationTime: z.string().nullish(),
|
|
208
|
+
sha256Hash: z.string().nullish(),
|
|
209
|
+
uri: z.string(),
|
|
210
|
+
state: z.string(),
|
|
211
|
+
}),
|
|
212
|
+
),
|
|
213
|
+
);
|
|
214
|
+
|
|
215
|
+
const googleFilesUploadOptionsSchema = lazySchema(() =>
|
|
216
|
+
zodSchema(
|
|
217
|
+
z
|
|
218
|
+
.object({
|
|
219
|
+
displayName: z.string().nullish(),
|
|
220
|
+
pollIntervalMs: z.number().positive().nullish(),
|
|
221
|
+
pollTimeoutMs: z.number().positive().nullish(),
|
|
222
|
+
})
|
|
223
|
+
.passthrough(),
|
|
224
|
+
),
|
|
225
|
+
);
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import {
|
|
2
|
+
lazySchema,
|
|
3
|
+
zodSchema,
|
|
4
|
+
type InferSchema,
|
|
5
|
+
} from '@ai-sdk/provider-utils';
|
|
6
|
+
import { z } from 'zod/v4';
|
|
7
|
+
import { googleSearchToolArgsBaseSchema } from './tool/google-search';
|
|
8
|
+
|
|
9
|
+
// Note: For the initial GA launch of Imagen 3, safety filters are not configurable.
|
|
10
|
+
// https://ai.google.dev/gemini-api/docs/imagen#imagen-model
|
|
11
|
+
export const googleImageModelOptionsSchema = lazySchema(() =>
|
|
12
|
+
zodSchema(
|
|
13
|
+
z.object({
|
|
14
|
+
personGeneration: z
|
|
15
|
+
.enum(['dont_allow', 'allow_adult', 'allow_all'])
|
|
16
|
+
.nullish(),
|
|
17
|
+
aspectRatio: z.enum(['1:1', '3:4', '4:3', '9:16', '16:9']).nullish(),
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Enable Google Search grounding for Gemini image models. The value is
|
|
21
|
+
* forwarded as the args of the `google.tools.googleSearch` provider
|
|
22
|
+
* tool on the underlying language-model call. Pass `{}` for defaults.
|
|
23
|
+
*
|
|
24
|
+
* `generateImage` does not accept a `tools` parameter, so this is the
|
|
25
|
+
* dedicated escape hatch for grounding image generation the same way
|
|
26
|
+
* `generateText` does.
|
|
27
|
+
*/
|
|
28
|
+
googleSearch: googleSearchToolArgsBaseSchema.optional(),
|
|
29
|
+
}),
|
|
30
|
+
),
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
export type GoogleImageModelOptions = InferSchema<
|
|
34
|
+
typeof googleImageModelOptionsSchema
|
|
35
|
+
>;
|
|
@@ -1,32 +1,35 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import type {
|
|
2
|
+
ImageModelV4,
|
|
3
|
+
LanguageModelV4Prompt,
|
|
4
|
+
SharedV4Warning,
|
|
5
5
|
} from '@ai-sdk/provider';
|
|
6
6
|
import {
|
|
7
7
|
combineHeaders,
|
|
8
8
|
convertToBase64,
|
|
9
9
|
createJsonResponseHandler,
|
|
10
|
-
FetchFunction,
|
|
11
10
|
generateId as defaultGenerateId,
|
|
12
|
-
type InferSchema,
|
|
13
11
|
lazySchema,
|
|
14
12
|
parseProviderOptions,
|
|
15
13
|
postJsonToApi,
|
|
16
|
-
Resolvable,
|
|
17
14
|
resolve,
|
|
15
|
+
serializeModelOptions,
|
|
16
|
+
WORKFLOW_SERIALIZE,
|
|
17
|
+
WORKFLOW_DESERIALIZE,
|
|
18
18
|
zodSchema,
|
|
19
|
+
type FetchFunction,
|
|
20
|
+
type Resolvable,
|
|
19
21
|
} from '@ai-sdk/provider-utils';
|
|
20
22
|
import { z } from 'zod/v4';
|
|
21
23
|
import { googleFailedResponseHandler } from './google-error';
|
|
22
|
-
import {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
import
|
|
28
|
-
|
|
29
|
-
|
|
24
|
+
import { googleImageModelOptionsSchema } from './google-image-model-options';
|
|
25
|
+
import type {
|
|
26
|
+
GoogleImageModelId,
|
|
27
|
+
GoogleImageSettings,
|
|
28
|
+
} from './google-image-settings';
|
|
29
|
+
import { GoogleLanguageModel } from './google-language-model';
|
|
30
|
+
import type { GoogleLanguageModelOptions } from './google-language-model-options';
|
|
31
|
+
|
|
32
|
+
interface GoogleImageModelConfig {
|
|
30
33
|
provider: string;
|
|
31
34
|
baseURL: string;
|
|
32
35
|
headers?: Resolvable<Record<string, string | undefined>>;
|
|
@@ -37,8 +40,22 @@ interface GoogleGenerativeAIImageModelConfig {
|
|
|
37
40
|
};
|
|
38
41
|
}
|
|
39
42
|
|
|
40
|
-
export class
|
|
41
|
-
readonly specificationVersion = '
|
|
43
|
+
export class GoogleImageModel implements ImageModelV4 {
|
|
44
|
+
readonly specificationVersion = 'v4';
|
|
45
|
+
|
|
46
|
+
static [WORKFLOW_SERIALIZE](model: GoogleImageModel) {
|
|
47
|
+
return serializeModelOptions({
|
|
48
|
+
modelId: model.modelId,
|
|
49
|
+
config: model.config,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
static [WORKFLOW_DESERIALIZE](options: {
|
|
54
|
+
modelId: string;
|
|
55
|
+
config: GoogleImageModelConfig;
|
|
56
|
+
}) {
|
|
57
|
+
return new GoogleImageModel(options.modelId, {}, options.config);
|
|
58
|
+
}
|
|
42
59
|
|
|
43
60
|
get maxImagesPerCall(): number {
|
|
44
61
|
if (this.settings.maxImagesPerCall != null) {
|
|
@@ -57,14 +74,14 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
57
74
|
}
|
|
58
75
|
|
|
59
76
|
constructor(
|
|
60
|
-
readonly modelId:
|
|
61
|
-
private readonly settings:
|
|
62
|
-
private readonly config:
|
|
77
|
+
readonly modelId: GoogleImageModelId,
|
|
78
|
+
private readonly settings: GoogleImageSettings,
|
|
79
|
+
private readonly config: GoogleImageModelConfig,
|
|
63
80
|
) {}
|
|
64
81
|
|
|
65
82
|
async doGenerate(
|
|
66
|
-
options: Parameters<
|
|
67
|
-
): Promise<Awaited<ReturnType<
|
|
83
|
+
options: Parameters<ImageModelV4['doGenerate']>[0],
|
|
84
|
+
): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
|
|
68
85
|
// Gemini image models use the language model API internally
|
|
69
86
|
if (isGeminiModel(this.modelId)) {
|
|
70
87
|
return this.doGenerateGemini(options);
|
|
@@ -73,8 +90,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
73
90
|
}
|
|
74
91
|
|
|
75
92
|
private async doGenerateImagen(
|
|
76
|
-
options: Parameters<
|
|
77
|
-
): Promise<Awaited<ReturnType<
|
|
93
|
+
options: Parameters<ImageModelV4['doGenerate']>[0],
|
|
94
|
+
): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
|
|
78
95
|
const {
|
|
79
96
|
prompt,
|
|
80
97
|
n = 1,
|
|
@@ -87,19 +104,19 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
87
104
|
files,
|
|
88
105
|
mask,
|
|
89
106
|
} = options;
|
|
90
|
-
const warnings: Array<
|
|
107
|
+
const warnings: Array<SharedV4Warning> = [];
|
|
91
108
|
|
|
92
109
|
// Imagen API endpoints do not support image editing
|
|
93
110
|
if (files != null && files.length > 0) {
|
|
94
111
|
throw new Error(
|
|
95
|
-
'Google
|
|
112
|
+
'Google Gemini API does not support image editing with Imagen models. ' +
|
|
96
113
|
'Use Google Vertex AI (@ai-sdk/google-vertex) for image editing capabilities.',
|
|
97
114
|
);
|
|
98
115
|
}
|
|
99
116
|
|
|
100
117
|
if (mask != null) {
|
|
101
118
|
throw new Error(
|
|
102
|
-
'Google
|
|
119
|
+
'Google Gemini API does not support image editing with masks. ' +
|
|
103
120
|
'Use Google Vertex AI (@ai-sdk/google-vertex) for image editing capabilities.',
|
|
104
121
|
);
|
|
105
122
|
}
|
|
@@ -139,7 +156,17 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
139
156
|
}
|
|
140
157
|
|
|
141
158
|
if (googleOptions) {
|
|
142
|
-
|
|
159
|
+
const { googleSearch: imagenGoogleSearch, ...imagenOptions } =
|
|
160
|
+
googleOptions;
|
|
161
|
+
if (imagenGoogleSearch != null) {
|
|
162
|
+
warnings.push({
|
|
163
|
+
type: 'unsupported',
|
|
164
|
+
feature: 'googleSearch',
|
|
165
|
+
details:
|
|
166
|
+
'Google Search grounding is only supported on Gemini image models.',
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
Object.assign(parameters, imagenOptions);
|
|
143
170
|
}
|
|
144
171
|
|
|
145
172
|
const body = {
|
|
@@ -151,7 +178,10 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
151
178
|
predictions: Array<{ bytesBase64Encoded: string }>;
|
|
152
179
|
}>({
|
|
153
180
|
url: `${this.config.baseURL}/models/${this.modelId}:predict`,
|
|
154
|
-
headers: combineHeaders(
|
|
181
|
+
headers: combineHeaders(
|
|
182
|
+
this.config.headers ? await resolve(this.config.headers) : undefined,
|
|
183
|
+
headers,
|
|
184
|
+
),
|
|
155
185
|
body,
|
|
156
186
|
failedResponseHandler: googleFailedResponseHandler,
|
|
157
187
|
successfulResponseHandler: createJsonResponseHandler(
|
|
@@ -181,8 +211,8 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
181
211
|
}
|
|
182
212
|
|
|
183
213
|
private async doGenerateGemini(
|
|
184
|
-
options: Parameters<
|
|
185
|
-
): Promise<Awaited<ReturnType<
|
|
214
|
+
options: Parameters<ImageModelV4['doGenerate']>[0],
|
|
215
|
+
): Promise<Awaited<ReturnType<ImageModelV4['doGenerate']>>> {
|
|
186
216
|
const {
|
|
187
217
|
prompt,
|
|
188
218
|
n,
|
|
@@ -195,7 +225,7 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
195
225
|
files,
|
|
196
226
|
mask,
|
|
197
227
|
} = options;
|
|
198
|
-
const warnings: Array<
|
|
228
|
+
const warnings: Array<SharedV4Warning> = [];
|
|
199
229
|
|
|
200
230
|
// Gemini does not support mask-based inpainting
|
|
201
231
|
if (mask != null) {
|
|
@@ -220,45 +250,63 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
220
250
|
});
|
|
221
251
|
}
|
|
222
252
|
|
|
223
|
-
// Build user message content for language model
|
|
224
253
|
const userContent: Array<
|
|
225
254
|
| { type: 'text'; text: string }
|
|
226
|
-
| {
|
|
255
|
+
| {
|
|
256
|
+
type: 'file';
|
|
257
|
+
data:
|
|
258
|
+
| { type: 'data'; data: string | Uint8Array }
|
|
259
|
+
| { type: 'url'; url: URL };
|
|
260
|
+
mediaType: string;
|
|
261
|
+
}
|
|
227
262
|
> = [];
|
|
228
263
|
|
|
229
|
-
// Add text prompt
|
|
230
264
|
if (prompt != null) {
|
|
231
265
|
userContent.push({ type: 'text', text: prompt });
|
|
232
266
|
}
|
|
233
267
|
|
|
234
|
-
// Add input images for editing
|
|
235
268
|
if (files != null && files.length > 0) {
|
|
236
269
|
for (const file of files) {
|
|
237
270
|
if (file.type === 'url') {
|
|
238
271
|
userContent.push({
|
|
239
272
|
type: 'file',
|
|
240
|
-
data: new URL(file.url),
|
|
273
|
+
data: { type: 'url', url: new URL(file.url) },
|
|
241
274
|
mediaType: 'image/*',
|
|
242
275
|
});
|
|
243
276
|
} else {
|
|
244
277
|
userContent.push({
|
|
245
278
|
type: 'file',
|
|
246
|
-
data:
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
279
|
+
data: {
|
|
280
|
+
type: 'data',
|
|
281
|
+
data:
|
|
282
|
+
typeof file.data === 'string'
|
|
283
|
+
? file.data
|
|
284
|
+
: new Uint8Array(file.data),
|
|
285
|
+
},
|
|
250
286
|
mediaType: file.mediaType,
|
|
251
287
|
});
|
|
252
288
|
}
|
|
253
289
|
}
|
|
254
290
|
}
|
|
255
291
|
|
|
256
|
-
const languageModelPrompt:
|
|
292
|
+
const languageModelPrompt: LanguageModelV4Prompt = [
|
|
257
293
|
{ role: 'user', content: userContent },
|
|
258
294
|
];
|
|
259
295
|
|
|
296
|
+
// Parse image-model-specific provider options so we can map them onto
|
|
297
|
+
// the underlying language-model call. `googleSearch` is the dedicated
|
|
298
|
+
// escape hatch for grounding (generateImage has no `tools` parameter).
|
|
299
|
+
const googleImageOptions = await parseProviderOptions({
|
|
300
|
+
provider: 'google',
|
|
301
|
+
providerOptions,
|
|
302
|
+
schema: googleImageModelOptionsSchema,
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
const { googleSearch: _strippedGoogleSearch, ...passthroughGoogleOptions } =
|
|
306
|
+
providerOptions?.google ?? {};
|
|
307
|
+
|
|
260
308
|
// Instantiate language model
|
|
261
|
-
const languageModel = new
|
|
309
|
+
const languageModel = new GoogleLanguageModel(this.modelId, {
|
|
262
310
|
provider: this.config.provider,
|
|
263
311
|
baseURL: this.config.baseURL,
|
|
264
312
|
headers: this.config.headers ?? {},
|
|
@@ -280,31 +328,51 @@ export class GoogleGenerativeAIImageModel implements ImageModelV3 {
|
|
|
280
328
|
>['aspectRatio'],
|
|
281
329
|
}
|
|
282
330
|
: undefined,
|
|
283
|
-
...(
|
|
331
|
+
...(passthroughGoogleOptions as Omit<
|
|
284
332
|
GoogleLanguageModelOptions,
|
|
285
333
|
'responseModalities' | 'imageConfig'
|
|
286
|
-
>)
|
|
334
|
+
>),
|
|
287
335
|
} satisfies GoogleLanguageModelOptions,
|
|
288
336
|
},
|
|
337
|
+
tools:
|
|
338
|
+
googleImageOptions?.googleSearch != null
|
|
339
|
+
? [
|
|
340
|
+
{
|
|
341
|
+
type: 'provider',
|
|
342
|
+
id: 'google.google_search',
|
|
343
|
+
name: 'google_search',
|
|
344
|
+
args: googleImageOptions.googleSearch,
|
|
345
|
+
},
|
|
346
|
+
]
|
|
347
|
+
: undefined,
|
|
289
348
|
headers,
|
|
290
349
|
abortSignal,
|
|
291
350
|
});
|
|
292
351
|
|
|
293
352
|
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
294
353
|
|
|
295
|
-
// Extract images from language model response
|
|
296
354
|
const images: string[] = [];
|
|
297
355
|
for (const part of result.content) {
|
|
298
|
-
if (
|
|
299
|
-
|
|
356
|
+
if (
|
|
357
|
+
part.type === 'file' &&
|
|
358
|
+
part.mediaType.startsWith('image/') &&
|
|
359
|
+
part.data.type === 'data'
|
|
360
|
+
) {
|
|
361
|
+
images.push(convertToBase64(part.data.data));
|
|
300
362
|
}
|
|
301
363
|
}
|
|
302
364
|
|
|
365
|
+
const languageModelGoogleMetadata =
|
|
366
|
+
(result.providerMetadata?.google as
|
|
367
|
+
| Record<string, unknown>
|
|
368
|
+
| undefined) ?? {};
|
|
369
|
+
|
|
303
370
|
return {
|
|
304
371
|
images,
|
|
305
372
|
warnings,
|
|
306
373
|
providerMetadata: {
|
|
307
374
|
google: {
|
|
375
|
+
...languageModelGoogleMetadata,
|
|
308
376
|
images: images.map(() => ({})),
|
|
309
377
|
},
|
|
310
378
|
},
|
|
@@ -340,20 +408,3 @@ const googleImageResponseSchema = lazySchema(() =>
|
|
|
340
408
|
}),
|
|
341
409
|
),
|
|
342
410
|
);
|
|
343
|
-
|
|
344
|
-
// Note: For the initial GA launch of Imagen 3, safety filters are not configurable.
|
|
345
|
-
// https://ai.google.dev/gemini-api/docs/imagen#imagen-model
|
|
346
|
-
const googleImageModelOptionsSchema = lazySchema(() =>
|
|
347
|
-
zodSchema(
|
|
348
|
-
z.object({
|
|
349
|
-
personGeneration: z
|
|
350
|
-
.enum(['dont_allow', 'allow_adult', 'allow_all'])
|
|
351
|
-
.nullish(),
|
|
352
|
-
aspectRatio: z.enum(['1:1', '3:4', '4:3', '9:16', '16:9']).nullish(),
|
|
353
|
-
}),
|
|
354
|
-
),
|
|
355
|
-
);
|
|
356
|
-
|
|
357
|
-
export type GoogleImageModelOptions = InferSchema<
|
|
358
|
-
typeof googleImageModelOptionsSchema
|
|
359
|
-
>;
|