@ai-sdk/google 4.0.0-beta.7 → 4.0.0-beta.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +614 -5
- package/README.md +6 -4
- package/dist/index.d.ts +301 -50
- package/dist/index.js +5410 -639
- package/dist/index.js.map +1 -1
- package/dist/internal/index.d.ts +100 -26
- package/dist/internal/index.js +1653 -451
- package/dist/internal/index.js.map +1 -1
- package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
- package/package.json +16 -17
- package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
- package/src/convert-json-schema-to-openapi-schema.ts +1 -1
- package/src/convert-to-google-messages.ts +647 -0
- package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
- package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
- package/src/google-error.ts +1 -1
- package/src/google-files.ts +225 -0
- package/src/google-image-model-options.ts +35 -0
- package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
- package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
- package/src/google-json-accumulator.ts +371 -0
- package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
- package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +701 -219
- package/src/google-prepare-tools.ts +72 -12
- package/src/google-prompt.ts +86 -0
- package/src/google-provider.ts +157 -53
- package/src/google-speech-api.ts +36 -0
- package/src/google-speech-model-options.ts +48 -0
- package/src/google-speech-model.ts +311 -0
- package/src/google-video-model-options.ts +43 -0
- package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
- package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
- package/src/index.ts +40 -9
- package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
- package/src/interactions/cancel-google-interaction.ts +60 -0
- package/src/interactions/convert-google-interactions-usage.ts +47 -0
- package/src/interactions/convert-to-google-interactions-input.ts +557 -0
- package/src/interactions/extract-google-interactions-sources.ts +252 -0
- package/src/interactions/google-interactions-agent.ts +15 -0
- package/src/interactions/google-interactions-api.ts +530 -0
- package/src/interactions/google-interactions-language-model-options.ts +262 -0
- package/src/interactions/google-interactions-language-model.ts +776 -0
- package/src/interactions/google-interactions-prompt.ts +582 -0
- package/src/interactions/google-interactions-provider-metadata.ts +23 -0
- package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
- package/src/interactions/parse-google-interactions-outputs.ts +252 -0
- package/src/interactions/poll-google-interactions.ts +129 -0
- package/src/interactions/prepare-google-interactions-tools.ts +245 -0
- package/src/interactions/stream-google-interactions.ts +242 -0
- package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
- package/src/internal/index.ts +3 -2
- package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
- package/src/realtime/google-realtime-event-mapper.ts +383 -0
- package/src/realtime/google-realtime-model-options.ts +3 -0
- package/src/realtime/google-realtime-model.ts +160 -0
- package/src/realtime/index.ts +2 -0
- package/src/tool/code-execution.ts +2 -2
- package/src/tool/enterprise-web-search.ts +9 -3
- package/src/tool/file-search.ts +5 -7
- package/src/tool/google-maps.ts +3 -2
- package/src/tool/google-search.ts +11 -12
- package/src/tool/url-context.ts +4 -2
- package/src/tool/vertex-rag-store.ts +9 -6
- package/dist/index.d.mts +0 -376
- package/dist/index.mjs +0 -2517
- package/dist/index.mjs.map +0 -1
- package/dist/internal/index.d.mts +0 -284
- package/dist/internal/index.mjs +0 -1706
- package/dist/internal/index.mjs.map +0 -1
- package/src/convert-to-google-generative-ai-messages.ts +0 -239
- package/src/google-generative-ai-prompt.ts +0 -38
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import type { SpeechModelV4, SharedV4Warning } from '@ai-sdk/provider';
|
|
2
|
+
import {
|
|
3
|
+
combineHeaders,
|
|
4
|
+
convertBase64ToUint8Array,
|
|
5
|
+
createJsonResponseHandler,
|
|
6
|
+
parseProviderOptions,
|
|
7
|
+
postJsonToApi,
|
|
8
|
+
resolve,
|
|
9
|
+
serializeModelOptions,
|
|
10
|
+
WORKFLOW_DESERIALIZE,
|
|
11
|
+
WORKFLOW_SERIALIZE,
|
|
12
|
+
type FetchFunction,
|
|
13
|
+
type Resolvable,
|
|
14
|
+
} from '@ai-sdk/provider-utils';
|
|
15
|
+
import { googleFailedResponseHandler } from './google-error';
|
|
16
|
+
import { googleSpeechResponseSchema } from './google-speech-api';
|
|
17
|
+
import {
|
|
18
|
+
googleSpeechProviderOptionsSchema,
|
|
19
|
+
type GoogleSpeechModelId,
|
|
20
|
+
type GoogleSpeechModelOptions,
|
|
21
|
+
} from './google-speech-model-options';
|
|
22
|
+
|
|
23
|
+
interface GoogleSpeechModelConfig {
|
|
24
|
+
provider: string;
|
|
25
|
+
baseURL: string;
|
|
26
|
+
headers?: Resolvable<Record<string, string | undefined>>;
|
|
27
|
+
fetch?: FetchFunction;
|
|
28
|
+
_internal?: {
|
|
29
|
+
currentDate?: () => Date;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const DEFAULT_VOICE = 'Kore';
|
|
34
|
+
// Gemini TTS returns raw PCM at 24kHz when the response does not specify a rate.
|
|
35
|
+
const DEFAULT_SAMPLE_RATE = 24000;
|
|
36
|
+
|
|
37
|
+
export class GoogleSpeechModel implements SpeechModelV4 {
|
|
38
|
+
readonly specificationVersion = 'v4';
|
|
39
|
+
|
|
40
|
+
static [WORKFLOW_SERIALIZE](model: GoogleSpeechModel) {
|
|
41
|
+
return serializeModelOptions({
|
|
42
|
+
modelId: model.modelId,
|
|
43
|
+
config: model.config,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
static [WORKFLOW_DESERIALIZE](options: {
|
|
48
|
+
modelId: GoogleSpeechModelId;
|
|
49
|
+
config: GoogleSpeechModelConfig;
|
|
50
|
+
}) {
|
|
51
|
+
return new GoogleSpeechModel(options.modelId, options.config);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
get provider(): string {
|
|
55
|
+
return this.config.provider;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
constructor(
|
|
59
|
+
readonly modelId: GoogleSpeechModelId,
|
|
60
|
+
private readonly config: GoogleSpeechModelConfig,
|
|
61
|
+
) {}
|
|
62
|
+
|
|
63
|
+
private async getArgs({
|
|
64
|
+
text,
|
|
65
|
+
voice = DEFAULT_VOICE,
|
|
66
|
+
outputFormat,
|
|
67
|
+
instructions,
|
|
68
|
+
speed,
|
|
69
|
+
language,
|
|
70
|
+
providerOptions,
|
|
71
|
+
}: Parameters<SpeechModelV4['doGenerate']>[0]) {
|
|
72
|
+
const warnings: SharedV4Warning[] = [];
|
|
73
|
+
|
|
74
|
+
// Names to look up in providerOptions. The Vertex provider exposes these
|
|
75
|
+
// under `googleVertex`/`vertex` (matching the Google Vertex language model),
|
|
76
|
+
// while every other Google provider uses `google`.
|
|
77
|
+
const providerOptionsNames: readonly string[] =
|
|
78
|
+
this.config.provider.includes('vertex')
|
|
79
|
+
? (['googleVertex', 'vertex'] as const)
|
|
80
|
+
: (['google'] as const);
|
|
81
|
+
|
|
82
|
+
let googleOptions: GoogleSpeechModelOptions | undefined;
|
|
83
|
+
for (const name of providerOptionsNames) {
|
|
84
|
+
googleOptions = await parseProviderOptions({
|
|
85
|
+
provider: name,
|
|
86
|
+
providerOptions,
|
|
87
|
+
schema: googleSpeechProviderOptionsSchema,
|
|
88
|
+
});
|
|
89
|
+
if (googleOptions != null) {
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Cross-namespace fallback: a Vertex provider may receive options under the
|
|
95
|
+
// `google` key (e.g. via the AI Gateway).
|
|
96
|
+
if (googleOptions == null && !providerOptionsNames.includes('google')) {
|
|
97
|
+
googleOptions = await parseProviderOptions({
|
|
98
|
+
provider: 'google',
|
|
99
|
+
providerOptions,
|
|
100
|
+
schema: googleSpeechProviderOptionsSchema,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Multi-speaker (provider option) takes precedence over the single voice.
|
|
105
|
+
const multiSpeakerVoiceConfig = googleOptions?.multiSpeakerVoiceConfig;
|
|
106
|
+
const speechConfig = multiSpeakerVoiceConfig
|
|
107
|
+
? { multiSpeakerVoiceConfig }
|
|
108
|
+
: { voiceConfig: { prebuiltVoiceConfig: { voiceName: voice } } };
|
|
109
|
+
|
|
110
|
+
// Gemini honors natural-language style direction expressed in the prompt
|
|
111
|
+
// text, so map `instructions` onto the spoken content. With multi-speaker
|
|
112
|
+
// the transcript starts with speaker labels (e.g. `Joe: ...`), so prepending
|
|
113
|
+
// instructions would corrupt that parsing — ignore them there (with a warning).
|
|
114
|
+
let promptText = text;
|
|
115
|
+
if (instructions != null) {
|
|
116
|
+
if (multiSpeakerVoiceConfig) {
|
|
117
|
+
warnings.push({
|
|
118
|
+
type: 'unsupported',
|
|
119
|
+
feature: 'instructions',
|
|
120
|
+
details:
|
|
121
|
+
'Google Gemini TTS ignores `instructions` when `multiSpeakerVoiceConfig` is set, ' +
|
|
122
|
+
'because prepending them would break multi-speaker transcript parsing.',
|
|
123
|
+
});
|
|
124
|
+
} else {
|
|
125
|
+
promptText = `${instructions}: ${text}`;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (speed != null) {
|
|
130
|
+
warnings.push({
|
|
131
|
+
type: 'unsupported',
|
|
132
|
+
feature: 'speed',
|
|
133
|
+
details:
|
|
134
|
+
'Google Gemini TTS models do not support the `speed` option. It was ignored.',
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (language != null) {
|
|
139
|
+
warnings.push({
|
|
140
|
+
type: 'unsupported',
|
|
141
|
+
feature: 'language',
|
|
142
|
+
details:
|
|
143
|
+
'Google Gemini TTS models do not support the `language` option. ' +
|
|
144
|
+
'Language is detected automatically from the input text.',
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Only `wav` (default, WAV-wrapped) and `pcm` (raw) are supported.
|
|
149
|
+
let resolvedOutputFormat: 'wav' | 'pcm' = 'wav';
|
|
150
|
+
if (outputFormat === 'pcm') {
|
|
151
|
+
resolvedOutputFormat = 'pcm';
|
|
152
|
+
} else if (outputFormat != null && outputFormat !== 'wav') {
|
|
153
|
+
warnings.push({
|
|
154
|
+
type: 'unsupported',
|
|
155
|
+
feature: 'outputFormat',
|
|
156
|
+
details: `Unsupported output format: ${outputFormat}. Using wav instead.`,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const requestBody = {
|
|
161
|
+
contents: [{ role: 'user', parts: [{ text: promptText }] }],
|
|
162
|
+
generationConfig: {
|
|
163
|
+
responseModalities: ['AUDIO'],
|
|
164
|
+
speechConfig,
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
return { requestBody, warnings, outputFormat: resolvedOutputFormat };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
async doGenerate(
|
|
172
|
+
options: Parameters<SpeechModelV4['doGenerate']>[0],
|
|
173
|
+
): Promise<Awaited<ReturnType<SpeechModelV4['doGenerate']>>> {
|
|
174
|
+
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
175
|
+
const { requestBody, warnings, outputFormat } = await this.getArgs(options);
|
|
176
|
+
|
|
177
|
+
const {
|
|
178
|
+
value: response,
|
|
179
|
+
responseHeaders,
|
|
180
|
+
rawValue: rawResponse,
|
|
181
|
+
} = await postJsonToApi({
|
|
182
|
+
url: `${this.config.baseURL}/models/${this.modelId}:generateContent`,
|
|
183
|
+
headers: combineHeaders(
|
|
184
|
+
this.config.headers ? await resolve(this.config.headers) : undefined,
|
|
185
|
+
options.headers,
|
|
186
|
+
),
|
|
187
|
+
body: requestBody,
|
|
188
|
+
failedResponseHandler: googleFailedResponseHandler,
|
|
189
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
190
|
+
googleSpeechResponseSchema,
|
|
191
|
+
),
|
|
192
|
+
abortSignal: options.abortSignal,
|
|
193
|
+
fetch: this.config.fetch,
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// `generateSpeech` returns a single audio result, and Gemini returns one
|
|
197
|
+
// inline audio part per request, so take the first inline-data part.
|
|
198
|
+
let base64Audio: string | undefined;
|
|
199
|
+
let mimeType: string | undefined;
|
|
200
|
+
for (const candidate of response.candidates ?? []) {
|
|
201
|
+
for (const part of candidate.content?.parts ?? []) {
|
|
202
|
+
if (part.inlineData?.data) {
|
|
203
|
+
base64Audio = part.inlineData.data;
|
|
204
|
+
mimeType = part.inlineData.mimeType ?? undefined;
|
|
205
|
+
break;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (base64Audio != null) {
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const sampleRate = parseSampleRate(mimeType) ?? DEFAULT_SAMPLE_RATE;
|
|
214
|
+
const pcm =
|
|
215
|
+
base64Audio != null
|
|
216
|
+
? convertBase64ToUint8Array(base64Audio)
|
|
217
|
+
: new Uint8Array(0);
|
|
218
|
+
|
|
219
|
+
// Gemini returns headerless raw PCM (e.g. `audio/L16;rate=24000`). Unlike
|
|
220
|
+
// providers that return a container format (mp3/opus/wav) directly,
|
|
221
|
+
// `generateSpeech`'s `detectMediaType` can't identify raw PCM and would
|
|
222
|
+
// mislabel it `audio/mp3` (not playable), so wrap it in a minimal WAV header
|
|
223
|
+
// by default; `outputFormat: 'pcm'` returns the raw bytes untouched.
|
|
224
|
+
// Empty audio is returned as-is so the core layer throws NoSpeechGeneratedError.
|
|
225
|
+
const audio =
|
|
226
|
+
outputFormat === 'pcm' || pcm.length === 0
|
|
227
|
+
? pcm
|
|
228
|
+
: addWavHeader(pcm, sampleRate);
|
|
229
|
+
|
|
230
|
+
if (outputFormat === 'pcm' && pcm.length > 0) {
|
|
231
|
+
warnings.push({
|
|
232
|
+
type: 'unsupported',
|
|
233
|
+
feature: 'outputFormat',
|
|
234
|
+
details:
|
|
235
|
+
`Returning raw PCM audio (signed 16-bit little-endian, mono, ${sampleRate} Hz). ` +
|
|
236
|
+
'These bytes have no container header and are not directly playable; ' +
|
|
237
|
+
'see providerMetadata.google for the sample rate and mime type.',
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return {
|
|
242
|
+
audio,
|
|
243
|
+
warnings,
|
|
244
|
+
request: {
|
|
245
|
+
body: JSON.stringify(requestBody),
|
|
246
|
+
},
|
|
247
|
+
response: {
|
|
248
|
+
timestamp: currentDate,
|
|
249
|
+
modelId: this.modelId,
|
|
250
|
+
headers: responseHeaders,
|
|
251
|
+
body: rawResponse,
|
|
252
|
+
},
|
|
253
|
+
providerMetadata: {
|
|
254
|
+
google: {
|
|
255
|
+
sampleRate,
|
|
256
|
+
mimeType: mimeType ?? null,
|
|
257
|
+
},
|
|
258
|
+
},
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Parses the sample rate from a PCM mime type such as `audio/L16;rate=24000`.
|
|
265
|
+
*/
|
|
266
|
+
function parseSampleRate(mimeType: string | undefined): number | undefined {
|
|
267
|
+
if (mimeType == null) {
|
|
268
|
+
return undefined;
|
|
269
|
+
}
|
|
270
|
+
const match = /rate=(\d+)/.exec(mimeType);
|
|
271
|
+
return match ? Number.parseInt(match[1], 10) : undefined;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Wraps raw signed 16-bit little-endian mono PCM in a minimal 44-byte WAV
|
|
276
|
+
* (RIFF/WAVE) container so the output is playable and detectable as `audio/wav`.
|
|
277
|
+
*/
|
|
278
|
+
function addWavHeader(pcm: Uint8Array, sampleRate: number): Uint8Array {
|
|
279
|
+
const numChannels = 1;
|
|
280
|
+
const bitsPerSample = 16;
|
|
281
|
+
const blockAlign = (numChannels * bitsPerSample) / 8;
|
|
282
|
+
const byteRate = sampleRate * blockAlign;
|
|
283
|
+
const dataSize = pcm.length;
|
|
284
|
+
|
|
285
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
286
|
+
const view = new DataView(buffer);
|
|
287
|
+
|
|
288
|
+
writeAscii(view, 0, 'RIFF');
|
|
289
|
+
view.setUint32(4, 36 + dataSize, true);
|
|
290
|
+
writeAscii(view, 8, 'WAVE');
|
|
291
|
+
writeAscii(view, 12, 'fmt ');
|
|
292
|
+
view.setUint32(16, 16, true); // PCM fmt chunk size
|
|
293
|
+
view.setUint16(20, 1, true); // audio format = PCM
|
|
294
|
+
view.setUint16(22, numChannels, true);
|
|
295
|
+
view.setUint32(24, sampleRate, true);
|
|
296
|
+
view.setUint32(28, byteRate, true);
|
|
297
|
+
view.setUint16(32, blockAlign, true);
|
|
298
|
+
view.setUint16(34, bitsPerSample, true);
|
|
299
|
+
writeAscii(view, 36, 'data');
|
|
300
|
+
view.setUint32(40, dataSize, true);
|
|
301
|
+
|
|
302
|
+
const out = new Uint8Array(buffer);
|
|
303
|
+
out.set(pcm, 44);
|
|
304
|
+
return out;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function writeAscii(view: DataView, offset: number, text: string): void {
|
|
308
|
+
for (let i = 0; i < text.length; i++) {
|
|
309
|
+
view.setUint8(offset + i, text.charCodeAt(i));
|
|
310
|
+
}
|
|
311
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { lazySchema, zodSchema } from '@ai-sdk/provider-utils';
|
|
2
|
+
import { z } from 'zod/v4';
|
|
3
|
+
|
|
4
|
+
export type GoogleVideoModelOptions = {
|
|
5
|
+
// Polling configuration
|
|
6
|
+
pollIntervalMs?: number | null;
|
|
7
|
+
pollTimeoutMs?: number | null;
|
|
8
|
+
|
|
9
|
+
// Video generation options
|
|
10
|
+
personGeneration?: 'dont_allow' | 'allow_adult' | 'allow_all' | null;
|
|
11
|
+
negativePrompt?: string | null;
|
|
12
|
+
|
|
13
|
+
// Reference images (for style/asset reference)
|
|
14
|
+
referenceImages?: Array<{
|
|
15
|
+
bytesBase64Encoded?: string;
|
|
16
|
+
gcsUri?: string;
|
|
17
|
+
}> | null;
|
|
18
|
+
|
|
19
|
+
[key: string]: unknown; // For passthrough
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export const googleVideoModelOptionsSchema = lazySchema(() =>
|
|
23
|
+
zodSchema(
|
|
24
|
+
z
|
|
25
|
+
.object({
|
|
26
|
+
pollIntervalMs: z.number().positive().nullish(),
|
|
27
|
+
pollTimeoutMs: z.number().positive().nullish(),
|
|
28
|
+
personGeneration: z
|
|
29
|
+
.enum(['dont_allow', 'allow_adult', 'allow_all'])
|
|
30
|
+
.nullish(),
|
|
31
|
+
negativePrompt: z.string().nullish(),
|
|
32
|
+
referenceImages: z
|
|
33
|
+
.array(
|
|
34
|
+
z.object({
|
|
35
|
+
bytesBase64Encoded: z.string().nullish(),
|
|
36
|
+
gcsUri: z.string().nullish(),
|
|
37
|
+
}),
|
|
38
|
+
)
|
|
39
|
+
.nullish(),
|
|
40
|
+
})
|
|
41
|
+
.passthrough(),
|
|
42
|
+
),
|
|
43
|
+
);
|
|
@@ -1,45 +1,30 @@
|
|
|
1
1
|
import {
|
|
2
2
|
AISDKError,
|
|
3
|
-
type
|
|
4
|
-
type
|
|
3
|
+
type Experimental_VideoModelV4,
|
|
4
|
+
type SharedV4Warning,
|
|
5
5
|
} from '@ai-sdk/provider';
|
|
6
6
|
import {
|
|
7
7
|
combineHeaders,
|
|
8
8
|
convertUint8ArrayToBase64,
|
|
9
9
|
createJsonResponseHandler,
|
|
10
10
|
delay,
|
|
11
|
-
type FetchFunction,
|
|
12
11
|
getFromApi,
|
|
13
|
-
|
|
12
|
+
isSameOrigin,
|
|
14
13
|
parseProviderOptions,
|
|
15
14
|
postJsonToApi,
|
|
16
|
-
type Resolvable,
|
|
17
15
|
resolve,
|
|
18
|
-
|
|
16
|
+
type FetchFunction,
|
|
17
|
+
type Resolvable,
|
|
19
18
|
} from '@ai-sdk/provider-utils';
|
|
20
19
|
import { z } from 'zod/v4';
|
|
21
20
|
import { googleFailedResponseHandler } from './google-error';
|
|
22
|
-
import
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
pollTimeoutMs?: number | null;
|
|
28
|
-
|
|
29
|
-
// Video generation options
|
|
30
|
-
personGeneration?: 'dont_allow' | 'allow_adult' | 'allow_all' | null;
|
|
31
|
-
negativePrompt?: string | null;
|
|
32
|
-
|
|
33
|
-
// Reference images (for style/asset reference)
|
|
34
|
-
referenceImages?: Array<{
|
|
35
|
-
bytesBase64Encoded?: string;
|
|
36
|
-
gcsUri?: string;
|
|
37
|
-
}> | null;
|
|
38
|
-
|
|
39
|
-
[key: string]: unknown; // For passthrough
|
|
40
|
-
};
|
|
21
|
+
import {
|
|
22
|
+
googleVideoModelOptionsSchema,
|
|
23
|
+
type GoogleVideoModelOptions,
|
|
24
|
+
} from './google-video-model-options';
|
|
25
|
+
import type { GoogleVideoModelId } from './google-video-settings';
|
|
41
26
|
|
|
42
|
-
interface
|
|
27
|
+
interface GoogleVideoModelConfig {
|
|
43
28
|
provider: string;
|
|
44
29
|
baseURL: string;
|
|
45
30
|
headers?: Resolvable<Record<string, string | undefined>>;
|
|
@@ -50,8 +35,8 @@ interface GoogleGenerativeAIVideoModelConfig {
|
|
|
50
35
|
};
|
|
51
36
|
}
|
|
52
37
|
|
|
53
|
-
export class
|
|
54
|
-
readonly specificationVersion = '
|
|
38
|
+
export class GoogleVideoModel implements Experimental_VideoModelV4 {
|
|
39
|
+
readonly specificationVersion = 'v4';
|
|
55
40
|
|
|
56
41
|
get provider(): string {
|
|
57
42
|
return this.config.provider;
|
|
@@ -63,15 +48,15 @@ export class GoogleGenerativeAIVideoModel implements Experimental_VideoModelV3 {
|
|
|
63
48
|
}
|
|
64
49
|
|
|
65
50
|
constructor(
|
|
66
|
-
readonly modelId:
|
|
67
|
-
private readonly config:
|
|
51
|
+
readonly modelId: GoogleVideoModelId,
|
|
52
|
+
private readonly config: GoogleVideoModelConfig,
|
|
68
53
|
) {}
|
|
69
54
|
|
|
70
55
|
async doGenerate(
|
|
71
|
-
options: Parameters<
|
|
72
|
-
): Promise<Awaited<ReturnType<
|
|
56
|
+
options: Parameters<Experimental_VideoModelV4['doGenerate']>[0],
|
|
57
|
+
): Promise<Awaited<ReturnType<Experimental_VideoModelV4['doGenerate']>>> {
|
|
73
58
|
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
74
|
-
const warnings:
|
|
59
|
+
const warnings: SharedV4Warning[] = [];
|
|
75
60
|
|
|
76
61
|
const googleOptions = (await parseProviderOptions({
|
|
77
62
|
provider: 'google',
|
|
@@ -279,10 +264,13 @@ export class GoogleGenerativeAIVideoModel implements Experimental_VideoModelV3 {
|
|
|
279
264
|
for (const generatedSample of response.generateVideoResponse
|
|
280
265
|
.generatedSamples) {
|
|
281
266
|
if (generatedSample.video?.uri) {
|
|
282
|
-
// Append API key to URL for authentication
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
267
|
+
// Append the API key to the download URL for authentication, but only
|
|
268
|
+
// when the response-supplied URI stays on the provider's own origin —
|
|
269
|
+
// otherwise the key would leak to whatever host the response names.
|
|
270
|
+
const urlWithAuth =
|
|
271
|
+
apiKey && isSameOrigin(generatedSample.video.uri, this.config.baseURL)
|
|
272
|
+
? `${generatedSample.video.uri}${generatedSample.video.uri.includes('?') ? '&' : '?'}key=${apiKey}`
|
|
273
|
+
: generatedSample.video.uri;
|
|
286
274
|
|
|
287
275
|
videos.push({
|
|
288
276
|
type: 'url',
|
|
@@ -349,26 +337,3 @@ const googleOperationSchema = z.object({
|
|
|
349
337
|
})
|
|
350
338
|
.nullish(),
|
|
351
339
|
});
|
|
352
|
-
|
|
353
|
-
const googleVideoModelOptionsSchema = lazySchema(() =>
|
|
354
|
-
zodSchema(
|
|
355
|
-
z
|
|
356
|
-
.object({
|
|
357
|
-
pollIntervalMs: z.number().positive().nullish(),
|
|
358
|
-
pollTimeoutMs: z.number().positive().nullish(),
|
|
359
|
-
personGeneration: z
|
|
360
|
-
.enum(['dont_allow', 'allow_adult', 'allow_all'])
|
|
361
|
-
.nullish(),
|
|
362
|
-
negativePrompt: z.string().nullish(),
|
|
363
|
-
referenceImages: z
|
|
364
|
-
.array(
|
|
365
|
-
z.object({
|
|
366
|
-
bytesBase64Encoded: z.string().nullish(),
|
|
367
|
-
gcsUri: z.string().nullish(),
|
|
368
|
-
}),
|
|
369
|
-
)
|
|
370
|
-
.nullish(),
|
|
371
|
-
})
|
|
372
|
-
.passthrough(),
|
|
373
|
-
),
|
|
374
|
-
);
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
export type
|
|
1
|
+
export type GoogleVideoModelId =
|
|
2
2
|
| 'veo-3.1-fast-generate-preview'
|
|
3
3
|
| 'veo-3.1-generate-preview'
|
|
4
4
|
| 'veo-3.1-generate'
|
|
5
|
+
| 'veo-3.1-lite-generate-preview'
|
|
5
6
|
| 'veo-3.0-generate-001'
|
|
6
7
|
| 'veo-3.0-fast-generate-001'
|
|
7
8
|
| 'veo-2.0-generate-001'
|
package/src/index.ts
CHANGED
|
@@ -3,27 +3,58 @@ export type {
|
|
|
3
3
|
GoogleLanguageModelOptions,
|
|
4
4
|
/** @deprecated Use `GoogleLanguageModelOptions` instead. */
|
|
5
5
|
GoogleLanguageModelOptions as GoogleGenerativeAIProviderOptions,
|
|
6
|
-
} from './google-
|
|
7
|
-
export type {
|
|
6
|
+
} from './google-language-model-options';
|
|
7
|
+
export type {
|
|
8
|
+
GoogleProviderMetadata,
|
|
9
|
+
/** @deprecated Use `GoogleProviderMetadata` instead. */
|
|
10
|
+
GoogleProviderMetadata as GoogleGenerativeAIProviderMetadata,
|
|
11
|
+
} from './google-prompt';
|
|
8
12
|
export type {
|
|
9
13
|
GoogleImageModelOptions,
|
|
10
14
|
/** @deprecated Use `GoogleImageModelOptions` instead. */
|
|
11
15
|
GoogleImageModelOptions as GoogleGenerativeAIImageProviderOptions,
|
|
12
|
-
} from './google-
|
|
16
|
+
} from './google-image-model-options';
|
|
13
17
|
export type {
|
|
14
18
|
GoogleEmbeddingModelOptions,
|
|
15
19
|
/** @deprecated Use `GoogleEmbeddingModelOptions` instead. */
|
|
16
20
|
GoogleEmbeddingModelOptions as GoogleGenerativeAIEmbeddingProviderOptions,
|
|
17
|
-
} from './google-
|
|
21
|
+
} from './google-embedding-model-options';
|
|
18
22
|
export type {
|
|
19
23
|
GoogleVideoModelOptions,
|
|
20
24
|
/** @deprecated Use `GoogleVideoModelOptions` instead. */
|
|
21
25
|
GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions,
|
|
22
|
-
} from './google-
|
|
23
|
-
export type {
|
|
24
|
-
|
|
26
|
+
} from './google-video-model-options';
|
|
27
|
+
export type {
|
|
28
|
+
GoogleVideoModelId,
|
|
29
|
+
/** @deprecated Use `GoogleVideoModelId` instead. */
|
|
30
|
+
GoogleVideoModelId as GoogleGenerativeAIVideoModelId,
|
|
31
|
+
} from './google-video-settings';
|
|
32
|
+
export type {
|
|
33
|
+
GoogleSpeechModelOptions,
|
|
34
|
+
GoogleSpeechModelId,
|
|
35
|
+
} from './google-speech-model-options';
|
|
36
|
+
export type { GoogleFilesUploadOptions } from './google-files';
|
|
37
|
+
export type {
|
|
38
|
+
GoogleLanguageModelInteractionsOptions,
|
|
39
|
+
GoogleInteractionsModelId,
|
|
40
|
+
} from './interactions/google-interactions-language-model-options';
|
|
41
|
+
export type { GoogleInteractionsProviderMetadata } from './interactions/google-interactions-provider-metadata';
|
|
42
|
+
export type { GoogleInteractionsAgentName } from './interactions/google-interactions-agent';
|
|
43
|
+
export {
|
|
44
|
+
createGoogle,
|
|
45
|
+
google,
|
|
46
|
+
/** @deprecated Use `createGoogle` instead. */
|
|
47
|
+
createGoogle as createGoogleGenerativeAI,
|
|
48
|
+
} from './google-provider';
|
|
25
49
|
export type {
|
|
26
|
-
|
|
27
|
-
|
|
50
|
+
GoogleProvider,
|
|
51
|
+
GoogleProviderSettings,
|
|
52
|
+
/** @deprecated Use `GoogleProvider` instead. */
|
|
53
|
+
GoogleProvider as GoogleGenerativeAIProvider,
|
|
54
|
+
/** @deprecated Use `GoogleProviderSettings` instead. */
|
|
55
|
+
GoogleProviderSettings as GoogleGenerativeAIProviderSettings,
|
|
28
56
|
} from './google-provider';
|
|
57
|
+
export { GoogleRealtimeModel as Experimental_GoogleRealtimeModel } from './realtime/google-realtime-model';
|
|
58
|
+
export type { GoogleRealtimeModelConfig as Experimental_GoogleRealtimeModelConfig } from './realtime/google-realtime-model';
|
|
59
|
+
|
|
29
60
|
export { VERSION } from './version';
|