@ai-sdk/google 4.0.0-beta.8 → 4.0.0-beta.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +608 -5
- package/README.md +6 -4
- package/dist/index.d.ts +297 -54
- package/dist/index.js +5409 -640
- package/dist/index.js.map +1 -1
- package/dist/internal/index.d.ts +97 -26
- package/dist/internal/index.js +1653 -453
- package/dist/internal/index.js.map +1 -1
- package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
- package/package.json +16 -17
- package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
- package/src/convert-json-schema-to-openapi-schema.ts +1 -1
- package/src/convert-to-google-messages.ts +647 -0
- package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
- package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
- package/src/google-error.ts +1 -1
- package/src/google-files.ts +225 -0
- package/src/google-image-model-options.ts +35 -0
- package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
- package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
- package/src/google-json-accumulator.ts +371 -0
- package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
- package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +691 -217
- package/src/google-prepare-tools.ts +72 -12
- package/src/google-prompt.ts +86 -0
- package/src/google-provider.ts +157 -53
- package/src/google-speech-api.ts +36 -0
- package/src/google-speech-model-options.ts +48 -0
- package/src/google-speech-model.ts +311 -0
- package/src/google-video-model-options.ts +43 -0
- package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
- package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
- package/src/index.ts +40 -9
- package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
- package/src/interactions/cancel-google-interaction.ts +60 -0
- package/src/interactions/convert-google-interactions-usage.ts +47 -0
- package/src/interactions/convert-to-google-interactions-input.ts +557 -0
- package/src/interactions/extract-google-interactions-sources.ts +252 -0
- package/src/interactions/google-interactions-agent.ts +15 -0
- package/src/interactions/google-interactions-api.ts +530 -0
- package/src/interactions/google-interactions-language-model-options.ts +262 -0
- package/src/interactions/google-interactions-language-model.ts +776 -0
- package/src/interactions/google-interactions-prompt.ts +582 -0
- package/src/interactions/google-interactions-provider-metadata.ts +23 -0
- package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
- package/src/interactions/parse-google-interactions-outputs.ts +252 -0
- package/src/interactions/poll-google-interactions.ts +129 -0
- package/src/interactions/prepare-google-interactions-tools.ts +245 -0
- package/src/interactions/stream-google-interactions.ts +242 -0
- package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
- package/src/internal/index.ts +3 -2
- package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
- package/src/realtime/google-realtime-event-mapper.ts +383 -0
- package/src/realtime/google-realtime-model-options.ts +3 -0
- package/src/realtime/google-realtime-model.ts +160 -0
- package/src/realtime/index.ts +2 -0
- package/src/tool/code-execution.ts +2 -2
- package/src/tool/enterprise-web-search.ts +9 -3
- package/src/tool/file-search.ts +5 -7
- package/src/tool/google-maps.ts +3 -2
- package/src/tool/google-search.ts +11 -12
- package/src/tool/url-context.ts +4 -2
- package/src/tool/vertex-rag-store.ts +9 -6
- package/dist/index.d.mts +0 -384
- package/dist/index.mjs +0 -2519
- package/dist/index.mjs.map +0 -1
- package/dist/internal/index.d.mts +0 -287
- package/dist/internal/index.mjs +0 -1708
- package/dist/internal/index.mjs.map +0 -1
- package/src/convert-to-google-generative-ai-messages.ts +0 -239
- package/src/google-generative-ai-prompt.ts +0 -47
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Experimental_RealtimeModelV4 as RealtimeModelV4,
|
|
3
|
+
Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
|
|
4
|
+
Experimental_RealtimeModelV4FunctionCallOutput as RealtimeModelV4FunctionCallOutput,
|
|
5
|
+
Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
|
|
6
|
+
Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
|
|
7
|
+
} from '@ai-sdk/provider';
|
|
8
|
+
import { safeParseJSON } from '@ai-sdk/provider-utils';
|
|
9
|
+
import { convertJSONSchemaToOpenAPISchema } from '../convert-json-schema-to-openapi-schema';
|
|
10
|
+
import { getModelPath } from '../get-model-path';
|
|
11
|
+
|
|
12
|
+
type GoogleRealtimeFunctionCall = {
|
|
13
|
+
id: string;
|
|
14
|
+
name: string;
|
|
15
|
+
args?: Record<string, unknown>;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
type GoogleRealtimeServerContent = {
|
|
19
|
+
interrupted?: boolean;
|
|
20
|
+
modelTurn?: {
|
|
21
|
+
parts?: Array<{
|
|
22
|
+
inlineData?: { data?: string };
|
|
23
|
+
text?: string;
|
|
24
|
+
}>;
|
|
25
|
+
};
|
|
26
|
+
outputTranscription?: { text?: string };
|
|
27
|
+
inputTranscription?: { text?: string };
|
|
28
|
+
turnComplete?: boolean;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
type GoogleRealtimeWireEvent = {
|
|
32
|
+
setupComplete?: unknown;
|
|
33
|
+
toolCall?: {
|
|
34
|
+
functionCalls?: GoogleRealtimeFunctionCall[];
|
|
35
|
+
};
|
|
36
|
+
toolCallCancellation?: unknown;
|
|
37
|
+
serverContent?: GoogleRealtimeServerContent;
|
|
38
|
+
inputTranscription?: { text?: string };
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Stateful event mapper for Google's Gemini Live API.
|
|
43
|
+
*
|
|
44
|
+
* Unlike OpenAI/xAI, Google's events don't have response/item IDs and
|
|
45
|
+
* a single message can contain multiple pieces of data. This class
|
|
46
|
+
* tracks turn state to generate consistent synthetic IDs.
|
|
47
|
+
*/
|
|
48
|
+
export class GoogleRealtimeEventMapper {
|
|
49
|
+
private turnCounter = 0;
|
|
50
|
+
private hasAudio = false;
|
|
51
|
+
private hasText = false;
|
|
52
|
+
private hasTranscript = false;
|
|
53
|
+
private turnClosed = false;
|
|
54
|
+
private inputAudioRate = 16000;
|
|
55
|
+
|
|
56
|
+
private get responseId(): string {
|
|
57
|
+
return `google-resp-${this.turnCounter}`;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private get itemId(): string {
|
|
61
|
+
return `google-item-${this.turnCounter}`;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Rolls over to the next turn lazily, only once new model content actually
|
|
66
|
+
* arrives. `turnComplete` merely marks the current turn closed; the counter
|
|
67
|
+
* is not advanced until the next response begins. This keeps a transcript
|
|
68
|
+
* that arrives shortly after `turnComplete` attached to the turn it belongs
|
|
69
|
+
* to, since Google delivers transcription independently with no guaranteed
|
|
70
|
+
* ordering relative to `turnComplete`.
|
|
71
|
+
*/
|
|
72
|
+
private beginTurnIfClosed(): void {
|
|
73
|
+
if (!this.turnClosed) return;
|
|
74
|
+
this.turnCounter++;
|
|
75
|
+
this.hasAudio = false;
|
|
76
|
+
this.hasText = false;
|
|
77
|
+
this.hasTranscript = false;
|
|
78
|
+
this.turnClosed = false;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
parseServerEvent(
|
|
82
|
+
raw: unknown,
|
|
83
|
+
): RealtimeModelV4ServerEvent | RealtimeModelV4ServerEvent[] {
|
|
84
|
+
const data = raw as GoogleRealtimeWireEvent;
|
|
85
|
+
|
|
86
|
+
if (data.setupComplete != null) {
|
|
87
|
+
return { type: 'session-created', raw };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (data.toolCall != null) {
|
|
91
|
+
this.beginTurnIfClosed();
|
|
92
|
+
const functionCalls = data.toolCall.functionCalls ?? [];
|
|
93
|
+
return functionCalls.flatMap(functionCall => {
|
|
94
|
+
const args = JSON.stringify(functionCall.args ?? {});
|
|
95
|
+
return [
|
|
96
|
+
{
|
|
97
|
+
type: 'function-call-arguments-delta' as const,
|
|
98
|
+
responseId: this.responseId,
|
|
99
|
+
itemId: this.itemId,
|
|
100
|
+
callId: functionCall.id,
|
|
101
|
+
delta: args,
|
|
102
|
+
raw,
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
type: 'function-call-arguments-done' as const,
|
|
106
|
+
responseId: this.responseId,
|
|
107
|
+
itemId: this.itemId,
|
|
108
|
+
callId: functionCall.id,
|
|
109
|
+
name: functionCall.name,
|
|
110
|
+
arguments: args,
|
|
111
|
+
raw,
|
|
112
|
+
},
|
|
113
|
+
];
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (data.toolCallCancellation != null) {
|
|
118
|
+
return {
|
|
119
|
+
type: 'custom',
|
|
120
|
+
rawType: 'toolCallCancellation',
|
|
121
|
+
raw,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (data.serverContent != null) {
|
|
126
|
+
return this.parseServerContent(data.serverContent, raw);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (data.inputTranscription?.text != null) {
|
|
130
|
+
return {
|
|
131
|
+
type: 'input-transcription-completed',
|
|
132
|
+
itemId: `google-input-${this.turnCounter}`,
|
|
133
|
+
transcript: data.inputTranscription.text,
|
|
134
|
+
raw,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return { type: 'custom', rawType: String(Object.keys(data)[0]), raw };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
private parseServerContent(
|
|
142
|
+
serverContent: GoogleRealtimeServerContent,
|
|
143
|
+
raw: unknown,
|
|
144
|
+
): RealtimeModelV4ServerEvent | RealtimeModelV4ServerEvent[] {
|
|
145
|
+
const events: RealtimeModelV4ServerEvent[] = [];
|
|
146
|
+
|
|
147
|
+
if (serverContent.interrupted) {
|
|
148
|
+
events.push({
|
|
149
|
+
type: 'speech-started',
|
|
150
|
+
raw,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (serverContent.modelTurn?.parts) {
|
|
155
|
+
// New model response content marks the start of the next turn.
|
|
156
|
+
this.beginTurnIfClosed();
|
|
157
|
+
for (const part of serverContent.modelTurn.parts) {
|
|
158
|
+
if (part.inlineData?.data) {
|
|
159
|
+
this.hasAudio = true;
|
|
160
|
+
events.push({
|
|
161
|
+
type: 'audio-delta',
|
|
162
|
+
responseId: this.responseId,
|
|
163
|
+
itemId: this.itemId,
|
|
164
|
+
delta: part.inlineData.data,
|
|
165
|
+
raw,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
if (part.text) {
|
|
169
|
+
this.hasText = true;
|
|
170
|
+
events.push({
|
|
171
|
+
type: 'text-delta',
|
|
172
|
+
responseId: this.responseId,
|
|
173
|
+
itemId: this.itemId,
|
|
174
|
+
delta: part.text,
|
|
175
|
+
raw,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (serverContent.outputTranscription?.text) {
|
|
182
|
+
this.hasTranscript = true;
|
|
183
|
+
events.push({
|
|
184
|
+
type: 'audio-transcript-delta',
|
|
185
|
+
responseId: this.responseId,
|
|
186
|
+
itemId: this.itemId,
|
|
187
|
+
delta: serverContent.outputTranscription.text,
|
|
188
|
+
raw,
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (serverContent.inputTranscription?.text) {
|
|
193
|
+
events.push({
|
|
194
|
+
type: 'input-transcription-completed',
|
|
195
|
+
itemId: `google-input-${this.turnCounter}`,
|
|
196
|
+
transcript: serverContent.inputTranscription.text,
|
|
197
|
+
raw,
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (serverContent.turnComplete) {
|
|
202
|
+
if (this.hasAudio) {
|
|
203
|
+
events.push({
|
|
204
|
+
type: 'audio-done',
|
|
205
|
+
responseId: this.responseId,
|
|
206
|
+
itemId: this.itemId,
|
|
207
|
+
raw,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
if (this.hasText) {
|
|
211
|
+
events.push({
|
|
212
|
+
type: 'text-done',
|
|
213
|
+
responseId: this.responseId,
|
|
214
|
+
itemId: this.itemId,
|
|
215
|
+
raw,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
if (this.hasTranscript) {
|
|
219
|
+
events.push({
|
|
220
|
+
type: 'audio-transcript-done',
|
|
221
|
+
responseId: this.responseId,
|
|
222
|
+
itemId: this.itemId,
|
|
223
|
+
raw,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
events.push({
|
|
227
|
+
type: 'response-done',
|
|
228
|
+
responseId: this.responseId,
|
|
229
|
+
status: 'completed',
|
|
230
|
+
raw,
|
|
231
|
+
});
|
|
232
|
+
// Mark the turn closed but defer advancing the counter until the next
|
|
233
|
+
// response actually begins (see `beginTurnIfClosed`).
|
|
234
|
+
this.turnClosed = true;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (events.length === 0) {
|
|
238
|
+
return { type: 'custom', rawType: 'serverContent', raw };
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return events.length === 1 ? events[0] : events;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
serializeClientEvent(
|
|
245
|
+
event: RealtimeModelV4ClientEvent,
|
|
246
|
+
modelId: string,
|
|
247
|
+
): ReturnType<RealtimeModelV4['serializeClientEvent']> {
|
|
248
|
+
switch (event.type) {
|
|
249
|
+
case 'session-update':
|
|
250
|
+
// Capture the configured capture rate so input audio blobs advertise
|
|
251
|
+
// the real rate. Google accepts any rate as long as the blob's mimeType
|
|
252
|
+
// matches; a mismatched label corrupts custom-rate audio.
|
|
253
|
+
if (event.config.inputAudioFormat?.rate != null) {
|
|
254
|
+
this.inputAudioRate = event.config.inputAudioFormat.rate;
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
setup: buildGoogleSessionConfig(event.config, modelId),
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
case 'input-audio-append':
|
|
261
|
+
return {
|
|
262
|
+
realtimeInput: {
|
|
263
|
+
audio: {
|
|
264
|
+
data: event.audio,
|
|
265
|
+
mimeType: `audio/pcm;rate=${this.inputAudioRate}`,
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
case 'input-audio-commit':
|
|
271
|
+
case 'input-audio-clear':
|
|
272
|
+
case 'response-create':
|
|
273
|
+
case 'response-cancel':
|
|
274
|
+
case 'conversation-item-truncate':
|
|
275
|
+
return null;
|
|
276
|
+
|
|
277
|
+
case 'conversation-item-create': {
|
|
278
|
+
const item = event.item;
|
|
279
|
+
switch (item.type) {
|
|
280
|
+
case 'text-message':
|
|
281
|
+
return {
|
|
282
|
+
realtimeInput: {
|
|
283
|
+
text: item.text,
|
|
284
|
+
},
|
|
285
|
+
};
|
|
286
|
+
case 'function-call-output':
|
|
287
|
+
return serializeFunctionCallOutput(item);
|
|
288
|
+
case 'audio-message':
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
break;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return null;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async function serializeFunctionCallOutput(
|
|
300
|
+
item: RealtimeModelV4FunctionCallOutput,
|
|
301
|
+
): Promise<unknown> {
|
|
302
|
+
const parseResult = await safeParseJSON({ text: item.output });
|
|
303
|
+
const response = parseResult.success ? parseResult.value : {};
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
toolResponse: {
|
|
307
|
+
functionResponses: [
|
|
308
|
+
{
|
|
309
|
+
id: item.callId,
|
|
310
|
+
name: item.name,
|
|
311
|
+
response,
|
|
312
|
+
},
|
|
313
|
+
],
|
|
314
|
+
},
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Builds a Google-specific session configuration from a normalized config.
|
|
320
|
+
* Used to construct the `bidiGenerateContentSetup` payload for auth token creation.
|
|
321
|
+
*/
|
|
322
|
+
export function buildGoogleSessionConfig(
|
|
323
|
+
config: RealtimeModelV4SessionConfig | undefined,
|
|
324
|
+
modelId: string,
|
|
325
|
+
): Record<string, unknown> {
|
|
326
|
+
const setup: Record<string, unknown> = {
|
|
327
|
+
model: getModelPath(modelId),
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
const generationConfig: Record<string, unknown> = {};
|
|
331
|
+
|
|
332
|
+
if (config?.outputModalities != null) {
|
|
333
|
+
generationConfig.responseModalities = config.outputModalities.map(m =>
|
|
334
|
+
m.toUpperCase(),
|
|
335
|
+
);
|
|
336
|
+
} else {
|
|
337
|
+
generationConfig.responseModalities = ['AUDIO'];
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (config?.voice != null) {
|
|
341
|
+
generationConfig.speechConfig = {
|
|
342
|
+
voiceConfig: {
|
|
343
|
+
prebuiltVoiceConfig: {
|
|
344
|
+
voiceName: config.voice,
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
setup.generationConfig = generationConfig;
|
|
351
|
+
|
|
352
|
+
if (config?.instructions != null) {
|
|
353
|
+
setup.systemInstruction = {
|
|
354
|
+
parts: [{ text: config.instructions }],
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (config?.tools != null && config.tools.length > 0) {
|
|
359
|
+
setup.tools = [
|
|
360
|
+
{
|
|
361
|
+
functionDeclarations: config.tools.map(tool => ({
|
|
362
|
+
name: tool.name,
|
|
363
|
+
description: tool.description,
|
|
364
|
+
parameters: convertJSONSchemaToOpenAPISchema(tool.parameters),
|
|
365
|
+
})),
|
|
366
|
+
},
|
|
367
|
+
];
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
if (config?.inputAudioTranscription != null) {
|
|
371
|
+
setup.inputAudioTranscription = {};
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (config?.outputAudioTranscription != null) {
|
|
375
|
+
setup.outputAudioTranscription = {};
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
if (config?.providerOptions != null) {
|
|
379
|
+
Object.assign(setup, config.providerOptions);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
return setup;
|
|
383
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Experimental_RealtimeModelV4 as RealtimeModelV4,
|
|
3
|
+
Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
|
|
4
|
+
Experimental_RealtimeModelV4ClientSecretOptions as RealtimeModelV4ClientSecretOptions,
|
|
5
|
+
Experimental_RealtimeModelV4ClientSecretResult as RealtimeModelV4ClientSecretResult,
|
|
6
|
+
Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
|
|
7
|
+
Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
|
|
8
|
+
} from '@ai-sdk/provider';
|
|
9
|
+
import type { FetchFunction } from '@ai-sdk/provider-utils';
|
|
10
|
+
import {
|
|
11
|
+
GoogleRealtimeEventMapper,
|
|
12
|
+
buildGoogleSessionConfig,
|
|
13
|
+
} from './google-realtime-event-mapper';
|
|
14
|
+
|
|
15
|
+
const realtimeWebSocketPath =
|
|
16
|
+
'google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained';
|
|
17
|
+
|
|
18
|
+
function getRealtimeBaseURL(baseURL: string): URL {
|
|
19
|
+
const url = new URL(baseURL);
|
|
20
|
+
const pathSegments = url.pathname.split('/');
|
|
21
|
+
const version = pathSegments.at(-1);
|
|
22
|
+
|
|
23
|
+
if (version === 'v1beta' || version === 'v1alpha') {
|
|
24
|
+
pathSegments.pop();
|
|
25
|
+
url.pathname = pathSegments.join('/') || '/';
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return url;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function getAuthTokensURL(baseURL: string): string {
|
|
32
|
+
const url = getRealtimeBaseURL(baseURL);
|
|
33
|
+
url.pathname = `${url.pathname.replace(/\/$/, '')}/v1alpha/auth_tokens`;
|
|
34
|
+
return url.toString();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function getWebSocketURL(baseURL: string): string {
|
|
38
|
+
const url = getRealtimeBaseURL(baseURL);
|
|
39
|
+
url.protocol = url.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
40
|
+
url.pathname = `${url.pathname.replace(/\/$/, '')}/ws/${realtimeWebSocketPath}`;
|
|
41
|
+
return url.toString();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export type GoogleRealtimeModelConfig = {
|
|
45
|
+
provider: string;
|
|
46
|
+
baseURL: string;
|
|
47
|
+
headers: () => Record<string, string | undefined>;
|
|
48
|
+
fetch?: FetchFunction;
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
export class GoogleRealtimeModel implements RealtimeModelV4 {
|
|
52
|
+
readonly specificationVersion = 'v4' as const;
|
|
53
|
+
readonly provider: string;
|
|
54
|
+
readonly modelId: string;
|
|
55
|
+
|
|
56
|
+
private readonly config: GoogleRealtimeModelConfig;
|
|
57
|
+
private readonly mapper = new GoogleRealtimeEventMapper();
|
|
58
|
+
|
|
59
|
+
constructor(modelId: string, config: GoogleRealtimeModelConfig) {
|
|
60
|
+
this.modelId = modelId;
|
|
61
|
+
this.provider = config.provider;
|
|
62
|
+
this.config = config;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async doCreateClientSecret(
|
|
66
|
+
options: RealtimeModelV4ClientSecretOptions,
|
|
67
|
+
): Promise<RealtimeModelV4ClientSecretResult> {
|
|
68
|
+
const fetchFn = this.config.fetch ?? fetch;
|
|
69
|
+
const headers = this.config.headers();
|
|
70
|
+
const apiKey = headers['x-goog-api-key'];
|
|
71
|
+
|
|
72
|
+
if (!apiKey) {
|
|
73
|
+
throw new Error(
|
|
74
|
+
'Google Generative AI API key is required for realtime token creation.',
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// `newSessionExpireTime` controls how long the token can be used to *open*
|
|
79
|
+
// a session — the window callers actually care about — so map
|
|
80
|
+
// `expiresAfterSeconds` to it (Google otherwise defaults it to ~60s).
|
|
81
|
+
// `expireTime` is the overall token lifetime and must be >=
|
|
82
|
+
// `newSessionExpireTime`, so extend it to leave room for the opened session
|
|
83
|
+
// to run.
|
|
84
|
+
const now = Date.now();
|
|
85
|
+
const openWindowMs = (options.expiresAfterSeconds ?? 60) * 1000;
|
|
86
|
+
const newSessionExpireTime = new Date(now + openWindowMs).toISOString();
|
|
87
|
+
const expireTime = new Date(
|
|
88
|
+
now + openWindowMs + 30 * 60 * 1000,
|
|
89
|
+
).toISOString();
|
|
90
|
+
|
|
91
|
+
const setupPayload = buildGoogleSessionConfig(
|
|
92
|
+
options.sessionConfig,
|
|
93
|
+
this.modelId,
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
const response = await fetchFn(
|
|
97
|
+
`${getAuthTokensURL(this.config.baseURL)}?key=${encodeURIComponent(apiKey)}`,
|
|
98
|
+
{
|
|
99
|
+
method: 'POST',
|
|
100
|
+
headers: { 'Content-Type': 'application/json' },
|
|
101
|
+
body: JSON.stringify({
|
|
102
|
+
// `uses: 0` means no limit is applied to how many times the token can
|
|
103
|
+
// start a session (per the AuthToken spec). An unset value would
|
|
104
|
+
// default to 1, which breaks WebSocket reconnects within the session.
|
|
105
|
+
uses: 0,
|
|
106
|
+
expireTime,
|
|
107
|
+
newSessionExpireTime,
|
|
108
|
+
bidiGenerateContentSetup: setupPayload,
|
|
109
|
+
}),
|
|
110
|
+
},
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
if (!response.ok) {
|
|
114
|
+
const text = await response.text();
|
|
115
|
+
throw new Error(
|
|
116
|
+
`Google realtime auth token request failed: ${response.status} ${text}`,
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const data = (await response.json()) as {
|
|
121
|
+
name: string;
|
|
122
|
+
expireTime?: string;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
token: data.name,
|
|
127
|
+
url: getWebSocketURL(this.config.baseURL),
|
|
128
|
+
expiresAt: data.expireTime
|
|
129
|
+
? Math.floor(new Date(data.expireTime).getTime() / 1000)
|
|
130
|
+
: undefined,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
getWebSocketConfig(options: { token: string; url: string }): {
|
|
135
|
+
url: string;
|
|
136
|
+
protocols?: string[];
|
|
137
|
+
} {
|
|
138
|
+
return {
|
|
139
|
+
url: `${options.url}?access_token=${encodeURIComponent(options.token)}`,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
parseServerEvent(
|
|
144
|
+
raw: unknown,
|
|
145
|
+
): RealtimeModelV4ServerEvent | RealtimeModelV4ServerEvent[] {
|
|
146
|
+
return this.mapper.parseServerEvent(raw);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
serializeClientEvent(
|
|
150
|
+
event: RealtimeModelV4ClientEvent,
|
|
151
|
+
): ReturnType<RealtimeModelV4['serializeClientEvent']> {
|
|
152
|
+
return this.mapper.serializeClientEvent(event, this.modelId);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
buildSessionConfig(
|
|
156
|
+
config: RealtimeModelV4SessionConfig,
|
|
157
|
+
): Record<string, unknown> {
|
|
158
|
+
return buildGoogleSessionConfig(config, this.modelId);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createProviderExecutedToolFactory } from '@ai-sdk/provider-utils';
|
|
2
2
|
import { z } from 'zod/v4';
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -10,7 +10,7 @@ import { z } from 'zod/v4';
|
|
|
10
10
|
* @see https://ai.google.dev/gemini-api/docs/code-execution (Google AI)
|
|
11
11
|
* @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/code-execution-api (Vertex AI)
|
|
12
12
|
*/
|
|
13
|
-
export const codeExecution =
|
|
13
|
+
export const codeExecution = createProviderExecutedToolFactory<
|
|
14
14
|
{
|
|
15
15
|
language: string;
|
|
16
16
|
code: string;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
createProviderExecutedToolFactory,
|
|
3
3
|
lazySchema,
|
|
4
4
|
zodSchema,
|
|
5
5
|
} from '@ai-sdk/provider-utils';
|
|
@@ -7,12 +7,18 @@ import { z } from 'zod/v4';
|
|
|
7
7
|
|
|
8
8
|
// https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/web-grounding-enterprise
|
|
9
9
|
|
|
10
|
-
export const enterpriseWebSearch =
|
|
10
|
+
export const enterpriseWebSearch = createProviderExecutedToolFactory<
|
|
11
11
|
{
|
|
12
12
|
// Enterprise Web Search does not have any input schema
|
|
13
13
|
},
|
|
14
|
-
{
|
|
14
|
+
{
|
|
15
|
+
// Enterprise Web Search does not have any output parameters
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
// Enterprise Web Search does not have any configuration options
|
|
19
|
+
}
|
|
15
20
|
>({
|
|
16
21
|
id: 'google.enterprise_web_search',
|
|
17
22
|
inputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
23
|
+
outputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
18
24
|
});
|
package/src/tool/file-search.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
createProviderExecutedToolFactory,
|
|
3
3
|
lazySchema,
|
|
4
4
|
zodSchema,
|
|
5
5
|
} from '@ai-sdk/provider-utils';
|
|
@@ -38,14 +38,12 @@ const fileSearchArgsBaseSchema = z
|
|
|
38
38
|
|
|
39
39
|
export type GoogleFileSearchToolArgs = z.infer<typeof fileSearchArgsBaseSchema>;
|
|
40
40
|
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
);
|
|
44
|
-
|
|
45
|
-
export const fileSearch = createProviderToolFactory<
|
|
41
|
+
export const fileSearch = createProviderExecutedToolFactory<
|
|
42
|
+
{},
|
|
46
43
|
{},
|
|
47
44
|
GoogleFileSearchToolArgs
|
|
48
45
|
>({
|
|
49
46
|
id: 'google.file_search',
|
|
50
|
-
inputSchema:
|
|
47
|
+
inputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
48
|
+
outputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
51
49
|
});
|
package/src/tool/google-maps.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
createProviderExecutedToolFactory,
|
|
3
3
|
lazySchema,
|
|
4
4
|
zodSchema,
|
|
5
5
|
} from '@ai-sdk/provider-utils';
|
|
@@ -8,7 +8,8 @@ import { z } from 'zod/v4';
|
|
|
8
8
|
// https://ai.google.dev/gemini-api/docs/maps-grounding
|
|
9
9
|
// https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/grounding-with-google-maps
|
|
10
10
|
|
|
11
|
-
export const googleMaps =
|
|
11
|
+
export const googleMaps = createProviderExecutedToolFactory<{}, {}, {}>({
|
|
12
12
|
id: 'google.google_maps',
|
|
13
13
|
inputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
14
|
+
outputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
14
15
|
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
createProviderExecutedToolFactory,
|
|
3
3
|
lazySchema,
|
|
4
4
|
zodSchema,
|
|
5
5
|
} from '@ai-sdk/provider-utils';
|
|
@@ -9,7 +9,7 @@ import { z } from 'zod/v4';
|
|
|
9
9
|
// https://ai.google.dev/api/generate-content#GroundingSupport
|
|
10
10
|
// https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/grounding-with-google-search
|
|
11
11
|
|
|
12
|
-
const googleSearchToolArgsBaseSchema = z
|
|
12
|
+
export const googleSearchToolArgsBaseSchema = z
|
|
13
13
|
.object({
|
|
14
14
|
searchTypes: z
|
|
15
15
|
.object({
|
|
@@ -31,13 +31,12 @@ export type GoogleSearchToolArgs = z.infer<
|
|
|
31
31
|
typeof googleSearchToolArgsBaseSchema
|
|
32
32
|
>;
|
|
33
33
|
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
);
|
|
34
|
+
export const googleSearch = createProviderExecutedToolFactory<
|
|
35
|
+
{},
|
|
36
|
+
{},
|
|
37
|
+
GoogleSearchToolArgs
|
|
38
|
+
>({
|
|
39
|
+
id: 'google.google_search',
|
|
40
|
+
inputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
41
|
+
outputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
42
|
+
});
|
package/src/tool/url-context.ts
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
createProviderExecutedToolFactory,
|
|
3
3
|
lazySchema,
|
|
4
4
|
zodSchema,
|
|
5
5
|
} from '@ai-sdk/provider-utils';
|
|
6
6
|
import { z } from 'zod/v4';
|
|
7
7
|
|
|
8
|
-
export const urlContext =
|
|
8
|
+
export const urlContext = createProviderExecutedToolFactory<
|
|
9
9
|
{
|
|
10
10
|
// Url context does not have any input schema, it will directly use the url from the prompt
|
|
11
11
|
},
|
|
12
|
+
{},
|
|
12
13
|
{}
|
|
13
14
|
>({
|
|
14
15
|
id: 'google.url_context',
|
|
15
16
|
inputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
17
|
+
outputSchema: lazySchema(() => zodSchema(z.object({}))),
|
|
16
18
|
});
|