@ai-sdk/google 3.0.74 → 3.0.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/dist/index.d.mts +55 -12
- package/dist/index.d.ts +55 -12
- package/dist/index.js +687 -375
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +687 -375
- package/dist/index.mjs.map +1 -1
- package/dist/internal/index.d.mts +1 -2
- package/dist/internal/index.d.ts +1 -2
- package/dist/internal/index.js +97 -59
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/index.mjs +97 -59
- package/dist/internal/index.mjs.map +1 -1
- package/docs/15-google-generative-ai.mdx +73 -16
- package/package.json +1 -1
- package/src/google-generative-ai-language-model.ts +104 -56
- package/src/google-generative-ai-options.ts +24 -8
- package/src/google-provider.ts +9 -4
- package/src/interactions/build-google-interactions-stream-transform.ts +285 -154
- package/src/interactions/convert-to-google-interactions-input.ts +57 -133
- package/src/interactions/extract-google-interactions-sources.ts +3 -3
- package/src/interactions/google-interactions-agent.ts +6 -7
- package/src/interactions/google-interactions-api.ts +179 -115
- package/src/interactions/google-interactions-language-model-options.ts +126 -0
- package/src/interactions/google-interactions-language-model.ts +173 -60
- package/src/interactions/google-interactions-prompt.ts +239 -114
- package/src/interactions/map-google-interactions-finish-reason.ts +3 -5
- package/src/interactions/parse-google-interactions-outputs.ts +80 -74
- package/src/interactions/prepare-google-interactions-tools.ts +1 -1
- package/src/interactions/stream-google-interactions.ts +2 -2
- package/src/interactions/synthesize-google-interactions-agent-stream.ts +1 -1
|
@@ -7,11 +7,12 @@ import type {
|
|
|
7
7
|
import { convertToBase64 } from '@ai-sdk/provider-utils';
|
|
8
8
|
import type {
|
|
9
9
|
GoogleInteractionsContent,
|
|
10
|
+
GoogleInteractionsContentBlock,
|
|
10
11
|
GoogleInteractionsFunctionResultContent,
|
|
11
12
|
GoogleInteractionsImageContent,
|
|
12
13
|
GoogleInteractionsInput,
|
|
14
|
+
GoogleInteractionsStep,
|
|
13
15
|
GoogleInteractionsTextContent,
|
|
14
|
-
GoogleInteractionsTurn,
|
|
15
16
|
} from './google-interactions-prompt';
|
|
16
17
|
|
|
17
18
|
function getTopLevelMediaType(mediaType: string): string {
|
|
@@ -42,22 +43,20 @@ export type ConvertToGoogleInteractionsInputResult = {
|
|
|
42
43
|
|
|
43
44
|
/**
|
|
44
45
|
* Converts an AI SDK `LanguageModelV3Prompt` into the Gemini Interactions
|
|
45
|
-
* request shape (`{ input
|
|
46
|
+
* request shape (`{ input: Array<Step>, system_instruction }`).
|
|
46
47
|
*
|
|
47
|
-
*
|
|
48
|
-
* `
|
|
49
|
-
* `
|
|
50
|
-
*
|
|
51
|
-
*
|
|
48
|
+
* Prior assistant content round-trips as discrete steps:
|
|
49
|
+
* - text / image content → `model_output` step with a single `content` array
|
|
50
|
+
* - reasoning → `thought` step (`signature` + `summary`)
|
|
51
|
+
* - tool-call → `function_call` step
|
|
52
|
+
* User turns (and tool-result turns from the previous round) are sent as
|
|
53
|
+
* `user_input` steps whose `content[]` holds the user's parts (text, files,
|
|
54
|
+
* and — for tool-result turns — `function_result` blocks).
|
|
52
55
|
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
* `
|
|
56
|
-
*
|
|
57
|
-
* `src/interactions/resources/interactions.ts` `ThoughtContent` /
|
|
58
|
-
* `FunctionCallContent`). When an input reasoning part has empty text + a
|
|
59
|
-
* signature, the converter emits a `thought` block with `signature` and an
|
|
60
|
-
* omitted `summary` — no synthetic empty-text carrier needed.
|
|
56
|
+
* Handles text parts, file parts (image / audio / document / video, all four
|
|
57
|
+
* `data.type` shapes), tool-call/tool-result round-tripping, per-step
|
|
58
|
+
* `signature` round-tripping, and statefulness compaction (drop assistant/tool
|
|
59
|
+
* turns whose `providerOptions.google.interactionId === previousInteractionId`).
|
|
61
60
|
*/
|
|
62
61
|
export function convertToGoogleInteractionsInput({
|
|
63
62
|
prompt,
|
|
@@ -68,20 +67,12 @@ export function convertToGoogleInteractionsInput({
|
|
|
68
67
|
prompt: LanguageModelV3Prompt;
|
|
69
68
|
previousInteractionId?: string;
|
|
70
69
|
store?: boolean;
|
|
71
|
-
/**
|
|
72
|
-
* Per-block media resolution applied to every image / video input block
|
|
73
|
-
* (the Interactions wire format places `resolution` on the block, not at
|
|
74
|
-
* the top level). See js-genai
|
|
75
|
-
* `src/interactions/resources/interactions.ts` `ImageContent.resolution`
|
|
76
|
-
* and `VideoContent.resolution`.
|
|
77
|
-
*/
|
|
78
70
|
mediaResolution?: GoogleInteractionsMediaResolution;
|
|
79
71
|
}): ConvertToGoogleInteractionsInputResult {
|
|
80
72
|
const warnings: Array<SharedV3Warning> = [];
|
|
81
73
|
|
|
82
74
|
/*
|
|
83
|
-
* Behavior matrix
|
|
84
|
-
* matrix":
|
|
75
|
+
* Behavior matrix for compaction:
|
|
85
76
|
*
|
|
86
77
|
* - `previousInteractionId` set + `store !== false` → compact history (drop
|
|
87
78
|
* assistant/tool turns whose `providerMetadata.google.interactionId`
|
|
@@ -90,10 +81,6 @@ export function convertToGoogleInteractionsInput({
|
|
|
90
81
|
* (incoherent combo), still send full history (NO compaction).
|
|
91
82
|
* - `store === false`, no `previousInteractionId` → no compaction.
|
|
92
83
|
* - Default → no compaction.
|
|
93
|
-
*
|
|
94
|
-
* The actual `previous_interaction_id` / `store` body fields are emitted in
|
|
95
|
-
* the language model's `getArgs`; this converter only handles the history
|
|
96
|
-
* shape and the warning.
|
|
97
84
|
*/
|
|
98
85
|
const incoherentCombo = previousInteractionId != null && store === false;
|
|
99
86
|
const shouldCompact = previousInteractionId != null && store !== false;
|
|
@@ -113,7 +100,7 @@ export function convertToGoogleInteractionsInput({
|
|
|
113
100
|
: prompt;
|
|
114
101
|
|
|
115
102
|
const systemTexts: Array<string> = [];
|
|
116
|
-
const
|
|
103
|
+
const steps: Array<GoogleInteractionsStep> = [];
|
|
117
104
|
|
|
118
105
|
for (const message of compactedPrompt) {
|
|
119
106
|
switch (message.role) {
|
|
@@ -122,14 +109,10 @@ export function convertToGoogleInteractionsInput({
|
|
|
122
109
|
break;
|
|
123
110
|
}
|
|
124
111
|
case 'user': {
|
|
125
|
-
const content: Array<
|
|
112
|
+
const content: Array<GoogleInteractionsContentBlock> = [];
|
|
126
113
|
for (const part of message.content) {
|
|
127
114
|
if (part.type === 'text') {
|
|
128
|
-
|
|
129
|
-
type: 'text',
|
|
130
|
-
text: part.text,
|
|
131
|
-
};
|
|
132
|
-
content.push(block);
|
|
115
|
+
content.push({ type: 'text', text: part.text });
|
|
133
116
|
} else if (part.type === 'file') {
|
|
134
117
|
const fileBlock = convertFilePartToContent({
|
|
135
118
|
part,
|
|
@@ -143,20 +126,34 @@ export function convertToGoogleInteractionsInput({
|
|
|
143
126
|
}
|
|
144
127
|
const merged = mergeAdjacentTextContent(content);
|
|
145
128
|
if (merged.length > 0) {
|
|
146
|
-
|
|
129
|
+
steps.push({ type: 'user_input', content: merged });
|
|
147
130
|
}
|
|
148
131
|
break;
|
|
149
132
|
}
|
|
150
133
|
case 'assistant': {
|
|
151
|
-
|
|
134
|
+
/*
|
|
135
|
+
* Prior assistant content fans out into one step per logical block.
|
|
136
|
+
* Adjacent text/image content blocks are coalesced into a single
|
|
137
|
+
* `model_output` step (matching how the API emits them on output);
|
|
138
|
+
* reasoning and tool-calls each become their own step.
|
|
139
|
+
*/
|
|
140
|
+
let pendingModelOutput: Array<GoogleInteractionsContentBlock> = [];
|
|
141
|
+
const flushModelOutput = () => {
|
|
142
|
+
if (pendingModelOutput.length > 0) {
|
|
143
|
+
steps.push({ type: 'model_output', content: pendingModelOutput });
|
|
144
|
+
pendingModelOutput = [];
|
|
145
|
+
}
|
|
146
|
+
};
|
|
147
|
+
|
|
152
148
|
for (const part of message.content) {
|
|
153
149
|
if (part.type === 'text') {
|
|
154
|
-
|
|
150
|
+
pendingModelOutput.push({ type: 'text', text: part.text });
|
|
155
151
|
} else if (part.type === 'reasoning') {
|
|
152
|
+
flushModelOutput();
|
|
156
153
|
const signature = part.providerOptions?.google?.signature as
|
|
157
154
|
| string
|
|
158
155
|
| undefined;
|
|
159
|
-
|
|
156
|
+
steps.push({
|
|
160
157
|
type: 'thought',
|
|
161
158
|
...(signature != null ? { signature } : {}),
|
|
162
159
|
summary:
|
|
@@ -171,9 +168,10 @@ export function convertToGoogleInteractionsInput({
|
|
|
171
168
|
mediaResolution,
|
|
172
169
|
});
|
|
173
170
|
if (fileBlock != null) {
|
|
174
|
-
|
|
171
|
+
pendingModelOutput.push(fileBlock);
|
|
175
172
|
}
|
|
176
173
|
} else if (part.type === 'tool-call') {
|
|
174
|
+
flushModelOutput();
|
|
177
175
|
const signature = part.providerOptions?.google?.signature as
|
|
178
176
|
| string
|
|
179
177
|
| undefined;
|
|
@@ -181,7 +179,7 @@ export function convertToGoogleInteractionsInput({
|
|
|
181
179
|
typeof part.input === 'string'
|
|
182
180
|
? safeParseToolArgs(part.input)
|
|
183
181
|
: ((part.input ?? {}) as Record<string, unknown>);
|
|
184
|
-
|
|
182
|
+
steps.push({
|
|
185
183
|
type: 'function_call',
|
|
186
184
|
id: part.toolCallId,
|
|
187
185
|
name: part.toolName,
|
|
@@ -195,51 +193,17 @@ export function convertToGoogleInteractionsInput({
|
|
|
195
193
|
});
|
|
196
194
|
}
|
|
197
195
|
}
|
|
198
|
-
|
|
199
|
-
turns.push({ role: 'model', content });
|
|
200
|
-
}
|
|
196
|
+
flushModelOutput();
|
|
201
197
|
break;
|
|
202
198
|
}
|
|
203
199
|
case 'tool': {
|
|
204
200
|
/*
|
|
205
|
-
* Tool-result messages are emitted as a `
|
|
206
|
-
* holds one `function_result` block per tool-result part.
|
|
207
|
-
* (
|
|
208
|
-
*
|
|
209
|
-
* `src/interactions/resources/interactions.ts` `FunctionResultContent`
|
|
210
|
-
* around line 979 — RESOLVES PRD Open Q2):
|
|
211
|
-
*
|
|
212
|
-
* {
|
|
213
|
-
* role: 'user',
|
|
214
|
-
* content: [
|
|
215
|
-
* {
|
|
216
|
-
* type: 'function_result',
|
|
217
|
-
* call_id: <id from the matching function_call block>,
|
|
218
|
-
* name: <tool name>,
|
|
219
|
-
* result: <string | unknown | Array<TextContent|ImageContent>>,
|
|
220
|
-
* is_error?: boolean,
|
|
221
|
-
* signature?: string,
|
|
222
|
-
* },
|
|
223
|
-
* ],
|
|
224
|
-
* }
|
|
225
|
-
*
|
|
226
|
-
* The `result` field is a discriminated union: a plain string for
|
|
227
|
-
* text-only results, or an array of `text` / `image` content blocks
|
|
228
|
-
* for mixed text/image results. Our converter takes the AI SDK
|
|
229
|
-
* canonical `LanguageModelV3ToolResultOutput` and maps:
|
|
230
|
-
* - `{ type: 'text', value }` → `result: <string>`
|
|
231
|
-
* - `{ type: 'json', value }` → `result: <stringified JSON>`
|
|
232
|
-
* - `{ type: 'error-text', value }` → `result: <string>` + `is_error: true`
|
|
233
|
-
* - `{ type: 'error-json', value }` → `result: <stringified JSON>` + `is_error: true`
|
|
234
|
-
* - `{ type: 'execution-denied', reason }` → `result: <reason>` + `is_error: true`
|
|
235
|
-
* - `{ type: 'content', value: [...] }` → `result: Array<text|image>`
|
|
236
|
-
* where each AI SDK `file` part with `mediaType: image/*` becomes
|
|
237
|
-
* an Interactions `image` block (file-data path matches
|
|
238
|
-
* `convertFilePartToContent` for top-level user images), and `text`
|
|
239
|
-
* parts pass through. Non-image file parts fall back to a warning
|
|
240
|
-
* because `FunctionResultContent.result` only accepts text/image.
|
|
201
|
+
* Tool-result messages are emitted as a `user_input` step whose
|
|
202
|
+
* content holds one `function_result` block per tool-result part.
|
|
203
|
+
* `function_result` remains a content-block type (it sits inside
|
|
204
|
+
* a step), not a top-level step type.
|
|
241
205
|
*/
|
|
242
|
-
const content: Array<
|
|
206
|
+
const content: Array<GoogleInteractionsContentBlock> = [];
|
|
243
207
|
for (const part of message.content) {
|
|
244
208
|
if (part.type !== 'tool-result') {
|
|
245
209
|
warnings.push({
|
|
@@ -260,7 +224,7 @@ export function convertToGoogleInteractionsInput({
|
|
|
260
224
|
content.push(block);
|
|
261
225
|
}
|
|
262
226
|
if (content.length > 0) {
|
|
263
|
-
|
|
227
|
+
steps.push({ type: 'user_input', content });
|
|
264
228
|
}
|
|
265
229
|
break;
|
|
266
230
|
}
|
|
@@ -270,24 +234,7 @@ export function convertToGoogleInteractionsInput({
|
|
|
270
234
|
const systemInstruction =
|
|
271
235
|
systemTexts.length > 0 ? systemTexts.join('\n\n') : undefined;
|
|
272
236
|
|
|
273
|
-
|
|
274
|
-
if (turns.length === 0) {
|
|
275
|
-
input = '';
|
|
276
|
-
} else if (
|
|
277
|
-
turns.length === 1 &&
|
|
278
|
-
turns[0].role === 'user' &&
|
|
279
|
-
Array.isArray(turns[0].content)
|
|
280
|
-
) {
|
|
281
|
-
/*
|
|
282
|
-
* Single-turn user prompt: send the bare `Array<Content>` shape per the
|
|
283
|
-
* Interactions API's preferred single-turn format.
|
|
284
|
-
*/
|
|
285
|
-
input = turns[0].content;
|
|
286
|
-
} else {
|
|
287
|
-
input = turns;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
return { input, systemInstruction, warnings };
|
|
237
|
+
return { input: steps, systemInstruction, warnings };
|
|
291
238
|
}
|
|
292
239
|
|
|
293
240
|
/**
|
|
@@ -337,12 +284,6 @@ function convertFilePartToContent({
|
|
|
337
284
|
return undefined;
|
|
338
285
|
}
|
|
339
286
|
|
|
340
|
-
/*
|
|
341
|
-
* `resolution` is per-block on the wire (`ImageContent.resolution`,
|
|
342
|
-
* `VideoContent.resolution`); only image and video carry it (see
|
|
343
|
-
* `googleapis/js-genai` `src/interactions/resources/interactions.ts`).
|
|
344
|
-
* Audio / document blocks ignore the option silently.
|
|
345
|
-
*/
|
|
346
287
|
const resolutionField =
|
|
347
288
|
mediaResolution != null && (kind === 'image' || kind === 'video')
|
|
348
289
|
? { resolution: mediaResolution }
|
|
@@ -374,23 +315,9 @@ function convertFilePartToContent({
|
|
|
374
315
|
}
|
|
375
316
|
|
|
376
317
|
/*
|
|
377
|
-
* Drops assistant
|
|
378
|
-
* (`previousInteractionId`)
|
|
379
|
-
*
|
|
380
|
-
* whose `tool-result.toolCallId` matches a `tool-call.toolCallId` from the
|
|
381
|
-
* dropped assistant turn — server-state already has the matching tool result
|
|
382
|
-
* baked in, and re-sending it without its paired call would be malformed.
|
|
383
|
-
*
|
|
384
|
-
* An assistant message is considered "part of the linked interaction" if any
|
|
385
|
-
* of its content parts carry `providerOptions.google.interactionId ===
|
|
386
|
-
* previousInteractionId`. This is stamped by `parseGoogleInteractionsOutputs`
|
|
387
|
-
* (and the stream transformer) on every output content part.
|
|
388
|
-
*
|
|
389
|
-
* User messages are always kept regardless of where they fell in the prior
|
|
390
|
-
* conversation — only assistant model output and its tool plumbing live on the
|
|
391
|
-
* server. (Note that the AI SDK does not stamp `interactionId` onto user
|
|
392
|
-
* messages, so even if it did, this function would not have a way to identify
|
|
393
|
-
* which user message belongs to which interaction.)
|
|
318
|
+
* Drops assistant messages that were part of the linked interaction
|
|
319
|
+
* (`previousInteractionId`). Tool-result turns whose tool-call counterpart
|
|
320
|
+
* was dropped are also pruned to keep the message stream well-formed.
|
|
394
321
|
*/
|
|
395
322
|
function compactPromptForPreviousInteraction({
|
|
396
323
|
prompt,
|
|
@@ -602,21 +529,18 @@ function filePartToImageBlock({
|
|
|
602
529
|
}
|
|
603
530
|
|
|
604
531
|
/*
|
|
605
|
-
* Collapses runs of adjacent text content blocks within a single user
|
|
606
|
-
* into one combined text block, separated by a blank line.
|
|
607
|
-
*
|
|
608
|
-
*
|
|
609
|
-
* keeps the wire shape compact and preserves intent when an inline text file
|
|
610
|
-
* sits next to a regular text part. Text blocks carrying `annotations` are
|
|
611
|
-
* left untouched (annotations are tied to specific text spans).
|
|
532
|
+
* Collapses runs of adjacent text content blocks within a single user step
|
|
533
|
+
* into one combined text block, separated by a blank line. Text blocks
|
|
534
|
+
* carrying `annotations` are left untouched (annotations are tied to specific
|
|
535
|
+
* text spans).
|
|
612
536
|
*/
|
|
613
537
|
function mergeAdjacentTextContent(
|
|
614
|
-
content: Array<
|
|
615
|
-
): Array<
|
|
538
|
+
content: Array<GoogleInteractionsContentBlock>,
|
|
539
|
+
): Array<GoogleInteractionsContentBlock> {
|
|
616
540
|
if (content.length < 2) {
|
|
617
541
|
return content;
|
|
618
542
|
}
|
|
619
|
-
const result: Array<
|
|
543
|
+
const result: Array<GoogleInteractionsContentBlock> = [];
|
|
620
544
|
for (const block of content) {
|
|
621
545
|
const last = result[result.length - 1];
|
|
622
546
|
if (
|
|
@@ -60,7 +60,7 @@ export function annotationToSource({
|
|
|
60
60
|
}
|
|
61
61
|
case 'file_citation': {
|
|
62
62
|
const a = annotation as GoogleInteractionsFileCitation;
|
|
63
|
-
const uri = a.
|
|
63
|
+
const uri = a.url ?? a.document_uri ?? a.file_name;
|
|
64
64
|
if (uri == null || uri.length === 0) return undefined;
|
|
65
65
|
if (uri.startsWith('http://') || uri.startsWith('https://')) {
|
|
66
66
|
return {
|
|
@@ -176,10 +176,10 @@ export function builtinToolResultToSources({
|
|
|
176
176
|
const entry = raw as {
|
|
177
177
|
file_name?: string;
|
|
178
178
|
document_uri?: string;
|
|
179
|
-
|
|
179
|
+
url?: string;
|
|
180
180
|
title?: string;
|
|
181
181
|
};
|
|
182
|
-
const uri = entry.
|
|
182
|
+
const uri = entry.url ?? entry.document_uri ?? entry.file_name;
|
|
183
183
|
if (uri == null || uri.length === 0) continue;
|
|
184
184
|
if (uri.startsWith('http://') || uri.startsWith('https://')) {
|
|
185
185
|
sources.push({
|
|
@@ -2,15 +2,14 @@
|
|
|
2
2
|
* Type-only module: declares the union of supported Gemini Interactions agent
|
|
3
3
|
* names. Used by the `google.interactions({ agent })` factory branch.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* passing an unknown agent name is a compile-time error. Add new agents here
|
|
10
|
-
* as Google publishes them.
|
|
5
|
+
* Strict string-literal union: unknown agent names are a compile-time error.
|
|
6
|
+
* User-defined agents (created via the `/agents` endpoint) are addressed by
|
|
7
|
+
* a separate `{ managedAgent: string }` factory shape — see
|
|
8
|
+
* `GoogleInteractionsModelInput`.
|
|
11
9
|
*/
|
|
12
10
|
|
|
13
11
|
export type GoogleInteractionsAgentName =
|
|
14
12
|
| 'deep-research-pro-preview-12-2025'
|
|
15
13
|
| 'deep-research-preview-04-2026'
|
|
16
|
-
| 'deep-research-max-preview-04-2026'
|
|
14
|
+
| 'deep-research-max-preview-04-2026'
|
|
15
|
+
| 'antigravity-preview-05-2026';
|