@ai-sdk/google 3.0.67 → 3.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1085,6 +1085,402 @@ The following Zod features are known to not work with Google Generative AI:
1085
1085
  available provider model ID as a string if needed.
1086
1086
  </Note>
1087
1087
 
1088
+ ## Interactions API
1089
+
1090
+ The [Gemini Interactions API](https://ai.google.dev/gemini-api/docs/interactions)
1091
+ (`POST /v1beta/interactions`) is a separate Google endpoint with server-side
1092
+ state, unified content blocks, first-class built-in tools, agent presets, and
1093
+ native multimodal image output. It is reached via the `google.interactions(...)`
1094
+ factory:
1095
+
1096
+ ```ts
1097
+ import { google } from '@ai-sdk/google';
1098
+ import { generateText } from 'ai';
1099
+
1100
+ const { text } = await generateText({
1101
+ model: google.interactions('gemini-2.5-flash'),
1102
+ prompt: 'Hello, how are you?',
1103
+ });
1104
+ ```
1105
+
1106
+ `google.interactions(...)` accepts either a model ID string (e.g.
1107
+ `'gemini-2.5-flash'`, `'gemini-3-pro-preview'`) or `{ agent: <name> }` to use
1108
+ a Gemini [agent preset](#agent-presets). The returned model can be passed to
1109
+ `generateText` and `streamText` like any other AI SDK language model.
1110
+
1111
+ <Note>
1112
+ Use `google(...)` for the standard `:generateContent` /
1113
+ `:streamGenerateContent` endpoints, and `google.interactions(...)` for the
1114
+ new Interactions endpoint. Pick one per model instance — they target
1115
+ different request bodies and SSE event vocabularies.
1116
+ </Note>
1117
+
1118
+ ### Provider Options
1119
+
1120
+ The Interactions model reads its options from the shared
1121
+ `providerOptions.google.*` namespace. Validate them with the
1122
+ `GoogleLanguageModelInteractionsOptions` type:
1123
+
1124
+ ```ts
1125
+ import {
1126
+ google,
1127
+ type GoogleLanguageModelInteractionsOptions,
1128
+ } from '@ai-sdk/google';
1129
+ import { generateText } from 'ai';
1130
+
1131
+ await generateText({
1132
+ model: google.interactions('gemini-2.5-flash'),
1133
+ prompt: 'What color is the sky in one word?',
1134
+ providerOptions: {
1135
+ google: {
1136
+ serviceTier: 'priority',
1137
+ } satisfies GoogleLanguageModelInteractionsOptions,
1138
+ },
1139
+ });
1140
+ ```
1141
+
1142
+ The following optional provider options are available:
1143
+
1144
+ - **previousInteractionId** _string_
1145
+
1146
+ Server-side interaction id from a prior turn. When set, the server pulls
1147
+ prior context from its own state and only the new user message is sent on
1148
+ the wire. Pair with the default `store: true` to chain stateful
1149
+ conversations. See [Stateful chaining](#stateful-chaining).
1150
+
1151
+ - **store** _boolean_
1152
+
1153
+ Whether the server should persist the interaction. Defaults to `true`.
1154
+ Set to `false` for stateless multi-turn conversations where the full
1155
+ message history is re-sent on every turn.
1156
+
1157
+ - **agent** _string_
1158
+
1159
+ Name of a Gemini agent preset (e.g. `'deep-research-pro-preview-12-2025'`).
1160
+
1161
+ <Note>
1162
+ Prefer the factory form `google.interactions({ agent: '...' })` over
1163
+ setting `agent` in provider options — the factory is type-checked
1164
+ against the supported agent names.
1165
+ </Note>
1166
+
1167
+ - **agentConfig** _object_
1168
+
1169
+ Per-agent configuration. Currently supports `{ type: 'dynamic' }` and
1170
+ `{ type: 'deep-research', thinkingSummaries?, visualization?, collaborativePlanning? }`.
1171
+
1172
+ - **thinkingLevel** _'minimal' | 'low' | 'medium' | 'high'_
1173
+
1174
+ Controls reasoning depth for thinking-enabled models. Mapped onto the
1175
+ Interactions request's `thinking_level`.
1176
+
1177
+ - **thinkingSummaries** _'auto' | 'none'_
1178
+
1179
+ Whether the model returns synthesized thought summaries on reasoning
1180
+ parts. Defaults to the API default.
1181
+
1182
+ - **imageConfig** _\{ aspectRatio?: string; imageSize?: '1K' | '2K' | '4K' | '512' \}_
1183
+
1184
+ Image generation configuration when `responseModalities` includes
1185
+ `'image'`. `aspectRatio` accepts `1:1`, `2:3`, `3:2`, `3:4`, `4:3`,
1186
+ `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:8`, `8:1`, `1:4`, `4:1`.
1187
+
1188
+ - **mediaResolution** _'low' | 'medium' | 'high' | 'ultra_high'_
1189
+
1190
+ Media resolution applied to image inputs / outputs.
1191
+
1192
+ - **responseModalities** _Array\<'text' | 'image' | 'audio' | 'video' | 'document'\>_
1193
+
1194
+ The modalities the model may emit. Defaults to text-only. Pass
1195
+ `['image']` (or `['text', 'image']`) to enable native image output. See
1196
+ [Image output](#image-output-via-interactions).
1197
+
1198
+ - **serviceTier** _'flex' | 'standard' | 'priority'_
1199
+
1200
+ Service tier for the request. Mirrored back on
1201
+ `result.providerMetadata.google.serviceTier` for observability.
1202
+
1203
+ - **systemInstruction** _string_
1204
+
1205
+ Alternative to the AI SDK `system` message. If both are set, the AI SDK
1206
+ `system` message wins and a warning is emitted.
1207
+
1208
+ - **pollingTimeoutMs** _number_
1209
+
1210
+ Maximum time, in milliseconds, to poll a background interaction (agent
1211
+ call) before giving up. Defaults to 30 minutes (1,800,000 ms). Long-running
1212
+ agents such as deep research may need longer.
1213
+
1214
+ ### Provider Metadata
1215
+
1216
+ `result.providerMetadata.google` (typed via `GoogleInteractionsProviderMetadata`)
1217
+ exposes:
1218
+
1219
+ - **interactionId** _string_
1220
+
1221
+ Server-side interaction id. Pass this back as `previousInteractionId` on
1222
+ the next turn to chain.
1223
+
1224
+ - **serviceTier** _string_
1225
+
1226
+ Service tier the request actually ran on.
1227
+
1228
+ - **signature** _string_
1229
+
1230
+ Per-block signature hash, set by the SDK on output reasoning and
1231
+ tool-call parts. Round-tripped automatically on the next turn.
1232
+
1233
+ ### Stateful chaining
1234
+
1235
+ With the default `store: true`, the server retains the prior turn so the
1236
+ next request only needs to send the new user message and the
1237
+ `previousInteractionId`:
1238
+
1239
+ ```ts
1240
+ import { google } from '@ai-sdk/google';
1241
+ import { generateText } from 'ai';
1242
+
1243
+ const turn1 = await generateText({
1244
+ model: google.interactions('gemini-2.5-flash'),
1245
+ prompt: 'What are the three largest cities in Spain?',
1246
+ });
1247
+
1248
+ const interactionId = turn1.providerMetadata?.google?.interactionId as
1249
+ | string
1250
+ | undefined;
1251
+
1252
+ const turn2 = await generateText({
1253
+ model: google.interactions('gemini-2.5-flash'),
1254
+ prompt: 'What is the most famous landmark in the second one?',
1255
+ providerOptions: {
1256
+ google: { previousInteractionId: interactionId },
1257
+ },
1258
+ });
1259
+ ```
1260
+
1261
+ For stateless multi-turn conversations, set `store: false` and re-send the
1262
+ full message history on every turn (no `previousInteractionId`):
1263
+
1264
+ ```ts
1265
+ import { google } from '@ai-sdk/google';
1266
+ import { generateText, type ModelMessage } from 'ai';
1267
+
1268
+ const messages: Array<ModelMessage> = [
1269
+ { role: 'user', content: 'What are the three largest cities in Spain?' },
1270
+ ];
1271
+
1272
+ const turn1 = await generateText({
1273
+ model: google.interactions('gemini-2.5-flash'),
1274
+ messages,
1275
+ providerOptions: { google: { store: false } },
1276
+ });
1277
+
1278
+ messages.push(...turn1.response.messages);
1279
+ messages.push({
1280
+ role: 'user',
1281
+ content: 'What is the most famous landmark in the second one?',
1282
+ });
1283
+
1284
+ const turn2 = await generateText({
1285
+ model: google.interactions('gemini-2.5-flash'),
1286
+ messages,
1287
+ providerOptions: { google: { store: false } },
1288
+ });
1289
+ ```
1290
+
1291
+ ### Built-in Tools
1292
+
1293
+ The Interactions API ships a built-in tool catalog. The provider-defined
1294
+ tools under `google.tools.*` map onto Interactions tool descriptors:
1295
+
1296
+ | AI SDK tool | Interactions tool type | Notes |
1297
+ | ------------------------------ | ---------------------- | --------------------------------------------------------------------------- |
1298
+ | `google.tools.googleSearch` | `google_search` | Web / image search grounding. |
1299
+ | `google.tools.codeExecution` | `code_execution` | Server-side Python execution. |
1300
+ | `google.tools.urlContext` | `url_context` | Fetch URLs referenced in the prompt. |
1301
+ | `google.tools.fileSearch` | `file_search` | Retrieval from File Search stores. |
1302
+ | `google.tools.googleMaps` | `google_maps` | Maps grounding for nearby-places queries. |
1303
+ | _provider tool_ `google.computer_use` | `computer_use` | Computer use (browser environment). |
1304
+ | _provider tool_ `google.mcp_server` | `mcp_server` | Remote MCP server passthrough. |
1305
+ | _provider tool_ `google.retrieval` | `retrieval` | Vertex AI Search retrieval. |
1306
+
1307
+ Function tools (`type: 'function'`) defined with the AI SDK `tool(...)`
1308
+ helper are translated to Interactions `function` tool descriptors. Other
1309
+ tool kinds emit a warning and are dropped.
1310
+
1311
+ ```ts
1312
+ import { google } from '@ai-sdk/google';
1313
+ import { generateText } from 'ai';
1314
+
1315
+ const { text, sources } = await generateText({
1316
+ model: google.interactions('gemini-2.5-flash'),
1317
+ tools: {
1318
+ google_search: google.tools.googleSearch({}),
1319
+ },
1320
+ prompt:
1321
+ "What's a notable AI development from this past week? " +
1322
+ 'Include the date for each item you mention.',
1323
+ });
1324
+ ```
1325
+
1326
+ Function tools work the same way as on the standard provider:
1327
+
1328
+ ```ts
1329
+ import { google } from '@ai-sdk/google';
1330
+ import { generateText, stepCountIs, tool } from 'ai';
1331
+ import { z } from 'zod';
1332
+
1333
+ const weatherTool = tool({
1334
+ description: 'Get the weather for a city.',
1335
+ inputSchema: z.object({ city: z.string() }),
1336
+ execute: async ({ city }) => `It is sunny in ${city}.`,
1337
+ });
1338
+
1339
+ const { text, toolCalls } = await generateText({
1340
+ model: google.interactions('gemini-2.5-flash'),
1341
+ tools: { getWeather: weatherTool },
1342
+ stopWhen: stepCountIs(5),
1343
+ prompt: 'What is the weather in San Francisco right now?',
1344
+ });
1345
+ ```
1346
+
1347
+ ### Image output via Interactions
1348
+
1349
+ Set `responseModalities: ['image']` on a Gemini image-capable model to get
1350
+ images as `LanguageModelV4FilePart` files in the response. No tool wrapping
1351
+ is required.
1352
+
1353
+ ```ts
1354
+ import { google } from '@ai-sdk/google';
1355
+ import { generateText } from 'ai';
1356
+
1357
+ const result = await generateText({
1358
+ model: google.interactions('gemini-3-pro-image-preview'),
1359
+ prompt: 'Generate an image of a comic cat in a spaceship.',
1360
+ providerOptions: {
1361
+ google: {
1362
+ responseModalities: ['image'],
1363
+ },
1364
+ },
1365
+ });
1366
+
1367
+ for (const file of result.files) {
1368
+ if (file.mediaType.startsWith('image/')) {
1369
+ // file.uint8Array | file.base64 | file.mediaType
1370
+ }
1371
+ }
1372
+ ```
1373
+
1374
+ Iterative image editing pairs naturally with stateful chaining — keep
1375
+ `previousInteractionId` set across turns and the model edits its prior
1376
+ output:
1377
+
1378
+ ```ts
1379
+ import { google } from '@ai-sdk/google';
1380
+ import { generateText } from 'ai';
1381
+
1382
+ const model = google.interactions('gemini-3-pro-image-preview');
1383
+
1384
+ const turn1 = await generateText({
1385
+ model,
1386
+ prompt: 'Generate an image of a comic cat in a spaceship.',
1387
+ providerOptions: { google: { responseModalities: ['image'] } },
1388
+ });
1389
+
1390
+ const interactionId = turn1.providerMetadata?.google?.interactionId as
1391
+ | string
1392
+ | undefined;
1393
+
1394
+ const turn2 = await generateText({
1395
+ model,
1396
+ prompt: 'now make the cat red',
1397
+ providerOptions: {
1398
+ google: {
1399
+ responseModalities: ['image'],
1400
+ previousInteractionId: interactionId,
1401
+ },
1402
+ },
1403
+ });
1404
+ ```
1405
+
1406
+ ### Agent presets
1407
+
1408
+ Pass `{ agent: <name> }` to target a Gemini agent preset. The factory
1409
+ type-checks the agent name against the supported set:
1410
+
1411
+ ```ts
1412
+ import { google } from '@ai-sdk/google';
1413
+ import { generateText } from 'ai';
1414
+
1415
+ const result = await generateText({
1416
+ model: google.interactions({
1417
+ agent: 'deep-research-pro-preview-12-2025',
1418
+ }),
1419
+ prompt:
1420
+ 'Briefly summarize the most-cited papers on retrieval-augmented generation since 2024 (2-3 sentences).',
1421
+ });
1422
+ ```
1423
+
1424
+ Agent calls run with `background: true` on the wire and the SDK polls the
1425
+ `GET /interactions/{id}` endpoint internally until the interaction
1426
+ completes. The default polling timeout is 30 minutes; raise it via
1427
+ `pollingTimeoutMs` for slower agents:
1428
+
1429
+ ```ts
1430
+ await generateText({
1431
+ model: google.interactions({ agent: 'deep-research-max-preview-04-2026' }),
1432
+ prompt: 'Produce a long-form research brief on ...',
1433
+ providerOptions: {
1434
+ google: {
1435
+ pollingTimeoutMs: 60 * 60 * 1000, // 1 hour
1436
+ },
1437
+ },
1438
+ });
1439
+ ```
1440
+
1441
+ Agents also chain through `previousInteractionId` like model-id calls.
1442
+
1443
+ ### Streaming
1444
+
1445
+ `streamText` is supported and consumes the seven Interactions SSE event
1446
+ types (`interaction.start`, `content.start`, `content.delta`,
1447
+ `content.stop`, `interaction.status_update`, `interaction.complete`,
1448
+ `error`). The stream's `finish` part exposes `interactionId` on
1449
+ `providerMetadata.google` so callers can chain.
1450
+
1451
+ ```ts
1452
+ import { google } from '@ai-sdk/google';
1453
+ import { streamText } from 'ai';
1454
+
1455
+ const result = streamText({
1456
+ model: google.interactions('gemini-2.5-flash'),
1457
+ prompt: 'Hello, how are you?',
1458
+ });
1459
+
1460
+ for await (const textPart of result.textStream) {
1461
+ process.stdout.write(textPart);
1462
+ }
1463
+
1464
+ const googleMetadata = (await result.providerMetadata)?.google;
1465
+ console.log('Interaction id:', googleMetadata?.interactionId);
1466
+ ```
1467
+
1468
+ ### Runnable Examples
1469
+
1470
+ Paired `generateText` + `streamText` examples live under:
1471
+
1472
+ - `examples/ai-functions/src/generate-text/google/interactions-*.ts`
1473
+ - `examples/ai-functions/src/stream-text/google/interactions-*.ts`
1474
+
1475
+ Notable examples: `interactions-basic`, `interactions-multi-turn-stateful`,
1476
+ `interactions-multi-turn-stateless`, `interactions-tool-call`,
1477
+ `interactions-google-search`, `interactions-image-output`,
1478
+ `interactions-image-output-modify`, `interactions-image-base64`,
1479
+ `interactions-image-reference`, `interactions-image-url`,
1480
+ `interactions-pdf`, `interactions-structured-output`,
1481
+ `interactions-service-tier`, `interactions-agent-single-turn`, and
1482
+ `interactions-agent-multi-turn`.
1483
+
1088
1484
  ## Gemma Models
1089
1485
 
1090
1486
  You can use [Gemma models](https://deepmind.google/models/gemma/) with the Google Generative AI API.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/google",
3
- "version": "3.0.67",
3
+ "version": "3.0.68",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -36,8 +36,8 @@
36
36
  }
37
37
  },
38
38
  "dependencies": {
39
- "@ai-sdk/provider-utils": "4.0.26",
40
- "@ai-sdk/provider": "3.0.10"
39
+ "@ai-sdk/provider": "3.0.10",
40
+ "@ai-sdk/provider-utils": "4.0.26"
41
41
  },
42
42
  "devDependencies": {
43
43
  "@types/node": "20.17.24",
@@ -26,6 +26,12 @@ import type {
26
26
  import { GoogleGenerativeAIImageModel } from './google-generative-ai-image-model';
27
27
  import { GoogleGenerativeAIVideoModel } from './google-generative-ai-video-model';
28
28
  import type { GoogleGenerativeAIVideoModelId } from './google-generative-ai-video-settings';
29
+ import {
30
+ GoogleInteractionsLanguageModel,
31
+ type GoogleInteractionsModelInput,
32
+ } from './interactions/google-interactions-language-model';
33
+ import type { GoogleInteractionsModelId } from './interactions/google-interactions-language-model-options';
34
+ import type { GoogleInteractionsAgentName } from './interactions/google-interactions-agent';
29
35
 
30
36
  export interface GoogleGenerativeAIProvider extends ProviderV3 {
31
37
  (modelId: GoogleGenerativeAIModelId): LanguageModelV3;
@@ -81,6 +87,17 @@ export interface GoogleGenerativeAIProvider extends ProviderV3 {
81
87
  modelId: GoogleGenerativeAIVideoModelId,
82
88
  ): Experimental_VideoModelV3;
83
89
 
90
+ /**
91
+ * Creates a language model targeting the Gemini Interactions API
92
+ * (`POST /v1beta/interactions`). Pass either a model ID (string) or
93
+ * `{ agent: <name> }` to use a Gemini agent preset.
94
+ */
95
+ interactions(
96
+ modelIdOrAgent:
97
+ | GoogleInteractionsModelId
98
+ | { agent: GoogleInteractionsAgentName },
99
+ ): LanguageModelV3;
100
+
84
101
  tools: typeof googleTools;
85
102
  }
86
103
 
@@ -194,6 +211,22 @@ export function createGoogleGenerativeAI(
194
211
  generateId: options.generateId ?? generateId,
195
212
  });
196
213
 
214
+ const createInteractionsModel = (
215
+ modelIdOrAgent:
216
+ | GoogleInteractionsModelId
217
+ | { agent: GoogleInteractionsAgentName },
218
+ ) =>
219
+ new GoogleInteractionsLanguageModel(
220
+ modelIdOrAgent as GoogleInteractionsModelInput,
221
+ {
222
+ provider: `${providerName}.interactions`,
223
+ baseURL,
224
+ headers: getHeaders,
225
+ generateId: options.generateId ?? generateId,
226
+ fetch: options.fetch,
227
+ },
228
+ );
229
+
197
230
  const provider = function (modelId: GoogleGenerativeAIModelId) {
198
231
  if (new.target) {
199
232
  throw new Error(
@@ -216,6 +249,7 @@ export function createGoogleGenerativeAI(
216
249
  provider.imageModel = createImageModel;
217
250
  provider.video = createVideoModel;
218
251
  provider.videoModel = createVideoModel;
252
+ provider.interactions = createInteractionsModel;
219
253
  provider.tools = googleTools;
220
254
 
221
255
  return provider as GoogleGenerativeAIProvider;
package/src/index.ts CHANGED
@@ -21,6 +21,12 @@ export type {
21
21
  GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions,
22
22
  } from './google-generative-ai-video-model';
23
23
  export type { GoogleGenerativeAIVideoModelId } from './google-generative-ai-video-settings';
24
+ export type {
25
+ GoogleLanguageModelInteractionsOptions,
26
+ GoogleInteractionsModelId,
27
+ } from './interactions/google-interactions-language-model-options';
28
+ export type { GoogleInteractionsProviderMetadata } from './interactions/google-interactions-provider-metadata';
29
+ export type { GoogleInteractionsAgentName } from './interactions/google-interactions-agent';
24
30
  export { createGoogleGenerativeAI, google } from './google-provider';
25
31
  export type {
26
32
  GoogleGenerativeAIProvider,