@ai-sdk/google 3.0.74 → 3.0.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/dist/index.d.mts +55 -12
  3. package/dist/index.d.ts +55 -12
  4. package/dist/index.js +687 -375
  5. package/dist/index.js.map +1 -1
  6. package/dist/index.mjs +687 -375
  7. package/dist/index.mjs.map +1 -1
  8. package/dist/internal/index.d.mts +1 -2
  9. package/dist/internal/index.d.ts +1 -2
  10. package/dist/internal/index.js +97 -59
  11. package/dist/internal/index.js.map +1 -1
  12. package/dist/internal/index.mjs +97 -59
  13. package/dist/internal/index.mjs.map +1 -1
  14. package/docs/15-google-generative-ai.mdx +73 -16
  15. package/package.json +1 -1
  16. package/src/google-generative-ai-language-model.ts +104 -56
  17. package/src/google-generative-ai-options.ts +24 -8
  18. package/src/google-provider.ts +9 -4
  19. package/src/interactions/build-google-interactions-stream-transform.ts +285 -154
  20. package/src/interactions/convert-to-google-interactions-input.ts +57 -133
  21. package/src/interactions/extract-google-interactions-sources.ts +3 -3
  22. package/src/interactions/google-interactions-agent.ts +6 -7
  23. package/src/interactions/google-interactions-api.ts +179 -115
  24. package/src/interactions/google-interactions-language-model-options.ts +126 -0
  25. package/src/interactions/google-interactions-language-model.ts +173 -60
  26. package/src/interactions/google-interactions-prompt.ts +239 -114
  27. package/src/interactions/map-google-interactions-finish-reason.ts +3 -5
  28. package/src/interactions/parse-google-interactions-outputs.ts +80 -74
  29. package/src/interactions/prepare-google-interactions-tools.ts +1 -1
  30. package/src/interactions/stream-google-interactions.ts +2 -2
  31. package/src/interactions/synthesize-google-interactions-agent-stream.ts +1 -1
@@ -32,8 +32,12 @@ import {
32
32
  } from './google-interactions-language-model-options';
33
33
  import type {
34
34
  GoogleInteractionsAgentConfig,
35
+ GoogleInteractionsEnvironmentSource,
35
36
  GoogleInteractionsGenerationConfig,
37
+ GoogleInteractionsNetworkAllowlistEntry,
38
+ GoogleInteractionsNetworkConfig,
36
39
  GoogleInteractionsRequestBody,
40
+ GoogleInteractionsResponseFormatEntry,
37
41
  GoogleInteractionsTool,
38
42
  GoogleInteractionsToolChoice,
39
43
  } from './google-interactions-prompt';
@@ -58,7 +62,8 @@ export type GoogleInteractionsConfig = {
58
62
 
59
63
  export type GoogleInteractionsModelInput =
60
64
  | GoogleInteractionsModelId
61
- | { agent: string };
65
+ | { agent: string }
66
+ | { managedAgent: string };
62
67
 
63
68
  export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
64
69
  readonly specificationVersion = 'v3';
@@ -80,6 +85,9 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
80
85
  if (typeof modelOrAgent === 'string') {
81
86
  this.modelId = modelOrAgent;
82
87
  this.agent = undefined;
88
+ } else if ('managedAgent' in modelOrAgent) {
89
+ this.modelId = modelOrAgent.managedAgent;
90
+ this.agent = modelOrAgent.managedAgent;
83
91
  } else {
84
92
  this.modelId = modelOrAgent.agent;
85
93
  this.agent = modelOrAgent.agent;
@@ -140,28 +148,22 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
140
148
  }
141
149
 
142
150
  /*
143
- * Structured output mapping (resolves PRD Open Q1).
151
+ * `response_format` is a polymorphic array of entries. Three sources
152
+ * contribute, in order:
144
153
  *
145
- * The Interactions API exposes structured output via two top-level body
146
- * fields: `response_mime_type` (always `'application/json'` here) and
147
- * `response_format` (typed as `unknown` in the js-genai SDK). Per the
148
- * canonical sample at
149
- * `googleapis/js-genai/sdk-samples/interactions_structured_output_json.ts`,
150
- * `response_format` accepts a **plain JSON Schema** value directly - no
151
- * wrapping object, no OpenAPI conversion. The js-genai resource type
152
- * (`src/interactions/resources/interactions.ts:1399`) confirms the field is
153
- * passed through verbatim. We therefore send the AI SDK
154
- * `responseFormat.schema` (a `JSONSchema7`) as-is.
155
- *
156
- * If a future API revision rejects plain JSON Schema, fall back to
157
- * `convertJSONSchemaToOpenAPISchema(...)` (already imported by
158
- * `google-language-model.ts`); empirically that has not been needed.
154
+ * 1. AI SDK call-level `responseFormat: { type: 'json', schema }`
155
+ * `{ type: 'text', mime_type: 'application/json', schema }`.
156
+ * 2. `providerOptions.google.responseFormat` (primary path) entries
157
+ * are appended verbatim with camelCase → snake_case translation.
158
+ * 3. `providerOptions.google.imageConfig` (deprecated fallback) — only
159
+ * contributes if no `{type:'image'}` entry was already provided via
160
+ * sources 1 or 2; emits a deprecation warning when used.
159
161
  *
160
162
  * Agent calls cannot send `generation_config` and (per the API) cannot
161
- * combine with structured output - emit a warning and drop the field.
163
+ * combine with structured output emit a warning and drop the field.
162
164
  */
163
- let responseMimeType: string | undefined;
164
- let responseFormat: unknown | undefined;
165
+ const responseFormatEntries: Array<GoogleInteractionsResponseFormatEntry> =
166
+ [];
165
167
  if (options.responseFormat?.type === 'json') {
166
168
  if (isAgent) {
167
169
  warnings.push({
@@ -170,9 +172,43 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
170
172
  'google.interactions: structured output (responseFormat) is not supported when an agent is set; responseFormat will be ignored.',
171
173
  });
172
174
  } else {
173
- responseMimeType = 'application/json';
174
- if (options.responseFormat.schema != null) {
175
- responseFormat = options.responseFormat.schema;
175
+ const entry: GoogleInteractionsResponseFormatEntry = {
176
+ type: 'text',
177
+ mime_type: 'application/json',
178
+ ...(options.responseFormat.schema != null
179
+ ? { schema: options.responseFormat.schema }
180
+ : {}),
181
+ };
182
+ responseFormatEntries.push(entry);
183
+ }
184
+ }
185
+
186
+ if (opts?.responseFormat != null) {
187
+ for (const entry of opts.responseFormat) {
188
+ if (entry.type === 'text') {
189
+ responseFormatEntries.push(
190
+ pruneUndefined({
191
+ type: 'text' as const,
192
+ mime_type: entry.mimeType ?? undefined,
193
+ schema: entry.schema ?? undefined,
194
+ }),
195
+ );
196
+ } else if (entry.type === 'image') {
197
+ responseFormatEntries.push(
198
+ pruneUndefined({
199
+ type: 'image' as const,
200
+ mime_type: entry.mimeType ?? undefined,
201
+ aspect_ratio: entry.aspectRatio ?? undefined,
202
+ image_size: entry.imageSize ?? undefined,
203
+ }),
204
+ );
205
+ } else if (entry.type === 'audio') {
206
+ responseFormatEntries.push(
207
+ pruneUndefined({
208
+ type: 'audio' as const,
209
+ mime_type: entry.mimeType ?? undefined,
210
+ }),
211
+ );
176
212
  }
177
213
  }
178
214
  }
@@ -205,14 +241,13 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
205
241
  /*
206
242
  * The Interactions API splits per-call config into `generation_config`
207
243
  * (model branch) and `agent_config` (agent branch); the two are mutually
208
- * exclusive. We stay minimal here for TASK-1 - only the AI SDK call-level
209
- * generation params and the thinking/imageConfig provider options flow
210
- * into `generation_config`. Tool-related fields land here in later tasks.
244
+ * exclusive. The AI SDK call-level generation params and the thinking /
245
+ * imageConfig provider options flow into `generation_config`.
211
246
  *
212
- * When an agent is set, none of these fields are accepted by the API. Per
213
- * PRD US 31 we emit a single `LanguageModelV3CallWarning` listing the
214
- * dropped field names and continue (do not throw); the agent-only
215
- * `agent_config` field supersedes them.
247
+ * When an agent is set, none of these fields are accepted by the API.
248
+ * Emit a single `LanguageModelV3CallWarning` listing the dropped field
249
+ * names and continue (do not throw); the agent-only `agent_config`
250
+ * field supersedes them.
216
251
  */
217
252
  let generationConfig: GoogleInteractionsGenerationConfig | undefined;
218
253
  if (isAgent) {
@@ -249,15 +284,38 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
249
284
  max_output_tokens: options.maxOutputTokens ?? undefined,
250
285
  thinking_level: opts?.thinkingLevel ?? undefined,
251
286
  thinking_summaries: opts?.thinkingSummaries ?? undefined,
252
- image_config:
253
- opts?.imageConfig != null
254
- ? pruneUndefined({
255
- aspect_ratio: opts.imageConfig.aspectRatio ?? undefined,
256
- image_size: opts.imageConfig.imageSize ?? undefined,
257
- })
258
- : undefined,
259
287
  tool_choice: toolChoiceForBody,
260
288
  });
289
+
290
+ /*
291
+ * Deprecated fallback path: `imageConfig` contributes an image entry
292
+ * only when none was supplied via `responseFormat`. A warning is
293
+ * always emitted when `imageConfig` is set so callers migrate to the
294
+ * `responseFormat` shape.
295
+ */
296
+ if (opts?.imageConfig != null) {
297
+ const alreadyHasImageEntry = responseFormatEntries.some(
298
+ entry => entry.type === 'image',
299
+ );
300
+ warnings.push({
301
+ type: 'other',
302
+ message: alreadyHasImageEntry
303
+ ? 'google.interactions: providerOptions.google.imageConfig is deprecated and was ignored because providerOptions.google.responseFormat already supplies an image entry. Use responseFormat exclusively.'
304
+ : 'google.interactions: providerOptions.google.imageConfig is deprecated. Use providerOptions.google.responseFormat with a { type: "image", ... } entry instead.',
305
+ });
306
+ if (!alreadyHasImageEntry) {
307
+ responseFormatEntries.push({
308
+ type: 'image',
309
+ mime_type: 'image/png',
310
+ ...(opts.imageConfig.aspectRatio != null
311
+ ? { aspect_ratio: opts.imageConfig.aspectRatio }
312
+ : {}),
313
+ ...(opts.imageConfig.imageSize != null
314
+ ? { image_size: opts.imageConfig.imageSize }
315
+ : {}),
316
+ });
317
+ }
318
+ }
261
319
  }
262
320
 
263
321
  let agentConfig: GoogleInteractionsAgentConfig | undefined;
@@ -275,26 +333,68 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
275
333
  }
276
334
  }
277
335
 
336
+ let environment: GoogleInteractionsRequestBody['environment'];
337
+ if (opts?.environment != null) {
338
+ if (!isAgent) {
339
+ warnings.push({
340
+ type: 'other',
341
+ message:
342
+ 'google.interactions: environment is only supported when an agent is set; environment will be omitted from the request body.',
343
+ });
344
+ } else if (typeof opts.environment === 'string') {
345
+ environment = opts.environment;
346
+ } else {
347
+ const env = opts.environment;
348
+ const sources: Array<GoogleInteractionsEnvironmentSource> | undefined =
349
+ env.sources?.map(s => {
350
+ if (s.type === 'inline') {
351
+ return {
352
+ type: 'inline' as const,
353
+ content: s.content,
354
+ target: s.target,
355
+ };
356
+ }
357
+ return pruneUndefined({
358
+ type: s.type,
359
+ source: s.source,
360
+ target: s.target ?? undefined,
361
+ }) as GoogleInteractionsEnvironmentSource;
362
+ });
363
+ let network: GoogleInteractionsNetworkConfig | undefined;
364
+ if (env.network === 'disabled') {
365
+ network = 'disabled';
366
+ } else if (env.network != null) {
367
+ network = {
368
+ allowlist: env.network.allowlist.map(entry =>
369
+ pruneUndefined({
370
+ domain: entry.domain,
371
+ transform: entry.transform ?? undefined,
372
+ }),
373
+ ) as Array<GoogleInteractionsNetworkAllowlistEntry>,
374
+ };
375
+ }
376
+ environment = pruneUndefined({
377
+ type: 'remote' as const,
378
+ sources: sources != null && sources.length > 0 ? sources : undefined,
379
+ network,
380
+ });
381
+ }
382
+ }
383
+
278
384
  /*
279
- * Agent calls require `background: true` on the wire otherwise the API
280
- * rejects them with `background=true is required for agent interactions.`
281
- * The server returns a non-terminal status (`in_progress`/`requires_action`)
282
- * and the final outputs are streamed via `GET /interactions/{id}?stream=true`
283
- * (or polled via `GET /interactions/{id}`). This is handled internally in
284
- * `doGenerate` / `doStream` so the user-facing surface stays identical to
285
- * model-id calls.
286
- *
287
- * Model-id calls retain their original synchronous behavior — no
288
- * `background` field is sent. (No documented model accepts `background:
289
- * true` today; revisit when one does.)
385
+ * `background` is opt-in via `providerOptions.google.background`. Some
386
+ * agents require it because their server-side workflow cannot complete
387
+ * within a single request; others reject it. When `background: true`, the
388
+ * POST returns a non-terminal status and the SDK polls
389
+ * `GET /interactions/{id}` until the work completes.
290
390
  */
291
391
  const args: GoogleInteractionsRequestBody = pruneUndefined({
292
392
  ...(isAgent ? { agent: this.agent } : { model: this.modelId }),
293
393
  input,
294
394
  system_instruction: systemInstruction,
295
395
  tools: toolsForBody,
296
- response_format: responseFormat,
297
- response_mime_type: responseMimeType,
396
+ response_format:
397
+ responseFormatEntries.length > 0 ? responseFormatEntries : undefined,
298
398
  response_modalities:
299
399
  opts?.responseModalities != null
300
400
  ? (opts.responseModalities as Array<
@@ -309,13 +409,15 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
309
409
  ? generationConfig
310
410
  : undefined,
311
411
  agent_config: agentConfig,
312
- ...(isAgent ? { background: true } : {}),
412
+ environment,
413
+ background: opts?.background ?? undefined,
313
414
  });
314
415
 
315
416
  return {
316
417
  args,
317
418
  warnings,
318
419
  isAgent,
420
+ isBackground: opts?.background === true,
319
421
  pollingTimeoutMs: opts?.pollingTimeoutMs ?? undefined,
320
422
  };
321
423
  }
@@ -329,6 +431,7 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
329
431
  const url = `${this.config.baseURL}/interactions`;
330
432
 
331
433
  const mergedHeaders = combineHeaders(
434
+ INTERACTIONS_API_REVISION_HEADER,
332
435
  this.config.headers ? await resolve(this.config.headers) : undefined,
333
436
  options.headers,
334
437
  );
@@ -352,8 +455,8 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
352
455
  } = postResult;
353
456
 
354
457
  /*
355
- * Agent calls run with `background: true`; the POST returns immediately
356
- * with a non-terminal status (`in_progress` / `requires_action`). Poll
458
+ * Agent calls may return a non-terminal status (`in_progress` /
459
+ * `requires_action`) when invoked with `background: true`. Poll
357
460
  * `GET /interactions/{id}` until terminal so the user-facing surface
358
461
  * matches a synchronous call.
359
462
  */
@@ -383,7 +486,7 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
383
486
  : undefined;
384
487
 
385
488
  const { content, hasFunctionCall } = parseGoogleInteractionsOutputs({
386
- outputs: response.outputs ?? null,
489
+ steps: response.steps ?? null,
387
490
  generateId: this.config.generateId ?? defaultGenerateId,
388
491
  interactionId,
389
492
  });
@@ -451,24 +554,25 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
451
554
  async doStream(
452
555
  options: LanguageModelV3CallOptions,
453
556
  ): Promise<LanguageModelV3StreamResult> {
454
- const { args, warnings, isAgent, pollingTimeoutMs } =
557
+ const { args, warnings, isBackground, pollingTimeoutMs } =
455
558
  await this.getArgs(options);
456
559
 
457
560
  const url = `${this.config.baseURL}/interactions`;
458
561
 
459
562
  const mergedHeaders = combineHeaders(
563
+ INTERACTIONS_API_REVISION_HEADER,
460
564
  this.config.headers ? await resolve(this.config.headers) : undefined,
461
565
  options.headers,
462
566
  );
463
567
 
464
568
  /*
465
- * Agent calls require `background: true`, which is incompatible with
466
- * `stream: true` on POST. Drive these via POST background -> GET stream
467
- * (with terminal-status short-circuit). The user-facing stream surface
468
- * stays identical -- text-start / text-delta / text-end / finish parts
469
- * are emitted in the same order as a true SSE response.
569
+ * `background: true` is incompatible with `stream: true` on POST. Drive
570
+ * background calls via POST background -> GET stream (with terminal-status
571
+ * short-circuit). The user-facing stream surface stays identical --
572
+ * text-start / text-delta / text-end / finish parts are emitted in the
573
+ * same order as a true SSE response.
470
574
  */
471
- if (isAgent) {
575
+ if (isBackground) {
472
576
  return this.doStreamBackground({
473
577
  args,
474
578
  warnings,
@@ -625,6 +729,15 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
625
729
  }
626
730
  }
627
731
 
732
+ /*
733
+ * Pins the Interactions API revision the SDK targets. Sent on every request
734
+ * the model issues so model-id calls, agent calls, polling, SSE reconnects,
735
+ * and cancellation all hit the same schema.
736
+ */
737
+ const INTERACTIONS_API_REVISION_HEADER: Record<string, string> = {
738
+ 'Api-Revision': '2026-05-20',
739
+ };
740
+
628
741
  function pruneUndefined<T extends Record<string, unknown>>(obj: T): T {
629
742
  const result: Record<string, unknown> = {};
630
743
  for (const [key, value] of Object.entries(obj)) {