npm - @ai-sdk/google - Versions diffs - 3.0.74 → 3.0.77 - Mend

@ai-sdk/google 3.0.74 → 3.0.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +19 -0
package/dist/index.d.mts +55 -12
package/dist/index.d.ts +55 -12
package/dist/index.js +687 -375
package/dist/index.js.map +1 -1
package/dist/index.mjs +687 -375
package/dist/index.mjs.map +1 -1
package/dist/internal/index.d.mts +1 -2
package/dist/internal/index.d.ts +1 -2
package/dist/internal/index.js +97 -59
package/dist/internal/index.js.map +1 -1
package/dist/internal/index.mjs +97 -59
package/dist/internal/index.mjs.map +1 -1
package/docs/15-google-generative-ai.mdx +73 -16
package/package.json +1 -1
package/src/google-generative-ai-language-model.ts +104 -56
package/src/google-generative-ai-options.ts +24 -8
package/src/google-provider.ts +9 -4
package/src/interactions/build-google-interactions-stream-transform.ts +285 -154
package/src/interactions/convert-to-google-interactions-input.ts +57 -133
package/src/interactions/extract-google-interactions-sources.ts +3 -3
package/src/interactions/google-interactions-agent.ts +6 -7
package/src/interactions/google-interactions-api.ts +179 -115
package/src/interactions/google-interactions-language-model-options.ts +126 -0
package/src/interactions/google-interactions-language-model.ts +173 -60
package/src/interactions/google-interactions-prompt.ts +239 -114
package/src/interactions/map-google-interactions-finish-reason.ts +3 -5
package/src/interactions/parse-google-interactions-outputs.ts +80 -74
package/src/interactions/prepare-google-interactions-tools.ts +1 -1
package/src/interactions/stream-google-interactions.ts +2 -2
package/src/interactions/synthesize-google-interactions-agent-stream.ts +1 -1

package/src/interactions/google-interactions-language-model.ts CHANGED Viewed

@@ -32,8 +32,12 @@ import {
 } from './google-interactions-language-model-options';
 import type {
   GoogleInteractionsAgentConfig,
+  GoogleInteractionsEnvironmentSource,
   GoogleInteractionsGenerationConfig,
+  GoogleInteractionsNetworkAllowlistEntry,
+  GoogleInteractionsNetworkConfig,
   GoogleInteractionsRequestBody,
+  GoogleInteractionsResponseFormatEntry,
   GoogleInteractionsTool,
   GoogleInteractionsToolChoice,
 } from './google-interactions-prompt';
@@ -58,7 +62,8 @@ export type GoogleInteractionsConfig = {
 export type GoogleInteractionsModelInput =
   | GoogleInteractionsModelId
-  | { agent: string };
+  | { agent: string }
+  | { managedAgent: string };
 export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
   readonly specificationVersion = 'v3';
@@ -80,6 +85,9 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
     if (typeof modelOrAgent === 'string') {
       this.modelId = modelOrAgent;
       this.agent = undefined;
+    } else if ('managedAgent' in modelOrAgent) {
+      this.modelId = modelOrAgent.managedAgent;
+      this.agent = modelOrAgent.managedAgent;
     } else {
       this.modelId = modelOrAgent.agent;
       this.agent = modelOrAgent.agent;
@@ -140,28 +148,22 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
     }
     /*
-     * Structured output mapping (resolves PRD Open Q1).
+     * `response_format` is a polymorphic array of entries. Three sources
+     * contribute, in order:
      *
-     * The Interactions API exposes structured output via two top-level body
-     * fields: `response_mime_type` (always `'application/json'` here) and
-     * `response_format` (typed as `unknown` in the js-genai SDK). Per the
-     * canonical sample at
-     * `googleapis/js-genai/sdk-samples/interactions_structured_output_json.ts`,
-     * `response_format` accepts a **plain JSON Schema** value directly - no
-     * wrapping object, no OpenAPI conversion. The js-genai resource type
-     * (`src/interactions/resources/interactions.ts:1399`) confirms the field is
-     * passed through verbatim. We therefore send the AI SDK
-     * `responseFormat.schema` (a `JSONSchema7`) as-is.
-     *
-     * If a future API revision rejects plain JSON Schema, fall back to
-     * `convertJSONSchemaToOpenAPISchema(...)` (already imported by
-     * `google-language-model.ts`); empirically that has not been needed.
+     *   1. AI SDK call-level `responseFormat: { type: 'json', schema }` →
+     *      `{ type: 'text', mime_type: 'application/json', schema }`.
+     *   2. `providerOptions.google.responseFormat` (primary path) — entries
+     *      are appended verbatim with camelCase → snake_case translation.
+     *   3. `providerOptions.google.imageConfig` (deprecated fallback) — only
+     *      contributes if no `{type:'image'}` entry was already provided via
+     *      sources 1 or 2; emits a deprecation warning when used.
      *
      * Agent calls cannot send `generation_config` and (per the API) cannot
-     * combine with structured output - emit a warning and drop the field.
+     * combine with structured output — emit a warning and drop the field.
      */
-    let responseMimeType: string | undefined;
-    let responseFormat: unknown | undefined;
+    const responseFormatEntries: Array<GoogleInteractionsResponseFormatEntry> =
+      [];
     if (options.responseFormat?.type === 'json') {
       if (isAgent) {
         warnings.push({
@@ -170,9 +172,43 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
             'google.interactions: structured output (responseFormat) is not supported when an agent is set; responseFormat will be ignored.',
         });
       } else {
-        responseMimeType = 'application/json';
-        if (options.responseFormat.schema != null) {
-          responseFormat = options.responseFormat.schema;
+        const entry: GoogleInteractionsResponseFormatEntry = {
+          type: 'text',
+          mime_type: 'application/json',
+          ...(options.responseFormat.schema != null
+            ? { schema: options.responseFormat.schema }
+            : {}),
+        };
+        responseFormatEntries.push(entry);
+      }
+    }
+    if (opts?.responseFormat != null) {
+      for (const entry of opts.responseFormat) {
+        if (entry.type === 'text') {
+          responseFormatEntries.push(
+            pruneUndefined({
+              type: 'text' as const,
+              mime_type: entry.mimeType ?? undefined,
+              schema: entry.schema ?? undefined,
+            }),
+          );
+        } else if (entry.type === 'image') {
+          responseFormatEntries.push(
+            pruneUndefined({
+              type: 'image' as const,
+              mime_type: entry.mimeType ?? undefined,
+              aspect_ratio: entry.aspectRatio ?? undefined,
+              image_size: entry.imageSize ?? undefined,
+            }),
+          );
+        } else if (entry.type === 'audio') {
+          responseFormatEntries.push(
+            pruneUndefined({
+              type: 'audio' as const,
+              mime_type: entry.mimeType ?? undefined,
+            }),
+          );
         }
       }
     }
@@ -205,14 +241,13 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
     /*
      * The Interactions API splits per-call config into `generation_config`
      * (model branch) and `agent_config` (agent branch); the two are mutually
-     * exclusive. We stay minimal here for TASK-1 - only the AI SDK call-level
-     * generation params and the thinking/imageConfig provider options flow
-     * into `generation_config`. Tool-related fields land here in later tasks.
+     * exclusive. The AI SDK call-level generation params and the thinking /
+     * imageConfig provider options flow into `generation_config`.
      *
-     * When an agent is set, none of these fields are accepted by the API. Per
-     * PRD US 31 we emit a single `LanguageModelV3CallWarning` listing the
-     * dropped field names and continue (do not throw); the agent-only
-     * `agent_config` field supersedes them.
+     * When an agent is set, none of these fields are accepted by the API.
+     * Emit a single `LanguageModelV3CallWarning` listing the dropped field
+     * names and continue (do not throw); the agent-only `agent_config`
+     * field supersedes them.
      */
     let generationConfig: GoogleInteractionsGenerationConfig | undefined;
     if (isAgent) {
@@ -249,15 +284,38 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
         max_output_tokens: options.maxOutputTokens ?? undefined,
         thinking_level: opts?.thinkingLevel ?? undefined,
         thinking_summaries: opts?.thinkingSummaries ?? undefined,
-        image_config:
-          opts?.imageConfig != null
-            ? pruneUndefined({
-                aspect_ratio: opts.imageConfig.aspectRatio ?? undefined,
-                image_size: opts.imageConfig.imageSize ?? undefined,
-              })
-            : undefined,
         tool_choice: toolChoiceForBody,
       });
+      /*
+       * Deprecated fallback path: `imageConfig` contributes an image entry
+       * only when none was supplied via `responseFormat`. A warning is
+       * always emitted when `imageConfig` is set so callers migrate to the
+       * `responseFormat` shape.
+       */
+      if (opts?.imageConfig != null) {
+        const alreadyHasImageEntry = responseFormatEntries.some(
+          entry => entry.type === 'image',
+        );
+        warnings.push({
+          type: 'other',
+          message: alreadyHasImageEntry
+            ? 'google.interactions: providerOptions.google.imageConfig is deprecated and was ignored because providerOptions.google.responseFormat already supplies an image entry. Use responseFormat exclusively.'
+            : 'google.interactions: providerOptions.google.imageConfig is deprecated. Use providerOptions.google.responseFormat with a { type: "image", ... } entry instead.',
+        });
+        if (!alreadyHasImageEntry) {
+          responseFormatEntries.push({
+            type: 'image',
+            mime_type: 'image/png',
+            ...(opts.imageConfig.aspectRatio != null
+              ? { aspect_ratio: opts.imageConfig.aspectRatio }
+              : {}),
+            ...(opts.imageConfig.imageSize != null
+              ? { image_size: opts.imageConfig.imageSize }
+              : {}),
+          });
+        }
+      }
     }
     let agentConfig: GoogleInteractionsAgentConfig | undefined;
@@ -275,26 +333,68 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
       }
     }
+    let environment: GoogleInteractionsRequestBody['environment'];
+    if (opts?.environment != null) {
+      if (!isAgent) {
+        warnings.push({
+          type: 'other',
+          message:
+            'google.interactions: environment is only supported when an agent is set; environment will be omitted from the request body.',
+        });
+      } else if (typeof opts.environment === 'string') {
+        environment = opts.environment;
+      } else {
+        const env = opts.environment;
+        const sources: Array<GoogleInteractionsEnvironmentSource> | undefined =
+          env.sources?.map(s => {
+            if (s.type === 'inline') {
+              return {
+                type: 'inline' as const,
+                content: s.content,
+                target: s.target,
+              };
+            }
+            return pruneUndefined({
+              type: s.type,
+              source: s.source,
+              target: s.target ?? undefined,
+            }) as GoogleInteractionsEnvironmentSource;
+          });
+        let network: GoogleInteractionsNetworkConfig | undefined;
+        if (env.network === 'disabled') {
+          network = 'disabled';
+        } else if (env.network != null) {
+          network = {
+            allowlist: env.network.allowlist.map(entry =>
+              pruneUndefined({
+                domain: entry.domain,
+                transform: entry.transform ?? undefined,
+              }),
+            ) as Array<GoogleInteractionsNetworkAllowlistEntry>,
+          };
+        }
+        environment = pruneUndefined({
+          type: 'remote' as const,
+          sources: sources != null && sources.length > 0 ? sources : undefined,
+          network,
+        });
+      }
+    }
     /*
-     * Agent calls require `background: true` on the wire — otherwise the API
-     * rejects them with `background=true is required for agent interactions.`
-     * The server returns a non-terminal status (`in_progress`/`requires_action`)
-     * and the final outputs are streamed via `GET /interactions/{id}?stream=true`
-     * (or polled via `GET /interactions/{id}`). This is handled internally in
-     * `doGenerate` / `doStream` so the user-facing surface stays identical to
-     * model-id calls.
-     *
-     * Model-id calls retain their original synchronous behavior — no
-     * `background` field is sent. (No documented model accepts `background:
-     * true` today; revisit when one does.)
+     * `background` is opt-in via `providerOptions.google.background`. Some
+     * agents require it because their server-side workflow cannot complete
+     * within a single request; others reject it. When `background: true`, the
+     * POST returns a non-terminal status and the SDK polls
+     * `GET /interactions/{id}` until the work completes.
      */
     const args: GoogleInteractionsRequestBody = pruneUndefined({
       ...(isAgent ? { agent: this.agent } : { model: this.modelId }),
       input,
       system_instruction: systemInstruction,
       tools: toolsForBody,
-      response_format: responseFormat,
-      response_mime_type: responseMimeType,
+      response_format:
+        responseFormatEntries.length > 0 ? responseFormatEntries : undefined,
       response_modalities:
         opts?.responseModalities != null
           ? (opts.responseModalities as Array<
@@ -309,13 +409,15 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
           ? generationConfig
           : undefined,
       agent_config: agentConfig,
-      ...(isAgent ? { background: true } : {}),
+      environment,
+      background: opts?.background ?? undefined,
     });
     return {
       args,
       warnings,
       isAgent,
+      isBackground: opts?.background === true,
       pollingTimeoutMs: opts?.pollingTimeoutMs ?? undefined,
     };
   }
@@ -329,6 +431,7 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
     const url = `${this.config.baseURL}/interactions`;
     const mergedHeaders = combineHeaders(
+      INTERACTIONS_API_REVISION_HEADER,
       this.config.headers ? await resolve(this.config.headers) : undefined,
       options.headers,
     );
@@ -352,8 +455,8 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
     } = postResult;
     /*
-     * Agent calls run with `background: true`; the POST returns immediately
-     * with a non-terminal status (`in_progress` / `requires_action`). Poll
+     * Agent calls may return a non-terminal status (`in_progress` /
+     * `requires_action`) when invoked with `background: true`. Poll
      * `GET /interactions/{id}` until terminal so the user-facing surface
      * matches a synchronous call.
      */
@@ -383,7 +486,7 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
         : undefined;
     const { content, hasFunctionCall } = parseGoogleInteractionsOutputs({
-      outputs: response.outputs ?? null,
+      steps: response.steps ?? null,
       generateId: this.config.generateId ?? defaultGenerateId,
       interactionId,
     });
@@ -451,24 +554,25 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
   async doStream(
     options: LanguageModelV3CallOptions,
   ): Promise<LanguageModelV3StreamResult> {
-    const { args, warnings, isAgent, pollingTimeoutMs } =
+    const { args, warnings, isBackground, pollingTimeoutMs } =
       await this.getArgs(options);
     const url = `${this.config.baseURL}/interactions`;
     const mergedHeaders = combineHeaders(
+      INTERACTIONS_API_REVISION_HEADER,
       this.config.headers ? await resolve(this.config.headers) : undefined,
       options.headers,
     );
     /*
-     * Agent calls require `background: true`, which is incompatible with
-     * `stream: true` on POST. Drive these via POST background -> GET stream
-     * (with terminal-status short-circuit). The user-facing stream surface
-     * stays identical -- text-start / text-delta / text-end / finish parts
-     * are emitted in the same order as a true SSE response.
+     * `background: true` is incompatible with `stream: true` on POST. Drive
+     * background calls via POST background -> GET stream (with terminal-status
+     * short-circuit). The user-facing stream surface stays identical --
+     * text-start / text-delta / text-end / finish parts are emitted in the
+     * same order as a true SSE response.
      */
-    if (isAgent) {
+    if (isBackground) {
       return this.doStreamBackground({
         args,
         warnings,
@@ -625,6 +729,15 @@ export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
   }
 }
+/*
+ * Pins the Interactions API revision the SDK targets. Sent on every request
+ * the model issues so model-id calls, agent calls, polling, SSE reconnects,
+ * and cancellation all hit the same schema.
+ */
+const INTERACTIONS_API_REVISION_HEADER: Record<string, string> = {
+  'Api-Revision': '2026-05-20',
+};
 function pruneUndefined<T extends Record<string, unknown>>(obj: T): T {
   const result: Record<string, unknown> = {};
   for (const [key, value] of Object.entries(obj)) {