npm - @ai-sdk/google - Versions diffs - 3.0.67 → 3.0.68 - Mend

@ai-sdk/google 3.0.67 → 3.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +6 -0
package/dist/index.d.mts +90 -1
package/dist/index.d.ts +90 -1
package/dist/index.js +2383 -49
package/dist/index.js.map +1 -1
package/dist/index.mjs +2353 -1
package/dist/index.mjs.map +1 -1
package/docs/15-google-generative-ai.mdx +396 -0
package/package.json +3 -3
package/src/google-provider.ts +34 -0
package/src/index.ts +6 -0
package/src/interactions/build-google-interactions-stream-transform.ts +711 -0
package/src/interactions/convert-google-interactions-usage.ts +47 -0
package/src/interactions/convert-to-google-interactions-input.ts +630 -0
package/src/interactions/extract-google-interactions-sources.ts +245 -0
package/src/interactions/google-interactions-agent.ts +16 -0
package/src/interactions/google-interactions-api.ts +466 -0
package/src/interactions/google-interactions-language-model-options.ts +136 -0
package/src/interactions/google-interactions-language-model.ts +609 -0
package/src/interactions/google-interactions-prompt.ts +457 -0
package/src/interactions/google-interactions-provider-metadata.ts +23 -0
package/src/interactions/map-google-interactions-finish-reason.ts +33 -0
package/src/interactions/parse-google-interactions-outputs.ts +257 -0
package/src/interactions/poll-google-interactions.ts +110 -0
package/src/interactions/prepare-google-interactions-tools.ts +245 -0
package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0

package/src/interactions/convert-google-interactions-usage.ts ADDED Viewed

@@ -0,0 +1,47 @@
+import type { JSONObject, LanguageModelV3Usage } from '@ai-sdk/provider';
+import type { GoogleInteractionsUsage } from './google-interactions-api';
+export function convertGoogleInteractionsUsage(
+  usage: GoogleInteractionsUsage | undefined | null,
+): LanguageModelV3Usage {
+  if (usage == null) {
+    return {
+      inputTokens: {
+        total: undefined,
+        noCache: undefined,
+        cacheRead: undefined,
+        cacheWrite: undefined,
+      },
+      outputTokens: {
+        total: undefined,
+        text: undefined,
+        reasoning: undefined,
+      },
+      raw: undefined,
+    };
+  }
+  const totalInput = usage.total_input_tokens ?? 0;
+  const totalOutput = usage.total_output_tokens ?? 0;
+  const totalThought = usage.total_thought_tokens ?? 0;
+  const totalCached = usage.total_cached_tokens ?? 0;
+  return {
+    inputTokens: {
+      total: usage.total_input_tokens ?? undefined,
+      noCache:
+        usage.total_input_tokens == null ? undefined : totalInput - totalCached,
+      cacheRead: usage.total_cached_tokens ?? undefined,
+      cacheWrite: undefined,
+    },
+    outputTokens: {
+      total:
+        usage.total_output_tokens == null && usage.total_thought_tokens == null
+          ? undefined
+          : totalOutput + totalThought,
+      text: usage.total_output_tokens ?? undefined,
+      reasoning: usage.total_thought_tokens ?? undefined,
+    },
+    raw: usage as unknown as JSONObject,
+  };
+}

package/src/interactions/convert-to-google-interactions-input.ts ADDED Viewed

@@ -0,0 +1,630 @@
+import type {
+  LanguageModelV3FilePart,
+  LanguageModelV3Prompt,
+  LanguageModelV3ToolResultOutput,
+  SharedV3Warning,
+} from '@ai-sdk/provider';
+import { convertToBase64 } from '@ai-sdk/provider-utils';
+import type {
+  GoogleInteractionsContent,
+  GoogleInteractionsFunctionResultContent,
+  GoogleInteractionsImageContent,
+  GoogleInteractionsInput,
+  GoogleInteractionsTextContent,
+  GoogleInteractionsTurn,
+} from './google-interactions-prompt';
+function getTopLevelMediaType(mediaType: string): string {
+  const slashIndex = mediaType.indexOf('/');
+  return slashIndex === -1 ? mediaType : mediaType.substring(0, slashIndex);
+}
+function isFullMediaType(mediaType: string): boolean {
+  const slashIndex = mediaType.indexOf('/');
+  if (slashIndex === -1) {
+    return false;
+  }
+  const subtype = mediaType.substring(slashIndex + 1);
+  return subtype.length > 0 && subtype !== '*';
+}
+export type GoogleInteractionsMediaResolution =
+  | 'low'
+  | 'medium'
+  | 'high'
+  | 'ultra_high';
+export type ConvertToGoogleInteractionsInputResult = {
+  input: GoogleInteractionsInput;
+  systemInstruction: string | undefined;
+  warnings: Array<SharedV3Warning>;
+};
+/**
+ * Converts an AI SDK `LanguageModelV3Prompt` into the Gemini Interactions
+ * request shape (`{ input, system_instruction }`).
+ *
+ * Handles text parts, file parts (image / audio / document / video, all four
+ * `data.type` shapes), tool-call/tool-result round-tripping, per-block
+ * `signature` round-tripping (`thought.signature`, `function_call.signature`),
+ * and statefulness compaction (drop assistant/tool turns whose
+ * `providerOptions.google.interactionId === previousInteractionId`).
+ *
+ * NOTE on PRD Open Q3 (empty-text-with-signature carrier hack from the
+ * `:generateContent` provider): unnecessary on Interactions because
+ * `thought.signature` and `function_call.signature` are explicit fields on
+ * the wire (verified against `googleapis/js-genai`
+ * `src/interactions/resources/interactions.ts` `ThoughtContent` /
+ * `FunctionCallContent`). When an input reasoning part has empty text + a
+ * signature, the converter emits a `thought` block with `signature` and an
+ * omitted `summary` — no synthetic empty-text carrier needed.
+ */
+export function convertToGoogleInteractionsInput({
+  prompt,
+  previousInteractionId,
+  store,
+  mediaResolution,
+}: {
+  prompt: LanguageModelV3Prompt;
+  previousInteractionId?: string;
+  store?: boolean;
+  /**
+   * Per-block media resolution applied to every image / video input block
+   * (the Interactions wire format places `resolution` on the block, not at
+   * the top level). See js-genai
+   * `src/interactions/resources/interactions.ts` `ImageContent.resolution`
+   * and `VideoContent.resolution`.
+   */
+  mediaResolution?: GoogleInteractionsMediaResolution;
+}): ConvertToGoogleInteractionsInputResult {
+  const warnings: Array<SharedV3Warning> = [];
+  /*
+   * Behavior matrix per PRD § "Public-API contracts" → "Configurable behavior
+   * matrix":
+   *
+   * - `previousInteractionId` set + `store !== false` → compact history (drop
+   *   assistant/tool turns whose `providerMetadata.google.interactionId`
+   *   matches), emit `previous_interaction_id`.
+   * - `previousInteractionId` set + `store === false` → emit warning
+   *   (incoherent combo), still send full history (NO compaction).
+   * - `store === false`, no `previousInteractionId` → no compaction.
+   * - Default → no compaction.
+   *
+   * The actual `previous_interaction_id` / `store` body fields are emitted in
+   * the language model's `getArgs`; this converter only handles the history
+   * shape and the warning.
+   */
+  const incoherentCombo = previousInteractionId != null && store === false;
+  const shouldCompact = previousInteractionId != null && store !== false;
+  if (incoherentCombo) {
+    warnings.push({
+      type: 'other',
+      message:
+        'google.interactions: providerOptions.google.previousInteractionId was set together with store: false. These are incoherent (the prior interaction cannot be referenced when nothing was stored on the server); the full history will be sent and previous_interaction_id will still be emitted.',
+    });
+  }
+  const compactedPrompt = shouldCompact
+    ? compactPromptForPreviousInteraction({
+        prompt,
+        previousInteractionId,
+      })
+    : prompt;
+  const systemTexts: Array<string> = [];
+  const turns: Array<GoogleInteractionsTurn> = [];
+  for (const message of compactedPrompt) {
+    switch (message.role) {
+      case 'system': {
+        systemTexts.push(message.content);
+        break;
+      }
+      case 'user': {
+        const content: Array<GoogleInteractionsContent> = [];
+        for (const part of message.content) {
+          if (part.type === 'text') {
+            const block: GoogleInteractionsTextContent = {
+              type: 'text',
+              text: part.text,
+            };
+            content.push(block);
+          } else if (part.type === 'file') {
+            const fileBlock = convertFilePartToContent({
+              part,
+              warnings,
+              mediaResolution,
+            });
+            if (fileBlock != null) {
+              content.push(fileBlock);
+            }
+          }
+        }
+        const merged = mergeAdjacentTextContent(content);
+        if (merged.length > 0) {
+          turns.push({ role: 'user', content: merged });
+        }
+        break;
+      }
+      case 'assistant': {
+        const content: Array<GoogleInteractionsContent> = [];
+        for (const part of message.content) {
+          if (part.type === 'text') {
+            content.push({ type: 'text', text: part.text });
+          } else if (part.type === 'reasoning') {
+            const signature = part.providerOptions?.google?.signature as
+              | string
+              | undefined;
+            content.push({
+              type: 'thought',
+              ...(signature != null ? { signature } : {}),
+              summary:
+                part.text.length > 0
+                  ? [{ type: 'text', text: part.text }]
+                  : undefined,
+            });
+          } else if (part.type === 'tool-call') {
+            const signature = part.providerOptions?.google?.signature as
+              | string
+              | undefined;
+            const args =
+              typeof part.input === 'string'
+                ? safeParseToolArgs(part.input)
+                : ((part.input ?? {}) as Record<string, unknown>);
+            content.push({
+              type: 'function_call',
+              id: part.toolCallId,
+              name: part.toolName,
+              arguments: args,
+              ...(signature != null ? { signature } : {}),
+            });
+          } else {
+            warnings.push({
+              type: 'other',
+              message: `google.interactions: unsupported assistant content part type "${part.type}"; part dropped.`,
+            });
+          }
+        }
+        if (content.length > 0) {
+          turns.push({ role: 'model', content });
+        }
+        break;
+      }
+      case 'tool': {
+        /*
+         * Tool-result messages are emitted as a `user` turn whose content
+         * holds one `function_result` block per tool-result part. Wire shape
+         * (verified against `googleapis/js-genai`
+         * `samples/interactions_function_calling_client_state.ts` and
+         * `src/interactions/resources/interactions.ts` `FunctionResultContent`
+         * around line 979 — RESOLVES PRD Open Q2):
+         *
+         *   {
+         *     role: 'user',
+         *     content: [
+         *       {
+         *         type: 'function_result',
+         *         call_id: <id from the matching function_call block>,
+         *         name: <tool name>,
+         *         result: <string | unknown | Array<TextContent|ImageContent>>,
+         *         is_error?: boolean,
+         *         signature?: string,
+         *       },
+         *     ],
+         *   }
+         *
+         * The `result` field is a discriminated union: a plain string for
+         * text-only results, or an array of `text` / `image` content blocks
+         * for mixed text/image results. Our converter takes the AI SDK
+         * canonical `LanguageModelV3ToolResultOutput` and maps:
+         * - `{ type: 'text', value }` → `result: <string>`
+         * - `{ type: 'json', value }` → `result: <stringified JSON>`
+         * - `{ type: 'error-text', value }` → `result: <string>` + `is_error: true`
+         * - `{ type: 'error-json', value }` → `result: <stringified JSON>` + `is_error: true`
+         * - `{ type: 'execution-denied', reason }` → `result: <reason>` + `is_error: true`
+         * - `{ type: 'content', value: [...] }` → `result: Array<text|image>`
+         *   where each AI SDK `file` part with `mediaType: image/*` becomes
+         *   an Interactions `image` block (file-data path matches
+         *   `convertFilePartToContent` for top-level user images), and `text`
+         *   parts pass through. Non-image file parts fall back to a warning
+         *   because `FunctionResultContent.result` only accepts text/image.
+         */
+        const content: Array<GoogleInteractionsContent> = [];
+        for (const part of message.content) {
+          if (part.type !== 'tool-result') {
+            warnings.push({
+              type: 'other',
+              message: `google.interactions: unsupported tool message part type "${part.type}"; part dropped.`,
+            });
+            continue;
+          }
+          const block = convertToolResultPart({
+            toolCallId: part.toolCallId,
+            toolName: part.toolName,
+            output: part.output,
+            signature: part.providerOptions?.google?.signature as
+              | string
+              | undefined,
+            warnings,
+          });
+          content.push(block);
+        }
+        if (content.length > 0) {
+          turns.push({ role: 'user', content });
+        }
+        break;
+      }
+    }
+  }
+  const systemInstruction =
+    systemTexts.length > 0 ? systemTexts.join('\n\n') : undefined;
+  let input: GoogleInteractionsInput;
+  if (turns.length === 0) {
+    input = '';
+  } else if (
+    turns.length === 1 &&
+    turns[0].role === 'user' &&
+    Array.isArray(turns[0].content)
+  ) {
+    /*
+     * Single-turn user prompt: send the bare `Array<Content>` shape per the
+     * Interactions API's preferred single-turn format.
+     */
+    input = turns[0].content;
+  } else {
+    input = turns;
+  }
+  return { input, systemInstruction, warnings };
+}
+/**
+ * Maps a single AI SDK `LanguageModelV3FilePart` to a Gemini Interactions
+ * content block (`image` / `audio` / `document` / `video`).
+ *
+ * Rules for the V3 `data` shapes:
+ * - `Uint8Array` / `string` (base64) → block with inline `data` (base64) +
+ *   `mime_type`.
+ * - `URL` → block with `uri` set to the URL string verbatim. Files API URIs
+ *   (e.g. `https://generativelanguage.googleapis.com/v1beta/files/<id>`) and
+ *   YouTube URLs are passed through the same way.
+ */
+function convertFilePartToContent({
+  part,
+  warnings,
+  mediaResolution,
+}: {
+  part: LanguageModelV3FilePart;
+  warnings: Array<SharedV3Warning>;
+  mediaResolution?: GoogleInteractionsMediaResolution;
+}): GoogleInteractionsContent | undefined {
+  const topLevel = getTopLevelMediaType(part.mediaType);
+  let kind: 'image' | 'audio' | 'video' | 'document' | undefined;
+  switch (topLevel) {
+    case 'image':
+      kind = 'image';
+      break;
+    case 'audio':
+      kind = 'audio';
+      break;
+    case 'video':
+      kind = 'video';
+      break;
+    case 'application':
+      kind = 'document';
+      break;
+    default:
+      kind = undefined;
+  }
+  if (kind == null) {
+    warnings.push({
+      type: 'other',
+      message: `google.interactions: unsupported file media type "${part.mediaType}"; part dropped.`,
+    });
+    return undefined;
+  }
+  /*
+   * `resolution` is per-block on the wire (`ImageContent.resolution`,
+   * `VideoContent.resolution`); only image and video carry it (see
+   * `googleapis/js-genai` `src/interactions/resources/interactions.ts`).
+   * Audio / document blocks ignore the option silently.
+   */
+  const resolutionField =
+    mediaResolution != null && (kind === 'image' || kind === 'video')
+      ? { resolution: mediaResolution }
+      : {};
+  if (part.data instanceof URL) {
+    return {
+      type: kind,
+      uri: part.data.toString(),
+      ...(isFullMediaType(part.mediaType) ? { mime_type: part.mediaType } : {}),
+      ...resolutionField,
+    };
+  }
+  if (!isFullMediaType(part.mediaType)) {
+    warnings.push({
+      type: 'other',
+      message: `google.interactions: inline file data requires a full IANA media type (e.g. "image/png"), got "${part.mediaType}"; part dropped.`,
+    });
+    return undefined;
+  }
+  return {
+    type: kind,
+    data: convertToBase64(part.data),
+    mime_type: part.mediaType,
+    ...resolutionField,
+  };
+}
+/*
+ * Drops assistant turns that were part of the linked interaction
+ * (`previousInteractionId`) so the API doesn't see them re-sent on top of its
+ * server-side state. Also drops any subsequent `tool` (tool-result) message
+ * whose `tool-result.toolCallId` matches a `tool-call.toolCallId` from the
+ * dropped assistant turn — server-state already has the matching tool result
+ * baked in, and re-sending it without its paired call would be malformed.
+ *
+ * An assistant message is considered "part of the linked interaction" if any
+ * of its content parts carry `providerOptions.google.interactionId ===
+ * previousInteractionId`. This is stamped by `parseGoogleInteractionsOutputs`
+ * (and the stream transformer) on every output content part.
+ *
+ * User messages are always kept regardless of where they fell in the prior
+ * conversation — only assistant model output and its tool plumbing live on the
+ * server. (Note that the AI SDK does not stamp `interactionId` onto user
+ * messages, so even if it did, this function would not have a way to identify
+ * which user message belongs to which interaction.)
+ */
+function compactPromptForPreviousInteraction({
+  prompt,
+  previousInteractionId,
+}: {
+  prompt: LanguageModelV3Prompt;
+  previousInteractionId: string;
+}): LanguageModelV3Prompt {
+  const out: LanguageModelV3Prompt = [];
+  const droppedToolCallIds = new Set<string>();
+  for (const message of prompt) {
+    if (message.role === 'assistant') {
+      const matchesLinkedInteraction = message.content.some(part => {
+        const partInteractionId = (
+          part as { providerOptions?: { google?: { interactionId?: string } } }
+        ).providerOptions?.google?.interactionId;
+        return partInteractionId === previousInteractionId;
+      });
+      if (matchesLinkedInteraction) {
+        for (const part of message.content) {
+          if (part.type === 'tool-call') {
+            droppedToolCallIds.add(part.toolCallId);
+          }
+        }
+        continue;
+      }
+      out.push(message);
+      continue;
+    }
+    if (message.role === 'tool') {
+      const remaining = message.content.filter(part => {
+        if (part.type !== 'tool-result') {
+          return true;
+        }
+        return !droppedToolCallIds.has(part.toolCallId);
+      });
+      if (remaining.length === 0) {
+        continue;
+      }
+      out.push({
+        ...message,
+        content: remaining as typeof message.content,
+      });
+      continue;
+    }
+    out.push(message);
+  }
+  return out;
+}
+function safeParseToolArgs(input: string): Record<string, unknown> {
+  try {
+    const parsed = JSON.parse(input);
+    if (
+      parsed != null &&
+      typeof parsed === 'object' &&
+      !Array.isArray(parsed)
+    ) {
+      return parsed as Record<string, unknown>;
+    }
+    return { value: parsed };
+  } catch {
+    return { value: input };
+  }
+}
+function convertToolResultPart({
+  toolCallId,
+  toolName,
+  output,
+  signature,
+  warnings,
+}: {
+  toolCallId: string;
+  toolName: string;
+  output: LanguageModelV3ToolResultOutput;
+  signature: string | undefined;
+  warnings: Array<SharedV3Warning>;
+}): GoogleInteractionsFunctionResultContent {
+  const base = {
+    type: 'function_result' as const,
+    call_id: toolCallId,
+    name: toolName,
+    ...(signature != null ? { signature } : {}),
+  };
+  switch (output.type) {
+    case 'text':
+      return { ...base, result: output.value };
+    case 'json':
+      return { ...base, result: JSON.stringify(output.value) };
+    case 'error-text':
+      return { ...base, is_error: true, result: output.value };
+    case 'error-json':
+      return { ...base, is_error: true, result: JSON.stringify(output.value) };
+    case 'execution-denied':
+      return {
+        ...base,
+        is_error: true,
+        result: output.reason ?? 'Tool execution denied by user.',
+      };
+    case 'content': {
+      const blocks: Array<
+        GoogleInteractionsTextContent | GoogleInteractionsImageContent
+      > = [];
+      for (const item of output.value) {
+        if (item.type === 'text') {
+          blocks.push({ type: 'text', text: item.text });
+        } else if (item.type === 'image-data') {
+          const imageBlock = filePartToImageBlock({
+            part: {
+              type: 'file',
+              mediaType: item.mediaType,
+              data: item.data,
+            },
+            warnings,
+          });
+          if (imageBlock != null) {
+            blocks.push(imageBlock);
+          }
+        } else if (item.type === 'image-url') {
+          const imageBlock = filePartToImageBlock({
+            part: {
+              type: 'file',
+              mediaType: 'image/*',
+              data: new URL(item.url),
+            },
+            warnings,
+          });
+          if (imageBlock != null) {
+            blocks.push(imageBlock);
+          }
+        } else if (item.type === 'file-data' || item.type === 'file-url') {
+          const mediaType =
+            item.type === 'file-data' ? item.mediaType : 'application/*';
+          const topLevel = getTopLevelMediaType(mediaType);
+          if (topLevel !== 'image') {
+            warnings.push({
+              type: 'other',
+              message: `google.interactions: tool-result file with mediaType "${mediaType}" is not supported (Interactions \`function_result.result\` accepts only text and image content); part dropped.`,
+            });
+            continue;
+          }
+          const imageBlock = filePartToImageBlock({
+            part:
+              item.type === 'file-data'
+                ? {
+                    type: 'file',
+                    mediaType: item.mediaType,
+                    data: item.data,
+                  }
+                : {
+                    type: 'file',
+                    mediaType,
+                    data: new URL(item.url),
+                  },
+            warnings,
+          });
+          if (imageBlock != null) {
+            blocks.push(imageBlock);
+          }
+        } else {
+          warnings.push({
+            type: 'other',
+            message: `google.interactions: tool-result content part type "${(item as { type: string }).type}" is not supported; part dropped.`,
+          });
+        }
+      }
+      return { ...base, result: blocks };
+    }
+  }
+}
+function filePartToImageBlock({
+  part,
+  warnings,
+}: {
+  part: {
+    type: 'file';
+    mediaType: string;
+    data: Uint8Array | string | URL;
+    filename?: string;
+  };
+  warnings: Array<SharedV3Warning>;
+}): GoogleInteractionsImageContent | undefined {
+  if (part.data instanceof URL) {
+    return {
+      type: 'image',
+      uri: part.data.toString(),
+      ...(isFullMediaType(part.mediaType) ? { mime_type: part.mediaType } : {}),
+    };
+  }
+  if (!isFullMediaType(part.mediaType)) {
+    warnings.push({
+      type: 'other',
+      message: `google.interactions: tool-result image part requires a full IANA media type (e.g. "image/png"), got "${part.mediaType}"; part dropped.`,
+    });
+    return undefined;
+  }
+  return {
+    type: 'image',
+    data: convertToBase64(part.data),
+    mime_type: part.mediaType,
+  };
+}
+/*
+ * Collapses runs of adjacent text content blocks within a single user message
+ * into one combined text block, separated by a blank line. The Interactions
+ * API has no `text+data` shape, so a `data.type === 'text'` file part is
+ * already lowered to a `text` block by `convertFilePartToContent`; merging
+ * keeps the wire shape compact and preserves intent when an inline text file
+ * sits next to a regular text part. Text blocks carrying `annotations` are
+ * left untouched (annotations are tied to specific text spans).
+ */
+function mergeAdjacentTextContent(
+  content: Array<GoogleInteractionsContent>,
+): Array<GoogleInteractionsContent> {
+  if (content.length < 2) {
+    return content;
+  }
+  const result: Array<GoogleInteractionsContent> = [];
+  for (const block of content) {
+    const last = result[result.length - 1];
+    if (
+      block.type === 'text' &&
+      last != null &&
+      last.type === 'text' &&
+      (last as GoogleInteractionsTextContent).annotations == null &&
+      (block as GoogleInteractionsTextContent).annotations == null
+    ) {
+      const merged: GoogleInteractionsTextContent = {
+        type: 'text',
+        text: `${(last as GoogleInteractionsTextContent).text}\n\n${(block as GoogleInteractionsTextContent).text}`,
+      };
+      result[result.length - 1] = merged;
+      continue;
+    }
+    result.push(block);
+  }
+  return result;
+}