npm - @ai-sdk/google - Versions diffs - 3.0.66 → 3.0.68 - Mend

@ai-sdk/google 3.0.66 → 3.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +13 -0
package/dist/index.d.mts +90 -1
package/dist/index.d.ts +90 -1
package/dist/index.js +2383 -49
package/dist/index.js.map +1 -1
package/dist/index.mjs +2353 -1
package/dist/index.mjs.map +1 -1
package/docs/15-google-generative-ai.mdx +396 -0
package/package.json +2 -2
package/src/google-provider.ts +34 -0
package/src/index.ts +6 -0
package/src/interactions/build-google-interactions-stream-transform.ts +711 -0
package/src/interactions/convert-google-interactions-usage.ts +47 -0
package/src/interactions/convert-to-google-interactions-input.ts +630 -0
package/src/interactions/extract-google-interactions-sources.ts +245 -0
package/src/interactions/google-interactions-agent.ts +16 -0
package/src/interactions/google-interactions-api.ts +466 -0
package/src/interactions/google-interactions-language-model-options.ts +136 -0
package/src/interactions/google-interactions-language-model.ts +609 -0
package/src/interactions/google-interactions-prompt.ts +457 -0
package/src/interactions/google-interactions-provider-metadata.ts +23 -0
package/src/interactions/map-google-interactions-finish-reason.ts +33 -0
package/src/interactions/parse-google-interactions-outputs.ts +257 -0
package/src/interactions/poll-google-interactions.ts +110 -0
package/src/interactions/prepare-google-interactions-tools.ts +245 -0
package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0

package/src/interactions/google-interactions-language-model.ts ADDED Viewed

@@ -0,0 +1,609 @@
+import type {
+  LanguageModelV3,
+  LanguageModelV3CallOptions,
+  LanguageModelV3FinishReason,
+  LanguageModelV3GenerateResult,
+  LanguageModelV3StreamResult,
+  SharedV3ProviderMetadata,
+  SharedV3Warning,
+} from '@ai-sdk/provider';
+import {
+  combineHeaders,
+  createEventSourceResponseHandler,
+  createJsonResponseHandler,
+  generateId as defaultGenerateId,
+  parseProviderOptions,
+  postJsonToApi,
+  resolve,
+  type FetchFunction,
+  type Resolvable,
+} from '@ai-sdk/provider-utils';
+import { googleFailedResponseHandler } from '../google-error';
+import { buildGoogleInteractionsStreamTransform } from './build-google-interactions-stream-transform';
+import { convertGoogleInteractionsUsage } from './convert-google-interactions-usage';
+import { convertToGoogleInteractionsInput } from './convert-to-google-interactions-input';
+import {
+  googleInteractionsEventSchema,
+  googleInteractionsResponseSchema,
+} from './google-interactions-api';
+import {
+  googleInteractionsLanguageModelOptions,
+  type GoogleInteractionsModelId,
+} from './google-interactions-language-model-options';
+import type {
+  GoogleInteractionsAgentConfig,
+  GoogleInteractionsGenerationConfig,
+  GoogleInteractionsRequestBody,
+  GoogleInteractionsTool,
+  GoogleInteractionsToolChoice,
+} from './google-interactions-prompt';
+import { mapGoogleInteractionsFinishReason } from './map-google-interactions-finish-reason';
+import { parseGoogleInteractionsOutputs } from './parse-google-interactions-outputs';
+import {
+  isTerminalStatus,
+  pollGoogleInteractionUntilTerminal,
+} from './poll-google-interactions';
+import { prepareGoogleInteractionsTools } from './prepare-google-interactions-tools';
+import { synthesizeGoogleInteractionsAgentStream } from './synthesize-google-interactions-agent-stream';
+export type GoogleInteractionsConfig = {
+  provider: string;
+  baseURL: string;
+  headers?: Resolvable<Record<string, string | undefined>>;
+  fetch?: FetchFunction;
+  generateId: () => string;
+  supportedUrls?: () => LanguageModelV3['supportedUrls'];
+};
+export type GoogleInteractionsModelInput =
+  | GoogleInteractionsModelId
+  | { agent: string };
+export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
+  readonly specificationVersion = 'v3';
+  readonly modelId: string;
+  /**
+   * Optional agent name. When provided, the request body sends `agent:` instead
+   * of `model:` and rejects `tools` / `generation_config` (warned, not thrown).
+   */
+  readonly agent: string | undefined;
+  private readonly config: GoogleInteractionsConfig;
+  constructor(
+    modelOrAgent: GoogleInteractionsModelInput,
+    config: GoogleInteractionsConfig,
+  ) {
+    if (typeof modelOrAgent === 'string') {
+      this.modelId = modelOrAgent;
+      this.agent = undefined;
+    } else {
+      this.modelId = modelOrAgent.agent;
+      this.agent = modelOrAgent.agent;
+    }
+    this.config = config;
+  }
+  get provider(): string {
+    return this.config.provider;
+  }
+  get supportedUrls() {
+    if (this.config.supportedUrls) {
+      return this.config.supportedUrls();
+    }
+    return {
+      'image/*': [/^https?:\/\/.+/],
+      'application/pdf': [/^https?:\/\/.+/],
+      'audio/*': [/^https?:\/\/.+/],
+      'video/*': [
+        /^https?:\/\/(www\.)?youtube\.com\/watch\?v=.+/,
+        /^https?:\/\/youtu\.be\/.+/,
+        /^gs:\/\/.+/,
+      ],
+    };
+  }
+  private async getArgs(options: LanguageModelV3CallOptions) {
+    const warnings: Array<SharedV3Warning> = [];
+    const opts = await parseProviderOptions({
+      provider: 'google',
+      providerOptions: options.providerOptions,
+      schema: googleInteractionsLanguageModelOptions,
+    });
+    const isAgent = this.agent != null;
+    const hasTools = options.tools != null && options.tools.length > 0;
+    let toolsForBody: Array<GoogleInteractionsTool> | undefined;
+    let toolChoiceForBody: GoogleInteractionsToolChoice | undefined;
+    if (hasTools && isAgent) {
+      warnings.push({
+        type: 'other',
+        message:
+          'google.interactions: tools are not supported when an agent is set; tools will be omitted from the request body.',
+      });
+    } else if (hasTools) {
+      const prepared = prepareGoogleInteractionsTools({
+        tools: options.tools,
+        toolChoice: options.toolChoice,
+      });
+      toolsForBody = prepared.tools;
+      toolChoiceForBody = prepared.toolChoice;
+      warnings.push(...prepared.toolWarnings);
+    }
+    /*
+     * Structured output mapping (resolves PRD Open Q1).
+     *
+     * The Interactions API exposes structured output via two top-level body
+     * fields: `response_mime_type` (always `'application/json'` here) and
+     * `response_format` (typed as `unknown` in the js-genai SDK). Per the
+     * canonical sample at
+     * `googleapis/js-genai/sdk-samples/interactions_structured_output_json.ts`,
+     * `response_format` accepts a **plain JSON Schema** value directly - no
+     * wrapping object, no OpenAPI conversion. The js-genai resource type
+     * (`src/interactions/resources/interactions.ts:1399`) confirms the field is
+     * passed through verbatim. We therefore send the AI SDK
+     * `responseFormat.schema` (a `JSONSchema7`) as-is.
+     *
+     * If a future API revision rejects plain JSON Schema, fall back to
+     * `convertJSONSchemaToOpenAPISchema(...)` (already imported by
+     * `google-language-model.ts`); empirically that has not been needed.
+     *
+     * Agent calls cannot send `generation_config` and (per the API) cannot
+     * combine with structured output - emit a warning and drop the field.
+     */
+    let responseMimeType: string | undefined;
+    let responseFormat: unknown | undefined;
+    if (options.responseFormat?.type === 'json') {
+      if (isAgent) {
+        warnings.push({
+          type: 'other',
+          message:
+            'google.interactions: structured output (responseFormat) is not supported when an agent is set; responseFormat will be ignored.',
+        });
+      } else {
+        responseMimeType = 'application/json';
+        if (options.responseFormat.schema != null) {
+          responseFormat = options.responseFormat.schema;
+        }
+      }
+    }
+    const {
+      input,
+      systemInstruction: convertedSystemInstruction,
+      warnings: convWarnings,
+    } = convertToGoogleInteractionsInput({
+      prompt: options.prompt,
+      previousInteractionId: opts?.previousInteractionId ?? undefined,
+      store: opts?.store ?? undefined,
+      mediaResolution: opts?.mediaResolution ?? undefined,
+    });
+    warnings.push(...convWarnings);
+    let systemInstruction = convertedSystemInstruction;
+    const optionSystemInstruction = opts?.systemInstruction ?? undefined;
+    if (systemInstruction != null && optionSystemInstruction != null) {
+      warnings.push({
+        type: 'other',
+        message:
+          'google.interactions: both AI SDK system message and providerOptions.google.systemInstruction were set; using the AI SDK system message.',
+      });
+    } else if (systemInstruction == null && optionSystemInstruction != null) {
+      systemInstruction = optionSystemInstruction;
+    }
+    /*
+     * The Interactions API splits per-call config into `generation_config`
+     * (model branch) and `agent_config` (agent branch); the two are mutually
+     * exclusive. We stay minimal here for TASK-1 - only the AI SDK call-level
+     * generation params and the thinking/imageConfig provider options flow
+     * into `generation_config`. Tool-related fields land here in later tasks.
+     *
+     * When an agent is set, none of these fields are accepted by the API. Per
+     * PRD US 31 we emit a single `LanguageModelV3CallWarning` listing the
+     * dropped field names and continue (do not throw); the agent-only
+     * `agent_config` field supersedes them.
+     */
+    let generationConfig: GoogleInteractionsGenerationConfig | undefined;
+    if (isAgent) {
+      const droppedFields: Array<string> = [];
+      if (options.temperature != null) droppedFields.push('temperature');
+      if (options.topP != null) droppedFields.push('topP');
+      if (options.seed != null) droppedFields.push('seed');
+      if (options.stopSequences != null && options.stopSequences.length > 0) {
+        droppedFields.push('stopSequences');
+      }
+      if (options.maxOutputTokens != null)
+        droppedFields.push('maxOutputTokens');
+      if (opts?.thinkingLevel != null) droppedFields.push('thinkingLevel');
+      if (opts?.thinkingSummaries != null) {
+        droppedFields.push('thinkingSummaries');
+      }
+      if (opts?.imageConfig != null) droppedFields.push('imageConfig');
+      if (droppedFields.length > 0) {
+        warnings.push({
+          type: 'other',
+          message: `google.interactions: ${droppedFields.join(', ')} ${droppedFields.length === 1 ? 'is' : 'are'} not supported when an agent is set; use providerOptions.google.agentConfig instead. Dropped from the request body.`,
+        });
+      }
+      generationConfig = undefined;
+    } else {
+      generationConfig = pruneUndefined({
+        temperature: options.temperature ?? undefined,
+        top_p: options.topP ?? undefined,
+        seed: options.seed ?? undefined,
+        stop_sequences:
+          options.stopSequences != null && options.stopSequences.length > 0
+            ? options.stopSequences
+            : undefined,
+        max_output_tokens: options.maxOutputTokens ?? undefined,
+        thinking_level: opts?.thinkingLevel ?? undefined,
+        thinking_summaries: opts?.thinkingSummaries ?? undefined,
+        image_config:
+          opts?.imageConfig != null
+            ? pruneUndefined({
+                aspect_ratio: opts.imageConfig.aspectRatio ?? undefined,
+                image_size: opts.imageConfig.imageSize ?? undefined,
+              })
+            : undefined,
+        tool_choice: toolChoiceForBody,
+      });
+    }
+    let agentConfig: GoogleInteractionsAgentConfig | undefined;
+    if (isAgent && opts?.agentConfig != null) {
+      const ac = opts.agentConfig;
+      if (ac.type === 'deep-research') {
+        agentConfig = pruneUndefined({
+          type: 'deep-research',
+          thinking_summaries: ac.thinkingSummaries ?? undefined,
+          visualization: ac.visualization ?? undefined,
+          collaborative_planning: ac.collaborativePlanning ?? undefined,
+        }) as GoogleInteractionsAgentConfig;
+      } else if (ac.type === 'dynamic') {
+        agentConfig = { type: 'dynamic' };
+      }
+    }
+    /*
+     * Agent calls require `background: true` on the wire — otherwise the API
+     * rejects them with `background=true is required for agent interactions.`
+     * The server returns a non-terminal status (`in_progress`/`requires_action`)
+     * and the final outputs must be polled via `GET /interactions/{id}`. This
+     * is handled internally in `doGenerate` / `doStream` so the user-facing
+     * surface stays identical to model-id calls.
+     *
+     * Model-id calls retain their original synchronous behavior — no
+     * `background` field is sent.
+     */
+    const args: GoogleInteractionsRequestBody = pruneUndefined({
+      ...(isAgent ? { agent: this.agent } : { model: this.modelId }),
+      input,
+      system_instruction: systemInstruction,
+      tools: toolsForBody,
+      response_format: responseFormat,
+      response_mime_type: responseMimeType,
+      response_modalities:
+        opts?.responseModalities != null
+          ? (opts.responseModalities as Array<
+              'text' | 'image' | 'audio' | 'video' | 'document'
+            >)
+          : undefined,
+      previous_interaction_id: opts?.previousInteractionId ?? undefined,
+      service_tier: opts?.serviceTier ?? undefined,
+      store: opts?.store ?? undefined,
+      generation_config:
+        generationConfig != null && Object.keys(generationConfig).length > 0
+          ? generationConfig
+          : undefined,
+      agent_config: agentConfig,
+      ...(isAgent ? { background: true } : {}),
+    });
+    return {
+      args,
+      warnings,
+      isAgent,
+      pollingTimeoutMs: opts?.pollingTimeoutMs ?? undefined,
+    };
+  }
+  async doGenerate(
+    options: LanguageModelV3CallOptions,
+  ): Promise<LanguageModelV3GenerateResult> {
+    const { args, warnings, isAgent, pollingTimeoutMs } =
+      await this.getArgs(options);
+    const url = `${this.config.baseURL}/interactions`;
+    const mergedHeaders = combineHeaders(
+      this.config.headers ? await resolve(this.config.headers) : undefined,
+      options.headers,
+    );
+    const postResult = await postJsonToApi({
+      url,
+      headers: mergedHeaders,
+      body: args,
+      failedResponseHandler: googleFailedResponseHandler,
+      successfulResponseHandler: createJsonResponseHandler(
+        googleInteractionsResponseSchema,
+      ),
+      abortSignal: options.abortSignal,
+      fetch: this.config.fetch,
+    });
+    let {
+      responseHeaders,
+      value: response,
+      rawValue: rawResponse,
+    } = postResult;
+    /*
+     * Agent calls run with `background: true`; the POST returns immediately
+     * with a non-terminal status (`in_progress` / `requires_action`). Poll
+     * `GET /interactions/{id}` until terminal so the user-facing surface
+     * matches a synchronous call.
+     */
+    if (isAgent && !isTerminalStatus(response.status)) {
+      const polled = await pollGoogleInteractionUntilTerminal({
+        baseURL: this.config.baseURL,
+        interactionId: response.id,
+        headers: mergedHeaders,
+        fetch: this.config.fetch,
+        abortSignal: options.abortSignal,
+        timeoutMs: pollingTimeoutMs,
+      });
+      response = polled.response;
+      rawResponse = polled.rawResponse;
+      responseHeaders = polled.responseHeaders ?? responseHeaders;
+    }
+    /*
+     * `response.id` is omitted when `store: false` (fully stateless mode), and
+     * the stream surface returns `id: ""` (empty string) for the same case.
+     * Normalize both to `undefined` so downstream stamping does not pollute
+     * provider metadata with an empty/missing identifier.
+     */
+    const interactionId =
+      typeof response.id === 'string' && response.id.length > 0
+        ? response.id
+        : undefined;
+    const { content, hasFunctionCall } = parseGoogleInteractionsOutputs({
+      outputs: response.outputs ?? null,
+      generateId: this.config.generateId ?? defaultGenerateId,
+      interactionId,
+    });
+    const finishReason: LanguageModelV3FinishReason = {
+      unified: mapGoogleInteractionsFinishReason({
+        status: response.status,
+        hasFunctionCall,
+      }),
+      raw: response.status,
+    };
+    /*
+     * Service tier divergence vs. `:generateContent`:
+     *
+     * `google-language-model.ts` reads the applied service tier from the
+     * `x-gemini-service-tier` HTTP response header (see commit 1adfb76d2d).
+     * The Interactions API does NOT surface that header; it returns the
+     * applied tier in the response body as `service_tier` on the top-level
+     * Interaction object (and on `interaction.complete.interaction` for
+     * streaming). The `responseHeaders` parameter is also checked as a
+     * defensive fallback in case the API later adds the header.
+     */
+    const serviceTier =
+      response.service_tier ??
+      responseHeaders?.['x-gemini-service-tier'] ??
+      undefined;
+    /*
+     * `response.id` is omitted when `store: false` (fully stateless mode), so
+     * `interactionId` is only surfaced when the API actually returned one.
+     */
+    const providerMetadata: SharedV3ProviderMetadata = {
+      google: {
+        ...(interactionId != null ? { interactionId } : {}),
+        ...(serviceTier != null ? { serviceTier } : {}),
+      },
+    };
+    let timestamp: Date | undefined;
+    if (typeof response.created === 'string') {
+      const parsed = new Date(response.created);
+      if (!Number.isNaN(parsed.getTime())) {
+        timestamp = parsed;
+      }
+    }
+    return {
+      content,
+      finishReason,
+      usage: convertGoogleInteractionsUsage(response.usage),
+      warnings,
+      providerMetadata,
+      request: { body: args },
+      response: {
+        headers: responseHeaders,
+        body: rawResponse,
+        ...(interactionId != null ? { id: interactionId } : {}),
+        ...(timestamp ? { timestamp } : {}),
+        modelId: response.model ?? undefined,
+      },
+    };
+  }
+  async doStream(
+    options: LanguageModelV3CallOptions,
+  ): Promise<LanguageModelV3StreamResult> {
+    const { args, warnings, isAgent, pollingTimeoutMs } =
+      await this.getArgs(options);
+    const url = `${this.config.baseURL}/interactions`;
+    const mergedHeaders = combineHeaders(
+      this.config.headers ? await resolve(this.config.headers) : undefined,
+      options.headers,
+    );
+    /*
+     * Agent calls require `background: true`, which is incompatible with
+     * `stream: true` on POST. We drive the agent flow exactly like
+     * `doGenerate` (POST background -> poll GET) and synthesize a stream
+     * from the final polled outputs. The user-facing stream surface stays
+     * identical -- text-start / text-delta / text-end / finish parts are
+     * emitted in the same order as a true SSE response.
+     */
+    if (isAgent) {
+      return this.doStreamAgent({
+        args,
+        warnings,
+        url,
+        mergedHeaders,
+        options,
+        pollingTimeoutMs,
+      });
+    }
+    const body = { ...args, stream: true };
+    const { responseHeaders, value: response } = await postJsonToApi({
+      url,
+      headers: mergedHeaders,
+      body,
+      failedResponseHandler: googleFailedResponseHandler,
+      successfulResponseHandler: createEventSourceResponseHandler(
+        googleInteractionsEventSchema,
+      ),
+      abortSignal: options.abortSignal,
+      fetch: this.config.fetch,
+    });
+    /*
+     * Google's API surfaces the applied service tier in the
+     * `x-gemini-service-tier` HTTP response header, not in the response body.
+     * Mirror the canonical pattern from `google-language-model.ts` (commit
+     * 1adfb76d2d) and pipe it through the stream transformer so the `finish`
+     * part's `providerMetadata.google.serviceTier` is sourced from the header.
+     */
+    const headerServiceTier = responseHeaders?.['x-gemini-service-tier'];
+    const transform = buildGoogleInteractionsStreamTransform({
+      warnings,
+      generateId: this.config.generateId ?? defaultGenerateId,
+      includeRawChunks: options.includeRawChunks,
+      serviceTier: headerServiceTier,
+    });
+    return {
+      stream: response.pipeThrough(transform),
+      request: { body },
+      response: { headers: responseHeaders },
+    };
+  }
+  /*
+   * Drive the streaming surface for agent calls. Agent calls require
+   * `background: true`, which is incompatible with `stream: true` on POST.
+   *
+   * In principle the API also exposes `GET /interactions/{id}?stream=true`
+   * to replay events as the agent runs. In practice the connection is
+   * idle for long stretches while the agent thinks (deep-research can run
+   * for a minute or more between SSE events), and undici's default body
+   * timeout terminates the request mid-flight with `UND_ERR_BODY_TIMEOUT`.
+   * Tuning the timeout per-call would require the caller to thread an
+   * `undici.Agent` through `fetch`, which contradicts the AI SDK's
+   * pluggable-fetch contract.
+   *
+   * We therefore drive `doStream` exactly like `doGenerate` for agents:
+   * POST with `background: true`, poll `GET /interactions/{id}` until
+   * terminal, then synthesize the stream from the final outputs. The
+   * user-facing surface stays identical -- text-start / text-delta /
+   * text-end / finish parts arrive in the same order as a true SSE
+   * response, just buffered until the agent completes.
+   */
+  private async doStreamAgent({
+    args,
+    warnings,
+    url,
+    mergedHeaders,
+    options,
+    pollingTimeoutMs,
+  }: {
+    args: GoogleInteractionsRequestBody;
+    warnings: Array<SharedV3Warning>;
+    url: string;
+    mergedHeaders: Record<string, string | undefined>;
+    options: LanguageModelV3CallOptions;
+    pollingTimeoutMs: number | undefined;
+  }): Promise<LanguageModelV3StreamResult> {
+    const postResult = await postJsonToApi({
+      url,
+      headers: mergedHeaders,
+      body: args,
+      failedResponseHandler: googleFailedResponseHandler,
+      successfulResponseHandler: createJsonResponseHandler(
+        googleInteractionsResponseSchema,
+      ),
+      abortSignal: options.abortSignal,
+      fetch: this.config.fetch,
+    });
+    let { responseHeaders: postHeaders, value: postResponse } = postResult;
+    const interactionId = postResponse.id;
+    if (interactionId == null || interactionId.length === 0) {
+      throw new Error(
+        'google.interactions: agent POST response did not include an interaction id; cannot poll for the agent result.',
+      );
+    }
+    if (!isTerminalStatus(postResponse.status)) {
+      const polled = await pollGoogleInteractionUntilTerminal({
+        baseURL: this.config.baseURL,
+        interactionId,
+        headers: mergedHeaders,
+        fetch: this.config.fetch,
+        abortSignal: options.abortSignal,
+        timeoutMs: pollingTimeoutMs,
+      });
+      postResponse = polled.response;
+      postHeaders = polled.responseHeaders ?? postHeaders;
+    }
+    const stream = synthesizeGoogleInteractionsAgentStream({
+      response: postResponse,
+      warnings,
+      generateId: this.config.generateId ?? defaultGenerateId,
+      includeRawChunks: options.includeRawChunks,
+      headerServiceTier: postHeaders?.['x-gemini-service-tier'],
+    });
+    return {
+      stream,
+      request: { body: args },
+      response: { headers: postHeaders },
+    };
+  }
+}
+function pruneUndefined<T extends Record<string, unknown>>(obj: T): T {
+  const result: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(obj)) {
+    if (value === undefined) continue;
+    result[key] = value;
+  }
+  return result as T;
+}