npm - universal-llm-client - Versions diffs - 4.3.0 → 4.5.1 - Mend

universal-llm-client 4.3.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/CHANGELOG.md +34 -19
package/README.md +62 -11
package/dist/ai-model.d.ts +12 -2
package/dist/ai-model.js +36 -2
package/dist/auditor.d.ts +0 -1
package/dist/auditor.js +0 -1
package/dist/client.d.ts +0 -1
package/dist/client.js +0 -1
package/dist/gemma-channel.d.ts +13 -0
package/dist/gemma-channel.js +37 -0
package/dist/gemma-diffusion.d.ts +48 -0
package/dist/gemma-diffusion.js +146 -0
package/dist/http.d.ts +4 -1
package/dist/http.js +14 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +4 -1
package/dist/interfaces.d.ts +163 -8
package/dist/interfaces.js +0 -1
package/dist/mcp.d.ts +0 -1
package/dist/mcp.js +0 -1
package/dist/providers/anthropic.d.ts +0 -1
package/dist/providers/anthropic.js +28 -4
package/dist/providers/google.d.ts +22 -2
package/dist/providers/google.js +223 -14
package/dist/providers/index.d.ts +0 -1
package/dist/providers/index.js +0 -1
package/dist/providers/ollama.d.ts +2 -1
package/dist/providers/ollama.js +59 -31
package/dist/providers/openai.d.ts +16 -1
package/dist/providers/openai.js +488 -81
package/dist/router.d.ts +2 -1
package/dist/router.js +4 -1
package/dist/stream-decoder.d.ts +12 -1
package/dist/stream-decoder.js +182 -6
package/dist/structured-output.d.ts +0 -1
package/dist/structured-output.js +0 -1
package/dist/thinking.d.ts +35 -0
package/dist/thinking.js +51 -0
package/dist/tools.d.ts +0 -1
package/dist/tools.js +0 -1
package/dist/zod-adapter.d.ts +0 -1
package/dist/zod-adapter.js +0 -1
package/package.json +3 -1
package/dist/ai-model.d.ts.map +0 -1
package/dist/ai-model.js.map +0 -1
package/dist/auditor.d.ts.map +0 -1
package/dist/auditor.js.map +0 -1
package/dist/client.d.ts.map +0 -1
package/dist/client.js.map +0 -1
package/dist/http.d.ts.map +0 -1
package/dist/http.js.map +0 -1
package/dist/index.d.ts.map +0 -1
package/dist/index.js.map +0 -1
package/dist/interfaces.d.ts.map +0 -1
package/dist/interfaces.js.map +0 -1
package/dist/mcp.d.ts.map +0 -1
package/dist/mcp.js.map +0 -1
package/dist/providers/anthropic.d.ts.map +0 -1
package/dist/providers/anthropic.js.map +0 -1
package/dist/providers/google.d.ts.map +0 -1
package/dist/providers/google.js.map +0 -1
package/dist/providers/index.d.ts.map +0 -1
package/dist/providers/index.js.map +0 -1
package/dist/providers/ollama.d.ts.map +0 -1
package/dist/providers/ollama.js.map +0 -1
package/dist/providers/openai.d.ts.map +0 -1
package/dist/providers/openai.js.map +0 -1
package/dist/router.d.ts.map +0 -1
package/dist/router.js.map +0 -1
package/dist/stream-decoder.d.ts.map +0 -1
package/dist/stream-decoder.js.map +0 -1
package/dist/structured-output.d.ts.map +0 -1
package/dist/structured-output.js.map +0 -1
package/dist/tools.d.ts.map +0 -1
package/dist/tools.js.map +0 -1
package/dist/zod-adapter.d.ts.map +0 -1
package/dist/zod-adapter.js.map +0 -1

package/dist/index.d.ts CHANGED Viewed

@@ -7,11 +7,11 @@
  * @module universal-llm-client
  */
 export { AIModel } from './ai-model.js';
-export { AIModelApiType, AIModelType, type AIModelConfig, type ProviderConfig, type LLMClientOptions, type LLMChatMessage, type LLMMessageContent, type LLMContentPart, type LLMTextContent, type LLMImageContent, type LLMAudioContent, type LLMChatResponse, type TokenUsageInfo, type LLMToolCall, type LLMToolDefinition, type LLMFunction, type ToolHandler, type ToolExecutionResult, type ToolRegistry, type ToolRegistryEntry, type ChatOptions, type ResponseFormat, type OutputOptions, type ModelMetadata, textContent, imageContent, multimodalMessage, extractTextContent, hasImages, audioContent, hasAudio, } from './interfaces.js';
+export { AIModelApiType, AIModelType, type AIModelConfig, type ProviderConfig, type LLMClientOptions, type LLMChatMessage, type LLMMessageContent, type LLMContentPart, type LLMTextContent, type LLMImageContent, type LLMAudioContent, type LLMChatResponse, type TokenUsageInfo, type LLMToolCall, type LLMToolDefinition, type LLMFunction, type ToolHandler, type ToolExecutionResult, type ToolRegistry, type ToolRegistryEntry, type ChatOptions, type ResponseFormat, type OutputOptions, type ThinkingLevel, type DeepResearchOptions, type DeepResearchResult, type DeepResearchStep, type DeepResearchEvent, type ModelMetadata, textContent, imageContent, multimodalMessage, extractTextContent, hasImages, audioContent, hasAudio, } from './interfaces.js';
 export { type Auditor, type AuditEvent, type AuditEventType, NoopAuditor, ConsoleAuditor, BufferedAuditor, } from './auditor.js';
 export { type StreamDecoder, type DecodedEvent, type DecoderCallback, type DecoderType, type DecoderOptions, type DecoderFactory, createDecoder, registerDecoder, getRegisteredDecoders, PassthroughDecoder, StandardChatDecoder, InterleavedReasoningDecoder, } from './stream-decoder.js';
 export { ToolBuilder, ToolExecutor, createTimeTool, createRandomNumberTool, } from './tools.js';
 export { httpRequest, httpStream, parseNDJSON, parseSSE, buildHeaders, type HttpRequestOptions, type HttpResponse, } from './http.js';
+export { isGemmaDiffusionModel, parseGemmaDiffusionOutput, gemmaArgsToJson, type GemmaDiffusionParsed, type GemmaParsedToolCall, } from './gemma-diffusion.js';
 export { MCPToolBridge, type MCPBridgeConfig, type MCPServerConfig, type MCPTool, } from './mcp.js';
 export { StructuredOutputError, type StructuredOutputErrorOptions, type StructuredOutputOptions, type StructuredOutputResult, type StructuredOutputSuccess, type StructuredOutputFailure, type JSONSchema, type SchemaProvider, type ProviderSchema, type SchemaConfig, isStructuredOutputSuccess, isStructuredOutputFailure, normalizeJsonSchema, convertToProviderSchema, stripUnsupportedFeatures, getJsonSchema, getJsonSchemaFromConfig, parseStructured, tryParseStructured, validateStructuredOutput, stripJsonFences, StreamingJsonParser, type StreamingStructuredResult, } from './structured-output.js';
-//# sourceMappingURL=index.d.ts.map

package/dist/index.js CHANGED Viewed

@@ -35,6 +35,10 @@ export { ToolBuilder, ToolExecutor, createTimeTool, createRandomNumberTool, } fr
 // ============================================================================
 export { httpRequest, httpStream, parseNDJSON, parseSSE, buildHeaders, } from './http.js';
 // ============================================================================
+// DiffusionGemma Native Protocol (vLLM without server-side parsers)
+// ============================================================================
+export { isGemmaDiffusionModel, parseGemmaDiffusionOutput, gemmaArgsToJson, } from './gemma-diffusion.js';
+// ============================================================================
 // MCP Integration
 // ============================================================================
 export { MCPToolBridge, } from './mcp.js';
@@ -48,4 +52,3 @@ normalizeJsonSchema, convertToProviderSchema, stripUnsupportedFeatures, getJsonS
 parseStructured, tryParseStructured, validateStructuredOutput, stripJsonFences,
 // Streaming parser
 StreamingJsonParser, } from './structured-output.js';
-//# sourceMappingURL=index.js.map

package/dist/interfaces.d.ts CHANGED Viewed

@@ -43,7 +43,45 @@ export interface ProviderConfig {
     region?: string;
     /** Google API version (default: "v1beta") */
     apiVersion?: 'v1' | 'v1beta';
+    /**
+     * Extra headers merged into requests, applied by providers that use
+     * `buildHeaders` — **OpenAI-compatible and Ollama**. Google/Vertex and
+     * Anthropic build their own auth headers and ignore this. Useful for Azure
+     * (api-key), custom gateways, or non-standard auth. Merged after the default
+     * auth header (later entries win).
+     */
+    headers?: Record<string, string>;
+    /**
+     * Extra query parameters appended to request URLs — **OpenAI-compatible
+     * provider only**. Useful for Azure OpenAI (e.g. { 'api-version': '2024-10-21' }).
+     */
+    queryParams?: Record<string, string>;
+    /**
+     * Override the name of the header that carries the API key (default:
+     * "Authorization") — **OpenAI-compatible and Ollama only** (via `buildHeaders`).
+     * Common alternative for Azure and some gateways: "api-key".
+     */
+    authHeader?: string;
+    /**
+     * Prefix placed before the apiKey value in the auth header (OpenAI-compatible
+     * and Ollama only). Default: "Bearer " when authHeader is Authorization (or
+     * unset), otherwise "". Set to "" explicitly for "api-key: <yourkey>" style auth.
+     */
+    authPrefix?: string;
+    /**
+     * For OpenAI-compatible providers only: the URL path segment to append after the base URL.
+     * Default: "/v1".
+     * Set to "" (or "/") to disable the automatic append. This is required when supplying
+     * a full Azure deployment URL such as ".../deployments/my-deploy".
+     */
+    apiBasePath?: string;
 }
+/**
+ * Unified reasoning-effort level. Mapped to each provider's native control:
+ * Gemini 3.x `thinkingConfig.thinkingLevel`, OpenAI `reasoning_effort`,
+ * Gemini 2.5 `thinkingBudget`, Anthropic `budget_tokens`, vLLM/Ollama on/off.
+ */
+export type ThinkingLevel = 'minimal' | 'low' | 'medium' | 'high';
 export interface AIModelConfig {
     /** Model name (used across all providers unless overridden) */
     model: string;
@@ -51,8 +89,8 @@ export interface AIModelConfig {
     providers: ProviderConfig[];
     /** Default parameters for all requests (temperature, top_p, etc.) */
     defaultParameters?: Record<string, unknown>;
-    /** Enable thinking/reasoning mode */
-    thinking?: boolean;
+    /** Enable thinking/reasoning — `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
+    thinking?: boolean | ThinkingLevel;
     /** Request timeout in ms (default: 30000) */
     timeout?: number;
     /** Retries per provider before failover (default: 2) */
@@ -73,8 +111,8 @@ export interface LLMClientOptions {
     modelType?: AIModelType;
     /** Default parameters for requests */
     defaultParameters?: Record<string, unknown>;
-    /** Enable thinking/reasoning mode */
-    thinking?: boolean;
+    /** Enable thinking/reasoning — `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
+    thinking?: boolean | ThinkingLevel;
     /** Request timeout in ms */
     timeout?: number;
     /** Number of retries for failed requests */
@@ -87,6 +125,31 @@ export interface LLMClientOptions {
     region?: string;
     /** Google API version */
     apiVersion?: 'v1' | 'v1beta';
+    /**
+     * Force the DiffusionGemma native channel protocol on/off for
+     * OpenAI-compatible backends (skip_special_tokens:false + client-side
+     * reasoning/tool-call parsing). Auto-detected from the model name when
+     * omitted. See gemma-diffusion.ts.
+     */
+    gemmaNativeProtocol?: boolean;
+    /**
+     * Extra headers merged for every request from this provider instance.
+     * Populated from ProviderConfig.headers for advanced auth / gateway scenarios
+     * (Azure api-key style, custom x- headers, etc.).
+     */
+    extraHeaders?: Record<string, string>;
+    /** Extra query parameters appended to request URLs (from ProviderConfig.queryParams). */
+    queryParams?: Record<string, string>;
+    /** Auth header name override (from ProviderConfig.authHeader). */
+    authHeader?: string;
+    /** Auth value prefix (from ProviderConfig.authPrefix). */
+    authPrefix?: string;
+    /**
+     * For openai-compatible clients: the sub-path to append (from ProviderConfig.apiBasePath).
+     * Defaults to "/v1"; `undefined` keeps that default. Set to "" or "/" to disable
+     * the append (when the base URL already contains the full path).
+     */
+    apiBasePath?: string;
 }
 export interface LLMTextContent {
     type: 'text';
@@ -235,6 +298,14 @@ export interface ChatOptions {
     temperature?: number;
     /** Max tokens to generate */
     maxTokens?: number;
+    /**
+     * Enable/disable/level model thinking for this request, overriding the
+     * model-level `thinking` config. `true`/`false` or a level
+     * ('minimal' | 'low' | 'medium' | 'high'). Mapped per provider: Gemini
+     * `thinkingLevel`/`thinkingBudget`, OpenAI `reasoning_effort`, vLLM
+     * `enable_thinking`, Anthropic `budget_tokens`, Ollama `think`.
+     */
+    thinking?: boolean | ThinkingLevel;
     /** Tool definitions (auto-populated from registry if not set) */
     tools?: LLMToolDefinition[];
     /** Tool choice mode */
@@ -245,7 +316,11 @@ export interface ChatOptions {
     signal?: AbortSignal;
     /** Enable/disable tool execution for chatWithTools */
     executeTools?: boolean;
-    /** Enable prompt caching (Provider specific feature, opt-in for Anthropic) */
+    /**
+     * Enable provider-side prompt caching when supported.
+     * - Anthropic: Adds cache_control: { type: 'ephemeral' } to the system prompt block (most common high-impact pattern).
+     * - Other providers: May be passed through via parameters/headers or ignored; consult provider docs.
+     */
     enablePromptCaching?: boolean;
     /** Maximum tool execution rounds (default: 10) */
     maxIterations?: number;
@@ -332,9 +407,23 @@ export interface TokenUsageInfo {
      * via `DecodedEvent { type: 'thinking' }`); consult the provider.
      */
     reasoningTokens?: number;
+    /**
+     * Total request duration in milliseconds. Server-measured where the
+     * provider reports it (Ollama `total_duration`); otherwise client-measured
+     * wall-clock (OpenAI-compatible / vLLM return no timing in `usage`).
+     */
+    durationMs?: number;
+    /**
+     * Decode throughput in output tokens/second. Server-precise for Ollama
+     * (`eval_count / eval_duration`); derived from `outputTokens / durationMs`
+     * for providers without server-side timing (OpenAI-compatible / vLLM).
+     */
+    tokensPerSecond?: number;
 }
 export interface LLMChatResponse<T = unknown> {
     message: LLMChatMessage;
+    /** Provider finish reason when available (e.g. Ollama done_reason, Google finishReason) */
+    finishReason?: string;
     /** Reasoning/thinking content from the model (if supported) */
     reasoning?: string;
     /** Token usage info */
@@ -375,9 +464,16 @@ export interface OllamaResponse {
         tool_calls?: LLMToolCall[];
     };
     done: boolean;
+    done_reason?: string;
+    /** Total request time in nanoseconds. */
+    total_duration?: number;
+    /** Model load time in nanoseconds. */
+    load_duration?: number;
     prompt_eval_count?: number;
     eval_count?: number;
+    /** Prompt evaluation time in nanoseconds. */
     prompt_eval_duration?: number;
+    /** Generation time in nanoseconds. */
     eval_duration?: number;
 }
 export interface OpenAIResponse {
@@ -390,6 +486,13 @@ export interface OpenAIResponse {
         message: {
             role: string;
             content: string | null;
+            /**
+             * Chain-of-thought from reasoning models exposed via a dedicated
+             * field (vLLM `--reasoning-parser`, DeepSeek-R1, etc.). vLLM uses
+             * `reasoning_content`; some gateways use `reasoning`.
+             */
+            reasoning?: string;
+            reasoning_content?: string;
             tool_calls?: LLMToolCall[];
         };
         finish_reason: string;
@@ -426,8 +529,8 @@ export interface OpenAIModelInfo {
 export interface GooglePart {
     text?: string;
     functionCall?: {
-        name: string;
-        args: Record<string, unknown>;
+        name?: string;
+        args?: Record<string, unknown>;
     };
     functionResponse?: {
         name: string;
@@ -437,6 +540,8 @@ export interface GooglePart {
         mimeType: string;
         data: string;
     };
+    /** True when this part is a reasoning summary (requires `includeThoughts`). */
+    thought?: boolean;
     /** Gemini 3.x thought signature — must be echoed back on functionCall parts */
     thoughtSignature?: string;
 }
@@ -507,6 +612,57 @@ export interface GoogleResponse {
         thoughtsTokenCount?: number;
     };
 }
+/** Options for an agentic Deep Research interaction (Gemini-only). */
+export interface DeepResearchOptions {
+    /** Research agent id (default 'deep-research-preview-04-2026'). */
+    agent?: string;
+    /** Tools the agent may use, e.g. 'google_search', 'url_context', 'code_execution'. */
+    tools?: string[];
+    /** Emit intermediate reasoning ('auto') or not ('none'). Default 'auto'. */
+    thinkingSummaries?: 'auto' | 'none';
+    /** Continue a prior interaction (follow-up question). */
+    previousInteractionId?: string;
+    /** Poll interval in ms while awaiting completion (default 5000). */
+    pollIntervalMs?: number;
+    /** Overall timeout in ms before giving up the poll loop (default 600000). */
+    timeoutMs?: number;
+    /** Abort signal forwarded to every request. */
+    signal?: AbortSignal;
+}
+/** One intermediate step in a Deep Research interaction. */
+export interface DeepResearchStep {
+    type?: string;
+    content?: Array<{
+        text?: string;
+        [k: string]: unknown;
+    }>;
+    [k: string]: unknown;
+}
+/** Terminal (or last-polled) state of a Deep Research interaction. */
+export interface DeepResearchResult {
+    id: string;
+    status: 'in_progress' | 'completed' | 'failed' | string;
+    /** Final research report (`output_text`) when completed. */
+    report?: string;
+    steps?: DeepResearchStep[];
+    error?: unknown;
+    /** The raw last interaction object from the API. */
+    raw?: unknown;
+}
+/** Streaming Deep Research event (from `step.delta` updates). */
+export type DeepResearchEvent = {
+    type: 'thought';
+    content: string;
+} | {
+    type: 'text';
+    content: string;
+} | {
+    type: 'image';
+    content: unknown;
+} | {
+    type: 'status';
+    status: string;
+};
 /** Create a text content part */
 export declare function textContent(text: string): LLMTextContent;
 /** Create an image content part from base64 data or URL */
@@ -521,4 +677,3 @@ export declare function hasImages(content: LLMMessageContent): boolean;
 export declare function audioContent(base64Data: string, mimeType: string): LLMAudioContent;
 /** Check if message content contains audio */
 export declare function hasAudio(content: LLMMessageContent): boolean;
-//# sourceMappingURL=interfaces.d.ts.map

package/dist/interfaces.js CHANGED Viewed

@@ -74,4 +74,3 @@ export function hasAudio(content) {
         return false;
     return content.some(part => part.type === 'audio');
 }
-//# sourceMappingURL=interfaces.js.map

package/dist/mcp.d.ts CHANGED Viewed

@@ -82,4 +82,3 @@ export declare class MCPToolBridge {
     private createToolHandler;
     private convertInputSchema;
 }
-//# sourceMappingURL=mcp.d.ts.map

package/dist/mcp.js CHANGED Viewed

@@ -252,4 +252,3 @@ export class MCPToolBridge {
         };
     }
 }
-//# sourceMappingURL=mcp.js.map

package/dist/providers/anthropic.d.ts CHANGED Viewed

@@ -53,4 +53,3 @@ export declare class AnthropicClient extends BaseLLMClient {
     /** Extract text from multimodal content */
     private extractText;
 }
-//# sourceMappingURL=anthropic.d.ts.map

package/dist/providers/anthropic.js CHANGED Viewed

@@ -14,6 +14,7 @@
  *   - Streaming: content_block_start/delta/stop events with typed deltas
  */
 import { BaseLLMClient } from '../client.js';
+import { resolveThinking, anthropicThinkingBudget } from '../thinking.js';
 import { httpRequest, httpStream, parseSSE } from '../http.js';
 import { StandardChatDecoder } from '../stream-decoder.js';
 // ============================================================================
@@ -271,6 +272,17 @@ export class AnthropicClient extends BaseLLMClient {
                 .map(m => typeof m.content === 'string' ? m.content : this.extractText(m.content))
                 .join('\n\n')
             : undefined;
+        // Prompt caching support (Anthropic-specific, high impact for long system prompts / RAG)
+        let system = systemPrompt;
+        if (options?.enablePromptCaching && systemPrompt) {
+            system = [
+                {
+                    type: 'text',
+                    text: systemPrompt,
+                    cache_control: { type: 'ephemeral' },
+                },
+            ];
+        }
         // Convert tools from OpenAI format to Anthropic format
         const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
         const anthropicTools = tools?.map(t => this.convertToolDef(t));
@@ -285,15 +297,28 @@ export class AnthropicClient extends BaseLLMClient {
         else if (options?.toolChoice === 'auto') {
             toolChoice = { type: 'auto' };
         }
+        // Unified thinking flag → Anthropic extended thinking. Per-call overrides
+        // model config; the level sets `budget_tokens` (kept < max_tokens). The
+        // API forbids a custom temperature while thinking is enabled, so it is
+        // omitted in that case (the required default of 1 applies).
+        const thinking = resolveThinking(options?.thinking, this.options.thinking);
+        const thinkingOn = thinking?.enabled === true;
+        const requestedMax = options?.maxTokens ?? 4096;
+        // Extended thinking requires budget_tokens >= 1024 AND < max_tokens, so when
+        // thinking is on we bump max_tokens to guarantee headroom for the answer.
+        const budget = thinkingOn ? anthropicThinkingBudget(thinking?.level, requestedMax) : 0;
+        const maxTokens = thinkingOn ? Math.max(requestedMax, budget + 1024) : requestedMax;
         const body = {
             model: this.options.model,
             messages: this.convertMessages(nonSystemMessages),
-            max_tokens: options?.maxTokens ?? 4096,
-            ...(systemPrompt && { system: systemPrompt }),
+            max_tokens: maxTokens,
+            ...(system && { system }),
             ...(anthropicTools?.length && { tools: anthropicTools }),
             ...(toolChoice && { tool_choice: toolChoice }),
             ...(stream && { stream: true }),
-            ...(options?.temperature !== undefined && { temperature: options.temperature }),
+            ...(thinkingOn
+                ? { thinking: { type: 'enabled', budget_tokens: budget } }
+                : (options?.temperature !== undefined && { temperature: options.temperature })),
         };
         return body;
     }
@@ -521,4 +546,3 @@ export class AnthropicClient extends BaseLLMClient {
             .join('');
     }
 }
-//# sourceMappingURL=anthropic.js.map

package/dist/providers/google.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  * streaming, embeddings, and system prompt handling.
  */
 import { BaseLLMClient } from '../client.js';
-import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions } from '../interfaces.js';
+import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions, DeepResearchOptions, DeepResearchResult, DeepResearchEvent } from '../interfaces.js';
 import type { DecodedEvent } from '../stream-decoder.js';
 import type { Auditor } from '../auditor.js';
 export declare class GoogleClient extends BaseLLMClient {
@@ -20,6 +20,26 @@ export declare class GoogleClient extends BaseLLMClient {
     private getHeaders;
     chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse>;
     chatStream(messages: LLMChatMessage[], options?: ChatOptions): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>;
+    /** Deep Research is available via Google AI Studio only (not Vertex AI). */
+    supportsDeepResearch(): boolean;
+    private interactionsBase;
+    private deepResearchHeaders;
+    private buildInteractionBody;
+    private toDeepResearchResult;
+    /** httpRequest with small backoff retries — the preview interactions API is flaky (503s). */
+    private drRequest;
+    /**
+     * Run an agentic Deep Research interaction: create it, then poll until it
+     * completes/fails or the timeout elapses. Returns the final report + steps.
+     */
+    deepResearch(input: string, opts?: DeepResearchOptions): Promise<DeepResearchResult>;
+    /**
+     * Stream a Deep Research interaction's intermediate updates (`step.delta`
+     * thought/text/image events) and return the final result. Best-effort:
+     * falls back to the created interaction object if the stream ends early.
+     */
+    deepResearchStream(input: string, opts?: DeepResearchOptions): AsyncGenerator<DeepResearchEvent, DeepResearchResult, unknown>;
+    private delay;
     embed(text: string): Promise<number[]>;
     getModels(): Promise<string[]>;
     private buildRequestBody;
@@ -28,6 +48,7 @@ export declare class GoogleClient extends BaseLLMClient {
     private convertContentToGoogleParts;
     private convertToGoogleTool;
     private convertFunctionCallToToolCall;
+    private parseToolArguments;
     private parseGoogleResponse;
     /**
      * Retry HTTP requests for Flex tier when receiving 503/429 errors.
@@ -35,4 +56,3 @@ export declare class GoogleClient extends BaseLLMClient {
      */
     private fetchWithFlexRetry;
 }
-//# sourceMappingURL=google.d.ts.map