npm - universal-llm-client - Versions diffs - 4.3.0 → 4.5.1 - Mend

universal-llm-client 4.3.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/CHANGELOG.md +34 -19
package/README.md +62 -11
package/dist/ai-model.d.ts +12 -2
package/dist/ai-model.js +36 -2
package/dist/auditor.d.ts +0 -1
package/dist/auditor.js +0 -1
package/dist/client.d.ts +0 -1
package/dist/client.js +0 -1
package/dist/gemma-channel.d.ts +13 -0
package/dist/gemma-channel.js +37 -0
package/dist/gemma-diffusion.d.ts +48 -0
package/dist/gemma-diffusion.js +146 -0
package/dist/http.d.ts +4 -1
package/dist/http.js +14 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +4 -1
package/dist/interfaces.d.ts +163 -8
package/dist/interfaces.js +0 -1
package/dist/mcp.d.ts +0 -1
package/dist/mcp.js +0 -1
package/dist/providers/anthropic.d.ts +0 -1
package/dist/providers/anthropic.js +28 -4
package/dist/providers/google.d.ts +22 -2
package/dist/providers/google.js +223 -14
package/dist/providers/index.d.ts +0 -1
package/dist/providers/index.js +0 -1
package/dist/providers/ollama.d.ts +2 -1
package/dist/providers/ollama.js +59 -31
package/dist/providers/openai.d.ts +16 -1
package/dist/providers/openai.js +488 -81
package/dist/router.d.ts +2 -1
package/dist/router.js +4 -1
package/dist/stream-decoder.d.ts +12 -1
package/dist/stream-decoder.js +182 -6
package/dist/structured-output.d.ts +0 -1
package/dist/structured-output.js +0 -1
package/dist/thinking.d.ts +35 -0
package/dist/thinking.js +51 -0
package/dist/tools.d.ts +0 -1
package/dist/tools.js +0 -1
package/dist/zod-adapter.d.ts +0 -1
package/dist/zod-adapter.js +0 -1
package/package.json +3 -1
package/dist/ai-model.d.ts.map +0 -1
package/dist/ai-model.js.map +0 -1
package/dist/auditor.d.ts.map +0 -1
package/dist/auditor.js.map +0 -1
package/dist/client.d.ts.map +0 -1
package/dist/client.js.map +0 -1
package/dist/http.d.ts.map +0 -1
package/dist/http.js.map +0 -1
package/dist/index.d.ts.map +0 -1
package/dist/index.js.map +0 -1
package/dist/interfaces.d.ts.map +0 -1
package/dist/interfaces.js.map +0 -1
package/dist/mcp.d.ts.map +0 -1
package/dist/mcp.js.map +0 -1
package/dist/providers/anthropic.d.ts.map +0 -1
package/dist/providers/anthropic.js.map +0 -1
package/dist/providers/google.d.ts.map +0 -1
package/dist/providers/google.js.map +0 -1
package/dist/providers/index.d.ts.map +0 -1
package/dist/providers/index.js.map +0 -1
package/dist/providers/ollama.d.ts.map +0 -1
package/dist/providers/ollama.js.map +0 -1
package/dist/providers/openai.d.ts.map +0 -1
package/dist/providers/openai.js.map +0 -1
package/dist/router.d.ts.map +0 -1
package/dist/router.js.map +0 -1
package/dist/stream-decoder.d.ts.map +0 -1
package/dist/stream-decoder.js.map +0 -1
package/dist/structured-output.d.ts.map +0 -1
package/dist/structured-output.js.map +0 -1
package/dist/tools.d.ts.map +0 -1
package/dist/tools.js.map +0 -1
package/dist/zod-adapter.d.ts.map +0 -1
package/dist/zod-adapter.js.map +0 -1

package/dist/providers/google.js CHANGED Viewed

@@ -6,7 +6,8 @@
  * streaming, embeddings, and system prompt handling.
  */
 import { BaseLLMClient } from '../client.js';
-import { httpRequest, httpStream } from '../http.js';
+import { resolveThinking, geminiThinkingBudget } from '../thinking.js';
+import { httpRequest, httpStream, parseSSE } from '../http.js';
 import { StandardChatDecoder } from '../stream-decoder.js';
 import { normalizeJsonSchema, stripUnsupportedFeatures, getJsonSchemaFromConfig, } from '../structured-output.js';
 export class GoogleClient extends BaseLLMClient {
@@ -132,6 +133,7 @@ export class GoogleClient extends BaseLLMClient {
         });
         // Google streams SSE with JSON payloads
         let buffer = '';
+        let reasoningBuffer = '';
         for await (const chunk of stream) {
             buffer += chunk;
             // Google SSE uses "data: " prefix
@@ -159,8 +161,14 @@ export class GoogleClient extends BaseLLMClient {
                         continue;
                     for (const part of candidate.content.parts) {
                         if (part.text) {
-                            decoder.push(part.text);
-                            yield { type: 'text', content: part.text };
+                            if (part.thought) {
+                                reasoningBuffer += part.text;
+                                yield { type: 'thinking', content: part.text };
+                            }
+                            else {
+                                decoder.push(part.text);
+                                yield { type: 'text', content: part.text };
+                            }
                         }
                         if (part.functionCall) {
                             const toolCall = this.convertFunctionCallToToolCall(part.functionCall, part.thoughtSignature);
@@ -189,12 +197,170 @@ export class GoogleClient extends BaseLLMClient {
                 content: decoder.getCleanContent(),
                 tool_calls: allToolCalls.length > 0 ? allToolCalls : undefined,
             },
-            reasoning: decoder.getReasoning(),
+            reasoning: reasoningBuffer || decoder.getReasoning(),
             usage,
             provider: this.isVertex ? 'vertex' : 'google',
         };
     }
     // ========================================================================
+    // Deep Research (Gemini interactions API)
+    // ========================================================================
+    /** Deep Research is available via Google AI Studio only (not Vertex AI). */
+    supportsDeepResearch() {
+        return !this.isVertex;
+    }
+    interactionsBase() {
+        if (this.isVertex) {
+            throw new Error('Deep Research is only available via Google AI Studio, not Vertex AI.');
+        }
+        return `https://generativelanguage.googleapis.com/${this.apiVersion}/interactions`;
+    }
+    deepResearchHeaders() {
+        return {
+            'Content-Type': 'application/json',
+            'x-goog-api-key': this.options.apiKey ?? '',
+            'Api-Revision': '2026-05-20',
+        };
+    }
+    buildInteractionBody(input, opts, background) {
+        return {
+            input,
+            agent: opts.agent ?? 'deep-research-preview-04-2026',
+            background,
+            agent_config: {
+                type: 'deep-research',
+                thinking_summaries: opts.thinkingSummaries ?? 'auto',
+            },
+            ...(opts.tools?.length ? { tools: opts.tools.map(t => ({ type: t })) } : {}),
+            ...(opts.previousInteractionId ? { previous_interaction_id: opts.previousInteractionId } : {}),
+        };
+    }
+    toDeepResearchResult(i) {
+        const obj = i ?? {};
+        const steps = obj['steps'];
+        let report = (obj['output_text'] ?? obj['outputText'] ?? obj['output']);
+        // Some responses carry the final report only inside the steps' content
+        // blocks (the last step is typically the answer) — concatenate text there.
+        if (!report && Array.isArray(steps)) {
+            const text = steps
+                .flatMap(s => (Array.isArray(s.content) ? s.content : []))
+                .map(c => (c && typeof c === 'object' && typeof c.text === 'string'
+                ? c.text
+                : ''))
+                .filter(Boolean)
+                .join('\n\n');
+            report = text || undefined;
+        }
+        return {
+            id: obj['id'] ?? '',
+            status: obj['status'] ?? 'in_progress',
+            report,
+            steps,
+            error: obj['error'],
+            raw: obj,
+        };
+    }
+    /** httpRequest with small backoff retries — the preview interactions API is flaky (503s). */
+    async drRequest(url, init, retries = 3) {
+        let lastErr;
+        for (let attempt = 0; attempt <= retries; attempt++) {
+            try {
+                const res = await httpRequest(url, init);
+                return res.data;
+            }
+            catch (e) {
+                lastErr = e;
+                if (attempt < retries)
+                    await this.delay(1500 * (attempt + 1), init.signal);
+            }
+        }
+        throw lastErr;
+    }
+    /**
+     * Run an agentic Deep Research interaction: create it, then poll until it
+     * completes/fails or the timeout elapses. Returns the final report + steps.
+     */
+    async deepResearch(input, opts = {}) {
+        const base = this.interactionsBase();
+        const headers = this.deepResearchHeaders();
+        const pollInterval = opts.pollIntervalMs ?? 5000;
+        const deadline = Date.now() + (opts.timeoutMs ?? 600_000);
+        let interaction = await this.drRequest(base, {
+            method: 'POST',
+            headers,
+            body: this.buildInteractionBody(input, opts, true),
+            timeout: this.options.timeout ?? 60_000,
+            signal: opts.signal,
+        });
+        const id = interaction?.['id'];
+        if (!id)
+            return this.toDeepResearchResult(interaction);
+        while ((interaction?.['status'] ?? 'in_progress') === 'in_progress') {
+            if (Date.now() > deadline)
+                break;
+            await this.delay(pollInterval, opts.signal);
+            try {
+                interaction = await this.drRequest(`${base}/${id}`, { method: 'GET', headers, timeout: this.options.timeout ?? 60_000, signal: opts.signal }, 2);
+            }
+            catch {
+                // Tolerate transient errors during a long poll; keep trying until the deadline.
+            }
+        }
+        return this.toDeepResearchResult(interaction);
+    }
+    /**
+     * Stream a Deep Research interaction's intermediate updates (`step.delta`
+     * thought/text/image events) and return the final result. Best-effort:
+     * falls back to the created interaction object if the stream ends early.
+     */
+    async *deepResearchStream(input, opts = {}) {
+        const base = this.interactionsBase();
+        const headers = this.deepResearchHeaders();
+        // Streaming long-running research requires background:true AND stream:true
+        // in the create body (per the Deep Research Interactions API docs).
+        const stream = httpStream(base, {
+            method: 'POST',
+            headers,
+            body: { ...this.buildInteractionBody(input, opts, true), stream: true },
+            timeout: opts.timeoutMs ?? 600_000,
+            signal: opts.signal,
+        });
+        let last;
+        for await (const { data } of parseSSE(stream)) {
+            if (!data || data === '[DONE]')
+                continue;
+            let parsed;
+            try {
+                parsed = JSON.parse(data);
+            }
+            catch {
+                continue;
+            }
+            last = parsed;
+            const delta = (parsed['delta'] ?? parsed['step']?.['delta']);
+            if (delta) {
+                const dtype = delta['type'];
+                if (dtype === 'thought')
+                    yield { type: 'thought', content: String(delta['text'] ?? delta['content'] ?? '') };
+                else if (dtype === 'text')
+                    yield { type: 'text', content: String(delta['text'] ?? delta['content'] ?? '') };
+                else if (dtype === 'image')
+                    yield { type: 'image', content: delta['image'] ?? delta['content'] };
+            }
+            if (typeof parsed['status'] === 'string')
+                yield { type: 'status', status: parsed['status'] };
+        }
+        return this.toDeepResearchResult(last);
+    }
+    delay(ms, signal) {
+        return new Promise((resolve, reject) => {
+            if (signal?.aborted)
+                return reject(new Error('aborted'));
+            const t = setTimeout(resolve, ms);
+            signal?.addEventListener('abort', () => { clearTimeout(t); reject(new Error('aborted')); }, { once: true });
+        });
+    }
+    // ========================================================================
     // Embeddings
     // ========================================================================
     async embed(text) {
@@ -268,8 +434,29 @@ export class GoogleClient extends BaseLLMClient {
             config['temperature'] = options.temperature;
         if (options?.maxTokens !== undefined)
             config['maxOutputTokens'] = options.maxTokens;
-        if (this.options.thinking) {
-            config['thinkingConfig'] = { thinkingBudget: 8192 };
+        // Unified thinking flag → Gemini thinkingConfig. Per-call overrides model
+        // config. Gemini 3.x uses `thinkingLevel`; 2.5/2.0 use `thinkingBudget`
+        // (0 = off, -1 = dynamic). `includeThoughts` surfaces the reasoning text.
+        // A user-supplied thinkingConfig (via parameters) is left untouched.
+        const thinking = resolveThinking(options?.thinking, this.options.thinking);
+        if (thinking && config['thinkingConfig'] === undefined) {
+            if (/gemini-3/i.test(this.options.model)) {
+                const tc = {};
+                if (!thinking.enabled) {
+                    tc['thinkingLevel'] = 'MINIMAL';
+                }
+                else {
+                    if (thinking.level)
+                        tc['thinkingLevel'] = thinking.level.toUpperCase();
+                    tc['includeThoughts'] = true;
+                }
+                config['thinkingConfig'] = tc;
+            }
+            else {
+                config['thinkingConfig'] = thinking.enabled
+                    ? { thinkingBudget: geminiThinkingBudget(thinking.level), includeThoughts: true }
+                    : { thinkingBudget: 0 };
+            }
         }
         // Structured output: add responseMimeType and responseSchema
         const schemaOptions = this.extractSchemaOptions(options);
@@ -346,9 +533,7 @@ export class GoogleClient extends BaseLLMClient {
                         const part = {
                             functionCall: {
                                 name: tc.function.name,
-                                args: typeof tc.function.arguments === 'string'
-                                    ? JSON.parse(tc.function.arguments)
-                                    : tc.function.arguments,
+                                args: this.parseToolArguments(tc.function.arguments),
                             },
                         };
                         // Echo thought signature back (required by Gemini 3.x)
@@ -430,8 +615,8 @@ export class GoogleClient extends BaseLLMClient {
             id: this.generateToolCallId(),
             type: 'function',
             function: {
-                name: fc.name,
-                arguments: JSON.stringify(fc.args),
+                name: fc.name || '',
+                arguments: JSON.stringify(fc.args ?? {}),
             },
         };
         if (thoughtSignature) {
@@ -439,6 +624,23 @@ export class GoogleClient extends BaseLLMClient {
         }
         return toolCall;
     }
+    parseToolArguments(args) {
+        if (typeof args !== 'string') {
+            return args ?? {};
+        }
+        if (args.length === 0) {
+            return {};
+        }
+        try {
+            const parsed = JSON.parse(args);
+            return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
+                ? parsed
+                : {};
+        }
+        catch {
+            return {};
+        }
+    }
     // ========================================================================
     // Response Parsing
     // ========================================================================
@@ -451,10 +653,17 @@ export class GoogleClient extends BaseLLMClient {
             };
         }
         let textContent = '';
+        let reasoningText = '';
         const toolCalls = [];
         for (const part of candidate.content.parts) {
-            if (part.text)
-                textContent += part.text;
+            if (part.text) {
+                // Thought summaries (includeThoughts) carry the reasoning trace;
+                // keep them out of `content` and surface them as `reasoning`.
+                if (part.thought)
+                    reasoningText += part.text;
+                else
+                    textContent += part.text;
+            }
             if (part.functionCall) {
                 toolCalls.push(this.convertFunctionCallToToolCall(part.functionCall, part.thoughtSignature));
             }
@@ -474,6 +683,7 @@ export class GoogleClient extends BaseLLMClient {
                 content: textContent,
                 tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
             },
+            reasoning: reasoningText || undefined,
             usage,
             provider: this.isVertex ? 'vertex' : 'google',
         };
@@ -502,4 +712,3 @@ export class GoogleClient extends BaseLLMClient {
         throw new Error('Unreachable');
     }
 }
-//# sourceMappingURL=google.js.map

package/dist/providers/index.d.ts CHANGED Viewed

@@ -5,4 +5,3 @@ export { OllamaClient } from './ollama.js';
 export { OpenAICompatibleClient } from './openai.js';
 export { GoogleClient } from './google.js';
 export { AnthropicClient } from './anthropic.js';
-//# sourceMappingURL=index.d.ts.map

package/dist/providers/index.js CHANGED Viewed

@@ -5,4 +5,3 @@ export { OllamaClient } from './ollama.js';
 export { OpenAICompatibleClient } from './openai.js';
 export { GoogleClient } from './google.js';
 export { AnthropicClient } from './anthropic.js';
-//# sourceMappingURL=index.js.map

package/dist/providers/ollama.d.ts CHANGED Viewed

@@ -18,6 +18,8 @@ export declare class OllamaClient extends BaseLLMClient {
     constructor(options: LLMClientOptions, auditor?: Auditor);
     chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse>;
     chatStream(messages: LLMChatMessage[], options?: ChatOptions): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>;
+    private normalizeToolCall;
+    private normalizeToolArguments;
     embed(text: string): Promise<number[]>;
     embedArray(texts: string[]): Promise<number[][]>;
     getModels(): Promise<string[]>;
@@ -35,4 +37,3 @@ export declare class OllamaClient extends BaseLLMClient {
      */
     private buildFormatParameter;
 }
-//# sourceMappingURL=ollama.d.ts.map

package/dist/providers/ollama.js CHANGED Viewed

@@ -11,9 +11,11 @@
  * - VAL-PROVIDER-OLLAMA-004: format "json" vs schema modes
  */
 import { BaseLLMClient } from '../client.js';
+import { resolveThinking } from '../thinking.js';
 import { httpRequest, httpStream, parseNDJSON, buildHeaders } from '../http.js';
 import { StandardChatDecoder } from '../stream-decoder.js';
 import { normalizeJsonSchema, getJsonSchemaFromConfig, } from '../structured-output.js';
+import { extractGemmaThoughtChannels } from '../gemma-channel.js';
 export class OllamaClient extends BaseLLMClient {
     constructor(options, auditor) {
         super({
@@ -39,7 +41,8 @@ export class OllamaClient extends BaseLLMClient {
         }
         // Enable native thinking by default — thinking models produce better
         // tool selections and reasoning when allowed to think before acting.
-        body['think'] = this.options.thinking ?? true;
+        // Ollama `think` is on/off (no levels); default on for thinking models.
+        body['think'] = resolveThinking(options?.thinking, this.options.thinking)?.enabled ?? true;
         // Handle structured output via format parameter
         const schemaOptions = this.extractSchemaOptions(options);
         if (schemaOptions) {
@@ -68,28 +71,25 @@ export class OllamaClient extends BaseLLMClient {
                 inputTokens: data.prompt_eval_count ?? 0,
                 outputTokens: data.eval_count ?? 0,
                 totalTokens: (data.prompt_eval_count ?? 0) + (data.eval_count ?? 0),
+                // Ollama reports server-precise timing in nanoseconds.
+                durationMs: data.total_duration ? data.total_duration / 1e6 : undefined,
+                tokensPerSecond: data.eval_duration && data.eval_count
+                    ? data.eval_count / (data.eval_duration / 1e9)
+                    : undefined,
             }
             : undefined;
-        // Normalize tool call IDs (Ollama sometimes omits them)
-        const toolCalls = data.message.tool_calls?.map(tc => ({
-            ...tc,
-            id: tc.id || this.generateToolCallId(),
-            function: {
-                ...tc.function,
-                arguments: typeof tc.function.arguments === 'string'
-                    ? tc.function.arguments
-                    : JSON.stringify(tc.function.arguments),
-            },
-        }));
-        // Get content, handling potential null
-        const content = data.message.content || data.message.thinking || '';
+        // Normalize tool calls (Ollama sometimes omits IDs and empty args).
+        const toolCalls = data.message.tool_calls?.map(tc => this.normalizeToolCall(tc));
+        const gemmaContent = extractGemmaThoughtChannels(data.message.content || '');
+        const reasoning = [data.message.thinking, gemmaContent.reasoning].filter(Boolean).join('\n\n') || undefined;
         const result = {
             message: {
                 role: 'assistant',
-                content,
+                content: gemmaContent.content,
                 tool_calls: toolCalls,
             },
-            reasoning: data.message.content ? data.message.thinking : undefined,
+            finishReason: data.done_reason,
+            reasoning,
             usage,
             provider: 'ollama',
         };
@@ -118,7 +118,8 @@ export class OllamaClient extends BaseLLMClient {
         if (tools?.length) {
             body['tools'] = this.convertToolsToOllama(tools);
         }
-        body['think'] = this.options.thinking ?? true;
+        // Ollama `think` is on/off (no levels); default on for thinking models.
+        body['think'] = resolveThinking(options?.thinking, this.options.thinking)?.enabled ?? true;
         const start = Date.now();
         this.auditor.record({
             timestamp: start,
@@ -126,7 +127,8 @@ export class OllamaClient extends BaseLLMClient {
             provider: 'ollama',
             model: this.options.model,
         });
-        const decoder = new StandardChatDecoder(() => { });
+        const decoderEvents = [];
+        const decoder = new StandardChatDecoder(event => decoderEvents.push(event));
         let lastResponse;
         const streamedToolCalls = [];
         // Stream idle timeout: thinking models can pause for minutes between chunks.
@@ -142,33 +144,38 @@ export class OllamaClient extends BaseLLMClient {
             lastResponse = chunk;
             if (chunk.message?.thinking) {
                 decoder.pushReasoning(chunk.message.thinking);
-                yield { type: 'thinking', content: chunk.message.thinking };
+                const pending = decoderEvents.splice(0);
+                for (const event of pending) {
+                    yield event;
+                }
             }
             if (chunk.message?.content) {
                 decoder.push(chunk.message.content);
-                yield { type: 'text', content: chunk.message.content };
+                const pending = decoderEvents.splice(0);
+                for (const event of pending) {
+                    yield event;
+                }
             }
             if (chunk.message?.tool_calls?.length) {
-                const normalized = chunk.message.tool_calls.map(tc => ({
-                    ...tc,
-                    id: tc.id || this.generateToolCallId(),
-                    function: {
-                        ...tc.function,
-                        arguments: typeof tc.function.arguments === 'string'
-                            ? tc.function.arguments
-                            : JSON.stringify(tc.function.arguments),
-                    },
-                }));
+                const normalized = chunk.message.tool_calls.map(tc => this.normalizeToolCall(tc));
                 streamedToolCalls.push(...normalized);
                 yield { type: 'tool_call', calls: normalized };
             }
         }
         decoder.flush();
+        const pending = decoderEvents.splice(0);
+        for (const event of pending) {
+            yield event;
+        }
         const usage = lastResponse?.prompt_eval_count
             ? {
                 inputTokens: lastResponse.prompt_eval_count ?? 0,
                 outputTokens: lastResponse.eval_count ?? 0,
                 totalTokens: (lastResponse.prompt_eval_count ?? 0) + (lastResponse.eval_count ?? 0),
+                durationMs: lastResponse.total_duration ? lastResponse.total_duration / 1e6 : undefined,
+                tokensPerSecond: lastResponse.eval_duration && lastResponse.eval_count
+                    ? lastResponse.eval_count / (lastResponse.eval_duration / 1e9)
+                    : undefined,
             }
             : undefined;
         this.auditor.record({
@@ -185,11 +192,33 @@ export class OllamaClient extends BaseLLMClient {
                 content: decoder.getCleanContent(),
                 tool_calls: streamedToolCalls.length > 0 ? streamedToolCalls : undefined,
             },
+            finishReason: lastResponse?.done_reason,
             reasoning: decoder.getReasoning(),
             usage,
             provider: 'ollama',
         };
     }
+    normalizeToolCall(toolCall) {
+        return {
+            ...toolCall,
+            id: toolCall.id || this.generateToolCallId(),
+            type: 'function',
+            function: {
+                ...toolCall.function,
+                name: toolCall.function?.name || '',
+                arguments: this.normalizeToolArguments(toolCall.function?.arguments),
+            },
+        };
+    }
+    normalizeToolArguments(args) {
+        if (typeof args === 'string') {
+            return args.trim().length > 0 ? args : '{}';
+        }
+        if (args == null) {
+            return '{}';
+        }
+        return JSON.stringify(args) ?? '{}';
+    }
     // ========================================================================
     // Embeddings
     // ========================================================================
@@ -397,4 +426,3 @@ export class OllamaClient extends BaseLLMClient {
         return 'json';
     }
 }
-//# sourceMappingURL=ollama.js.map

package/dist/providers/openai.d.ts CHANGED Viewed

@@ -9,9 +9,25 @@ import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions } f
 import type { DecodedEvent } from '../stream-decoder.js';
 import type { Auditor } from '../auditor.js';
 export declare class OpenAICompatibleClient extends BaseLLMClient {
+    private warnedVllmToolFallback;
+    /**
+     * DiffusionGemma on trimmed vLLM builds has no server-side reasoning or
+     * tool-call parser — the native channel protocol is handled client-side
+     * (see gemma-diffusion.ts). Auto-detected from the model name; override
+     * with `gemmaNativeProtocol` in LLMClientOptions.
+     */
+    private get gemmaNative();
+    /**
+     * Build a full endpoint URL, respecting apiBasePath (already baked into this.options.url)
+     * and any queryParams provided at the provider config level.
+     */
+    private buildUrl;
     constructor(options: LLMClientOptions, auditor?: Auditor);
+    private warnVllmToolFallback;
     chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse>;
     chatStream(messages: LLMChatMessage[], options?: ChatOptions): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>;
+    private normalizeToolCall;
+    private normalizeToolArguments;
     embed(text: string): Promise<number[]>;
     getModels(): Promise<string[]>;
     private convertMessages;
@@ -21,4 +37,3 @@ export declare class OpenAICompatibleClient extends BaseLLMClient {
      */
     private buildResponseFormat;
 }
-//# sourceMappingURL=openai.d.ts.map