npm - @juspay/neurolink - Versions diffs - 9.67.2 → 9.67.3 - Mend

@juspay/neurolink 9.67.2 → 9.67.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +2 -0
package/dist/browser/neurolink.min.js +326 -326
package/dist/lib/providers/litellm.d.ts +25 -32
package/dist/lib/providers/litellm.js +132 -601
package/dist/lib/providers/openaiChatCompletionsBase.d.ts +93 -0
package/dist/lib/providers/openaiChatCompletionsBase.js +644 -0
package/dist/lib/providers/openaiCompatible.d.ts +7 -63
package/dist/lib/providers/openaiCompatible.js +27 -658
package/dist/lib/types/openaiCompatible.d.ts +20 -0
package/dist/providers/litellm.d.ts +25 -32
package/dist/providers/litellm.js +132 -601
package/dist/providers/openaiChatCompletionsBase.d.ts +93 -0
package/dist/providers/openaiChatCompletionsBase.js +643 -0
package/dist/providers/openaiCompatible.d.ts +7 -63
package/dist/providers/openaiCompatible.js +27 -658
package/dist/types/openaiCompatible.d.ts +20 -0
package/package.json +1 -1

package/dist/providers/litellm.js CHANGED Viewed

@@ -1,20 +1,14 @@
 import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
-import { BaseProvider } from "../core/baseProvider.js";
-import { DEFAULT_MAX_STEPS } from "../core/constants.js";
-import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
 import { createProxyFetch } from "../proxy/proxyFetch.js";
 import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
 import { isAbortError } from "../utils/errorHandling.js";
-import { NoOutputGeneratedError } from "../utils/generationErrors.js";
 import { logger } from "../utils/logger.js";
-import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
+import { isGemini25Model as isCanonicalGemini25Model } from "../utils/modelDetection.js";
 import { calculateCost } from "../utils/pricing.js";
 import { getProviderModel } from "../utils/providerConfig.js";
-import { composeAbortSignals, createTimeoutController, mergeAbortSignals, TimeoutError, } from "../utils/timeout.js";
-import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
-import { resolveToolChoice } from "../utils/toolChoice.js";
-import { transformToolExecutions } from "../utils/transformationUtils.js";
-import { buildAPIError, buildBody, buildToolsForOpenAI, createChunkQueue, createDeferredAnalytics, mapNeuroLinkToolChoice, mergeUsage, messageBuilderToOpenAI, parseSSEStream, stringifyToolOutput, stripTrailingSlash, v3ResponseFormatToOpenAI, v3ToolChoiceToOpenAI, v3ToolsToOpenAI, } from "./openaiChatCompletionsClient.js";
+import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
+import { stripTrailingSlash } from "./openaiChatCompletionsClient.js";
+import { OpenAIChatCompletionsProvider } from "./openaiChatCompletionsBase.js";
 const streamTracer = trace.getTracer("neurolink.provider.litellm");
 const FALLBACK_LITELLM_MODEL = "openai/gpt-4o-mini";
 const getLiteLLMConfig = () => ({
@@ -25,37 +19,37 @@ const getLiteLLMConfig = () => ({
  * LiteLLM uses a 'provider/model' format. Override via LITELLM_MODEL env var.
  */
 const getDefaultLiteLLMModel = () => getProviderModel("LITELLM_MODEL", FALLBACK_LITELLM_MODEL);
-const isGemini25Model = (modelName) => modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
-// =============================================================================
-// Direct HTTP client for LiteLLM proxy.
-//
-// LiteLLM exposes the OpenAI chat-completions wire format, so all the
-// wire-level converters and the SSE parser are shared with the
-// openai-compatible provider via ./openaiChatCompletionsClient.ts. This
-// file owns LiteLLM-specific behaviour: OTel span wrap with cost, model
-// allowlist 403 → ModelAccessDeniedError, Gemini 2.5 maxTokens skip,
-// model caching, and native /v1/embeddings.
-// =============================================================================
+// LiteLLM model ids come in `provider/model` form (e.g. "google/gemini-2.5-flash").
+// Strip the provider prefix and delegate to the canonical anchored-regex
+// check in src/lib/utils/modelDetection.ts so the truth lives in one place.
+const isGemini25Model = (modelName) => {
+    const lastSegment = modelName.includes("/")
+        ? modelName.slice(modelName.lastIndexOf("/") + 1)
+        : modelName;
+    return isCanonicalGemini25Model(lastSegment);
+};
 /**
  * LiteLLM Provider — direct HTTP, no AI SDK. Talks to a LiteLLM proxy
  * server (or any deployment that speaks OpenAI chat-completions + the
  * `/v1/models` and `/v1/embeddings` endpoints).
+ *
+ * All request/stream/tool-loop orchestration lives in
+ * `OpenAIChatCompletionsProvider`. This class adds LiteLLM-specific
+ * behaviour: OTel span wrap with cost (`onStreamStart`), Gemini 2.5
+ * maxTokens skip (`adjustBuildBodyOptions`), ModelAccessDeniedError on
+ * 403, 10-minute model cache (`getAvailableModels`), `LITELLM_FALLBACK_MODELS`
+ * env-driven fallback list, and native `/v1/embeddings`.
  */
-export class LiteLLMProvider extends BaseProvider {
-    config;
-    credentials;
-    resolvedModel;
+export class LiteLLMProvider extends OpenAIChatCompletionsProvider {
     static modelsCache = [];
     static modelsCacheTime = 0;
     static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
     constructor(modelName, sdk, _region, credentials) {
-        super(modelName, "litellm", sdk);
-        this.credentials = credentials;
         const envConfig = getLiteLLMConfig();
-        this.config = {
+        super("litellm", modelName, sdk, {
             baseURL: credentials?.baseURL ?? envConfig.baseURL,
             apiKey: credentials?.apiKey ?? envConfig.apiKey,
-        };
+        });
         logger.debug("LiteLLM Provider initialized", {
             modelName: this.modelName,
             provider: this.providerName,
@@ -68,146 +62,77 @@ export class LiteLLMProvider extends BaseProvider {
     getDefaultModel() {
         return getDefaultLiteLLMModel();
     }
+    getFallbackModelName() {
+        return FALLBACK_LITELLM_MODEL;
+    }
+    getFallbackModels() {
+        return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
+            .map((m) => m.trim())
+            .filter((m) => m.length > 0) || [
+            "openai/gpt-4o",
+            "anthropic/claude-3-haiku",
+            "meta-llama/llama-3.1-8b-instruct",
+            "google/gemini-2.5-flash",
+        ]);
+    }
     /**
-     * Abstract from BaseProvider — used by the parent's generate() path which
-     * still goes through `generateText`. Returns a thin LanguageModelV3-shaped
-     * object that delegates to the same HTTP helpers used by executeStream.
+     * Gemini 2.5 models on LiteLLM have a known compatibility issue with
+     * `max_tokens` — strip it before the wire body is built. Applies to
+     * both streaming and non-streaming paths.
      */
-    async getAISDKModel() {
-        const modelId = await this.resolveModelName();
-        return this.buildDelegatingModel(modelId);
-    }
-    async resolveModelName() {
-        if (this.resolvedModel) {
-            return this.resolvedModel;
-        }
-        const explicit = this.modelName || getDefaultLiteLLMModel();
-        if (explicit && explicit.trim() !== "") {
-            this.resolvedModel = explicit;
-            if (this.modelName !== explicit) {
-                this.refreshHandlersForModel(explicit);
+    adjustBuildBodyOptions(modelId, opts) {
+        if (isGemini25Model(modelId) && opts.maxTokens !== undefined) {
+            if (logger.shouldLog("debug")) {
+                logger.debug("LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)", { modelId, requestedMaxTokens: opts.maxTokens });
             }
-            return explicit;
+            return { ...opts, maxTokens: undefined };
         }
-        this.resolvedModel = FALLBACK_LITELLM_MODEL;
-        this.refreshHandlersForModel(FALLBACK_LITELLM_MODEL);
-        return FALLBACK_LITELLM_MODEL;
+        return opts;
     }
     /**
-     * Returns a minimal V3-shaped model. Only used by BaseProvider's
-     * `generate()` non-streaming path which still relies on the parent's
-     * `generateText`. The streaming path bypasses this entirely.
+     * Wrap the stream in an OTel span to capture provider-level latency,
+     * token usage, finish reason, and cost. Matches the pre-migration
+     * behaviour where streamText was wrapped in `neurolink.provider.streamText`.
      */
-    buildDelegatingModel(modelId) {
-        const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
-        const fetchImpl = createProxyFetch();
-        const apiKey = this.config.apiKey;
-        const providerName = this.providerName;
-        const getTimeoutForOptions = (opts) => this.getTimeout((opts ?? {}));
-        const gemini25Skip = isGemini25Model(modelId);
+    onStreamStart(modelId) {
+        const span = streamTracer.startSpan("neurolink.provider.streamText", {
+            kind: SpanKind.CLIENT,
+            attributes: {
+                "gen_ai.system": "litellm",
+                "gen_ai.request.model": modelId,
+            },
+        });
+        let spanEnded = false;
+        const endSpan = () => {
+            if (!spanEnded) {
+                spanEnded = true;
+                span.end();
+            }
+        };
         return {
-            specificationVersion: "v3",
-            provider: "litellm",
-            modelId,
-            supportedUrls: {},
-            doGenerate: async (options) => {
-                const messages = messageBuilderToOpenAI(options.prompt);
-                const body = buildBody({
-                    modelId,
-                    messages,
-                    options: {
-                        maxTokens: gemini25Skip ? undefined : options.maxOutputTokens,
-                        temperature: options.temperature,
-                        topP: options.topP,
-                        presencePenalty: options.presencePenalty,
-                        frequencyPenalty: options.frequencyPenalty,
-                        seed: options.seed,
-                        stopSequences: options.stopSequences,
-                    },
-                    tools: v3ToolsToOpenAI(options.tools),
-                    ...(options.toolChoice
-                        ? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
-                        : {}),
-                    streaming: false,
-                    ...(options.responseFormat
-                        ? {
-                            responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
-                        }
-                        : {}),
+            onUsage: (usage) => {
+                span.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
+                span.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
+                const cost = calculateCost(this.providerName, this.modelName, {
+                    input: usage.promptTokens,
+                    output: usage.completionTokens,
+                    total: usage.totalTokens,
                 });
-                const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
-                const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
-                let res;
-                try {
-                    res = await fetchImpl(url, {
-                        method: "POST",
-                        headers: {
-                            "Content-Type": "application/json",
-                            Authorization: `Bearer ${apiKey}`,
-                        },
-                        body: JSON.stringify(body),
-                        ...(composedSignal ? { signal: composedSignal } : {}),
-                    });
-                }
-                finally {
-                    timeoutController?.cleanup();
+                if (cost && cost > 0) {
+                    span.setAttribute("neurolink.cost", cost);
                 }
-                if (!res.ok) {
-                    throw await buildAPIError(url, body, res);
-                }
-                const json = (await res.json());
-                const choice = json.choices?.[0];
-                const text = (typeof choice?.message?.content === "string"
-                    ? choice.message.content
-                    : "") ?? "";
-                const content = [];
-                if (text.length > 0) {
-                    content.push({ type: "text", text });
-                }
-                for (const tc of choice?.message?.tool_calls ?? []) {
-                    content.push({
-                        type: "tool-call",
-                        toolCallId: tc.id,
-                        toolName: tc.function.name,
-                        input: tc.function.arguments ?? "",
+            },
+            onFinish: (reason, capturedError) => {
+                span.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
+                if (reason === "error") {
+                    span.setStatus({
+                        code: SpanStatusCode.ERROR,
+                        message: capturedError instanceof Error
+                            ? capturedError.message
+                            : String(capturedError ?? "stream error"),
                     });
                 }
-                const rawFinish = choice?.finish_reason;
-                const unified = rawFinish === "length"
-                    ? "length"
-                    : rawFinish === "tool_calls" || rawFinish === "function_call"
-                        ? "tool-calls"
-                        : rawFinish === "content_filter"
-                            ? "content-filter"
-                            : "stop";
-                return {
-                    content,
-                    finishReason: { unified, raw: rawFinish ?? "stop" },
-                    usage: {
-                        inputTokens: {
-                            total: json.usage?.prompt_tokens,
-                            noCache: json.usage?.prompt_tokens,
-                            cacheRead: undefined,
-                            cacheWrite: undefined,
-                        },
-                        outputTokens: {
-                            total: json.usage?.completion_tokens,
-                            text: json.usage?.completion_tokens,
-                            reasoning: undefined,
-                        },
-                    },
-                    warnings: [],
-                    request: { body },
-                    response: {
-                        ...(json.id ? { id: json.id } : {}),
-                        ...(json.model ? { modelId: json.model } : {}),
-                        headers: {},
-                        body: json,
-                    },
-                };
-            },
-            doStream: () => {
-                throw new Error("litellm: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
+                endSpan();
             },
         };
     }
@@ -253,384 +178,69 @@ export class LiteLLMProvider extends BaseProvider {
         }
         return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
     }
-    supportsTools() {
-        return true;
-    }
     /**
-     * Streaming path — drives the LiteLLM proxy directly. No streamText, no
-     * AI SDK orchestrator. Tool calls, multi-step loops, telemetry, abort
-     * handling all inline. OTel span captures gen_ai.system + cost.
+     * Get available models from LiteLLM proxy `/v1/models` endpoint.
+     * Caches results for 10 minutes; falls back to env-driven list or a
+     * minimal safe default if the API fetch fails.
      */
-    async executeStream(options, _analysisSchema) {
-        this.validateStreamOptions(options);
-        const startTime = Date.now();
-        const timeout = this.getTimeout(options);
-        const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
-        const consumerAbortController = new AbortController();
-        const abortSignal = mergeAbortSignals([
-            options.abortSignal,
-            timeoutController?.controller.signal,
-            consumerAbortController.signal,
-        ]).signal;
-        let modelId;
-        let toolsRecord;
-        let openAITools;
-        let openAIToolChoice;
-        let conversation;
-        try {
-            modelId = await this.resolveModelName();
-            const shouldUseTools = !options.disableTools && this.supportsTools();
-            toolsRecord = shouldUseTools
-                ? options.tools || (await this.getAllTools())
-                : {};
-            openAITools = shouldUseTools
-                ? buildToolsForOpenAI(toolsRecord)
-                : undefined;
-            openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
-            const initialMessages = await this.buildMessagesForStream(options);
-            conversation = messageBuilderToOpenAI(initialMessages);
-        }
-        catch (setupErr) {
-            timeoutController?.cleanup();
-            throw setupErr;
-        }
-        const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
-        const fetchImpl = createProxyFetch();
-        const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
-        const emitter = this.neurolink?.getEventEmitter();
-        const toolsUsed = [];
-        const toolExecutionSummaries = [];
-        const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
-        const { pushChunk, nextChunk } = createChunkQueue();
-        // Wrap the stream in an OTel span to capture provider-level latency,
-        // token usage, finish reason, and cost. Matches the pre-migration
-        // behaviour where streamText was wrapped in `neurolink.provider.streamText`.
-        const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
-            kind: SpanKind.CLIENT,
-            attributes: {
-                "gen_ai.system": "litellm",
-                "gen_ai.request.model": modelId,
-            },
-        });
-        // Model-specific maxTokens handling — Gemini 2.5 models have known issues
-        // with maxTokens being forwarded. Mutate a shallow copy so the original
-        // StreamOptions reference downstream (analytics, telemetry) is unchanged.
-        const requestOptions = isGemini25Model(modelId)
-            ? { ...options, maxTokens: undefined }
-            : options;
-        if (requestOptions !== options &&
-            options.maxTokens &&
-            logger.shouldLog("debug")) {
-            logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, { modelId, requestedMaxTokens: options.maxTokens });
-        }
-        const loopPromise = this.runStreamLoop({
-            maxSteps,
-            modelId,
-            url,
-            apiKey: this.config.apiKey,
-            fetchImpl,
-            abortSignal,
-            options: requestOptions,
-            conversation,
-            openAITools,
-            openAIToolChoice,
-            toolsRecord,
-            emitter,
-            toolsUsed,
-            toolExecutionSummaries,
-            pushChunk,
-            resolveUsage,
-            resolveFinish,
-        });
-        // Wire the OTel span lifecycle to the deferred analytics promises.
-        let capturedProviderError;
-        const captureProviderError = (error) => {
-            capturedProviderError = error;
-        };
-        usagePromise
-            .then((usage) => {
-            streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
-            streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
-            const cost = calculateCost(this.providerName, this.modelName, {
-                input: usage.promptTokens,
-                output: usage.completionTokens,
-                total: usage.totalTokens,
+    async getAvailableModels() {
+        const now = Date.now();
+        if (LiteLLMProvider.modelsCache.length > 0 &&
+            now - LiteLLMProvider.modelsCacheTime <
+                LiteLLMProvider.MODELS_CACHE_DURATION) {
+            logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
+                cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
+                modelCount: LiteLLMProvider.modelsCache.length,
             });
-            if (cost && cost > 0) {
-                streamSpan.setAttribute("neurolink.cost", cost);
-            }
-        })
-            .catch(() => {
-            // usage may never resolve if the stream is aborted before completion
-        });
-        finishPromise
-            .then((reason) => {
-            streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
-            if (reason === "error") {
-                streamSpan.setStatus({
-                    code: SpanStatusCode.ERROR,
-                    message: capturedProviderError instanceof Error
-                        ? capturedProviderError.message
-                        : String(capturedProviderError ?? "stream error"),
-                });
-            }
-            streamSpan.end();
-        })
-            .catch(() => {
-            streamSpan.end();
-        });
-        const transformedStream = async function* () {
-            let contentYielded = 0;
-            try {
-                for (;;) {
-                    const chunk = await nextChunk();
-                    if ("done" in chunk) {
-                        break;
-                    }
-                    if ("content" in chunk &&
-                        typeof chunk.content === "string" &&
-                        chunk.content.length > 0) {
-                        contentYielded++;
-                    }
-                    yield chunk;
-                }
-                await loopPromise;
-                if (contentYielded === 0 && toolsUsed.length === 0) {
-                    logger.warn("LiteLLM: Stream produced no output — emitting enriched sentinel");
-                    const fauxNoOutput = new NoOutputGeneratedError({
-                        message: "Stream produced no output",
-                    });
-                    const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
-                    stampNoOutputSpan(sentinel);
-                    yield sentinel;
-                }
-            }
-            catch (streamError) {
-                if (NoOutputGeneratedError.isInstance(streamError)) {
-                    const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
-                    stampNoOutputSpan(sentinel);
-                    yield sentinel;
-                    return;
-                }
-                const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
-                stampNoOutputSpan(sentinel);
-                yield sentinel;
-                throw streamError;
-            }
-            finally {
-                if (!consumerAbortController.signal.aborted) {
-                    consumerAbortController.abort();
-                }
-            }
-        };
-        const result = {
-            stream: transformedStream(),
-            provider: this.providerName,
-            model: this.modelName,
-            analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, {
-                textStream: (async function* () { })(),
-                usage: usagePromise,
-                finishReason: finishPromise,
-            }, Date.now() - startTime, {
-                requestId: options.requestId ??
-                    `litellm-stream-${Date.now()}`,
-                streamingMode: true,
-            }),
-            toolsUsed,
-            metadata: {
-                startTime,
-                streamId: `litellm-${Date.now()}`,
-            },
-        };
-        Object.defineProperty(result, "toolExecutions", {
-            enumerable: true,
-            configurable: true,
-            get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
-                toolName: s.toolName,
-                input: s.input,
-                output: s.output,
-                duration: s.endTime.getTime() - s.startTime.getTime(),
-            }))),
-        });
-        loopPromise
-            .finally(() => timeoutController?.cleanup())
-            .catch((error) => {
-            captureProviderError(error);
-        });
-        return result;
-    }
-    async runStreamLoop(args) {
-        const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
+            return LiteLLMProvider.modelsCache;
+        }
         try {
-            let stepFinish = null;
-            let stepUsage;
-            for (let step = 0; step < maxSteps; step++) {
-                const stepResult = await this.streamOneStep({
-                    modelId,
-                    url,
-                    apiKey,
-                    fetchImpl,
-                    abortSignal,
-                    options,
-                    conversation,
-                    openAITools,
-                    openAIToolChoice,
-                    pushChunk,
-                });
-                stepFinish = stepResult.finishReason;
-                if (stepResult.usage) {
-                    stepUsage = mergeUsage(stepUsage, stepResult.usage);
-                }
-                if (stepResult.toolCalls.size === 0) {
-                    break;
-                }
-                await this.executeToolBatch({
-                    stepResult,
-                    conversation,
-                    toolsRecord,
-                    emitter,
-                    toolsUsed,
-                    toolExecutionSummaries,
-                    options,
-                });
+            const dynamicModels = await this.fetchModelsFromAPI();
+            if (dynamicModels.length > 0) {
+                LiteLLMProvider.modelsCache = dynamicModels;
+                LiteLLMProvider.modelsCacheTime = now;
+                return dynamicModels;
             }
-            resolveUsage({
-                promptTokens: stepUsage?.prompt_tokens ?? 0,
-                completionTokens: stepUsage?.completion_tokens ?? 0,
-                totalTokens: stepUsage?.total_tokens ?? 0,
-            });
-            resolveFinish(stepFinish ?? "stop");
-            pushChunk({ done: true });
-            return {
-                finishReason: stepFinish ?? "stop",
-                usage: stepUsage,
-            };
-        }
-        catch (err) {
-            logger.error("LiteLLM: Stream error", {
-                error: err instanceof Error ? err.message : String(err),
-            });
-            resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
-            resolveFinish("error");
-            pushChunk({ done: true });
-            throw err;
         }
-    }
-    async streamOneStep(args) {
-        const body = buildBody({
-            modelId: args.modelId,
-            messages: args.conversation,
-            options: args.options,
-            tools: args.openAITools,
-            ...(args.openAIToolChoice !== undefined
-                ? { toolChoice: args.openAIToolChoice }
-                : {}),
-            streaming: true,
-        });
-        const res = await args.fetchImpl(args.url, {
-            method: "POST",
-            headers: {
-                "Content-Type": "application/json",
-                Authorization: `Bearer ${args.apiKey}`,
-            },
-            body: JSON.stringify(body),
-            ...(args.abortSignal ? { signal: args.abortSignal } : {}),
-        });
-        if (!res.ok) {
-            throw await buildAPIError(args.url, body, res);
-        }
-        if (!res.body) {
-            throw new Error("litellm: stream response had no body");
+        catch (error) {
+            logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
         }
-        return parseSSEStream(res.body, (delta) => {
-            args.pushChunk({ content: delta });
-        });
+        return this.getFallbackModels();
     }
-    async executeToolBatch(args) {
-        const { stepResult, conversation, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, options, } = args;
-        const toolCallsForMessage = [];
-        for (const [, t] of stepResult.toolCalls) {
-            toolCallsForMessage.push({
-                id: t.id,
-                type: "function",
-                function: { name: t.name, arguments: t.argsBuffered },
-            });
-        }
-        conversation.push({
-            role: "assistant",
-            content: stepResult.text.length > 0 ? stepResult.text : null,
-            tool_calls: toolCallsForMessage,
-        });
-        for (const [, t] of stepResult.toolCalls) {
-            const startedAt = new Date();
-            let input;
-            try {
-                input = JSON.parse(t.argsBuffered || "{}");
-            }
-            catch {
-                input = t.argsBuffered;
-            }
-            let output;
-            let errorMsg;
-            const toolDef = toolsRecord[t.name];
-            emitter?.emit("tool:start", {
-                toolName: t.name,
-                toolCallId: t.id,
-                input,
+    async fetchModelsFromAPI() {
+        const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
+        const proxyFetch = createProxyFetch();
+        const controller = new AbortController();
+        const timeoutId = setTimeout(() => controller.abort(), 5000);
+        try {
+            const response = await proxyFetch(modelsUrl, {
+                method: "GET",
+                headers: {
+                    Authorization: `Bearer ${this.config.apiKey}`,
+                    "Content-Type": "application/json",
+                },
+                signal: controller.signal,
             });
-            if (!toolDef || typeof toolDef.execute !== "function") {
-                errorMsg = `Tool '${t.name}' is not registered.`;
-                output = { error: errorMsg };
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
             }
-            else {
-                try {
-                    output = await toolDef.execute(input, {});
-                }
-                catch (err) {
-                    errorMsg = err instanceof Error ? err.message : String(err);
-                    output = { error: errorMsg };
-                }
+            const data = (await response.json());
+            if (!Array.isArray(data.data)) {
+                throw new Error("Invalid response format: expected data.data array");
             }
-            const endedAt = new Date();
-            toolsUsed.push(t.name);
-            toolExecutionSummaries.push({
-                toolCallId: t.id,
-                toolName: t.name,
-                input,
-                output,
-                ...(errorMsg ? { error: errorMsg } : {}),
-                startTime: startedAt,
-                endTime: endedAt,
-            });
-            conversation.push({
-                role: "tool",
-                tool_call_id: t.id,
-                content: stringifyToolOutput(output),
-            });
+            return data.data
+                .map((m) => m.id)
+                .filter((id) => typeof id === "string" && id.length > 0)
+                .sort();
         }
-        const justExecuted = toolExecutionSummaries.slice(-stepResult.toolCalls.size);
-        emitToolEndFromStepFinish(emitter, justExecuted.map((s) => ({
-            toolName: s.toolName,
-            output: s.output,
-            ...(s.error ? { error: s.error } : {}),
-        })));
-        try {
-            await this.handleToolExecutionStorage(justExecuted.map((s) => ({
-                toolCallId: s.toolCallId,
-                toolName: s.toolName,
-                input: s.input,
-                output: s.output,
-            })), justExecuted.map((s) => ({
-                toolCallId: s.toolCallId,
-                toolName: s.toolName,
-                output: s.output,
-            })), options, new Date());
+        catch (error) {
+            if (isAbortError(error)) {
+                throw new NetworkError("Request timed out after 5 seconds", this.providerName);
+            }
+            throw error;
         }
-        catch (err) {
-            logger.warn("[LiteLLMProvider] Failed to store tool executions", {
-                provider: this.providerName,
-                error: err instanceof Error ? err.message : String(err),
-            });
+        finally {
+            clearTimeout(timeoutId);
         }
     }
     /**
@@ -692,83 +302,4 @@ export class LiteLLMProvider extends BaseProvider {
             timeoutController?.cleanup();
         }
     }
-    /**
-     * Get available models from LiteLLM proxy `/v1/models` endpoint.
-     * Caches results for 10 minutes; falls back to env-driven list or a
-     * minimal safe default if the API fetch fails.
-     */
-    async getAvailableModels() {
-        const now = Date.now();
-        if (LiteLLMProvider.modelsCache.length > 0 &&
-            now - LiteLLMProvider.modelsCacheTime <
-                LiteLLMProvider.MODELS_CACHE_DURATION) {
-            logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
-                cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
-                modelCount: LiteLLMProvider.modelsCache.length,
-            });
-            return LiteLLMProvider.modelsCache;
-        }
-        try {
-            const dynamicModels = await this.fetchModelsFromAPI();
-            if (dynamicModels.length > 0) {
-                LiteLLMProvider.modelsCache = dynamicModels;
-                LiteLLMProvider.modelsCacheTime = now;
-                return dynamicModels;
-            }
-        }
-        catch (error) {
-            logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
-        }
-        return this.getFallbackModels();
-    }
-    async getFirstAvailableModel() {
-        const models = await this.getAvailableModels();
-        return models[0] || FALLBACK_LITELLM_MODEL;
-    }
-    getFallbackModels() {
-        return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
-            .map((m) => m.trim())
-            .filter((m) => m.length > 0) || [
-            "openai/gpt-4o",
-            "anthropic/claude-3-haiku",
-            "meta-llama/llama-3.1-8b-instruct",
-            "google/gemini-2.5-flash",
-        ]);
-    }
-    async fetchModelsFromAPI() {
-        const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
-        const proxyFetch = createProxyFetch();
-        const controller = new AbortController();
-        const timeoutId = setTimeout(() => controller.abort(), 5000);
-        try {
-            const response = await proxyFetch(modelsUrl, {
-                method: "GET",
-                headers: {
-                    Authorization: `Bearer ${this.config.apiKey}`,
-                    "Content-Type": "application/json",
-                },
-                signal: controller.signal,
-            });
-            if (!response.ok) {
-                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-            }
-            const data = (await response.json());
-            if (!Array.isArray(data.data)) {
-                throw new Error("Invalid response format: expected data.data array");
-            }
-            return data.data
-                .map((m) => m.id)
-                .filter((id) => typeof id === "string" && id.length > 0)
-                .sort();
-        }
-        catch (error) {
-            if (isAbortError(error)) {
-                throw new NetworkError("Request timed out after 5 seconds", this.providerName);
-            }
-            throw error;
-        }
-        finally {
-            clearTimeout(timeoutId);
-        }
-    }
 }