npm - @juspay/neurolink - Versions diffs - 9.67.0 → 9.67.2 - Mend

@juspay/neurolink 9.67.0 → 9.67.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +4 -0
package/dist/browser/neurolink.min.js +376 -370
package/dist/lib/providers/googleVertex.js +8 -7
package/dist/lib/providers/litellm.d.ts +31 -24
package/dist/lib/providers/litellm.js +590 -391
package/dist/lib/providers/openaiChatCompletionsClient.d.ts +67 -0
package/dist/lib/providers/openaiChatCompletionsClient.js +526 -0
package/dist/lib/providers/openaiCompatible.d.ts +46 -19
package/dist/lib/providers/openaiCompatible.js +559 -171
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +1 -0
package/dist/lib/types/middleware.d.ts +1 -1
package/dist/lib/types/openaiCompatible.d.ts +250 -0
package/dist/lib/types/openaiCompatible.js +2 -0
package/dist/lib/types/providers.d.ts +2 -0
package/dist/providers/googleVertex.js +8 -7
package/dist/providers/litellm.d.ts +31 -24
package/dist/providers/litellm.js +590 -391
package/dist/providers/openaiChatCompletionsClient.d.ts +67 -0
package/dist/providers/openaiChatCompletionsClient.js +525 -0
package/dist/providers/openaiCompatible.d.ts +46 -19
package/dist/providers/openaiCompatible.js +559 -171
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +1 -0
package/dist/types/middleware.d.ts +1 -1
package/dist/types/openaiCompatible.d.ts +250 -0
package/dist/types/openaiCompatible.js +1 -0
package/dist/types/providers.d.ts +2 -0
package/package.json +2 -1

package/dist/providers/litellm.js CHANGED Viewed

@@ -1,4 +1,3 @@
-import { createOpenAI } from "@ai-sdk/openai";
 import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
 import { BaseProvider } from "../core/baseProvider.js";
 import { DEFAULT_MAX_STEPS } from "../core/constants.js";
@@ -6,72 +5,61 @@ import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
 import { createProxyFetch } from "../proxy/proxyFetch.js";
 import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
 import { isAbortError } from "../utils/errorHandling.js";
-import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
+import { NoOutputGeneratedError } from "../utils/generationErrors.js";
 import { logger } from "../utils/logger.js";
-import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
+import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
 import { calculateCost } from "../utils/pricing.js";
 import { getProviderModel } from "../utils/providerConfig.js";
-import { composeAbortSignals, createTimeoutController, TimeoutError, withTimeout, } from "../utils/timeout.js";
+import { composeAbortSignals, createTimeoutController, mergeAbortSignals, TimeoutError, } from "../utils/timeout.js";
+import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
 import { resolveToolChoice } from "../utils/toolChoice.js";
-import { getModelId } from "./providerTypeUtils.js";
-import { NoOutputGeneratedError } from "../utils/generationErrors.js";
-import { Output, stepCountIs } from "../utils/tool.js";
-import { streamText } from "../utils/generation.js";
+import { transformToolExecutions } from "../utils/transformationUtils.js";
+import { buildAPIError, buildBody, buildToolsForOpenAI, createChunkQueue, createDeferredAnalytics, mapNeuroLinkToolChoice, mergeUsage, messageBuilderToOpenAI, parseSSEStream, stringifyToolOutput, stripTrailingSlash, v3ResponseFormatToOpenAI, v3ToolChoiceToOpenAI, v3ToolsToOpenAI, } from "./openaiChatCompletionsClient.js";
 const streamTracer = trace.getTracer("neurolink.provider.litellm");
-// Configuration helpers
-const getLiteLLMConfig = () => {
-    return {
-        baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
-        apiKey: process.env.LITELLM_API_KEY || "sk-anything",
-    };
-};
+const FALLBACK_LITELLM_MODEL = "openai/gpt-4o-mini";
+const getLiteLLMConfig = () => ({
+    baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
+    apiKey: process.env.LITELLM_API_KEY || "sk-anything",
+});
 /**
- * Returns the default model name for LiteLLM.
- *
- * LiteLLM uses a 'provider/model' format for model names.
- * For example:
- *   - 'openai/gpt-4o-mini'
- *   - 'openai/gpt-3.5-turbo'
- *   - 'anthropic/claude-3-sonnet-20240229'
- *   - 'google/gemini-pro'
- *
- * You can override the default by setting the LITELLM_MODEL environment variable.
+ * LiteLLM uses a 'provider/model' format. Override via LITELLM_MODEL env var.
  */
-const getDefaultLiteLLMModel = () => {
-    return getProviderModel("LITELLM_MODEL", "openai/gpt-4o-mini");
-};
+const getDefaultLiteLLMModel = () => getProviderModel("LITELLM_MODEL", FALLBACK_LITELLM_MODEL);
+const isGemini25Model = (modelName) => modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
+// =============================================================================
+// Direct HTTP client for LiteLLM proxy.
+//
+// LiteLLM exposes the OpenAI chat-completions wire format, so all the
+// wire-level converters and the SSE parser are shared with the
+// openai-compatible provider via ./openaiChatCompletionsClient.ts. This
+// file owns LiteLLM-specific behaviour: OTel span wrap with cost, model
+// allowlist 403 → ModelAccessDeniedError, Gemini 2.5 maxTokens skip,
+// model caching, and native /v1/embeddings.
+// =============================================================================
 /**
- * LiteLLM Provider - BaseProvider Implementation
- * Provides access to 100+ models via LiteLLM proxy server
+ * LiteLLM Provider — direct HTTP, no AI SDK. Talks to a LiteLLM proxy
+ * server (or any deployment that speaks OpenAI chat-completions + the
+ * `/v1/models` and `/v1/embeddings` endpoints).
  */
 export class LiteLLMProvider extends BaseProvider {
-    model;
+    config;
     credentials;
-    // Cache for available models to avoid repeated API calls
+    resolvedModel;
     static modelsCache = [];
     static modelsCacheTime = 0;
     static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
     constructor(modelName, sdk, _region, credentials) {
         super(modelName, "litellm", sdk);
-        // Store per-request credentials for use in embed/embedMany/fetchModelsFromAPI
         this.credentials = credentials;
-        // Initialize LiteLLM using OpenAI SDK with explicit configuration
-        const config = getLiteLLMConfig();
-        // Create OpenAI SDK instance configured for LiteLLM proxy
-        // LiteLLM acts as a proxy server that implements the OpenAI-compatible API.
-        // To communicate with LiteLLM instead of the default OpenAI endpoint, we use createOpenAI
-        // with a custom baseURL and apiKey. This ensures all requests are routed through the LiteLLM
-        // proxy, allowing access to multiple models and custom authentication.
-        const customOpenAI = createOpenAI({
-            baseURL: credentials?.baseURL ?? config.baseURL,
-            apiKey: credentials?.apiKey ?? config.apiKey,
-            fetch: createProxyFetch(),
-        });
-        this.model = customOpenAI.chat(this.modelName || getDefaultLiteLLMModel());
+        const envConfig = getLiteLLMConfig();
+        this.config = {
+            baseURL: credentials?.baseURL ?? envConfig.baseURL,
+            apiKey: credentials?.apiKey ?? envConfig.apiKey,
+        };
         logger.debug("LiteLLM Provider initialized", {
             modelName: this.modelName,
             provider: this.providerName,
-            baseURL: config.baseURL,
+            baseURL: this.config.baseURL,
         });
     }
     getProviderName() {
@@ -81,16 +69,152 @@ export class LiteLLMProvider extends BaseProvider {
         return getDefaultLiteLLMModel();
     }
     /**
-     * Returns the Vercel AI SDK model instance for LiteLLM
+     * Abstract from BaseProvider — used by the parent's generate() path which
+     * still goes through `generateText`. Returns a thin LanguageModelV3-shaped
+     * object that delegates to the same HTTP helpers used by executeStream.
+     */
+    async getAISDKModel() {
+        const modelId = await this.resolveModelName();
+        return this.buildDelegatingModel(modelId);
+    }
+    async resolveModelName() {
+        if (this.resolvedModel) {
+            return this.resolvedModel;
+        }
+        const explicit = this.modelName || getDefaultLiteLLMModel();
+        if (explicit && explicit.trim() !== "") {
+            this.resolvedModel = explicit;
+            if (this.modelName !== explicit) {
+                this.refreshHandlersForModel(explicit);
+            }
+            return explicit;
+        }
+        this.resolvedModel = FALLBACK_LITELLM_MODEL;
+        this.refreshHandlersForModel(FALLBACK_LITELLM_MODEL);
+        return FALLBACK_LITELLM_MODEL;
+    }
+    /**
+     * Returns a minimal V3-shaped model. Only used by BaseProvider's
+     * `generate()` non-streaming path which still relies on the parent's
+     * `generateText`. The streaming path bypasses this entirely.
      */
-    getAISDKModel() {
-        return this.model;
+    buildDelegatingModel(modelId) {
+        const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
+        const fetchImpl = createProxyFetch();
+        const apiKey = this.config.apiKey;
+        const providerName = this.providerName;
+        const getTimeoutForOptions = (opts) => this.getTimeout((opts ?? {}));
+        const gemini25Skip = isGemini25Model(modelId);
+        return {
+            specificationVersion: "v3",
+            provider: "litellm",
+            modelId,
+            supportedUrls: {},
+            doGenerate: async (options) => {
+                const messages = messageBuilderToOpenAI(options.prompt);
+                const body = buildBody({
+                    modelId,
+                    messages,
+                    options: {
+                        maxTokens: gemini25Skip ? undefined : options.maxOutputTokens,
+                        temperature: options.temperature,
+                        topP: options.topP,
+                        presencePenalty: options.presencePenalty,
+                        frequencyPenalty: options.frequencyPenalty,
+                        seed: options.seed,
+                        stopSequences: options.stopSequences,
+                    },
+                    tools: v3ToolsToOpenAI(options.tools),
+                    ...(options.toolChoice
+                        ? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
+                        : {}),
+                    streaming: false,
+                    ...(options.responseFormat
+                        ? {
+                            responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
+                        }
+                        : {}),
+                });
+                const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
+                const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
+                let res;
+                try {
+                    res = await fetchImpl(url, {
+                        method: "POST",
+                        headers: {
+                            "Content-Type": "application/json",
+                            Authorization: `Bearer ${apiKey}`,
+                        },
+                        body: JSON.stringify(body),
+                        ...(composedSignal ? { signal: composedSignal } : {}),
+                    });
+                }
+                finally {
+                    timeoutController?.cleanup();
+                }
+                if (!res.ok) {
+                    throw await buildAPIError(url, body, res);
+                }
+                const json = (await res.json());
+                const choice = json.choices?.[0];
+                const text = (typeof choice?.message?.content === "string"
+                    ? choice.message.content
+                    : "") ?? "";
+                const content = [];
+                if (text.length > 0) {
+                    content.push({ type: "text", text });
+                }
+                for (const tc of choice?.message?.tool_calls ?? []) {
+                    content.push({
+                        type: "tool-call",
+                        toolCallId: tc.id,
+                        toolName: tc.function.name,
+                        input: tc.function.arguments ?? "",
+                    });
+                }
+                const rawFinish = choice?.finish_reason;
+                const unified = rawFinish === "length"
+                    ? "length"
+                    : rawFinish === "tool_calls" || rawFinish === "function_call"
+                        ? "tool-calls"
+                        : rawFinish === "content_filter"
+                            ? "content-filter"
+                            : "stop";
+                return {
+                    content,
+                    finishReason: { unified, raw: rawFinish ?? "stop" },
+                    usage: {
+                        inputTokens: {
+                            total: json.usage?.prompt_tokens,
+                            noCache: json.usage?.prompt_tokens,
+                            cacheRead: undefined,
+                            cacheWrite: undefined,
+                        },
+                        outputTokens: {
+                            total: json.usage?.completion_tokens,
+                            text: json.usage?.completion_tokens,
+                            reasoning: undefined,
+                        },
+                    },
+                    warnings: [],
+                    request: { body },
+                    response: {
+                        ...(json.id ? { id: json.id } : {}),
+                        ...(json.model ? { modelId: json.model } : {}),
+                        headers: {},
+                        body: json,
+                    },
+                };
+            },
+            doStream: () => {
+                throw new Error("litellm: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
+            },
+        };
     }
     formatProviderError(error) {
         if (error instanceof TimeoutError) {
             return new NetworkError(`Request timed out: ${error.message}`, this.providerName);
         }
-        // Check for timeout by error name and message as fallback
         const errorRecord = error;
         if (errorRecord?.name === "TimeoutError" ||
             (typeof errorRecord?.message === "string" &&
@@ -103,10 +227,10 @@ export class LiteLLMProvider extends BaseProvider {
                 return new NetworkError("LiteLLM proxy server not available. Please start the LiteLLM proxy server at " +
                     `${process.env.LITELLM_BASE_URL || "http://localhost:4000"}`, this.providerName);
             }
-            // Curator P1-1: detect "team not allowed to access model" responses
-            // and surface as ModelAccessDeniedError with the allowed_models array
-            // parsed from the body. Must run before the generic "API key" check
-            // because LiteLLM phrases this as a 403 distinct from auth.
+            // Curator P1-1: detect "team not allowed to access model" responses and
+            // surface as ModelAccessDeniedError with the allowed_models array parsed
+            // from the body. Must run before the generic "API key" check because
+            // LiteLLM phrases this as a 403 distinct from auth.
             if (isModelAccessDeniedMessage(errorRecord.message)) {
                 return new ModelAccessDeniedError(errorRecord.message, {
                     provider: this.providerName,
@@ -129,447 +253,522 @@ export class LiteLLMProvider extends BaseProvider {
         }
         return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
     }
-    /**
-     * LiteLLM supports tools for compatible models
-     */
     supportsTools() {
         return true;
     }
     /**
-     * Provider-specific streaming implementation
-     * Note: This is only used when tools are disabled
+     * Streaming path — drives the LiteLLM proxy directly. No streamText, no
+     * AI SDK orchestrator. Tool calls, multi-step loops, telemetry, abort
+     * handling all inline. OTel span captures gen_ai.system + cost.
      */
-    async executeStream(options, analysisSchema) {
+    async executeStream(options, _analysisSchema) {
         this.validateStreamOptions(options);
         const startTime = Date.now();
-        let chunkCount = 0; // Track chunk count for debugging
-        // Reviewer follow-up: capture upstream provider errors via onError so
-        // the post-stream NoOutput detect can propagate the *real* cause
-        // (content_filter, provider crash, etc.) into the sentinel's
-        // providerError / modelResponseRaw instead of "No output generated".
-        let capturedProviderError;
         const timeout = this.getTimeout(options);
         const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
+        const consumerAbortController = new AbortController();
+        const abortSignal = mergeAbortSignals([
+            options.abortSignal,
+            timeoutController?.controller.signal,
+            consumerAbortController.signal,
+        ]).signal;
+        let modelId;
+        let toolsRecord;
+        let openAITools;
+        let openAIToolChoice;
+        let conversation;
         try {
-            // Build message array from options with multimodal support
-            // Using protected helper from BaseProvider to eliminate code duplication
-            const messages = await this.buildMessagesForStream(options);
-            const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
-            // Get tools - options.tools is pre-merged by BaseProvider.stream()
+            modelId = await this.resolveModelName();
             const shouldUseTools = !options.disableTools && this.supportsTools();
-            const tools = shouldUseTools
+            toolsRecord = shouldUseTools
                 ? options.tools || (await this.getAllTools())
                 : {};
-            logger.debug(`LiteLLM: Tools for streaming`, {
-                shouldUseTools,
-                toolCount: Object.keys(tools).length,
-                toolNames: Object.keys(tools),
+            openAITools = shouldUseTools
+                ? buildToolsForOpenAI(toolsRecord)
+                : undefined;
+            openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
+            const initialMessages = await this.buildMessagesForStream(options);
+            conversation = messageBuilderToOpenAI(initialMessages);
+        }
+        catch (setupErr) {
+            timeoutController?.cleanup();
+            throw setupErr;
+        }
+        const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
+        const fetchImpl = createProxyFetch();
+        const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
+        const emitter = this.neurolink?.getEventEmitter();
+        const toolsUsed = [];
+        const toolExecutionSummaries = [];
+        const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
+        const { pushChunk, nextChunk } = createChunkQueue();
+        // Wrap the stream in an OTel span to capture provider-level latency,
+        // token usage, finish reason, and cost. Matches the pre-migration
+        // behaviour where streamText was wrapped in `neurolink.provider.streamText`.
+        const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
+            kind: SpanKind.CLIENT,
+            attributes: {
+                "gen_ai.system": "litellm",
+                "gen_ai.request.model": modelId,
+            },
+        });
+        // Model-specific maxTokens handling — Gemini 2.5 models have known issues
+        // with maxTokens being forwarded. Mutate a shallow copy so the original
+        // StreamOptions reference downstream (analytics, telemetry) is unchanged.
+        const requestOptions = isGemini25Model(modelId)
+            ? { ...options, maxTokens: undefined }
+            : options;
+        if (requestOptions !== options &&
+            options.maxTokens &&
+            logger.shouldLog("debug")) {
+            logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, { modelId, requestedMaxTokens: options.maxTokens });
+        }
+        const loopPromise = this.runStreamLoop({
+            maxSteps,
+            modelId,
+            url,
+            apiKey: this.config.apiKey,
+            fetchImpl,
+            abortSignal,
+            options: requestOptions,
+            conversation,
+            openAITools,
+            openAIToolChoice,
+            toolsRecord,
+            emitter,
+            toolsUsed,
+            toolExecutionSummaries,
+            pushChunk,
+            resolveUsage,
+            resolveFinish,
+        });
+        // Wire the OTel span lifecycle to the deferred analytics promises.
+        let capturedProviderError;
+        const captureProviderError = (error) => {
+            capturedProviderError = error;
+        };
+        usagePromise
+            .then((usage) => {
+            streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
+            streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
+            const cost = calculateCost(this.providerName, this.modelName, {
+                input: usage.promptTokens,
+                output: usage.completionTokens,
+                total: usage.totalTokens,
             });
-            // Model-specific maxTokens handling - Gemini 2.5 models have issues with maxTokens
-            const modelName = this.modelName || getDefaultLiteLLMModel();
-            const isGemini25Model = modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
-            const maxTokens = isGemini25Model ? undefined : options.maxTokens;
-            if (isGemini25Model && options.maxTokens) {
-                logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, {
-                    modelName,
-                    requestedMaxTokens: options.maxTokens,
+            if (cost && cost > 0) {
+                streamSpan.setAttribute("neurolink.cost", cost);
+            }
+        })
+            .catch(() => {
+            // usage may never resolve if the stream is aborted before completion
+        });
+        finishPromise
+            .then((reason) => {
+            streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
+            if (reason === "error") {
+                streamSpan.setStatus({
+                    code: SpanStatusCode.ERROR,
+                    message: capturedProviderError instanceof Error
+                        ? capturedProviderError.message
+                        : String(capturedProviderError ?? "stream error"),
                 });
             }
-            // Build complete stream options with proper typing - matching Vertex pattern
-            let streamOptions = {
-                model: model,
-                messages: messages,
-                temperature: options.temperature,
-                ...(maxTokens && { maxTokens }), // Conditionally include maxTokens
-                ...(shouldUseTools &&
-                    Object.keys(tools).length > 0 && {
-                    tools,
-                    toolChoice: resolveToolChoice(options, tools, shouldUseTools),
-                    stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
-                }),
-                abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
-                experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
-                experimental_repairToolCall: this.getToolCallRepairFn(options),
-                onError: (event) => {
-                    const error = event.error;
-                    const errorMessage = error instanceof Error ? error.message : String(error);
-                    // Reviewer follow-up: propagate the captured error to the
-                    // post-stream NoOutput sentinel so telemetry sees the real
-                    // provider cause instead of "No output generated".
-                    capturedProviderError = error;
-                    logger.error(`LiteLLM: Stream error`, {
-                        provider: this.providerName,
-                        modelName: this.modelName,
-                        error: errorMessage,
-                        chunkCount,
-                    });
-                },
-                onFinish: (event) => {
-                    logger.debug(`LiteLLM: Stream finished`, {
-                        finishReason: event.finishReason,
-                        totalChunks: chunkCount,
-                    });
-                },
-                onChunk: () => {
-                    chunkCount++;
-                },
-                onStepFinish: ({ toolCalls, toolResults }) => {
-                    emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
-                    logger.info("Tool execution completed", { toolResults, toolCalls });
-                    for (const toolCall of toolCalls) {
-                        collectedToolCalls.push({
-                            toolCallId: toolCall.toolCallId,
-                            toolName: toolCall.toolName,
-                            args: toolCall.args ??
-                                toolCall.input ??
-                                toolCall
-                                    .parameters ??
-                                {},
-                        });
+            streamSpan.end();
+        })
+            .catch(() => {
+            streamSpan.end();
+        });
+        const transformedStream = async function* () {
+            let contentYielded = 0;
+            try {
+                for (;;) {
+                    const chunk = await nextChunk();
+                    if ("done" in chunk) {
+                        break;
                     }
-                    for (const toolResult of toolResults) {
-                        const rawToolResult = toolResult;
-                        collectedToolResults.push({
-                            toolName: toolResult.toolName,
-                            status: rawToolResult.error ? "failure" : "success",
-                            output: (rawToolResult.output ??
-                                rawToolResult.result) ??
-                                undefined,
-                            error: rawToolResult.error,
-                            id: rawToolResult.toolCallId ?? toolResult.toolName,
-                        });
+                    if ("content" in chunk &&
+                        typeof chunk.content === "string" &&
+                        chunk.content.length > 0) {
+                        contentYielded++;
                     }
-                    this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
-                        logger.warn("[LiteLLMProvider] Failed to store tool executions", {
-                            provider: this.providerName,
-                            error: error instanceof Error ? error.message : String(error),
-                        });
-                    });
-                },
-            };
-            // Add analysisSchema support if provided
-            if (analysisSchema) {
-                try {
-                    streamOptions = {
-                        ...streamOptions,
-                        experimental_output: Output.object({
-                            schema: analysisSchema,
-                        }),
-                    };
+                    yield chunk;
                 }
-                catch (error) {
-                    logger.warn("Schema application failed, continuing without schema", {
-                        error: String(error),
+                await loopPromise;
+                if (contentYielded === 0 && toolsUsed.length === 0) {
+                    logger.warn("LiteLLM: Stream produced no output — emitting enriched sentinel");
+                    const fauxNoOutput = new NoOutputGeneratedError({
+                        message: "Stream produced no output",
                     });
+                    const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
+                    stampNoOutputSpan(sentinel);
+                    yield sentinel;
                 }
             }
-            // Wrap streamText in an OTel span to capture provider-level latency, token usage, and cost
-            const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
-                kind: SpanKind.CLIENT,
-                attributes: {
-                    "gen_ai.system": "litellm",
-                    "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
-                },
-            });
-            let result;
-            const collectedToolCalls = [];
-            const collectedToolResults = [];
-            try {
-                result = streamText(streamOptions);
-            }
             catch (streamError) {
-                streamSpan.setStatus({
-                    code: SpanStatusCode.ERROR,
-                    message: streamError instanceof Error
-                        ? streamError.message
-                        : String(streamError),
-                });
-                streamSpan.end();
+                if (NoOutputGeneratedError.isInstance(streamError)) {
+                    const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
+                    stampNoOutputSpan(sentinel);
+                    yield sentinel;
+                    return;
+                }
+                const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
+                stampNoOutputSpan(sentinel);
+                yield sentinel;
                 throw streamError;
             }
-            // Collect token usage, cost, and finish reason asynchronously when the stream completes,
-            // then end the span. This avoids blocking the stream consumer.
-            Promise.resolve(result.usage)
-                .then((usage) => {
-                streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
-                streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
-                const cost = calculateCost(this.providerName, this.modelName, {
-                    input: usage.inputTokens || 0,
-                    output: usage.outputTokens || 0,
-                    total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
-                });
-                if (cost && cost > 0) {
-                    streamSpan.setAttribute("neurolink.cost", cost);
+            finally {
+                if (!consumerAbortController.signal.aborted) {
+                    consumerAbortController.abort();
                 }
-            })
-                .catch(() => {
-                // Usage may not be available if the stream is aborted
-            });
-            Promise.resolve(result.finishReason)
-                .then((reason) => {
-                streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
-            })
-                .catch(() => {
-                // Finish reason may not be available if the stream is aborted
-            });
-            Promise.resolve(result.text)
-                .then(() => {
-                streamSpan.end();
-            })
-                .catch((err) => {
-                streamSpan.setStatus({
-                    code: SpanStatusCode.ERROR,
-                    message: err instanceof Error ? err.message : String(err),
-                });
-                streamSpan.end();
-            });
-            timeoutController?.cleanup();
-            const transformedStream = this.createLiteLLMTransformedStream(result, () => capturedProviderError);
-            // Create analytics promise that resolves after stream completion
-            const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, result, Date.now() - startTime, {
+            }
+        };
+        const result = {
+            stream: transformedStream(),
+            provider: this.providerName,
+            model: this.modelName,
+            analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, {
+                textStream: (async function* () { })(),
+                usage: usagePromise,
+                finishReason: finishPromise,
+            }, Date.now() - startTime, {
                 requestId: options.requestId ??
                     `litellm-stream-${Date.now()}`,
                 streamingMode: true,
+            }),
+            toolsUsed,
+            metadata: {
+                startTime,
+                streamId: `litellm-${Date.now()}`,
+            },
+        };
+        Object.defineProperty(result, "toolExecutions", {
+            enumerable: true,
+            configurable: true,
+            get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
+                toolName: s.toolName,
+                input: s.input,
+                output: s.output,
+                duration: s.endTime.getTime() - s.startTime.getTime(),
+            }))),
+        });
+        loopPromise
+            .finally(() => timeoutController?.cleanup())
+            .catch((error) => {
+            captureProviderError(error);
+        });
+        return result;
+    }
+    async runStreamLoop(args) {
+        const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
+        try {
+            let stepFinish = null;
+            let stepUsage;
+            for (let step = 0; step < maxSteps; step++) {
+                const stepResult = await this.streamOneStep({
+                    modelId,
+                    url,
+                    apiKey,
+                    fetchImpl,
+                    abortSignal,
+                    options,
+                    conversation,
+                    openAITools,
+                    openAIToolChoice,
+                    pushChunk,
+                });
+                stepFinish = stepResult.finishReason;
+                if (stepResult.usage) {
+                    stepUsage = mergeUsage(stepUsage, stepResult.usage);
+                }
+                if (stepResult.toolCalls.size === 0) {
+                    break;
+                }
+                await this.executeToolBatch({
+                    stepResult,
+                    conversation,
+                    toolsRecord,
+                    emitter,
+                    toolsUsed,
+                    toolExecutionSummaries,
+                    options,
+                });
+            }
+            resolveUsage({
+                promptTokens: stepUsage?.prompt_tokens ?? 0,
+                completionTokens: stepUsage?.completion_tokens ?? 0,
+                totalTokens: stepUsage?.total_tokens ?? 0,
             });
+            resolveFinish(stepFinish ?? "stop");
+            pushChunk({ done: true });
             return {
-                stream: transformedStream,
-                provider: this.providerName,
-                model: this.modelName,
-                ...(shouldUseTools && {
-                    toolCalls: collectedToolCalls,
-                    toolResults: collectedToolResults,
-                }),
-                analytics: analyticsPromise,
-                metadata: {
-                    startTime,
-                    streamId: `litellm-${Date.now()}`,
-                },
+                finishReason: stepFinish ?? "stop",
+                usage: stepUsage,
             };
         }
-        catch (error) {
-            timeoutController?.cleanup();
-            throw this.handleProviderError(error);
+        catch (err) {
+            logger.error("LiteLLM: Stream error", {
+                error: err instanceof Error ? err.message : String(err),
+            });
+            resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
+            resolveFinish("error");
+            pushChunk({ done: true });
+            throw err;
         }
     }
-    async *createLiteLLMTransformedStream(result, getCapturedProviderError) {
-        // Reviewer follow-up: gate the post-stream NoOutput detect on
-        // *content yielded*, not raw chunk count. AI SDK fullStream emits
-        // control events ({ type: "start" }, "step-start", etc.) before any
-        // text-delta — those incremented chunkCount and made the post-stream
-        // detect dead even when zero text was produced.
-        let contentYielded = 0;
-        try {
-            const streamToUse = result.fullStream || result.textStream;
-            for await (const chunk of streamToUse) {
-                if (chunk && typeof chunk === "object") {
-                    if ("type" in chunk && chunk.type === "error") {
-                        const errorChunk = chunk;
-                        logger.error(`LiteLLM: Error chunk received:`, {
-                            errorType: errorChunk.type,
-                            errorDetails: errorChunk.error,
-                        });
-                        throw this.formatProviderError(new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`));
-                    }
-                    if ("textDelta" in chunk) {
-                        const textDelta = chunk.textDelta;
-                        if (textDelta) {
-                            contentYielded++;
-                            yield { content: textDelta };
-                        }
-                    }
-                    else if ("type" in chunk &&
-                        chunk.type === "tool-call" &&
-                        "toolCallId" in chunk) {
-                        logger.debug("LiteLLM: Tool call", {
-                            toolCallId: String(chunk.toolCallId),
-                            toolName: "toolName" in chunk ? String(chunk.toolName) : "unknown",
-                        });
-                    }
+    async streamOneStep(args) {
+        const body = buildBody({
+            modelId: args.modelId,
+            messages: args.conversation,
+            options: args.options,
+            tools: args.openAITools,
+            ...(args.openAIToolChoice !== undefined
+                ? { toolChoice: args.openAIToolChoice }
+                : {}),
+            streaming: true,
+        });
+        const res = await args.fetchImpl(args.url, {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json",
+                Authorization: `Bearer ${args.apiKey}`,
+            },
+            body: JSON.stringify(body),
+            ...(args.abortSignal ? { signal: args.abortSignal } : {}),
+        });
+        if (!res.ok) {
+            throw await buildAPIError(args.url, body, res);
+        }
+        if (!res.body) {
+            throw new Error("litellm: stream response had no body");
+        }
+        return parseSSEStream(res.body, (delta) => {
+            args.pushChunk({ content: delta });
+        });
+    }
+    async executeToolBatch(args) {
+        const { stepResult, conversation, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, options, } = args;
+        const toolCallsForMessage = [];
+        for (const [, t] of stepResult.toolCalls) {
+            toolCallsForMessage.push({
+                id: t.id,
+                type: "function",
+                function: { name: t.name, arguments: t.argsBuffered },
+            });
+        }
+        conversation.push({
+            role: "assistant",
+            content: stepResult.text.length > 0 ? stepResult.text : null,
+            tool_calls: toolCallsForMessage,
+        });
+        for (const [, t] of stepResult.toolCalls) {
+            const startedAt = new Date();
+            let input;
+            try {
+                input = JSON.parse(t.argsBuffered || "{}");
+            }
+            catch {
+                input = t.argsBuffered;
+            }
+            let output;
+            let errorMsg;
+            const toolDef = toolsRecord[t.name];
+            emitter?.emit("tool:start", {
+                toolName: t.name,
+                toolCallId: t.id,
+                input,
+            });
+            if (!toolDef || typeof toolDef.execute !== "function") {
+                errorMsg = `Tool '${t.name}' is not registered.`;
+                output = { error: errorMsg };
+            }
+            else {
+                try {
+                    output = await toolDef.execute(input, {});
                 }
-                else if (typeof chunk === "string") {
-                    contentYielded++;
-                    yield { content: chunk };
+                catch (err) {
+                    errorMsg = err instanceof Error ? err.message : String(err);
+                    output = { error: errorMsg };
                 }
             }
+            const endedAt = new Date();
+            toolsUsed.push(t.name);
+            toolExecutionSummaries.push({
+                toolCallId: t.id,
+                toolName: t.name,
+                input,
+                output,
+                ...(errorMsg ? { error: errorMsg } : {}),
+                startTime: startedAt,
+                endTime: endedAt,
+            });
+            conversation.push({
+                role: "tool",
+                tool_call_id: t.id,
+                content: stringifyToolOutput(output),
+            });
         }
-        catch (streamError) {
-            if (NoOutputGeneratedError.isInstance(streamError)) {
-                logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
-                // Yield the enriched sentinel so downstream telemetry has
-                // finishReason / usage / providerError. Match the other
-                // providers' pattern: yield + return (no throw). NeuroLink's
-                // iteration fallback at neurolink.ts only fires for
-                // looksLikeModelAccessDenied errors, so a NoOutput throw here
-                // would NOT trigger any fallback — and it would mask the
-                // already-yielded sentinel from consumers expecting a clean
-                // stream. The sentinel itself signals the no-output condition.
-                const sentinel = await buildNoOutputSentinel(streamError, result, getCapturedProviderError?.());
-                stampNoOutputSpan(sentinel);
-                yield sentinel;
-                return;
-            }
-            throw streamError;
+        const justExecuted = toolExecutionSummaries.slice(-stepResult.toolCalls.size);
+        emitToolEndFromStepFinish(emitter, justExecuted.map((s) => ({
+            toolName: s.toolName,
+            output: s.output,
+            ...(s.error ? { error: s.error } : {}),
+        })));
+        try {
+            await this.handleToolExecutionStorage(justExecuted.map((s) => ({
+                toolCallId: s.toolCallId,
+                toolName: s.toolName,
+                input: s.input,
+                output: s.output,
+            })), justExecuted.map((s) => ({
+                toolCallId: s.toolCallId,
+                toolName: s.toolName,
+                output: s.output,
+            })), options, new Date());
         }
-        // Curator P3-6 (round-2 fix): production trigger sets the error on
-        // result.finishReason rejection (NOT thrown from textStream).
-        // Surface that path here, matching the catch above (yield + return).
-        if (contentYielded === 0) {
-            const detected = await detectPostStreamNoOutput(result, getCapturedProviderError?.());
-            if (detected) {
-                logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
-                stampNoOutputSpan(detected.sentinel);
-                yield detected.sentinel;
-            }
+        catch (err) {
+            logger.warn("[LiteLLMProvider] Failed to store tool executions", {
+                provider: this.providerName,
+                error: err instanceof Error ? err.message : String(err),
+            });
         }
     }
     /**
-     * Generate an embedding for a single text input
-     * Uses the LiteLLM proxy with OpenAI-compatible embedding API
+     * Generate an embedding for a single text input via native /v1/embeddings.
      */
     async embed(text, modelName) {
-        const { embed: aiEmbed } = await import("../utils/generation.js");
-        const { createOpenAI } = await import("@ai-sdk/openai");
-        const config = getLiteLLMConfig();
         const embeddingModelName = modelName ||
             process.env.LITELLM_EMBEDDING_MODEL ||
             "gemini-embedding-001";
-        const customOpenAI = createOpenAI({
-            baseURL: this.credentials?.baseURL ?? config.baseURL,
-            apiKey: this.credentials?.apiKey ?? config.apiKey,
-            fetch: createProxyFetch(),
-        });
-        const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
-        // Wrap in withTimeout so stalled upstream embedding requests abort instead
-        // of hanging forever. 30s matches the default for embedding endpoints
-        // across the OpenAI-compatible cluster.
-        const result = await withTimeout(aiEmbed({ model: embeddingModel, value: text }), 30_000, "litellm", "generate");
-        return result.embedding;
+        const [embedding] = await this.callEmbeddings(embeddingModelName, [text], "embed");
+        return embedding;
     }
     /**
-     * Generate embeddings for multiple text inputs
-     * Uses the LiteLLM proxy with OpenAI-compatible embedding API
+     * Generate embeddings for multiple text inputs via native /v1/embeddings.
      */
     async embedMany(texts, modelName) {
-        const { embedMany: aiEmbedMany } = await import("../utils/generation.js");
-        const { createOpenAI } = await import("@ai-sdk/openai");
-        const config = getLiteLLMConfig();
         const embeddingModelName = modelName ||
             process.env.LITELLM_EMBEDDING_MODEL ||
             "gemini-embedding-001";
-        const customOpenAI = createOpenAI({
-            baseURL: this.credentials?.baseURL ?? config.baseURL,
-            apiKey: this.credentials?.apiKey ?? config.apiKey,
-            fetch: createProxyFetch(),
-        });
-        const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
-        // Wrap in withTimeout so a single slow batch doesn't hang indefinitely.
-        const result = await withTimeout(aiEmbedMany({ model: embeddingModel, values: texts }), 30_000, "litellm", "generate");
-        return result.embeddings;
+        return this.callEmbeddings(embeddingModelName, texts, "embedMany");
+    }
+    async callEmbeddings(modelName, input, operation) {
+        const url = `${stripTrailingSlash(this.config.baseURL)}/embeddings`;
+        const fetchImpl = createProxyFetch();
+        const timeoutController = createTimeoutController(30_000, this.providerName, "generate");
+        try {
+            const res = await fetchImpl(url, {
+                method: "POST",
+                headers: {
+                    "Content-Type": "application/json",
+                    Authorization: `Bearer ${this.config.apiKey}`,
+                },
+                body: JSON.stringify({
+                    model: modelName,
+                    input: input.length === 1 ? input[0] : input,
+                }),
+                ...(timeoutController?.controller.signal
+                    ? { signal: timeoutController.controller.signal }
+                    : {}),
+            });
+            if (!res.ok) {
+                const bodyText = await res.text().catch(() => "");
+                const parsed = bodyText
+                    ? JSON.parse(bodyText)
+                    : undefined;
+                throw this.formatProviderError(new Error(parsed?.error?.message ||
+                    `LiteLLM ${operation} failed with status ${res.status}`));
+            }
+            const json = (await res.json());
+            const embeddings = (json.data ?? [])
+                .map((row) => row.embedding)
+                .filter((e) => Array.isArray(e));
+            if (embeddings.length === 0) {
+                throw new ProviderError(`LiteLLM ${operation} returned no embeddings`, this.providerName);
+            }
+            return embeddings;
+        }
+        finally {
+            timeoutController?.cleanup();
+        }
     }
     /**
-     * Get available models from LiteLLM proxy server
-     * Dynamically fetches from /v1/models endpoint with caching and fallback
+     * Get available models from LiteLLM proxy `/v1/models` endpoint.
+     * Caches results for 10 minutes; falls back to env-driven list or a
+     * minimal safe default if the API fetch fails.
      */
     async getAvailableModels() {
-        const functionTag = "LiteLLMProvider.getAvailableModels";
         const now = Date.now();
-        // Check if cached models are still valid
         if (LiteLLMProvider.modelsCache.length > 0 &&
             now - LiteLLMProvider.modelsCacheTime <
                 LiteLLMProvider.MODELS_CACHE_DURATION) {
-            logger.debug(`[${functionTag}] Using cached models`, {
+            logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
                 cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
                 modelCount: LiteLLMProvider.modelsCache.length,
             });
             return LiteLLMProvider.modelsCache;
         }
-        // Try to fetch models dynamically
         try {
             const dynamicModels = await this.fetchModelsFromAPI();
             if (dynamicModels.length > 0) {
-                // Cache successful result
                 LiteLLMProvider.modelsCache = dynamicModels;
                 LiteLLMProvider.modelsCacheTime = now;
-                logger.debug(`[${functionTag}] Successfully fetched models from API`, {
-                    modelCount: dynamicModels.length,
-                });
                 return dynamicModels;
             }
         }
         catch (error) {
-            logger.warn(`[${functionTag}] Failed to fetch models from API, using fallback`, {
-                error: error instanceof Error ? error.message : String(error),
-            });
+            logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
         }
-        // Fallback to hardcoded list if API fetch fails
-        const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",")
+        return this.getFallbackModels();
+    }
+    async getFirstAvailableModel() {
+        const models = await this.getAvailableModels();
+        return models[0] || FALLBACK_LITELLM_MODEL;
+    }
+    getFallbackModels() {
+        return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
             .map((m) => m.trim())
             .filter((m) => m.length > 0) || [
-            "openai/gpt-4o", // minimal safe baseline
+            "openai/gpt-4o",
             "anthropic/claude-3-haiku",
             "meta-llama/llama-3.1-8b-instruct",
             "google/gemini-2.5-flash",
-        ];
-        logger.debug(`[${functionTag}] Using fallback model list`, {
-            modelCount: fallbackModels.length,
-        });
-        return fallbackModels;
+        ]);
     }
-    /**
-     * Fetch available models from LiteLLM proxy /v1/models endpoint
-     * @private
-     */
     async fetchModelsFromAPI() {
-        const functionTag = "LiteLLMProvider.fetchModelsFromAPI";
-        const config = getLiteLLMConfig();
-        const resolvedBaseURL = this.credentials?.baseURL ?? config.baseURL;
-        const resolvedApiKey = this.credentials?.apiKey ?? config.apiKey;
-        const modelsUrl = `${resolvedBaseURL}/v1/models`;
+        const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
+        const proxyFetch = createProxyFetch();
         const controller = new AbortController();
-        const timeoutId = setTimeout(() => controller.abort(), 5000); // 5 second timeout
+        const timeoutId = setTimeout(() => controller.abort(), 5000);
         try {
-            logger.debug(`[${functionTag}] Fetching models from ${modelsUrl}`);
-            const proxyFetch = createProxyFetch();
             const response = await proxyFetch(modelsUrl, {
                 method: "GET",
                 headers: {
-                    Authorization: `Bearer ${resolvedApiKey}`,
+                    Authorization: `Bearer ${this.config.apiKey}`,
                     "Content-Type": "application/json",
                 },
                 signal: controller.signal,
             });
-            clearTimeout(timeoutId);
             if (!response.ok) {
                 throw new Error(`HTTP ${response.status}: ${response.statusText}`);
             }
-            const data = await response.json();
-            // Parse OpenAI-compatible models response
-            if (data && Array.isArray(data.data)) {
-                const models = data.data
-                    .map((model) => typeof model === "object" &&
-                    model !== null &&
-                    "id" in model &&
-                    typeof model.id === "string"
-                    ? model.id
-                    : undefined)
-                    .filter((id) => typeof id === "string" && id.length > 0)
-                    .sort();
-                logger.debug(`[${functionTag}] Successfully parsed models`, {
-                    totalModels: models.length,
-                    sampleModels: models.slice(0, 5),
-                });
-                return models;
-            }
-            else {
+            const data = (await response.json());
+            if (!Array.isArray(data.data)) {
                 throw new Error("Invalid response format: expected data.data array");
             }
+            return data.data
+                .map((m) => m.id)
+                .filter((id) => typeof id === "string" && id.length > 0)
+                .sort();
         }
         catch (error) {
-            clearTimeout(timeoutId);
             if (isAbortError(error)) {
                 throw new NetworkError("Request timed out after 5 seconds", this.providerName);
             }
             throw error;
         }
+        finally {
+            clearTimeout(timeoutId);
+        }
     }
 }