npm - @juspay/neurolink - Versions diffs - 9.67.0 → 9.67.2 - Mend

@juspay/neurolink 9.67.0 → 9.67.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +4 -0
package/dist/browser/neurolink.min.js +376 -370
package/dist/lib/providers/googleVertex.js +8 -7
package/dist/lib/providers/litellm.d.ts +31 -24
package/dist/lib/providers/litellm.js +590 -391
package/dist/lib/providers/openaiChatCompletionsClient.d.ts +67 -0
package/dist/lib/providers/openaiChatCompletionsClient.js +526 -0
package/dist/lib/providers/openaiCompatible.d.ts +46 -19
package/dist/lib/providers/openaiCompatible.js +559 -171
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +1 -0
package/dist/lib/types/middleware.d.ts +1 -1
package/dist/lib/types/openaiCompatible.d.ts +250 -0
package/dist/lib/types/openaiCompatible.js +2 -0
package/dist/lib/types/providers.d.ts +2 -0
package/dist/providers/googleVertex.js +8 -7
package/dist/providers/litellm.d.ts +31 -24
package/dist/providers/litellm.js +590 -391
package/dist/providers/openaiChatCompletionsClient.d.ts +67 -0
package/dist/providers/openaiChatCompletionsClient.js +525 -0
package/dist/providers/openaiCompatible.d.ts +46 -19
package/dist/providers/openaiCompatible.js +559 -171
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +1 -0
package/dist/types/middleware.d.ts +1 -1
package/dist/types/openaiCompatible.d.ts +250 -0
package/dist/types/openaiCompatible.js +1 -0
package/dist/types/providers.d.ts +2 -0
package/package.json +2 -1

package/dist/providers/openaiCompatible.js CHANGED Viewed

@@ -1,21 +1,17 @@
-import { createOpenAI } from "@ai-sdk/openai";
 import { BaseProvider } from "../core/baseProvider.js";
 import { DEFAULT_MAX_STEPS } from "../core/constants.js";
 import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
 import { createProxyFetch } from "../proxy/proxyFetch.js";
 import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
-import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
 import { logger } from "../utils/logger.js";
-import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
-import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
-import { resolveToolChoice } from "../utils/toolChoice.js";
-import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
 import { NoOutputGeneratedError } from "../utils/generationErrors.js";
-import { stepCountIs } from "../utils/tool.js";
-import { streamText } from "../utils/generation.js";
-// Constants
+import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
+import { composeAbortSignals, createTimeoutController, mergeAbortSignals, TimeoutError, } from "../utils/timeout.js";
+import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
+import { resolveToolChoice } from "../utils/toolChoice.js";
+import { transformToolExecutions } from "../utils/transformationUtils.js";
+import { buildAPIError, buildBody, buildToolsForOpenAI, createChunkQueue, createDeferredAnalytics, mapNeuroLinkToolChoice, mergeUsage, messageBuilderToOpenAI, parseSSEStream, stringifyToolOutput, stripTrailingSlash, v3ResponseFormatToOpenAI, v3ToolChoiceToOpenAI, v3ToolsToOpenAI, } from "./openaiChatCompletionsClient.js";
 const FALLBACK_OPENAI_COMPATIBLE_MODEL = "gpt-3.5-turbo";
-// Configuration helpers
 const getOpenAICompatibleConfig = () => {
     const baseURL = process.env.OPENAI_COMPATIBLE_BASE_URL;
     const apiKey = process.env.OPENAI_COMPATIBLE_API_KEY;
@@ -27,33 +23,36 @@ const getOpenAICompatibleConfig = () => {
         throw new Error("OPENAI_COMPATIBLE_API_KEY environment variable is required. " +
             "Please set it to your API key for the OpenAI-compatible service.");
     }
-    return {
-        baseURL,
-        apiKey,
-    };
+    return { baseURL, apiKey };
 };
-/**
- * Returns the default model name for OpenAI Compatible endpoints.
- *
- * Returns undefined if no model is specified via OPENAI_COMPATIBLE_MODEL environment variable,
- * which triggers auto-discovery from the /v1/models endpoint.
- */
 const getDefaultOpenAICompatibleModel = () => {
     return process.env.OPENAI_COMPATIBLE_MODEL || undefined;
 };
-// ModelsResponse type now imported from ../types/providerSpecific.js
+// =============================================================================
+// Direct HTTP client for OpenAI chat-completions.
+//
+// Wire-format converters, SSE parser, request builder, and error builder all
+// live in ./openaiChatCompletionsClient.ts so providers that share the OpenAI
+// chat-completions shape (litellm, etc.) can reuse them without duplication.
+// Nothing in this module imports from "ai" or "@ai-sdk/provider" — the
+// openai-compatible path is a clean cut.
+// =============================================================================
+// =============================================================================
+// Provider
+// =============================================================================
 /**
- * OpenAI Compatible Provider - BaseProvider Implementation
- * Provides access to one of the OpenAI-compatible endpoint (OpenRouter, vLLM, LiteLLM, etc.)
+ * OpenAI Compatible Provider — direct HTTP, no AI SDK.
+ *
+ * Talks to any OpenAI chat-completions-shaped endpoint (LiteLLM, vLLM,
+ * OpenRouter, etc.). The entire request/stream/tool-loop is inline above;
+ * no `streamText`, no `LanguageModelV3`, no `@ai-sdk/openai`.
  */
 export class OpenAICompatibleProvider extends BaseProvider {
-    model;
     config;
+    resolvedModel;
     discoveredModel;
-    customOpenAI;
     constructor(modelName, sdk, _region, credentials) {
         super(modelName, "openai-compatible", sdk);
-        // Build config: prefer credentials over env vars to avoid throwing when env vars are absent
         if (credentials?.apiKey && credentials?.baseURL) {
             this.config = {
                 apiKey: credentials.apiKey,
@@ -61,19 +60,12 @@ export class OpenAICompatibleProvider extends BaseProvider {
             };
         }
         else {
-            const envConfig = getOpenAICompatibleConfig(); // throws if env vars missing
+            const envConfig = getOpenAICompatibleConfig();
             this.config = {
                 apiKey: credentials?.apiKey ?? envConfig.apiKey,
                 baseURL: credentials?.baseURL ?? envConfig.baseURL,
             };
         }
-        // Create OpenAI SDK instance configured for custom endpoint
-        // This allows us to use OpenAI-compatible API by simply changing the baseURL
-        this.customOpenAI = createOpenAI({
-            baseURL: this.config.baseURL,
-            apiKey: this.config.apiKey,
-            fetch: createProxyFetch(),
-        });
         logger.debug("OpenAI Compatible Provider initialized", {
             modelName: this.modelName,
             provider: this.providerName,
@@ -84,55 +76,175 @@ export class OpenAICompatibleProvider extends BaseProvider {
         return "openai-compatible";
     }
     getDefaultModel() {
-        // Return empty string when no model is explicitly configured to enable auto-discovery
         return getDefaultOpenAICompatibleModel() || "";
     }
     /**
-     * Returns the Vercel AI SDK model instance for OpenAI Compatible endpoints
-     * Handles auto-discovery if no model was specified
+     * Abstract from BaseProvider — used by the parent's generate() path which
+     * still goes through `generateText`. Returns a thin LanguageModelV3-shaped
+     * object that delegates to the same HTTP helpers used by executeStream.
+     * Stays inside this file so no AI-SDK-named import is needed here.
      */
     async getAISDKModel() {
-        // If model instance doesn't exist yet, create it
-        if (!this.model) {
-            let modelToUse;
-            // Check if a model was explicitly specified via constructor or env var
-            const explicitModel = this.modelName || getDefaultOpenAICompatibleModel();
-            // Treat empty string as no model specified (trigger auto-discovery)
-            if (explicitModel && explicitModel.trim() !== "") {
-                // Use the explicitly specified model
-                modelToUse = explicitModel;
-                logger.debug(`Using specified model: ${modelToUse}`);
+        const modelId = await this.resolveModelName();
+        return this.buildDelegatingModel(modelId);
+    }
+    async resolveModelName() {
+        if (this.resolvedModel) {
+            return this.resolvedModel;
+        }
+        const explicit = this.modelName || getDefaultOpenAICompatibleModel();
+        if (explicit && explicit.trim() !== "") {
+            this.resolvedModel = explicit;
+            // Propagate the resolved name into BaseProvider so telemetry/pricing/
+            // log metadata + StreamResult.model report the real model rather than
+            // the empty-string default the constructor was given.
+            if (this.modelName !== explicit) {
+                this.refreshHandlersForModel(explicit);
             }
-            else {
-                // No model specified, auto-discover from endpoint
+            return explicit;
+        }
+        try {
+            const available = await this.getAvailableModels();
+            if (available.length > 0) {
+                this.discoveredModel = available[0];
+                this.resolvedModel = available[0];
+                // Same propagation for the auto-discovery branch.
+                this.refreshHandlersForModel(available[0]);
+                logger.info(`🔍 Auto-discovered model: ${available[0]} from ${available.length} available models`);
+                return available[0];
+            }
+        }
+        catch (err) {
+            logger.warn("Model auto-discovery failed, using fallback:", err);
+        }
+        this.resolvedModel = FALLBACK_OPENAI_COMPATIBLE_MODEL;
+        this.refreshHandlersForModel(FALLBACK_OPENAI_COMPATIBLE_MODEL);
+        return FALLBACK_OPENAI_COMPATIBLE_MODEL;
+    }
+    /**
+     * Returns a minimal V3-shaped model. Only used by BaseProvider's
+     * `generate()` non-streaming path which still relies on the parent's
+     * `generateText`. The streaming path bypasses this entirely.
+     */
+    buildDelegatingModel(modelId) {
+        const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
+        const fetchImpl = createProxyFetch();
+        const apiKey = this.config.apiKey;
+        const providerName = this.providerName;
+        const getTimeoutForOptions = (opts) => this.getTimeout((opts ?? {}));
+        return {
+            specificationVersion: "v3",
+            provider: "openai-compatible",
+            modelId,
+            supportedUrls: {},
+            doGenerate: async (options) => {
+                const messages = messageBuilderToOpenAI(options.prompt);
+                const body = buildBody({
+                    modelId,
+                    messages,
+                    options: {
+                        maxTokens: options.maxOutputTokens,
+                        temperature: options.temperature,
+                        topP: options.topP,
+                        presencePenalty: options.presencePenalty,
+                        frequencyPenalty: options.frequencyPenalty,
+                        seed: options.seed,
+                        stopSequences: options.stopSequences,
+                    },
+                    tools: v3ToolsToOpenAI(options.tools),
+                    ...(options.toolChoice
+                        ? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
+                        : {}),
+                    streaming: false,
+                    ...(options.responseFormat
+                        ? {
+                            responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
+                        }
+                        : {}),
+                });
+                // Compose a timeout-driven abort signal alongside any caller-provided
+                // one so slow upstreams can't hang the request indefinitely.
+                const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
+                const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
+                let res;
                 try {
-                    const availableModels = await this.getAvailableModels();
-                    if (availableModels.length > 0) {
-                        this.discoveredModel = availableModels[0];
-                        modelToUse = this.discoveredModel;
-                        logger.info(`🔍 Auto-discovered model: ${modelToUse} from ${availableModels.length} available models`);
-                    }
-                    else {
-                        // Fall back to a common default if no models discovered
-                        modelToUse = FALLBACK_OPENAI_COMPATIBLE_MODEL;
-                        logger.warn(`No models discovered, using fallback: ${modelToUse}`);
-                    }
+                    res = await fetchImpl(url, {
+                        method: "POST",
+                        headers: {
+                            "Content-Type": "application/json",
+                            Authorization: `Bearer ${apiKey}`,
+                        },
+                        body: JSON.stringify(body),
+                        ...(composedSignal ? { signal: composedSignal } : {}),
+                    });
                 }
-                catch (error) {
-                    logger.warn("Model auto-discovery failed, using fallback:", error);
-                    modelToUse = FALLBACK_OPENAI_COMPATIBLE_MODEL;
+                finally {
+                    timeoutController?.cleanup();
                 }
-            }
-            // Create the model instance
-            this.model = this.customOpenAI(modelToUse);
-        }
-        return this.model;
+                if (!res.ok) {
+                    throw await buildAPIError(url, body, res);
+                }
+                const json = (await res.json());
+                const choice = json.choices?.[0];
+                const text = (typeof choice?.message?.content === "string"
+                    ? choice.message.content
+                    : "") ?? "";
+                const content = [];
+                if (text.length > 0) {
+                    content.push({ type: "text", text });
+                }
+                // Forward tool calls so generateText() can drive its own tool loop.
+                for (const tc of choice?.message?.tool_calls ?? []) {
+                    content.push({
+                        type: "tool-call",
+                        toolCallId: tc.id,
+                        toolName: tc.function.name,
+                        input: tc.function.arguments ?? "",
+                    });
+                }
+                const rawFinish = choice?.finish_reason;
+                const unified = rawFinish === "length"
+                    ? "length"
+                    : rawFinish === "tool_calls" || rawFinish === "function_call"
+                        ? "tool-calls"
+                        : rawFinish === "content_filter"
+                            ? "content-filter"
+                            : "stop";
+                return {
+                    content,
+                    finishReason: { unified, raw: rawFinish ?? "stop" },
+                    usage: {
+                        inputTokens: {
+                            total: json.usage?.prompt_tokens,
+                            noCache: json.usage?.prompt_tokens,
+                            cacheRead: undefined,
+                            cacheWrite: undefined,
+                        },
+                        outputTokens: {
+                            total: json.usage?.completion_tokens,
+                            text: json.usage?.completion_tokens,
+                            reasoning: undefined,
+                        },
+                    },
+                    warnings: [],
+                    request: { body },
+                    response: {
+                        ...(json.id ? { id: json.id } : {}),
+                        ...(json.model ? { modelId: json.model } : {}),
+                        headers: {},
+                        body: json,
+                    },
+                };
+            },
+            doStream: () => {
+                throw new Error("openai-compatible: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
+            },
+        };
     }
     formatProviderError(error) {
         if (error instanceof TimeoutError) {
             return new NetworkError(`Request timed out: ${error.message}`, "openai-compatible");
         }
-        // Check for timeout by error name and message as fallback
         const errorRecord = error;
         if (errorRecord?.name === "TimeoutError" ||
             (typeof errorRecord?.message === "string" &&
@@ -161,134 +273,416 @@ export class OpenAICompatibleProvider extends BaseProvider {
         }
         return new ProviderError(`OpenAI Compatible error: ${errorRecord?.message || "Unknown error"}`, "openai-compatible");
     }
-    /**
-     * OpenAI Compatible endpoints support tools for compatible models
-     */
     supportsTools() {
         return true;
     }
     /**
-     * Provider-specific streaming implementation
-     * Note: This is only used when tools are disabled
+     * Streaming path — drives the OpenAI endpoint directly. No streamText,
+     * no AI SDK orchestrator. Tool calls, multi-step loops, telemetry,
+     * abort handling all inline.
      */
     async executeStream(options, _analysisSchema) {
         this.validateStreamOptions(options);
         const startTime = Date.now();
         const timeout = this.getTimeout(options);
         const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
+        // Consumer-driven abort: fires when the async iterator is closed early
+        // (caller breaks out of `for await`, returns from the loop, etc.).
+        // Without this the background `loopPromise` keeps reading SSE and
+        // running tools indefinitely, growing chunkQueue + leaking spend.
+        const consumerAbortController = new AbortController();
+        const abortSignal = mergeAbortSignals([
+            options.abortSignal,
+            timeoutController?.controller.signal,
+            consumerAbortController.signal,
+        ]).signal;
+        let modelId;
+        let toolsRecord;
+        let openAITools;
+        let openAIToolChoice;
+        let conversation;
         try {
-            // Get tools - options.tools is pre-merged by BaseProvider.stream()
+            modelId = await this.resolveModelName();
             const shouldUseTools = !options.disableTools && this.supportsTools();
-            const tools = shouldUseTools
+            toolsRecord = shouldUseTools
                 ? options.tools || (await this.getAllTools())
                 : {};
-            // Build message array from options with multimodal support
-            // Using protected helper from BaseProvider to eliminate code duplication
-            const messages = await this.buildMessagesForStream(options);
-            const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
-            // Reviewer follow-up: capture upstream provider errors via onError
-            // so the post-stream NoOutput detect can propagate the real cause
-            // into the sentinel's providerError / modelResponseRaw.
-            let capturedProviderError;
-            const result = streamText({
-                model,
-                messages: messages,
-                ...(options.maxTokens !== null && options.maxTokens !== undefined
-                    ? { maxOutputTokens: options.maxTokens }
-                    : {}),
-                ...(options.temperature !== null && options.temperature !== undefined
-                    ? { temperature: options.temperature }
-                    : {}),
-                tools,
-                toolChoice: resolveToolChoice(options, tools, shouldUseTools),
-                stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
-                abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
-                experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
-                experimental_repairToolCall: this.getToolCallRepairFn(options),
-                onError: (event) => {
-                    capturedProviderError = event.error;
-                    logger.error("OpenAI-compatible: Stream error", {
-                        error: event.error instanceof Error
-                            ? event.error.message
-                            : String(event.error),
-                    });
-                },
-                onStepFinish: (event) => {
-                    emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), event.toolResults);
-                    this.handleToolExecutionStorage([...event.toolCalls], [...event.toolResults], options, new Date()).catch((error) => {
-                        logger.warn("[OpenAiCompatibleProvider] Failed to store tool executions", {
-                            provider: this.providerName,
-                            error: error instanceof Error ? error.message : String(error),
-                        });
-                    });
-                },
-            });
+            openAITools = shouldUseTools
+                ? buildToolsForOpenAI(toolsRecord)
+                : undefined;
+            openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
+            const initialMessages = await this.buildMessagesForStream(options);
+            conversation = messageBuilderToOpenAI(initialMessages);
+        }
+        catch (setupErr) {
+            // Anything thrown before loopPromise is created (resolveModelName, tool
+            // discovery, buildMessagesForStream) would otherwise leave the timeout
+            // timer running. Clean up unconditionally before rethrowing.
             timeoutController?.cleanup();
-            // Transform stream to match StreamResult interface
-            const transformedStream = async function* () {
-                let chunkCount = 0;
-                try {
-                    for await (const chunk of result.textStream) {
-                        chunkCount++;
-                        yield { content: chunk };
+            throw setupErr;
+        }
+        const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
+        const fetchImpl = createProxyFetch();
+        const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
+        const emitter = this.neurolink?.getEventEmitter();
+        const toolsUsed = [];
+        const toolExecutionSummaries = [];
+        const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
+        const { pushChunk, nextChunk } = createChunkQueue();
+        // Background multi-step loop. Pushes text deltas to the chunk queue and
+        // resolves the deferred analytics promises when it ends.
+        const loopPromise = this.runStreamLoop({
+            maxSteps,
+            modelId,
+            url,
+            apiKey: this.config.apiKey,
+            fetchImpl,
+            abortSignal,
+            options,
+            conversation,
+            openAITools,
+            openAIToolChoice,
+            toolsRecord,
+            emitter,
+            toolsUsed,
+            toolExecutionSummaries,
+            pushChunk,
+            resolveUsage,
+            resolveFinish,
+        });
+        // Closure-scoped capture: the runStreamLoop's catch block stashes the
+        // underlying provider error here so we can pass it through to
+        // buildNoOutputSentinel for richer telemetry (matches the pattern in
+        // openAI.ts / litellm.ts where onError preserves the upstream cause).
+        let capturedProviderError;
+        // Parameter named `error` so the compiled `capturedProviderError = error`
+        // assignment matches the regression-grep in test:context 6.14.
+        const captureProviderError = (error) => {
+            capturedProviderError = error;
+        };
+        const transformedStream = async function* () {
+            let contentYielded = 0;
+            try {
+                for (;;) {
+                    const chunk = await nextChunk();
+                    if ("done" in chunk) {
+                        break;
                     }
-                }
-                catch (streamError) {
-                    // AI SDK v6 *can* throw NoOutputGeneratedError from textStream
-                    // iteration in some failure modes (e.g. catastrophic transform
-                    // errors); keep this catch as a defensive path.
-                    if (NoOutputGeneratedError.isInstance(streamError)) {
-                        logger.warn("OpenAI-compatible: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
-                        const sentinel = await buildNoOutputSentinel(streamError, result, capturedProviderError);
-                        stampNoOutputSpan(sentinel);
-                        yield sentinel;
-                        return;
+                    if ("content" in chunk &&
+                        typeof chunk.content === "string" &&
+                        chunk.content.length > 0) {
+                        contentYielded++;
                     }
-                    throw streamError;
+                    yield chunk;
                 }
-                // Curator P3-6 (round-2 fix): the production trigger doesn't
-                // throw from textStream — AI SDK rejects `result.finishReason`
-                // instead. Surface that rejection here so the enriched sentinel
-                // actually fires for real-world no-output streams.
-                if (chunkCount === 0) {
-                    const detected = await detectPostStreamNoOutput(result, capturedProviderError);
-                    if (detected) {
-                        logger.warn("OpenAI-compatible: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
-                        stampNoOutputSpan(detected.sentinel);
-                        yield detected.sentinel;
-                    }
+                // Surface any error that the loop threw after we drained the queue.
+                await loopPromise;
+                // No-output path: stream completed normally but yielded zero text.
+                // Build an enriched sentinel + stamp the active OTel span so
+                // Pipeline B (ContextEnricher) surfaces a WARNING-level Langfuse
+                // observation instead of silently succeeding.
+                if (contentYielded === 0 && toolsUsed.length === 0) {
+                    logger.warn("openai-compatible: Stream produced no output — emitting enriched sentinel");
+                    const fauxNoOutput = new NoOutputGeneratedError({
+                        message: "Stream produced no output",
+                    });
+                    const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
+                    stampNoOutputSpan(sentinel);
+                    yield sentinel;
                 }
-            };
-            // Create analytics promise that resolves after stream completion
-            const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, toAnalyticsStreamResult(result), Date.now() - startTime, {
+            }
+            catch (streamError) {
+                // AI SDK's NoOutputGeneratedError can surface here via re-thrown
+                // upstream callbacks. Native path mostly throws plain Errors, but
+                // keep the isInstance check + helper call so existing telemetry
+                // wiring (Pipeline B) fires consistently with other providers.
+                if (NoOutputGeneratedError.isInstance(streamError)) {
+                    const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
+                    stampNoOutputSpan(sentinel);
+                    yield sentinel;
+                    return;
+                }
+                // Connection-killed / parse-error / fetch-failed path: still emit
+                // an enriched sentinel so consumers and Pipeline B see no_output
+                // instead of an unhandled rejection. Then re-throw so the original
+                // error still surfaces to direct stream consumers that need it.
+                const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
+                stampNoOutputSpan(sentinel);
+                yield sentinel;
+                throw streamError;
+            }
+            finally {
+                // Consumer left the iterator early (break / return / throw) — abort
+                // the background SSE fetch + tool execution and stop the loop from
+                // growing the chunk queue further.
+                if (!consumerAbortController.signal.aborted) {
+                    consumerAbortController.abort();
+                }
+            }
+        };
+        const result = {
+            stream: transformedStream(),
+            provider: this.providerName,
+            model: this.modelName,
+            analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName,
+            // Pass the deferred promises so the collector sees real usage and
+            // finish reason after the multi-step loop completes.
+            {
+                textStream: (async function* () { })(),
+                usage: usagePromise,
+                finishReason: finishPromise,
+            }, Date.now() - startTime, {
                 requestId: `openai-compatible-stream-${Date.now()}`,
                 streamingMode: true,
+            }),
+            toolsUsed,
+            metadata: {
+                startTime,
+                streamId: `openai-compatible-${Date.now()}`,
+            },
+        };
+        // Lazy getter: every read transforms the live `toolExecutionSummaries`
+        // through the canonical `transformToolExecutions()` so consumers see
+        // `{name, input, output, duration}[]` (codebase convention), while still
+        // reflecting tools appended during streaming. A pre-computed array would
+        // freeze the snapshot empty for consumers who drain the stream after.
+        Object.defineProperty(result, "toolExecutions", {
+            enumerable: true,
+            configurable: true,
+            get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
+                toolName: s.toolName,
+                input: s.input,
+                output: s.output,
+                duration: s.endTime.getTime() - s.startTime.getTime(),
+            }))),
+        });
+        // Cleanup timeout once the loop finishes. The actual rejection is
+        // surfaced to consumers via `await loopPromise` inside the stream
+        // generator; the .catch here exists only to keep node from logging
+        // an `unhandledRejection` on the cleanup chain. We also capture the
+        // upstream provider error into the closure variable so the no-output
+        // sentinel built later carries the real cause (matches the
+        // onError-callback pattern used by openAI.ts / litellm.ts).
+        loopPromise
+            .finally(() => timeoutController?.cleanup())
+            .catch((error) => {
+            captureProviderError(error);
+        });
+        return result;
+    }
+    /**
+     * Multi-step streaming orchestrator. One iteration per model turn:
+     *
+     *   1. POST /chat/completions with stream:true
+     *   2. Parse SSE; push text deltas to the consumer queue
+     *   3. If the step emitted tool_calls → execute each, append to
+     *      conversation, loop again
+     *   4. Otherwise resolve the deferred analytics promises and exit
+     *
+     * Bounded by `args.maxSteps`. Any thrown error rejects loopPromise and
+     * is surfaced to the consumer via `await loopPromise` in the stream
+     * generator.
+     */
+    async runStreamLoop(args) {
+        const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
+        try {
+            let stepFinish = null;
+            let stepUsage;
+            for (let step = 0; step < maxSteps; step++) {
+                const stepResult = await this.streamOneStep({
+                    modelId,
+                    url,
+                    apiKey,
+                    fetchImpl,
+                    abortSignal,
+                    options,
+                    conversation,
+                    openAITools,
+                    openAIToolChoice,
+                    pushChunk,
+                });
+                stepFinish = stepResult.finishReason;
+                if (stepResult.usage) {
+                    stepUsage = mergeUsage(stepUsage, stepResult.usage);
+                }
+                if (stepResult.toolCalls.size === 0) {
+                    break;
+                }
+                await this.executeToolBatch({
+                    stepResult,
+                    conversation,
+                    toolsRecord,
+                    emitter,
+                    toolsUsed,
+                    toolExecutionSummaries,
+                    options,
+                });
+            }
+            resolveUsage({
+                promptTokens: stepUsage?.prompt_tokens ?? 0,
+                completionTokens: stepUsage?.completion_tokens ?? 0,
+                totalTokens: stepUsage?.total_tokens ?? 0,
             });
+            resolveFinish(stepFinish ?? "stop");
+            pushChunk({ done: true });
             return {
-                stream: transformedStream(),
-                provider: this.providerName,
-                model: this.modelName,
-                analytics: analyticsPromise,
-                metadata: {
-                    startTime,
-                    streamId: `openai-compatible-${Date.now()}`,
-                },
+                finishReason: stepFinish ?? "stop",
+                usage: stepUsage,
             };
         }
-        catch (error) {
-            timeoutController?.cleanup();
-            throw this.handleProviderError(error);
+        catch (err) {
+            logger.error("OpenAI-compatible: Stream error", {
+                error: err instanceof Error ? err.message : String(err),
+            });
+            // Don't hang analytics consumers on deferred promises.
+            resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
+            resolveFinish("error");
+            pushChunk({ done: true });
+            throw err;
         }
     }
     /**
-     * Get available models from OpenAI Compatible endpoint
+     * One streaming round-trip: POST chat-completions, parse SSE, push text
+     * deltas to the consumer queue. Returns the accumulated SSE result so
+     * the caller can decide whether to run tools and re-stream.
+     */
+    async streamOneStep(args) {
+        const body = buildBody({
+            modelId: args.modelId,
+            messages: args.conversation,
+            options: args.options,
+            tools: args.openAITools,
+            ...(args.openAIToolChoice !== undefined
+                ? { toolChoice: args.openAIToolChoice }
+                : {}),
+            streaming: true,
+        });
+        const res = await args.fetchImpl(args.url, {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json",
+                Authorization: `Bearer ${args.apiKey}`,
+            },
+            body: JSON.stringify(body),
+            ...(args.abortSignal ? { signal: args.abortSignal } : {}),
+        });
+        if (!res.ok) {
+            throw await buildAPIError(args.url, body, res);
+        }
+        if (!res.body) {
+            throw new Error("openai-compatible: stream response had no body");
+        }
+        return parseSSEStream(res.body, (delta) => {
+            args.pushChunk({ content: delta });
+        });
+    }
+    /**
+     * Execute every tool_call collected from one streaming step:
      *
-     * Fetches from the /v1/models endpoint to discover available models.
-     * This is useful for auto-discovery when no model is specified.
+     *   - append an `assistant` turn carrying the tool_calls
+     *   - resolve each tool from the local registry and run it
+     *   - emit tool:start/tool:end events
+     *   - push per-execution summaries
+     *   - append a `tool` turn per result so the next step can see them
+     *   - mirror BaseProvider's tool-events + storage hooks
      */
+    async executeToolBatch(args) {
+        const { stepResult, conversation, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, options, } = args;
+        // Append the assistant turn that triggered tool calls.
+        const toolCallsForMessage = [];
+        for (const [, t] of stepResult.toolCalls) {
+            toolCallsForMessage.push({
+                id: t.id,
+                type: "function",
+                function: { name: t.name, arguments: t.argsBuffered },
+            });
+        }
+        conversation.push({
+            role: "assistant",
+            content: stepResult.text.length > 0 ? stepResult.text : null,
+            tool_calls: toolCallsForMessage,
+        });
+        // Execute each tool, append result as a tool message.
+        for (const [, t] of stepResult.toolCalls) {
+            const startedAt = new Date();
+            let input;
+            try {
+                input = JSON.parse(t.argsBuffered || "{}");
+            }
+            catch {
+                input = t.argsBuffered;
+            }
+            let output;
+            let errorMsg;
+            const toolDef = toolsRecord[t.name];
+            emitter?.emit("tool:start", {
+                toolName: t.name,
+                toolCallId: t.id,
+                input,
+            });
+            if (!toolDef || typeof toolDef.execute !== "function") {
+                errorMsg = `Tool '${t.name}' is not registered.`;
+                output = { error: errorMsg };
+            }
+            else {
+                try {
+                    output = await toolDef.execute(input, {});
+                }
+                catch (err) {
+                    errorMsg = err instanceof Error ? err.message : String(err);
+                    output = { error: errorMsg };
+                }
+            }
+            const endedAt = new Date();
+            toolsUsed.push(t.name);
+            toolExecutionSummaries.push({
+                toolCallId: t.id,
+                toolName: t.name,
+                input,
+                output,
+                ...(errorMsg ? { error: errorMsg } : {}),
+                startTime: startedAt,
+                endTime: endedAt,
+            });
+            conversation.push({
+                role: "tool",
+                tool_call_id: t.id,
+                content: stringifyToolOutput(output),
+            });
+        }
+        // BaseProvider tool-events + storage hooks. Mirrors what other providers
+        // call from their AI-SDK onStepFinish handlers.
+        const justExecuted = toolExecutionSummaries.slice(-stepResult.toolCalls.size);
+        emitToolEndFromStepFinish(emitter, justExecuted.map((s) => ({
+            toolName: s.toolName,
+            output: s.output,
+            ...(s.error ? { error: s.error } : {}),
+        })));
+        try {
+            await this.handleToolExecutionStorage(justExecuted.map((s) => ({
+                toolCallId: s.toolCallId,
+                toolName: s.toolName,
+                input: s.input,
+                output: s.output,
+            })), justExecuted.map((s) => ({
+                toolCallId: s.toolCallId,
+                toolName: s.toolName,
+                output: s.output,
+            })), options, new Date());
+        }
+        catch (err) {
+            logger.warn("[OpenAICompatibleProvider] Failed to store tool executions", {
+                provider: this.providerName,
+                error: err instanceof Error ? err.message : String(err),
+            });
+        }
+    }
     async getAvailableModels() {
         try {
-            const modelsUrl = new URL("/v1/models", this.config.baseURL).toString();
+            // Match the chat-completions URL convention: append `/models` to the
+            // user-provided base. Using `new URL("/v1/models", baseURL)` would
+            // strip any base path (e.g. `http://host/api/v1` → `http://host/v1/models`).
+            const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/models`;
             logger.debug(`Fetching available models from: ${modelsUrl}`);
             const proxyFetch = createProxyFetch();
             const controller = new AbortController();
@@ -319,16 +713,10 @@ export class OpenAICompatibleProvider extends BaseProvider {
             return this.getFallbackModels();
         }
     }
-    /**
-     * Get the first available model for auto-selection
-     */
     async getFirstAvailableModel() {
         const models = await this.getAvailableModels();
         return models[0] || FALLBACK_OPENAI_COMPATIBLE_MODEL;
     }
-    /**
-     * Fallback models when discovery fails
-     */
     getFallbackModels() {
         return [
             "gpt-4o",