@juspay/neurolink 9.67.0 → 9.67.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- import { createOpenAI } from "@ai-sdk/openai";
2
1
  import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
3
2
  import { BaseProvider } from "../core/baseProvider.js";
4
3
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
@@ -6,72 +5,61 @@ import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
6
5
  import { createProxyFetch } from "../proxy/proxyFetch.js";
7
6
  import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
8
7
  import { isAbortError } from "../utils/errorHandling.js";
9
- import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
8
+ import { NoOutputGeneratedError } from "../utils/generationErrors.js";
10
9
  import { logger } from "../utils/logger.js";
11
- import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
10
+ import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
12
11
  import { calculateCost } from "../utils/pricing.js";
13
12
  import { getProviderModel } from "../utils/providerConfig.js";
14
- import { composeAbortSignals, createTimeoutController, TimeoutError, withTimeout, } from "../utils/timeout.js";
13
+ import { composeAbortSignals, createTimeoutController, mergeAbortSignals, TimeoutError, } from "../utils/timeout.js";
14
+ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
15
15
  import { resolveToolChoice } from "../utils/toolChoice.js";
16
- import { getModelId } from "./providerTypeUtils.js";
17
- import { NoOutputGeneratedError } from "../utils/generationErrors.js";
18
- import { Output, stepCountIs } from "../utils/tool.js";
19
- import { streamText } from "../utils/generation.js";
16
+ import { transformToolExecutions } from "../utils/transformationUtils.js";
17
+ import { buildAPIError, buildBody, buildToolsForOpenAI, createChunkQueue, createDeferredAnalytics, mapNeuroLinkToolChoice, mergeUsage, messageBuilderToOpenAI, parseSSEStream, stringifyToolOutput, stripTrailingSlash, v3ResponseFormatToOpenAI, v3ToolChoiceToOpenAI, v3ToolsToOpenAI, } from "./openaiChatCompletionsClient.js";
20
18
  const streamTracer = trace.getTracer("neurolink.provider.litellm");
21
- // Configuration helpers
22
- const getLiteLLMConfig = () => {
23
- return {
24
- baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
25
- apiKey: process.env.LITELLM_API_KEY || "sk-anything",
26
- };
27
- };
19
+ const FALLBACK_LITELLM_MODEL = "openai/gpt-4o-mini";
20
+ const getLiteLLMConfig = () => ({
21
+ baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
22
+ apiKey: process.env.LITELLM_API_KEY || "sk-anything",
23
+ });
28
24
  /**
29
- * Returns the default model name for LiteLLM.
30
- *
31
- * LiteLLM uses a 'provider/model' format for model names.
32
- * For example:
33
- * - 'openai/gpt-4o-mini'
34
- * - 'openai/gpt-3.5-turbo'
35
- * - 'anthropic/claude-3-sonnet-20240229'
36
- * - 'google/gemini-pro'
37
- *
38
- * You can override the default by setting the LITELLM_MODEL environment variable.
25
+ * LiteLLM uses a 'provider/model' format. Override via LITELLM_MODEL env var.
39
26
  */
40
- const getDefaultLiteLLMModel = () => {
41
- return getProviderModel("LITELLM_MODEL", "openai/gpt-4o-mini");
42
- };
27
+ const getDefaultLiteLLMModel = () => getProviderModel("LITELLM_MODEL", FALLBACK_LITELLM_MODEL);
28
+ const isGemini25Model = (modelName) => modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
29
+ // =============================================================================
30
+ // Direct HTTP client for LiteLLM proxy.
31
+ //
32
+ // LiteLLM exposes the OpenAI chat-completions wire format, so all the
33
+ // wire-level converters and the SSE parser are shared with the
34
+ // openai-compatible provider via ./openaiChatCompletionsClient.ts. This
35
+ // file owns LiteLLM-specific behaviour: OTel span wrap with cost, model
36
+ // allowlist 403 → ModelAccessDeniedError, Gemini 2.5 maxTokens skip,
37
+ // model caching, and native /v1/embeddings.
38
+ // =============================================================================
43
39
  /**
44
- * LiteLLM Provider - BaseProvider Implementation
45
- * Provides access to 100+ models via LiteLLM proxy server
40
+ * LiteLLM Provider direct HTTP, no AI SDK. Talks to a LiteLLM proxy
41
+ * server (or any deployment that speaks OpenAI chat-completions + the
42
+ * `/v1/models` and `/v1/embeddings` endpoints).
46
43
  */
47
44
  export class LiteLLMProvider extends BaseProvider {
48
- model;
45
+ config;
49
46
  credentials;
50
- // Cache for available models to avoid repeated API calls
47
+ resolvedModel;
51
48
  static modelsCache = [];
52
49
  static modelsCacheTime = 0;
53
50
  static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
54
51
  constructor(modelName, sdk, _region, credentials) {
55
52
  super(modelName, "litellm", sdk);
56
- // Store per-request credentials for use in embed/embedMany/fetchModelsFromAPI
57
53
  this.credentials = credentials;
58
- // Initialize LiteLLM using OpenAI SDK with explicit configuration
59
- const config = getLiteLLMConfig();
60
- // Create OpenAI SDK instance configured for LiteLLM proxy
61
- // LiteLLM acts as a proxy server that implements the OpenAI-compatible API.
62
- // To communicate with LiteLLM instead of the default OpenAI endpoint, we use createOpenAI
63
- // with a custom baseURL and apiKey. This ensures all requests are routed through the LiteLLM
64
- // proxy, allowing access to multiple models and custom authentication.
65
- const customOpenAI = createOpenAI({
66
- baseURL: credentials?.baseURL ?? config.baseURL,
67
- apiKey: credentials?.apiKey ?? config.apiKey,
68
- fetch: createProxyFetch(),
69
- });
70
- this.model = customOpenAI.chat(this.modelName || getDefaultLiteLLMModel());
54
+ const envConfig = getLiteLLMConfig();
55
+ this.config = {
56
+ baseURL: credentials?.baseURL ?? envConfig.baseURL,
57
+ apiKey: credentials?.apiKey ?? envConfig.apiKey,
58
+ };
71
59
  logger.debug("LiteLLM Provider initialized", {
72
60
  modelName: this.modelName,
73
61
  provider: this.providerName,
74
- baseURL: config.baseURL,
62
+ baseURL: this.config.baseURL,
75
63
  });
76
64
  }
77
65
  getProviderName() {
@@ -81,16 +69,152 @@ export class LiteLLMProvider extends BaseProvider {
81
69
  return getDefaultLiteLLMModel();
82
70
  }
83
71
  /**
84
- * Returns the Vercel AI SDK model instance for LiteLLM
72
+ * Abstract from BaseProvider used by the parent's generate() path which
73
+ * still goes through `generateText`. Returns a thin LanguageModelV3-shaped
74
+ * object that delegates to the same HTTP helpers used by executeStream.
75
+ */
76
+ async getAISDKModel() {
77
+ const modelId = await this.resolveModelName();
78
+ return this.buildDelegatingModel(modelId);
79
+ }
80
+ async resolveModelName() {
81
+ if (this.resolvedModel) {
82
+ return this.resolvedModel;
83
+ }
84
+ const explicit = this.modelName || getDefaultLiteLLMModel();
85
+ if (explicit && explicit.trim() !== "") {
86
+ this.resolvedModel = explicit;
87
+ if (this.modelName !== explicit) {
88
+ this.refreshHandlersForModel(explicit);
89
+ }
90
+ return explicit;
91
+ }
92
+ this.resolvedModel = FALLBACK_LITELLM_MODEL;
93
+ this.refreshHandlersForModel(FALLBACK_LITELLM_MODEL);
94
+ return FALLBACK_LITELLM_MODEL;
95
+ }
96
+ /**
97
+ * Returns a minimal V3-shaped model. Only used by BaseProvider's
98
+ * `generate()` non-streaming path which still relies on the parent's
99
+ * `generateText`. The streaming path bypasses this entirely.
85
100
  */
86
- getAISDKModel() {
87
- return this.model;
101
+ buildDelegatingModel(modelId) {
102
+ const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
103
+ const fetchImpl = createProxyFetch();
104
+ const apiKey = this.config.apiKey;
105
+ const providerName = this.providerName;
106
+ const getTimeoutForOptions = (opts) => this.getTimeout((opts ?? {}));
107
+ const gemini25Skip = isGemini25Model(modelId);
108
+ return {
109
+ specificationVersion: "v3",
110
+ provider: "litellm",
111
+ modelId,
112
+ supportedUrls: {},
113
+ doGenerate: async (options) => {
114
+ const messages = messageBuilderToOpenAI(options.prompt);
115
+ const body = buildBody({
116
+ modelId,
117
+ messages,
118
+ options: {
119
+ maxTokens: gemini25Skip ? undefined : options.maxOutputTokens,
120
+ temperature: options.temperature,
121
+ topP: options.topP,
122
+ presencePenalty: options.presencePenalty,
123
+ frequencyPenalty: options.frequencyPenalty,
124
+ seed: options.seed,
125
+ stopSequences: options.stopSequences,
126
+ },
127
+ tools: v3ToolsToOpenAI(options.tools),
128
+ ...(options.toolChoice
129
+ ? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
130
+ : {}),
131
+ streaming: false,
132
+ ...(options.responseFormat
133
+ ? {
134
+ responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
135
+ }
136
+ : {}),
137
+ });
138
+ const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
139
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
140
+ let res;
141
+ try {
142
+ res = await fetchImpl(url, {
143
+ method: "POST",
144
+ headers: {
145
+ "Content-Type": "application/json",
146
+ Authorization: `Bearer ${apiKey}`,
147
+ },
148
+ body: JSON.stringify(body),
149
+ ...(composedSignal ? { signal: composedSignal } : {}),
150
+ });
151
+ }
152
+ finally {
153
+ timeoutController?.cleanup();
154
+ }
155
+ if (!res.ok) {
156
+ throw await buildAPIError(url, body, res);
157
+ }
158
+ const json = (await res.json());
159
+ const choice = json.choices?.[0];
160
+ const text = (typeof choice?.message?.content === "string"
161
+ ? choice.message.content
162
+ : "") ?? "";
163
+ const content = [];
164
+ if (text.length > 0) {
165
+ content.push({ type: "text", text });
166
+ }
167
+ for (const tc of choice?.message?.tool_calls ?? []) {
168
+ content.push({
169
+ type: "tool-call",
170
+ toolCallId: tc.id,
171
+ toolName: tc.function.name,
172
+ input: tc.function.arguments ?? "",
173
+ });
174
+ }
175
+ const rawFinish = choice?.finish_reason;
176
+ const unified = rawFinish === "length"
177
+ ? "length"
178
+ : rawFinish === "tool_calls" || rawFinish === "function_call"
179
+ ? "tool-calls"
180
+ : rawFinish === "content_filter"
181
+ ? "content-filter"
182
+ : "stop";
183
+ return {
184
+ content,
185
+ finishReason: { unified, raw: rawFinish ?? "stop" },
186
+ usage: {
187
+ inputTokens: {
188
+ total: json.usage?.prompt_tokens,
189
+ noCache: json.usage?.prompt_tokens,
190
+ cacheRead: undefined,
191
+ cacheWrite: undefined,
192
+ },
193
+ outputTokens: {
194
+ total: json.usage?.completion_tokens,
195
+ text: json.usage?.completion_tokens,
196
+ reasoning: undefined,
197
+ },
198
+ },
199
+ warnings: [],
200
+ request: { body },
201
+ response: {
202
+ ...(json.id ? { id: json.id } : {}),
203
+ ...(json.model ? { modelId: json.model } : {}),
204
+ headers: {},
205
+ body: json,
206
+ },
207
+ };
208
+ },
209
+ doStream: () => {
210
+ throw new Error("litellm: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
211
+ },
212
+ };
88
213
  }
89
214
  formatProviderError(error) {
90
215
  if (error instanceof TimeoutError) {
91
216
  return new NetworkError(`Request timed out: ${error.message}`, this.providerName);
92
217
  }
93
- // Check for timeout by error name and message as fallback
94
218
  const errorRecord = error;
95
219
  if (errorRecord?.name === "TimeoutError" ||
96
220
  (typeof errorRecord?.message === "string" &&
@@ -103,10 +227,10 @@ export class LiteLLMProvider extends BaseProvider {
103
227
  return new NetworkError("LiteLLM proxy server not available. Please start the LiteLLM proxy server at " +
104
228
  `${process.env.LITELLM_BASE_URL || "http://localhost:4000"}`, this.providerName);
105
229
  }
106
- // Curator P1-1: detect "team not allowed to access model" responses
107
- // and surface as ModelAccessDeniedError with the allowed_models array
108
- // parsed from the body. Must run before the generic "API key" check
109
- // because LiteLLM phrases this as a 403 distinct from auth.
230
+ // Curator P1-1: detect "team not allowed to access model" responses and
231
+ // surface as ModelAccessDeniedError with the allowed_models array parsed
232
+ // from the body. Must run before the generic "API key" check because
233
+ // LiteLLM phrases this as a 403 distinct from auth.
110
234
  if (isModelAccessDeniedMessage(errorRecord.message)) {
111
235
  return new ModelAccessDeniedError(errorRecord.message, {
112
236
  provider: this.providerName,
@@ -129,448 +253,523 @@ export class LiteLLMProvider extends BaseProvider {
129
253
  }
130
254
  return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
131
255
  }
132
- /**
133
- * LiteLLM supports tools for compatible models
134
- */
135
256
  supportsTools() {
136
257
  return true;
137
258
  }
138
259
  /**
139
- * Provider-specific streaming implementation
140
- * Note: This is only used when tools are disabled
260
+ * Streaming path — drives the LiteLLM proxy directly. No streamText, no
261
+ * AI SDK orchestrator. Tool calls, multi-step loops, telemetry, abort
262
+ * handling all inline. OTel span captures gen_ai.system + cost.
141
263
  */
142
- async executeStream(options, analysisSchema) {
264
+ async executeStream(options, _analysisSchema) {
143
265
  this.validateStreamOptions(options);
144
266
  const startTime = Date.now();
145
- let chunkCount = 0; // Track chunk count for debugging
146
- // Reviewer follow-up: capture upstream provider errors via onError so
147
- // the post-stream NoOutput detect can propagate the *real* cause
148
- // (content_filter, provider crash, etc.) into the sentinel's
149
- // providerError / modelResponseRaw instead of "No output generated".
150
- let capturedProviderError;
151
267
  const timeout = this.getTimeout(options);
152
268
  const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
269
+ const consumerAbortController = new AbortController();
270
+ const abortSignal = mergeAbortSignals([
271
+ options.abortSignal,
272
+ timeoutController?.controller.signal,
273
+ consumerAbortController.signal,
274
+ ]).signal;
275
+ let modelId;
276
+ let toolsRecord;
277
+ let openAITools;
278
+ let openAIToolChoice;
279
+ let conversation;
153
280
  try {
154
- // Build message array from options with multimodal support
155
- // Using protected helper from BaseProvider to eliminate code duplication
156
- const messages = await this.buildMessagesForStream(options);
157
- const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
158
- // Get tools - options.tools is pre-merged by BaseProvider.stream()
281
+ modelId = await this.resolveModelName();
159
282
  const shouldUseTools = !options.disableTools && this.supportsTools();
160
- const tools = shouldUseTools
283
+ toolsRecord = shouldUseTools
161
284
  ? options.tools || (await this.getAllTools())
162
285
  : {};
163
- logger.debug(`LiteLLM: Tools for streaming`, {
164
- shouldUseTools,
165
- toolCount: Object.keys(tools).length,
166
- toolNames: Object.keys(tools),
286
+ openAITools = shouldUseTools
287
+ ? buildToolsForOpenAI(toolsRecord)
288
+ : undefined;
289
+ openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
290
+ const initialMessages = await this.buildMessagesForStream(options);
291
+ conversation = messageBuilderToOpenAI(initialMessages);
292
+ }
293
+ catch (setupErr) {
294
+ timeoutController?.cleanup();
295
+ throw setupErr;
296
+ }
297
+ const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
298
+ const fetchImpl = createProxyFetch();
299
+ const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
300
+ const emitter = this.neurolink?.getEventEmitter();
301
+ const toolsUsed = [];
302
+ const toolExecutionSummaries = [];
303
+ const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
304
+ const { pushChunk, nextChunk } = createChunkQueue();
305
+ // Wrap the stream in an OTel span to capture provider-level latency,
306
+ // token usage, finish reason, and cost. Matches the pre-migration
307
+ // behaviour where streamText was wrapped in `neurolink.provider.streamText`.
308
+ const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
309
+ kind: SpanKind.CLIENT,
310
+ attributes: {
311
+ "gen_ai.system": "litellm",
312
+ "gen_ai.request.model": modelId,
313
+ },
314
+ });
315
+ // Model-specific maxTokens handling — Gemini 2.5 models have known issues
316
+ // with maxTokens being forwarded. Mutate a shallow copy so the original
317
+ // StreamOptions reference downstream (analytics, telemetry) is unchanged.
318
+ const requestOptions = isGemini25Model(modelId)
319
+ ? { ...options, maxTokens: undefined }
320
+ : options;
321
+ if (requestOptions !== options &&
322
+ options.maxTokens &&
323
+ logger.shouldLog("debug")) {
324
+ logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, { modelId, requestedMaxTokens: options.maxTokens });
325
+ }
326
+ const loopPromise = this.runStreamLoop({
327
+ maxSteps,
328
+ modelId,
329
+ url,
330
+ apiKey: this.config.apiKey,
331
+ fetchImpl,
332
+ abortSignal,
333
+ options: requestOptions,
334
+ conversation,
335
+ openAITools,
336
+ openAIToolChoice,
337
+ toolsRecord,
338
+ emitter,
339
+ toolsUsed,
340
+ toolExecutionSummaries,
341
+ pushChunk,
342
+ resolveUsage,
343
+ resolveFinish,
344
+ });
345
+ // Wire the OTel span lifecycle to the deferred analytics promises.
346
+ let capturedProviderError;
347
+ const captureProviderError = (error) => {
348
+ capturedProviderError = error;
349
+ };
350
+ usagePromise
351
+ .then((usage) => {
352
+ streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
353
+ streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
354
+ const cost = calculateCost(this.providerName, this.modelName, {
355
+ input: usage.promptTokens,
356
+ output: usage.completionTokens,
357
+ total: usage.totalTokens,
167
358
  });
168
- // Model-specific maxTokens handling - Gemini 2.5 models have issues with maxTokens
169
- const modelName = this.modelName || getDefaultLiteLLMModel();
170
- const isGemini25Model = modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
171
- const maxTokens = isGemini25Model ? undefined : options.maxTokens;
172
- if (isGemini25Model && options.maxTokens) {
173
- logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, {
174
- modelName,
175
- requestedMaxTokens: options.maxTokens,
359
+ if (cost && cost > 0) {
360
+ streamSpan.setAttribute("neurolink.cost", cost);
361
+ }
362
+ })
363
+ .catch(() => {
364
+ // usage may never resolve if the stream is aborted before completion
365
+ });
366
+ finishPromise
367
+ .then((reason) => {
368
+ streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
369
+ if (reason === "error") {
370
+ streamSpan.setStatus({
371
+ code: SpanStatusCode.ERROR,
372
+ message: capturedProviderError instanceof Error
373
+ ? capturedProviderError.message
374
+ : String(capturedProviderError ?? "stream error"),
176
375
  });
177
376
  }
178
- // Build complete stream options with proper typing - matching Vertex pattern
179
- let streamOptions = {
180
- model: model,
181
- messages: messages,
182
- temperature: options.temperature,
183
- ...(maxTokens && { maxTokens }), // Conditionally include maxTokens
184
- ...(shouldUseTools &&
185
- Object.keys(tools).length > 0 && {
186
- tools,
187
- toolChoice: resolveToolChoice(options, tools, shouldUseTools),
188
- stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
189
- }),
190
- abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
191
- experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
192
- experimental_repairToolCall: this.getToolCallRepairFn(options),
193
- onError: (event) => {
194
- const error = event.error;
195
- const errorMessage = error instanceof Error ? error.message : String(error);
196
- // Reviewer follow-up: propagate the captured error to the
197
- // post-stream NoOutput sentinel so telemetry sees the real
198
- // provider cause instead of "No output generated".
199
- capturedProviderError = error;
200
- logger.error(`LiteLLM: Stream error`, {
201
- provider: this.providerName,
202
- modelName: this.modelName,
203
- error: errorMessage,
204
- chunkCount,
205
- });
206
- },
207
- onFinish: (event) => {
208
- logger.debug(`LiteLLM: Stream finished`, {
209
- finishReason: event.finishReason,
210
- totalChunks: chunkCount,
211
- });
212
- },
213
- onChunk: () => {
214
- chunkCount++;
215
- },
216
- onStepFinish: ({ toolCalls, toolResults }) => {
217
- emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
218
- logger.info("Tool execution completed", { toolResults, toolCalls });
219
- for (const toolCall of toolCalls) {
220
- collectedToolCalls.push({
221
- toolCallId: toolCall.toolCallId,
222
- toolName: toolCall.toolName,
223
- args: toolCall.args ??
224
- toolCall.input ??
225
- toolCall
226
- .parameters ??
227
- {},
228
- });
377
+ streamSpan.end();
378
+ })
379
+ .catch(() => {
380
+ streamSpan.end();
381
+ });
382
+ const transformedStream = async function* () {
383
+ let contentYielded = 0;
384
+ try {
385
+ for (;;) {
386
+ const chunk = await nextChunk();
387
+ if ("done" in chunk) {
388
+ break;
229
389
  }
230
- for (const toolResult of toolResults) {
231
- const rawToolResult = toolResult;
232
- collectedToolResults.push({
233
- toolName: toolResult.toolName,
234
- status: rawToolResult.error ? "failure" : "success",
235
- output: (rawToolResult.output ??
236
- rawToolResult.result) ??
237
- undefined,
238
- error: rawToolResult.error,
239
- id: rawToolResult.toolCallId ?? toolResult.toolName,
240
- });
390
+ if ("content" in chunk &&
391
+ typeof chunk.content === "string" &&
392
+ chunk.content.length > 0) {
393
+ contentYielded++;
241
394
  }
242
- this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
243
- logger.warn("[LiteLLMProvider] Failed to store tool executions", {
244
- provider: this.providerName,
245
- error: error instanceof Error ? error.message : String(error),
246
- });
247
- });
248
- },
249
- };
250
- // Add analysisSchema support if provided
251
- if (analysisSchema) {
252
- try {
253
- streamOptions = {
254
- ...streamOptions,
255
- experimental_output: Output.object({
256
- schema: analysisSchema,
257
- }),
258
- };
395
+ yield chunk;
259
396
  }
260
- catch (error) {
261
- logger.warn("Schema application failed, continuing without schema", {
262
- error: String(error),
397
+ await loopPromise;
398
+ if (contentYielded === 0 && toolsUsed.length === 0) {
399
+ logger.warn("LiteLLM: Stream produced no output — emitting enriched sentinel");
400
+ const fauxNoOutput = new NoOutputGeneratedError({
401
+ message: "Stream produced no output",
263
402
  });
403
+ const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
404
+ stampNoOutputSpan(sentinel);
405
+ yield sentinel;
264
406
  }
265
407
  }
266
- // Wrap streamText in an OTel span to capture provider-level latency, token usage, and cost
267
- const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
268
- kind: SpanKind.CLIENT,
269
- attributes: {
270
- "gen_ai.system": "litellm",
271
- "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
272
- },
273
- });
274
- let result;
275
- const collectedToolCalls = [];
276
- const collectedToolResults = [];
277
- try {
278
- result = streamText(streamOptions);
279
- }
280
408
  catch (streamError) {
281
- streamSpan.setStatus({
282
- code: SpanStatusCode.ERROR,
283
- message: streamError instanceof Error
284
- ? streamError.message
285
- : String(streamError),
286
- });
287
- streamSpan.end();
409
+ if (NoOutputGeneratedError.isInstance(streamError)) {
410
+ const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
411
+ stampNoOutputSpan(sentinel);
412
+ yield sentinel;
413
+ return;
414
+ }
415
+ const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
416
+ stampNoOutputSpan(sentinel);
417
+ yield sentinel;
288
418
  throw streamError;
289
419
  }
290
- // Collect token usage, cost, and finish reason asynchronously when the stream completes,
291
- // then end the span. This avoids blocking the stream consumer.
292
- Promise.resolve(result.usage)
293
- .then((usage) => {
294
- streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
295
- streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
296
- const cost = calculateCost(this.providerName, this.modelName, {
297
- input: usage.inputTokens || 0,
298
- output: usage.outputTokens || 0,
299
- total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
300
- });
301
- if (cost && cost > 0) {
302
- streamSpan.setAttribute("neurolink.cost", cost);
420
+ finally {
421
+ if (!consumerAbortController.signal.aborted) {
422
+ consumerAbortController.abort();
303
423
  }
304
- })
305
- .catch(() => {
306
- // Usage may not be available if the stream is aborted
307
- });
308
- Promise.resolve(result.finishReason)
309
- .then((reason) => {
310
- streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
311
- })
312
- .catch(() => {
313
- // Finish reason may not be available if the stream is aborted
314
- });
315
- Promise.resolve(result.text)
316
- .then(() => {
317
- streamSpan.end();
318
- })
319
- .catch((err) => {
320
- streamSpan.setStatus({
321
- code: SpanStatusCode.ERROR,
322
- message: err instanceof Error ? err.message : String(err),
323
- });
324
- streamSpan.end();
325
- });
326
- timeoutController?.cleanup();
327
- const transformedStream = this.createLiteLLMTransformedStream(result, () => capturedProviderError);
328
- // Create analytics promise that resolves after stream completion
329
- const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, result, Date.now() - startTime, {
424
+ }
425
+ };
426
+ const result = {
427
+ stream: transformedStream(),
428
+ provider: this.providerName,
429
+ model: this.modelName,
430
+ analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, {
431
+ textStream: (async function* () { })(),
432
+ usage: usagePromise,
433
+ finishReason: finishPromise,
434
+ }, Date.now() - startTime, {
330
435
  requestId: options.requestId ??
331
436
  `litellm-stream-${Date.now()}`,
332
437
  streamingMode: true,
438
+ }),
439
+ toolsUsed,
440
+ metadata: {
441
+ startTime,
442
+ streamId: `litellm-${Date.now()}`,
443
+ },
444
+ };
445
+ Object.defineProperty(result, "toolExecutions", {
446
+ enumerable: true,
447
+ configurable: true,
448
+ get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
449
+ toolName: s.toolName,
450
+ input: s.input,
451
+ output: s.output,
452
+ duration: s.endTime.getTime() - s.startTime.getTime(),
453
+ }))),
454
+ });
455
+ loopPromise
456
+ .finally(() => timeoutController?.cleanup())
457
+ .catch((error) => {
458
+ captureProviderError(error);
459
+ });
460
+ return result;
461
+ }
462
+ async runStreamLoop(args) {
463
+ const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
464
+ try {
465
+ let stepFinish = null;
466
+ let stepUsage;
467
+ for (let step = 0; step < maxSteps; step++) {
468
+ const stepResult = await this.streamOneStep({
469
+ modelId,
470
+ url,
471
+ apiKey,
472
+ fetchImpl,
473
+ abortSignal,
474
+ options,
475
+ conversation,
476
+ openAITools,
477
+ openAIToolChoice,
478
+ pushChunk,
479
+ });
480
+ stepFinish = stepResult.finishReason;
481
+ if (stepResult.usage) {
482
+ stepUsage = mergeUsage(stepUsage, stepResult.usage);
483
+ }
484
+ if (stepResult.toolCalls.size === 0) {
485
+ break;
486
+ }
487
+ await this.executeToolBatch({
488
+ stepResult,
489
+ conversation,
490
+ toolsRecord,
491
+ emitter,
492
+ toolsUsed,
493
+ toolExecutionSummaries,
494
+ options,
495
+ });
496
+ }
497
+ resolveUsage({
498
+ promptTokens: stepUsage?.prompt_tokens ?? 0,
499
+ completionTokens: stepUsage?.completion_tokens ?? 0,
500
+ totalTokens: stepUsage?.total_tokens ?? 0,
333
501
  });
502
+ resolveFinish(stepFinish ?? "stop");
503
+ pushChunk({ done: true });
334
504
  return {
335
- stream: transformedStream,
336
- provider: this.providerName,
337
- model: this.modelName,
338
- ...(shouldUseTools && {
339
- toolCalls: collectedToolCalls,
340
- toolResults: collectedToolResults,
341
- }),
342
- analytics: analyticsPromise,
343
- metadata: {
344
- startTime,
345
- streamId: `litellm-${Date.now()}`,
346
- },
505
+ finishReason: stepFinish ?? "stop",
506
+ usage: stepUsage,
347
507
  };
348
508
  }
349
- catch (error) {
350
- timeoutController?.cleanup();
351
- throw this.handleProviderError(error);
509
+ catch (err) {
510
+ logger.error("LiteLLM: Stream error", {
511
+ error: err instanceof Error ? err.message : String(err),
512
+ });
513
+ resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
514
+ resolveFinish("error");
515
+ pushChunk({ done: true });
516
+ throw err;
352
517
  }
353
518
  }
354
- async *createLiteLLMTransformedStream(result, getCapturedProviderError) {
355
- // Reviewer follow-up: gate the post-stream NoOutput detect on
356
- // *content yielded*, not raw chunk count. AI SDK fullStream emits
357
- // control events ({ type: "start" }, "step-start", etc.) before any
358
- // text-delta — those incremented chunkCount and made the post-stream
359
- // detect dead even when zero text was produced.
360
- let contentYielded = 0;
361
- try {
362
- const streamToUse = result.fullStream || result.textStream;
363
- for await (const chunk of streamToUse) {
364
- if (chunk && typeof chunk === "object") {
365
- if ("type" in chunk && chunk.type === "error") {
366
- const errorChunk = chunk;
367
- logger.error(`LiteLLM: Error chunk received:`, {
368
- errorType: errorChunk.type,
369
- errorDetails: errorChunk.error,
370
- });
371
- throw this.formatProviderError(new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`));
372
- }
373
- if ("textDelta" in chunk) {
374
- const textDelta = chunk.textDelta;
375
- if (textDelta) {
376
- contentYielded++;
377
- yield { content: textDelta };
378
- }
379
- }
380
- else if ("type" in chunk &&
381
- chunk.type === "tool-call" &&
382
- "toolCallId" in chunk) {
383
- logger.debug("LiteLLM: Tool call", {
384
- toolCallId: String(chunk.toolCallId),
385
- toolName: "toolName" in chunk ? String(chunk.toolName) : "unknown",
386
- });
387
- }
519
+ async streamOneStep(args) {
520
+ const body = buildBody({
521
+ modelId: args.modelId,
522
+ messages: args.conversation,
523
+ options: args.options,
524
+ tools: args.openAITools,
525
+ ...(args.openAIToolChoice !== undefined
526
+ ? { toolChoice: args.openAIToolChoice }
527
+ : {}),
528
+ streaming: true,
529
+ });
530
+ const res = await args.fetchImpl(args.url, {
531
+ method: "POST",
532
+ headers: {
533
+ "Content-Type": "application/json",
534
+ Authorization: `Bearer ${args.apiKey}`,
535
+ },
536
+ body: JSON.stringify(body),
537
+ ...(args.abortSignal ? { signal: args.abortSignal } : {}),
538
+ });
539
+ if (!res.ok) {
540
+ throw await buildAPIError(args.url, body, res);
541
+ }
542
+ if (!res.body) {
543
+ throw new Error("litellm: stream response had no body");
544
+ }
545
+ return parseSSEStream(res.body, (delta) => {
546
+ args.pushChunk({ content: delta });
547
+ });
548
+ }
549
+ async executeToolBatch(args) {
550
+ const { stepResult, conversation, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, options, } = args;
551
+ const toolCallsForMessage = [];
552
+ for (const [, t] of stepResult.toolCalls) {
553
+ toolCallsForMessage.push({
554
+ id: t.id,
555
+ type: "function",
556
+ function: { name: t.name, arguments: t.argsBuffered },
557
+ });
558
+ }
559
+ conversation.push({
560
+ role: "assistant",
561
+ content: stepResult.text.length > 0 ? stepResult.text : null,
562
+ tool_calls: toolCallsForMessage,
563
+ });
564
+ for (const [, t] of stepResult.toolCalls) {
565
+ const startedAt = new Date();
566
+ let input;
567
+ try {
568
+ input = JSON.parse(t.argsBuffered || "{}");
569
+ }
570
+ catch {
571
+ input = t.argsBuffered;
572
+ }
573
+ let output;
574
+ let errorMsg;
575
+ const toolDef = toolsRecord[t.name];
576
+ emitter?.emit("tool:start", {
577
+ toolName: t.name,
578
+ toolCallId: t.id,
579
+ input,
580
+ });
581
+ if (!toolDef || typeof toolDef.execute !== "function") {
582
+ errorMsg = `Tool '${t.name}' is not registered.`;
583
+ output = { error: errorMsg };
584
+ }
585
+ else {
586
+ try {
587
+ output = await toolDef.execute(input, {});
388
588
  }
389
- else if (typeof chunk === "string") {
390
- contentYielded++;
391
- yield { content: chunk };
589
+ catch (err) {
590
+ errorMsg = err instanceof Error ? err.message : String(err);
591
+ output = { error: errorMsg };
392
592
  }
393
593
  }
594
+ const endedAt = new Date();
595
+ toolsUsed.push(t.name);
596
+ toolExecutionSummaries.push({
597
+ toolCallId: t.id,
598
+ toolName: t.name,
599
+ input,
600
+ output,
601
+ ...(errorMsg ? { error: errorMsg } : {}),
602
+ startTime: startedAt,
603
+ endTime: endedAt,
604
+ });
605
+ conversation.push({
606
+ role: "tool",
607
+ tool_call_id: t.id,
608
+ content: stringifyToolOutput(output),
609
+ });
394
610
  }
395
- catch (streamError) {
396
- if (NoOutputGeneratedError.isInstance(streamError)) {
397
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
398
- // Yield the enriched sentinel so downstream telemetry has
399
- // finishReason / usage / providerError. Match the other
400
- // providers' pattern: yield + return (no throw). NeuroLink's
401
- // iteration fallback at neurolink.ts only fires for
402
- // looksLikeModelAccessDenied errors, so a NoOutput throw here
403
- // would NOT trigger any fallback — and it would mask the
404
- // already-yielded sentinel from consumers expecting a clean
405
- // stream. The sentinel itself signals the no-output condition.
406
- const sentinel = await buildNoOutputSentinel(streamError, result, getCapturedProviderError?.());
407
- stampNoOutputSpan(sentinel);
408
- yield sentinel;
409
- return;
410
- }
411
- throw streamError;
611
+ const justExecuted = toolExecutionSummaries.slice(-stepResult.toolCalls.size);
612
+ emitToolEndFromStepFinish(emitter, justExecuted.map((s) => ({
613
+ toolName: s.toolName,
614
+ output: s.output,
615
+ ...(s.error ? { error: s.error } : {}),
616
+ })));
617
+ try {
618
+ await this.handleToolExecutionStorage(justExecuted.map((s) => ({
619
+ toolCallId: s.toolCallId,
620
+ toolName: s.toolName,
621
+ input: s.input,
622
+ output: s.output,
623
+ })), justExecuted.map((s) => ({
624
+ toolCallId: s.toolCallId,
625
+ toolName: s.toolName,
626
+ output: s.output,
627
+ })), options, new Date());
412
628
  }
413
- // Curator P3-6 (round-2 fix): production trigger sets the error on
414
- // result.finishReason rejection (NOT thrown from textStream).
415
- // Surface that path here, matching the catch above (yield + return).
416
- if (contentYielded === 0) {
417
- const detected = await detectPostStreamNoOutput(result, getCapturedProviderError?.());
418
- if (detected) {
419
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
420
- stampNoOutputSpan(detected.sentinel);
421
- yield detected.sentinel;
422
- }
629
+ catch (err) {
630
+ logger.warn("[LiteLLMProvider] Failed to store tool executions", {
631
+ provider: this.providerName,
632
+ error: err instanceof Error ? err.message : String(err),
633
+ });
423
634
  }
424
635
  }
425
636
  /**
426
- * Generate an embedding for a single text input
427
- * Uses the LiteLLM proxy with OpenAI-compatible embedding API
637
+ * Generate an embedding for a single text input via native /v1/embeddings.
428
638
  */
429
639
  async embed(text, modelName) {
430
- const { embed: aiEmbed } = await import("../utils/generation.js");
431
- const { createOpenAI } = await import("@ai-sdk/openai");
432
- const config = getLiteLLMConfig();
433
640
  const embeddingModelName = modelName ||
434
641
  process.env.LITELLM_EMBEDDING_MODEL ||
435
642
  "gemini-embedding-001";
436
- const customOpenAI = createOpenAI({
437
- baseURL: this.credentials?.baseURL ?? config.baseURL,
438
- apiKey: this.credentials?.apiKey ?? config.apiKey,
439
- fetch: createProxyFetch(),
440
- });
441
- const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
442
- // Wrap in withTimeout so stalled upstream embedding requests abort instead
443
- // of hanging forever. 30s matches the default for embedding endpoints
444
- // across the OpenAI-compatible cluster.
445
- const result = await withTimeout(aiEmbed({ model: embeddingModel, value: text }), 30_000, "litellm", "generate");
446
- return result.embedding;
643
+ const [embedding] = await this.callEmbeddings(embeddingModelName, [text], "embed");
644
+ return embedding;
447
645
  }
448
646
  /**
449
- * Generate embeddings for multiple text inputs
450
- * Uses the LiteLLM proxy with OpenAI-compatible embedding API
647
+ * Generate embeddings for multiple text inputs via native /v1/embeddings.
451
648
  */
452
649
  async embedMany(texts, modelName) {
453
- const { embedMany: aiEmbedMany } = await import("../utils/generation.js");
454
- const { createOpenAI } = await import("@ai-sdk/openai");
455
- const config = getLiteLLMConfig();
456
650
  const embeddingModelName = modelName ||
457
651
  process.env.LITELLM_EMBEDDING_MODEL ||
458
652
  "gemini-embedding-001";
459
- const customOpenAI = createOpenAI({
460
- baseURL: this.credentials?.baseURL ?? config.baseURL,
461
- apiKey: this.credentials?.apiKey ?? config.apiKey,
462
- fetch: createProxyFetch(),
463
- });
464
- const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
465
- // Wrap in withTimeout so a single slow batch doesn't hang indefinitely.
466
- const result = await withTimeout(aiEmbedMany({ model: embeddingModel, values: texts }), 30_000, "litellm", "generate");
467
- return result.embeddings;
653
+ return this.callEmbeddings(embeddingModelName, texts, "embedMany");
654
+ }
655
+ async callEmbeddings(modelName, input, operation) {
656
+ const url = `${stripTrailingSlash(this.config.baseURL)}/embeddings`;
657
+ const fetchImpl = createProxyFetch();
658
+ const timeoutController = createTimeoutController(30_000, this.providerName, "generate");
659
+ try {
660
+ const res = await fetchImpl(url, {
661
+ method: "POST",
662
+ headers: {
663
+ "Content-Type": "application/json",
664
+ Authorization: `Bearer ${this.config.apiKey}`,
665
+ },
666
+ body: JSON.stringify({
667
+ model: modelName,
668
+ input: input.length === 1 ? input[0] : input,
669
+ }),
670
+ ...(timeoutController?.controller.signal
671
+ ? { signal: timeoutController.controller.signal }
672
+ : {}),
673
+ });
674
+ if (!res.ok) {
675
+ const bodyText = await res.text().catch(() => "");
676
+ const parsed = bodyText
677
+ ? JSON.parse(bodyText)
678
+ : undefined;
679
+ throw this.formatProviderError(new Error(parsed?.error?.message ||
680
+ `LiteLLM ${operation} failed with status ${res.status}`));
681
+ }
682
+ const json = (await res.json());
683
+ const embeddings = (json.data ?? [])
684
+ .map((row) => row.embedding)
685
+ .filter((e) => Array.isArray(e));
686
+ if (embeddings.length === 0) {
687
+ throw new ProviderError(`LiteLLM ${operation} returned no embeddings`, this.providerName);
688
+ }
689
+ return embeddings;
690
+ }
691
+ finally {
692
+ timeoutController?.cleanup();
693
+ }
468
694
  }
469
695
  /**
470
- * Get available models from LiteLLM proxy server
471
- * Dynamically fetches from /v1/models endpoint with caching and fallback
696
+ * Get available models from LiteLLM proxy `/v1/models` endpoint.
697
+ * Caches results for 10 minutes; falls back to env-driven list or a
698
+ * minimal safe default if the API fetch fails.
472
699
  */
473
700
  async getAvailableModels() {
474
- const functionTag = "LiteLLMProvider.getAvailableModels";
475
701
  const now = Date.now();
476
- // Check if cached models are still valid
477
702
  if (LiteLLMProvider.modelsCache.length > 0 &&
478
703
  now - LiteLLMProvider.modelsCacheTime <
479
704
  LiteLLMProvider.MODELS_CACHE_DURATION) {
480
- logger.debug(`[${functionTag}] Using cached models`, {
705
+ logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
481
706
  cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
482
707
  modelCount: LiteLLMProvider.modelsCache.length,
483
708
  });
484
709
  return LiteLLMProvider.modelsCache;
485
710
  }
486
- // Try to fetch models dynamically
487
711
  try {
488
712
  const dynamicModels = await this.fetchModelsFromAPI();
489
713
  if (dynamicModels.length > 0) {
490
- // Cache successful result
491
714
  LiteLLMProvider.modelsCache = dynamicModels;
492
715
  LiteLLMProvider.modelsCacheTime = now;
493
- logger.debug(`[${functionTag}] Successfully fetched models from API`, {
494
- modelCount: dynamicModels.length,
495
- });
496
716
  return dynamicModels;
497
717
  }
498
718
  }
499
719
  catch (error) {
500
- logger.warn(`[${functionTag}] Failed to fetch models from API, using fallback`, {
501
- error: error instanceof Error ? error.message : String(error),
502
- });
720
+ logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
503
721
  }
504
- // Fallback to hardcoded list if API fetch fails
505
- const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",")
722
+ return this.getFallbackModels();
723
+ }
724
+ async getFirstAvailableModel() {
725
+ const models = await this.getAvailableModels();
726
+ return models[0] || FALLBACK_LITELLM_MODEL;
727
+ }
728
+ getFallbackModels() {
729
+ return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
506
730
  .map((m) => m.trim())
507
731
  .filter((m) => m.length > 0) || [
508
- "openai/gpt-4o", // minimal safe baseline
732
+ "openai/gpt-4o",
509
733
  "anthropic/claude-3-haiku",
510
734
  "meta-llama/llama-3.1-8b-instruct",
511
735
  "google/gemini-2.5-flash",
512
- ];
513
- logger.debug(`[${functionTag}] Using fallback model list`, {
514
- modelCount: fallbackModels.length,
515
- });
516
- return fallbackModels;
736
+ ]);
517
737
  }
518
- /**
519
- * Fetch available models from LiteLLM proxy /v1/models endpoint
520
- * @private
521
- */
522
738
  async fetchModelsFromAPI() {
523
- const functionTag = "LiteLLMProvider.fetchModelsFromAPI";
524
- const config = getLiteLLMConfig();
525
- const resolvedBaseURL = this.credentials?.baseURL ?? config.baseURL;
526
- const resolvedApiKey = this.credentials?.apiKey ?? config.apiKey;
527
- const modelsUrl = `${resolvedBaseURL}/v1/models`;
739
+ const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
740
+ const proxyFetch = createProxyFetch();
528
741
  const controller = new AbortController();
529
- const timeoutId = setTimeout(() => controller.abort(), 5000); // 5 second timeout
742
+ const timeoutId = setTimeout(() => controller.abort(), 5000);
530
743
  try {
531
- logger.debug(`[${functionTag}] Fetching models from ${modelsUrl}`);
532
- const proxyFetch = createProxyFetch();
533
744
  const response = await proxyFetch(modelsUrl, {
534
745
  method: "GET",
535
746
  headers: {
536
- Authorization: `Bearer ${resolvedApiKey}`,
747
+ Authorization: `Bearer ${this.config.apiKey}`,
537
748
  "Content-Type": "application/json",
538
749
  },
539
750
  signal: controller.signal,
540
751
  });
541
- clearTimeout(timeoutId);
542
752
  if (!response.ok) {
543
753
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
544
754
  }
545
- const data = await response.json();
546
- // Parse OpenAI-compatible models response
547
- if (data && Array.isArray(data.data)) {
548
- const models = data.data
549
- .map((model) => typeof model === "object" &&
550
- model !== null &&
551
- "id" in model &&
552
- typeof model.id === "string"
553
- ? model.id
554
- : undefined)
555
- .filter((id) => typeof id === "string" && id.length > 0)
556
- .sort();
557
- logger.debug(`[${functionTag}] Successfully parsed models`, {
558
- totalModels: models.length,
559
- sampleModels: models.slice(0, 5),
560
- });
561
- return models;
562
- }
563
- else {
755
+ const data = (await response.json());
756
+ if (!Array.isArray(data.data)) {
564
757
  throw new Error("Invalid response format: expected data.data array");
565
758
  }
759
+ return data.data
760
+ .map((m) => m.id)
761
+ .filter((id) => typeof id === "string" && id.length > 0)
762
+ .sort();
566
763
  }
567
764
  catch (error) {
568
- clearTimeout(timeoutId);
569
765
  if (isAbortError(error)) {
570
766
  throw new NetworkError("Request timed out after 5 seconds", this.providerName);
571
767
  }
572
768
  throw error;
573
769
  }
770
+ finally {
771
+ clearTimeout(timeoutId);
772
+ }
574
773
  }
575
774
  }
576
775
  //# sourceMappingURL=litellm.js.map