@juspay/neurolink 9.67.2 → 9.67.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,14 @@
1
1
  import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
2
- import { BaseProvider } from "../core/baseProvider.js";
3
- import { DEFAULT_MAX_STEPS } from "../core/constants.js";
4
- import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
5
2
  import { createProxyFetch } from "../proxy/proxyFetch.js";
6
3
  import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
7
4
  import { isAbortError } from "../utils/errorHandling.js";
8
- import { NoOutputGeneratedError } from "../utils/generationErrors.js";
9
5
  import { logger } from "../utils/logger.js";
10
- import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
6
+ import { isGemini25Model as isCanonicalGemini25Model } from "../utils/modelDetection.js";
11
7
  import { calculateCost } from "../utils/pricing.js";
12
8
  import { getProviderModel } from "../utils/providerConfig.js";
13
- import { composeAbortSignals, createTimeoutController, mergeAbortSignals, TimeoutError, } from "../utils/timeout.js";
14
- import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
15
- import { resolveToolChoice } from "../utils/toolChoice.js";
16
- import { transformToolExecutions } from "../utils/transformationUtils.js";
17
- import { buildAPIError, buildBody, buildToolsForOpenAI, createChunkQueue, createDeferredAnalytics, mapNeuroLinkToolChoice, mergeUsage, messageBuilderToOpenAI, parseSSEStream, stringifyToolOutput, stripTrailingSlash, v3ResponseFormatToOpenAI, v3ToolChoiceToOpenAI, v3ToolsToOpenAI, } from "./openaiChatCompletionsClient.js";
9
+ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
10
+ import { stripTrailingSlash } from "./openaiChatCompletionsClient.js";
11
+ import { OpenAIChatCompletionsProvider } from "./openaiChatCompletionsBase.js";
18
12
  const streamTracer = trace.getTracer("neurolink.provider.litellm");
19
13
  const FALLBACK_LITELLM_MODEL = "openai/gpt-4o-mini";
20
14
  const getLiteLLMConfig = () => ({
@@ -25,37 +19,37 @@ const getLiteLLMConfig = () => ({
25
19
  * LiteLLM uses a 'provider/model' format. Override via LITELLM_MODEL env var.
26
20
  */
27
21
  const getDefaultLiteLLMModel = () => getProviderModel("LITELLM_MODEL", FALLBACK_LITELLM_MODEL);
28
- const isGemini25Model = (modelName) => modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
29
- // =============================================================================
30
- // Direct HTTP client for LiteLLM proxy.
31
- //
32
- // LiteLLM exposes the OpenAI chat-completions wire format, so all the
33
- // wire-level converters and the SSE parser are shared with the
34
- // openai-compatible provider via ./openaiChatCompletionsClient.ts. This
35
- // file owns LiteLLM-specific behaviour: OTel span wrap with cost, model
36
- // allowlist 403 → ModelAccessDeniedError, Gemini 2.5 maxTokens skip,
37
- // model caching, and native /v1/embeddings.
38
- // =============================================================================
22
+ // LiteLLM model ids come in `provider/model` form (e.g. "google/gemini-2.5-flash").
23
+ // Strip the provider prefix and delegate to the canonical anchored-regex
24
+ // check in src/lib/utils/modelDetection.ts so the truth lives in one place.
25
+ const isGemini25Model = (modelName) => {
26
+ const lastSegment = modelName.includes("/")
27
+ ? modelName.slice(modelName.lastIndexOf("/") + 1)
28
+ : modelName;
29
+ return isCanonicalGemini25Model(lastSegment);
30
+ };
39
31
  /**
40
32
  * LiteLLM Provider — direct HTTP, no AI SDK. Talks to a LiteLLM proxy
41
33
  * server (or any deployment that speaks OpenAI chat-completions + the
42
34
  * `/v1/models` and `/v1/embeddings` endpoints).
35
+ *
36
+ * All request/stream/tool-loop orchestration lives in
37
+ * `OpenAIChatCompletionsProvider`. This class adds LiteLLM-specific
38
+ * behaviour: OTel span wrap with cost (`onStreamStart`), Gemini 2.5
39
+ * maxTokens skip (`adjustBuildBodyOptions`), ModelAccessDeniedError on
40
+ * 403, 10-minute model cache (`getAvailableModels`), `LITELLM_FALLBACK_MODELS`
41
+ * env-driven fallback list, and native `/v1/embeddings`.
43
42
  */
44
- export class LiteLLMProvider extends BaseProvider {
45
- config;
46
- credentials;
47
- resolvedModel;
43
+ export class LiteLLMProvider extends OpenAIChatCompletionsProvider {
48
44
  static modelsCache = [];
49
45
  static modelsCacheTime = 0;
50
46
  static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
51
47
  constructor(modelName, sdk, _region, credentials) {
52
- super(modelName, "litellm", sdk);
53
- this.credentials = credentials;
54
48
  const envConfig = getLiteLLMConfig();
55
- this.config = {
49
+ super("litellm", modelName, sdk, {
56
50
  baseURL: credentials?.baseURL ?? envConfig.baseURL,
57
51
  apiKey: credentials?.apiKey ?? envConfig.apiKey,
58
- };
52
+ });
59
53
  logger.debug("LiteLLM Provider initialized", {
60
54
  modelName: this.modelName,
61
55
  provider: this.providerName,
@@ -68,146 +62,77 @@ export class LiteLLMProvider extends BaseProvider {
68
62
  getDefaultModel() {
69
63
  return getDefaultLiteLLMModel();
70
64
  }
65
+ getFallbackModelName() {
66
+ return FALLBACK_LITELLM_MODEL;
67
+ }
68
+ getFallbackModels() {
69
+ return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
70
+ .map((m) => m.trim())
71
+ .filter((m) => m.length > 0) || [
72
+ "openai/gpt-4o",
73
+ "anthropic/claude-3-haiku",
74
+ "meta-llama/llama-3.1-8b-instruct",
75
+ "google/gemini-2.5-flash",
76
+ ]);
77
+ }
71
78
  /**
72
- * Abstract from BaseProvider used by the parent's generate() path which
73
- * still goes through `generateText`. Returns a thin LanguageModelV3-shaped
74
- * object that delegates to the same HTTP helpers used by executeStream.
79
+ * Gemini 2.5 models on LiteLLM have a known compatibility issue with
80
+ * `max_tokens` strip it before the wire body is built. Applies to
81
+ * both streaming and non-streaming paths.
75
82
  */
76
- async getAISDKModel() {
77
- const modelId = await this.resolveModelName();
78
- return this.buildDelegatingModel(modelId);
79
- }
80
- async resolveModelName() {
81
- if (this.resolvedModel) {
82
- return this.resolvedModel;
83
- }
84
- const explicit = this.modelName || getDefaultLiteLLMModel();
85
- if (explicit && explicit.trim() !== "") {
86
- this.resolvedModel = explicit;
87
- if (this.modelName !== explicit) {
88
- this.refreshHandlersForModel(explicit);
83
+ adjustBuildBodyOptions(modelId, opts) {
84
+ if (isGemini25Model(modelId) && opts.maxTokens !== undefined) {
85
+ if (logger.shouldLog("debug")) {
86
+ logger.debug("LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)", { modelId, requestedMaxTokens: opts.maxTokens });
89
87
  }
90
- return explicit;
88
+ return { ...opts, maxTokens: undefined };
91
89
  }
92
- this.resolvedModel = FALLBACK_LITELLM_MODEL;
93
- this.refreshHandlersForModel(FALLBACK_LITELLM_MODEL);
94
- return FALLBACK_LITELLM_MODEL;
90
+ return opts;
95
91
  }
96
92
  /**
97
- * Returns a minimal V3-shaped model. Only used by BaseProvider's
98
- * `generate()` non-streaming path which still relies on the parent's
99
- * `generateText`. The streaming path bypasses this entirely.
93
+ * Wrap the stream in an OTel span to capture provider-level latency,
94
+ * token usage, finish reason, and cost. Matches the pre-migration
95
+ * behaviour where streamText was wrapped in `neurolink.provider.streamText`.
100
96
  */
101
- buildDelegatingModel(modelId) {
102
- const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
103
- const fetchImpl = createProxyFetch();
104
- const apiKey = this.config.apiKey;
105
- const providerName = this.providerName;
106
- const getTimeoutForOptions = (opts) => this.getTimeout((opts ?? {}));
107
- const gemini25Skip = isGemini25Model(modelId);
97
+ onStreamStart(modelId) {
98
+ const span = streamTracer.startSpan("neurolink.provider.streamText", {
99
+ kind: SpanKind.CLIENT,
100
+ attributes: {
101
+ "gen_ai.system": "litellm",
102
+ "gen_ai.request.model": modelId,
103
+ },
104
+ });
105
+ let spanEnded = false;
106
+ const endSpan = () => {
107
+ if (!spanEnded) {
108
+ spanEnded = true;
109
+ span.end();
110
+ }
111
+ };
108
112
  return {
109
- specificationVersion: "v3",
110
- provider: "litellm",
111
- modelId,
112
- supportedUrls: {},
113
- doGenerate: async (options) => {
114
- const messages = messageBuilderToOpenAI(options.prompt);
115
- const body = buildBody({
116
- modelId,
117
- messages,
118
- options: {
119
- maxTokens: gemini25Skip ? undefined : options.maxOutputTokens,
120
- temperature: options.temperature,
121
- topP: options.topP,
122
- presencePenalty: options.presencePenalty,
123
- frequencyPenalty: options.frequencyPenalty,
124
- seed: options.seed,
125
- stopSequences: options.stopSequences,
126
- },
127
- tools: v3ToolsToOpenAI(options.tools),
128
- ...(options.toolChoice
129
- ? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
130
- : {}),
131
- streaming: false,
132
- ...(options.responseFormat
133
- ? {
134
- responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
135
- }
136
- : {}),
113
+ onUsage: (usage) => {
114
+ span.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
115
+ span.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
116
+ const cost = calculateCost(this.providerName, this.modelName, {
117
+ input: usage.promptTokens,
118
+ output: usage.completionTokens,
119
+ total: usage.totalTokens,
137
120
  });
138
- const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
139
- const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
140
- let res;
141
- try {
142
- res = await fetchImpl(url, {
143
- method: "POST",
144
- headers: {
145
- "Content-Type": "application/json",
146
- Authorization: `Bearer ${apiKey}`,
147
- },
148
- body: JSON.stringify(body),
149
- ...(composedSignal ? { signal: composedSignal } : {}),
150
- });
151
- }
152
- finally {
153
- timeoutController?.cleanup();
121
+ if (cost && cost > 0) {
122
+ span.setAttribute("neurolink.cost", cost);
154
123
  }
155
- if (!res.ok) {
156
- throw await buildAPIError(url, body, res);
157
- }
158
- const json = (await res.json());
159
- const choice = json.choices?.[0];
160
- const text = (typeof choice?.message?.content === "string"
161
- ? choice.message.content
162
- : "") ?? "";
163
- const content = [];
164
- if (text.length > 0) {
165
- content.push({ type: "text", text });
166
- }
167
- for (const tc of choice?.message?.tool_calls ?? []) {
168
- content.push({
169
- type: "tool-call",
170
- toolCallId: tc.id,
171
- toolName: tc.function.name,
172
- input: tc.function.arguments ?? "",
124
+ },
125
+ onFinish: (reason, capturedError) => {
126
+ span.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
127
+ if (reason === "error") {
128
+ span.setStatus({
129
+ code: SpanStatusCode.ERROR,
130
+ message: capturedError instanceof Error
131
+ ? capturedError.message
132
+ : String(capturedError ?? "stream error"),
173
133
  });
174
134
  }
175
- const rawFinish = choice?.finish_reason;
176
- const unified = rawFinish === "length"
177
- ? "length"
178
- : rawFinish === "tool_calls" || rawFinish === "function_call"
179
- ? "tool-calls"
180
- : rawFinish === "content_filter"
181
- ? "content-filter"
182
- : "stop";
183
- return {
184
- content,
185
- finishReason: { unified, raw: rawFinish ?? "stop" },
186
- usage: {
187
- inputTokens: {
188
- total: json.usage?.prompt_tokens,
189
- noCache: json.usage?.prompt_tokens,
190
- cacheRead: undefined,
191
- cacheWrite: undefined,
192
- },
193
- outputTokens: {
194
- total: json.usage?.completion_tokens,
195
- text: json.usage?.completion_tokens,
196
- reasoning: undefined,
197
- },
198
- },
199
- warnings: [],
200
- request: { body },
201
- response: {
202
- ...(json.id ? { id: json.id } : {}),
203
- ...(json.model ? { modelId: json.model } : {}),
204
- headers: {},
205
- body: json,
206
- },
207
- };
208
- },
209
- doStream: () => {
210
- throw new Error("litellm: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
135
+ endSpan();
211
136
  },
212
137
  };
213
138
  }
@@ -253,384 +178,69 @@ export class LiteLLMProvider extends BaseProvider {
253
178
  }
254
179
  return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
255
180
  }
256
- supportsTools() {
257
- return true;
258
- }
259
181
  /**
260
- * Streaming path drives the LiteLLM proxy directly. No streamText, no
261
- * AI SDK orchestrator. Tool calls, multi-step loops, telemetry, abort
262
- * handling all inline. OTel span captures gen_ai.system + cost.
182
+ * Get available models from LiteLLM proxy `/v1/models` endpoint.
183
+ * Caches results for 10 minutes; falls back to env-driven list or a
184
+ * minimal safe default if the API fetch fails.
263
185
  */
264
- async executeStream(options, _analysisSchema) {
265
- this.validateStreamOptions(options);
266
- const startTime = Date.now();
267
- const timeout = this.getTimeout(options);
268
- const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
269
- const consumerAbortController = new AbortController();
270
- const abortSignal = mergeAbortSignals([
271
- options.abortSignal,
272
- timeoutController?.controller.signal,
273
- consumerAbortController.signal,
274
- ]).signal;
275
- let modelId;
276
- let toolsRecord;
277
- let openAITools;
278
- let openAIToolChoice;
279
- let conversation;
280
- try {
281
- modelId = await this.resolveModelName();
282
- const shouldUseTools = !options.disableTools && this.supportsTools();
283
- toolsRecord = shouldUseTools
284
- ? options.tools || (await this.getAllTools())
285
- : {};
286
- openAITools = shouldUseTools
287
- ? buildToolsForOpenAI(toolsRecord)
288
- : undefined;
289
- openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
290
- const initialMessages = await this.buildMessagesForStream(options);
291
- conversation = messageBuilderToOpenAI(initialMessages);
292
- }
293
- catch (setupErr) {
294
- timeoutController?.cleanup();
295
- throw setupErr;
296
- }
297
- const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
298
- const fetchImpl = createProxyFetch();
299
- const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
300
- const emitter = this.neurolink?.getEventEmitter();
301
- const toolsUsed = [];
302
- const toolExecutionSummaries = [];
303
- const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
304
- const { pushChunk, nextChunk } = createChunkQueue();
305
- // Wrap the stream in an OTel span to capture provider-level latency,
306
- // token usage, finish reason, and cost. Matches the pre-migration
307
- // behaviour where streamText was wrapped in `neurolink.provider.streamText`.
308
- const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
309
- kind: SpanKind.CLIENT,
310
- attributes: {
311
- "gen_ai.system": "litellm",
312
- "gen_ai.request.model": modelId,
313
- },
314
- });
315
- // Model-specific maxTokens handling — Gemini 2.5 models have known issues
316
- // with maxTokens being forwarded. Mutate a shallow copy so the original
317
- // StreamOptions reference downstream (analytics, telemetry) is unchanged.
318
- const requestOptions = isGemini25Model(modelId)
319
- ? { ...options, maxTokens: undefined }
320
- : options;
321
- if (requestOptions !== options &&
322
- options.maxTokens &&
323
- logger.shouldLog("debug")) {
324
- logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, { modelId, requestedMaxTokens: options.maxTokens });
325
- }
326
- const loopPromise = this.runStreamLoop({
327
- maxSteps,
328
- modelId,
329
- url,
330
- apiKey: this.config.apiKey,
331
- fetchImpl,
332
- abortSignal,
333
- options: requestOptions,
334
- conversation,
335
- openAITools,
336
- openAIToolChoice,
337
- toolsRecord,
338
- emitter,
339
- toolsUsed,
340
- toolExecutionSummaries,
341
- pushChunk,
342
- resolveUsage,
343
- resolveFinish,
344
- });
345
- // Wire the OTel span lifecycle to the deferred analytics promises.
346
- let capturedProviderError;
347
- const captureProviderError = (error) => {
348
- capturedProviderError = error;
349
- };
350
- usagePromise
351
- .then((usage) => {
352
- streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
353
- streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
354
- const cost = calculateCost(this.providerName, this.modelName, {
355
- input: usage.promptTokens,
356
- output: usage.completionTokens,
357
- total: usage.totalTokens,
186
+ async getAvailableModels() {
187
+ const now = Date.now();
188
+ if (LiteLLMProvider.modelsCache.length > 0 &&
189
+ now - LiteLLMProvider.modelsCacheTime <
190
+ LiteLLMProvider.MODELS_CACHE_DURATION) {
191
+ logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
192
+ cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
193
+ modelCount: LiteLLMProvider.modelsCache.length,
358
194
  });
359
- if (cost && cost > 0) {
360
- streamSpan.setAttribute("neurolink.cost", cost);
361
- }
362
- })
363
- .catch(() => {
364
- // usage may never resolve if the stream is aborted before completion
365
- });
366
- finishPromise
367
- .then((reason) => {
368
- streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
369
- if (reason === "error") {
370
- streamSpan.setStatus({
371
- code: SpanStatusCode.ERROR,
372
- message: capturedProviderError instanceof Error
373
- ? capturedProviderError.message
374
- : String(capturedProviderError ?? "stream error"),
375
- });
376
- }
377
- streamSpan.end();
378
- })
379
- .catch(() => {
380
- streamSpan.end();
381
- });
382
- const transformedStream = async function* () {
383
- let contentYielded = 0;
384
- try {
385
- for (;;) {
386
- const chunk = await nextChunk();
387
- if ("done" in chunk) {
388
- break;
389
- }
390
- if ("content" in chunk &&
391
- typeof chunk.content === "string" &&
392
- chunk.content.length > 0) {
393
- contentYielded++;
394
- }
395
- yield chunk;
396
- }
397
- await loopPromise;
398
- if (contentYielded === 0 && toolsUsed.length === 0) {
399
- logger.warn("LiteLLM: Stream produced no output — emitting enriched sentinel");
400
- const fauxNoOutput = new NoOutputGeneratedError({
401
- message: "Stream produced no output",
402
- });
403
- const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
404
- stampNoOutputSpan(sentinel);
405
- yield sentinel;
406
- }
407
- }
408
- catch (streamError) {
409
- if (NoOutputGeneratedError.isInstance(streamError)) {
410
- const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
411
- stampNoOutputSpan(sentinel);
412
- yield sentinel;
413
- return;
414
- }
415
- const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
416
- stampNoOutputSpan(sentinel);
417
- yield sentinel;
418
- throw streamError;
419
- }
420
- finally {
421
- if (!consumerAbortController.signal.aborted) {
422
- consumerAbortController.abort();
423
- }
424
- }
425
- };
426
- const result = {
427
- stream: transformedStream(),
428
- provider: this.providerName,
429
- model: this.modelName,
430
- analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, {
431
- textStream: (async function* () { })(),
432
- usage: usagePromise,
433
- finishReason: finishPromise,
434
- }, Date.now() - startTime, {
435
- requestId: options.requestId ??
436
- `litellm-stream-${Date.now()}`,
437
- streamingMode: true,
438
- }),
439
- toolsUsed,
440
- metadata: {
441
- startTime,
442
- streamId: `litellm-${Date.now()}`,
443
- },
444
- };
445
- Object.defineProperty(result, "toolExecutions", {
446
- enumerable: true,
447
- configurable: true,
448
- get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
449
- toolName: s.toolName,
450
- input: s.input,
451
- output: s.output,
452
- duration: s.endTime.getTime() - s.startTime.getTime(),
453
- }))),
454
- });
455
- loopPromise
456
- .finally(() => timeoutController?.cleanup())
457
- .catch((error) => {
458
- captureProviderError(error);
459
- });
460
- return result;
461
- }
462
- async runStreamLoop(args) {
463
- const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
195
+ return LiteLLMProvider.modelsCache;
196
+ }
464
197
  try {
465
- let stepFinish = null;
466
- let stepUsage;
467
- for (let step = 0; step < maxSteps; step++) {
468
- const stepResult = await this.streamOneStep({
469
- modelId,
470
- url,
471
- apiKey,
472
- fetchImpl,
473
- abortSignal,
474
- options,
475
- conversation,
476
- openAITools,
477
- openAIToolChoice,
478
- pushChunk,
479
- });
480
- stepFinish = stepResult.finishReason;
481
- if (stepResult.usage) {
482
- stepUsage = mergeUsage(stepUsage, stepResult.usage);
483
- }
484
- if (stepResult.toolCalls.size === 0) {
485
- break;
486
- }
487
- await this.executeToolBatch({
488
- stepResult,
489
- conversation,
490
- toolsRecord,
491
- emitter,
492
- toolsUsed,
493
- toolExecutionSummaries,
494
- options,
495
- });
198
+ const dynamicModels = await this.fetchModelsFromAPI();
199
+ if (dynamicModels.length > 0) {
200
+ LiteLLMProvider.modelsCache = dynamicModels;
201
+ LiteLLMProvider.modelsCacheTime = now;
202
+ return dynamicModels;
496
203
  }
497
- resolveUsage({
498
- promptTokens: stepUsage?.prompt_tokens ?? 0,
499
- completionTokens: stepUsage?.completion_tokens ?? 0,
500
- totalTokens: stepUsage?.total_tokens ?? 0,
501
- });
502
- resolveFinish(stepFinish ?? "stop");
503
- pushChunk({ done: true });
504
- return {
505
- finishReason: stepFinish ?? "stop",
506
- usage: stepUsage,
507
- };
508
- }
509
- catch (err) {
510
- logger.error("LiteLLM: Stream error", {
511
- error: err instanceof Error ? err.message : String(err),
512
- });
513
- resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
514
- resolveFinish("error");
515
- pushChunk({ done: true });
516
- throw err;
517
204
  }
518
- }
519
- async streamOneStep(args) {
520
- const body = buildBody({
521
- modelId: args.modelId,
522
- messages: args.conversation,
523
- options: args.options,
524
- tools: args.openAITools,
525
- ...(args.openAIToolChoice !== undefined
526
- ? { toolChoice: args.openAIToolChoice }
527
- : {}),
528
- streaming: true,
529
- });
530
- const res = await args.fetchImpl(args.url, {
531
- method: "POST",
532
- headers: {
533
- "Content-Type": "application/json",
534
- Authorization: `Bearer ${args.apiKey}`,
535
- },
536
- body: JSON.stringify(body),
537
- ...(args.abortSignal ? { signal: args.abortSignal } : {}),
538
- });
539
- if (!res.ok) {
540
- throw await buildAPIError(args.url, body, res);
541
- }
542
- if (!res.body) {
543
- throw new Error("litellm: stream response had no body");
205
+ catch (error) {
206
+ logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
544
207
  }
545
- return parseSSEStream(res.body, (delta) => {
546
- args.pushChunk({ content: delta });
547
- });
208
+ return this.getFallbackModels();
548
209
  }
549
- async executeToolBatch(args) {
550
- const { stepResult, conversation, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, options, } = args;
551
- const toolCallsForMessage = [];
552
- for (const [, t] of stepResult.toolCalls) {
553
- toolCallsForMessage.push({
554
- id: t.id,
555
- type: "function",
556
- function: { name: t.name, arguments: t.argsBuffered },
557
- });
558
- }
559
- conversation.push({
560
- role: "assistant",
561
- content: stepResult.text.length > 0 ? stepResult.text : null,
562
- tool_calls: toolCallsForMessage,
563
- });
564
- for (const [, t] of stepResult.toolCalls) {
565
- const startedAt = new Date();
566
- let input;
567
- try {
568
- input = JSON.parse(t.argsBuffered || "{}");
569
- }
570
- catch {
571
- input = t.argsBuffered;
572
- }
573
- let output;
574
- let errorMsg;
575
- const toolDef = toolsRecord[t.name];
576
- emitter?.emit("tool:start", {
577
- toolName: t.name,
578
- toolCallId: t.id,
579
- input,
210
+ async fetchModelsFromAPI() {
211
+ const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
212
+ const proxyFetch = createProxyFetch();
213
+ const controller = new AbortController();
214
+ const timeoutId = setTimeout(() => controller.abort(), 5000);
215
+ try {
216
+ const response = await proxyFetch(modelsUrl, {
217
+ method: "GET",
218
+ headers: {
219
+ Authorization: `Bearer ${this.config.apiKey}`,
220
+ "Content-Type": "application/json",
221
+ },
222
+ signal: controller.signal,
580
223
  });
581
- if (!toolDef || typeof toolDef.execute !== "function") {
582
- errorMsg = `Tool '${t.name}' is not registered.`;
583
- output = { error: errorMsg };
224
+ if (!response.ok) {
225
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
584
226
  }
585
- else {
586
- try {
587
- output = await toolDef.execute(input, {});
588
- }
589
- catch (err) {
590
- errorMsg = err instanceof Error ? err.message : String(err);
591
- output = { error: errorMsg };
592
- }
227
+ const data = (await response.json());
228
+ if (!Array.isArray(data.data)) {
229
+ throw new Error("Invalid response format: expected data.data array");
593
230
  }
594
- const endedAt = new Date();
595
- toolsUsed.push(t.name);
596
- toolExecutionSummaries.push({
597
- toolCallId: t.id,
598
- toolName: t.name,
599
- input,
600
- output,
601
- ...(errorMsg ? { error: errorMsg } : {}),
602
- startTime: startedAt,
603
- endTime: endedAt,
604
- });
605
- conversation.push({
606
- role: "tool",
607
- tool_call_id: t.id,
608
- content: stringifyToolOutput(output),
609
- });
231
+ return data.data
232
+ .map((m) => m.id)
233
+ .filter((id) => typeof id === "string" && id.length > 0)
234
+ .sort();
610
235
  }
611
- const justExecuted = toolExecutionSummaries.slice(-stepResult.toolCalls.size);
612
- emitToolEndFromStepFinish(emitter, justExecuted.map((s) => ({
613
- toolName: s.toolName,
614
- output: s.output,
615
- ...(s.error ? { error: s.error } : {}),
616
- })));
617
- try {
618
- await this.handleToolExecutionStorage(justExecuted.map((s) => ({
619
- toolCallId: s.toolCallId,
620
- toolName: s.toolName,
621
- input: s.input,
622
- output: s.output,
623
- })), justExecuted.map((s) => ({
624
- toolCallId: s.toolCallId,
625
- toolName: s.toolName,
626
- output: s.output,
627
- })), options, new Date());
236
+ catch (error) {
237
+ if (isAbortError(error)) {
238
+ throw new NetworkError("Request timed out after 5 seconds", this.providerName);
239
+ }
240
+ throw error;
628
241
  }
629
- catch (err) {
630
- logger.warn("[LiteLLMProvider] Failed to store tool executions", {
631
- provider: this.providerName,
632
- error: err instanceof Error ? err.message : String(err),
633
- });
242
+ finally {
243
+ clearTimeout(timeoutId);
634
244
  }
635
245
  }
636
246
  /**
@@ -692,83 +302,4 @@ export class LiteLLMProvider extends BaseProvider {
692
302
  timeoutController?.cleanup();
693
303
  }
694
304
  }
695
- /**
696
- * Get available models from LiteLLM proxy `/v1/models` endpoint.
697
- * Caches results for 10 minutes; falls back to env-driven list or a
698
- * minimal safe default if the API fetch fails.
699
- */
700
- async getAvailableModels() {
701
- const now = Date.now();
702
- if (LiteLLMProvider.modelsCache.length > 0 &&
703
- now - LiteLLMProvider.modelsCacheTime <
704
- LiteLLMProvider.MODELS_CACHE_DURATION) {
705
- logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
706
- cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
707
- modelCount: LiteLLMProvider.modelsCache.length,
708
- });
709
- return LiteLLMProvider.modelsCache;
710
- }
711
- try {
712
- const dynamicModels = await this.fetchModelsFromAPI();
713
- if (dynamicModels.length > 0) {
714
- LiteLLMProvider.modelsCache = dynamicModels;
715
- LiteLLMProvider.modelsCacheTime = now;
716
- return dynamicModels;
717
- }
718
- }
719
- catch (error) {
720
- logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
721
- }
722
- return this.getFallbackModels();
723
- }
724
- async getFirstAvailableModel() {
725
- const models = await this.getAvailableModels();
726
- return models[0] || FALLBACK_LITELLM_MODEL;
727
- }
728
- getFallbackModels() {
729
- return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
730
- .map((m) => m.trim())
731
- .filter((m) => m.length > 0) || [
732
- "openai/gpt-4o",
733
- "anthropic/claude-3-haiku",
734
- "meta-llama/llama-3.1-8b-instruct",
735
- "google/gemini-2.5-flash",
736
- ]);
737
- }
738
- async fetchModelsFromAPI() {
739
- const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
740
- const proxyFetch = createProxyFetch();
741
- const controller = new AbortController();
742
- const timeoutId = setTimeout(() => controller.abort(), 5000);
743
- try {
744
- const response = await proxyFetch(modelsUrl, {
745
- method: "GET",
746
- headers: {
747
- Authorization: `Bearer ${this.config.apiKey}`,
748
- "Content-Type": "application/json",
749
- },
750
- signal: controller.signal,
751
- });
752
- if (!response.ok) {
753
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
754
- }
755
- const data = (await response.json());
756
- if (!Array.isArray(data.data)) {
757
- throw new Error("Invalid response format: expected data.data array");
758
- }
759
- return data.data
760
- .map((m) => m.id)
761
- .filter((id) => typeof id === "string" && id.length > 0)
762
- .sort();
763
- }
764
- catch (error) {
765
- if (isAbortError(error)) {
766
- throw new NetworkError("Request timed out after 5 seconds", this.providerName);
767
- }
768
- throw error;
769
- }
770
- finally {
771
- clearTimeout(timeoutId);
772
- }
773
- }
774
305
  }