@juspay/neurolink 9.67.2 → 9.67.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -0
- package/dist/browser/neurolink.min.js +326 -326
- package/dist/lib/providers/litellm.d.ts +25 -32
- package/dist/lib/providers/litellm.js +132 -601
- package/dist/lib/providers/openaiChatCompletionsBase.d.ts +93 -0
- package/dist/lib/providers/openaiChatCompletionsBase.js +644 -0
- package/dist/lib/providers/openaiCompatible.d.ts +7 -63
- package/dist/lib/providers/openaiCompatible.js +27 -658
- package/dist/lib/types/openaiCompatible.d.ts +20 -0
- package/dist/providers/litellm.d.ts +25 -32
- package/dist/providers/litellm.js +132 -601
- package/dist/providers/openaiChatCompletionsBase.d.ts +93 -0
- package/dist/providers/openaiChatCompletionsBase.js +643 -0
- package/dist/providers/openaiCompatible.d.ts +7 -63
- package/dist/providers/openaiCompatible.js +27 -658
- package/dist/types/openaiCompatible.d.ts +20 -0
- package/package.json +1 -1
|
@@ -1,20 +1,14 @@
|
|
|
1
1
|
import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
|
|
2
|
-
import { BaseProvider } from "../core/baseProvider.js";
|
|
3
|
-
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
4
|
-
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
5
2
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
6
3
|
import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
|
|
7
4
|
import { isAbortError } from "../utils/errorHandling.js";
|
|
8
|
-
import { NoOutputGeneratedError } from "../utils/generationErrors.js";
|
|
9
5
|
import { logger } from "../utils/logger.js";
|
|
10
|
-
import {
|
|
6
|
+
import { isGemini25Model as isCanonicalGemini25Model } from "../utils/modelDetection.js";
|
|
11
7
|
import { calculateCost } from "../utils/pricing.js";
|
|
12
8
|
import { getProviderModel } from "../utils/providerConfig.js";
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
16
|
-
import { transformToolExecutions } from "../utils/transformationUtils.js";
|
|
17
|
-
import { buildAPIError, buildBody, buildToolsForOpenAI, createChunkQueue, createDeferredAnalytics, mapNeuroLinkToolChoice, mergeUsage, messageBuilderToOpenAI, parseSSEStream, stringifyToolOutput, stripTrailingSlash, v3ResponseFormatToOpenAI, v3ToolChoiceToOpenAI, v3ToolsToOpenAI, } from "./openaiChatCompletionsClient.js";
|
|
9
|
+
import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
|
|
10
|
+
import { stripTrailingSlash } from "./openaiChatCompletionsClient.js";
|
|
11
|
+
import { OpenAIChatCompletionsProvider } from "./openaiChatCompletionsBase.js";
|
|
18
12
|
const streamTracer = trace.getTracer("neurolink.provider.litellm");
|
|
19
13
|
const FALLBACK_LITELLM_MODEL = "openai/gpt-4o-mini";
|
|
20
14
|
const getLiteLLMConfig = () => ({
|
|
@@ -25,37 +19,37 @@ const getLiteLLMConfig = () => ({
|
|
|
25
19
|
* LiteLLM uses a 'provider/model' format. Override via LITELLM_MODEL env var.
|
|
26
20
|
*/
|
|
27
21
|
const getDefaultLiteLLMModel = () => getProviderModel("LITELLM_MODEL", FALLBACK_LITELLM_MODEL);
|
|
28
|
-
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
// model caching, and native /v1/embeddings.
|
|
38
|
-
// =============================================================================
|
|
22
|
+
// LiteLLM model ids come in `provider/model` form (e.g. "google/gemini-2.5-flash").
|
|
23
|
+
// Strip the provider prefix and delegate to the canonical anchored-regex
|
|
24
|
+
// check in src/lib/utils/modelDetection.ts so the truth lives in one place.
|
|
25
|
+
const isGemini25Model = (modelName) => {
|
|
26
|
+
const lastSegment = modelName.includes("/")
|
|
27
|
+
? modelName.slice(modelName.lastIndexOf("/") + 1)
|
|
28
|
+
: modelName;
|
|
29
|
+
return isCanonicalGemini25Model(lastSegment);
|
|
30
|
+
};
|
|
39
31
|
/**
|
|
40
32
|
* LiteLLM Provider — direct HTTP, no AI SDK. Talks to a LiteLLM proxy
|
|
41
33
|
* server (or any deployment that speaks OpenAI chat-completions + the
|
|
42
34
|
* `/v1/models` and `/v1/embeddings` endpoints).
|
|
35
|
+
*
|
|
36
|
+
* All request/stream/tool-loop orchestration lives in
|
|
37
|
+
* `OpenAIChatCompletionsProvider`. This class adds LiteLLM-specific
|
|
38
|
+
* behaviour: OTel span wrap with cost (`onStreamStart`), Gemini 2.5
|
|
39
|
+
* maxTokens skip (`adjustBuildBodyOptions`), ModelAccessDeniedError on
|
|
40
|
+
* 403, 10-minute model cache (`getAvailableModels`), `LITELLM_FALLBACK_MODELS`
|
|
41
|
+
* env-driven fallback list, and native `/v1/embeddings`.
|
|
43
42
|
*/
|
|
44
|
-
export class LiteLLMProvider extends
|
|
45
|
-
config;
|
|
46
|
-
credentials;
|
|
47
|
-
resolvedModel;
|
|
43
|
+
export class LiteLLMProvider extends OpenAIChatCompletionsProvider {
|
|
48
44
|
static modelsCache = [];
|
|
49
45
|
static modelsCacheTime = 0;
|
|
50
46
|
static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
|
|
51
47
|
constructor(modelName, sdk, _region, credentials) {
|
|
52
|
-
super(modelName, "litellm", sdk);
|
|
53
|
-
this.credentials = credentials;
|
|
54
48
|
const envConfig = getLiteLLMConfig();
|
|
55
|
-
|
|
49
|
+
super("litellm", modelName, sdk, {
|
|
56
50
|
baseURL: credentials?.baseURL ?? envConfig.baseURL,
|
|
57
51
|
apiKey: credentials?.apiKey ?? envConfig.apiKey,
|
|
58
|
-
};
|
|
52
|
+
});
|
|
59
53
|
logger.debug("LiteLLM Provider initialized", {
|
|
60
54
|
modelName: this.modelName,
|
|
61
55
|
provider: this.providerName,
|
|
@@ -68,146 +62,77 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
68
62
|
getDefaultModel() {
|
|
69
63
|
return getDefaultLiteLLMModel();
|
|
70
64
|
}
|
|
65
|
+
getFallbackModelName() {
|
|
66
|
+
return FALLBACK_LITELLM_MODEL;
|
|
67
|
+
}
|
|
68
|
+
getFallbackModels() {
|
|
69
|
+
return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
|
|
70
|
+
.map((m) => m.trim())
|
|
71
|
+
.filter((m) => m.length > 0) || [
|
|
72
|
+
"openai/gpt-4o",
|
|
73
|
+
"anthropic/claude-3-haiku",
|
|
74
|
+
"meta-llama/llama-3.1-8b-instruct",
|
|
75
|
+
"google/gemini-2.5-flash",
|
|
76
|
+
]);
|
|
77
|
+
}
|
|
71
78
|
/**
|
|
72
|
-
*
|
|
73
|
-
*
|
|
74
|
-
*
|
|
79
|
+
* Gemini 2.5 models on LiteLLM have a known compatibility issue with
|
|
80
|
+
* `max_tokens` — strip it before the wire body is built. Applies to
|
|
81
|
+
* both streaming and non-streaming paths.
|
|
75
82
|
*/
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
async resolveModelName() {
|
|
81
|
-
if (this.resolvedModel) {
|
|
82
|
-
return this.resolvedModel;
|
|
83
|
-
}
|
|
84
|
-
const explicit = this.modelName || getDefaultLiteLLMModel();
|
|
85
|
-
if (explicit && explicit.trim() !== "") {
|
|
86
|
-
this.resolvedModel = explicit;
|
|
87
|
-
if (this.modelName !== explicit) {
|
|
88
|
-
this.refreshHandlersForModel(explicit);
|
|
83
|
+
adjustBuildBodyOptions(modelId, opts) {
|
|
84
|
+
if (isGemini25Model(modelId) && opts.maxTokens !== undefined) {
|
|
85
|
+
if (logger.shouldLog("debug")) {
|
|
86
|
+
logger.debug("LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)", { modelId, requestedMaxTokens: opts.maxTokens });
|
|
89
87
|
}
|
|
90
|
-
return
|
|
88
|
+
return { ...opts, maxTokens: undefined };
|
|
91
89
|
}
|
|
92
|
-
|
|
93
|
-
this.refreshHandlersForModel(FALLBACK_LITELLM_MODEL);
|
|
94
|
-
return FALLBACK_LITELLM_MODEL;
|
|
90
|
+
return opts;
|
|
95
91
|
}
|
|
96
92
|
/**
|
|
97
|
-
*
|
|
98
|
-
*
|
|
99
|
-
*
|
|
93
|
+
* Wrap the stream in an OTel span to capture provider-level latency,
|
|
94
|
+
* token usage, finish reason, and cost. Matches the pre-migration
|
|
95
|
+
* behaviour where streamText was wrapped in `neurolink.provider.streamText`.
|
|
100
96
|
*/
|
|
101
|
-
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
97
|
+
onStreamStart(modelId) {
|
|
98
|
+
const span = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
99
|
+
kind: SpanKind.CLIENT,
|
|
100
|
+
attributes: {
|
|
101
|
+
"gen_ai.system": "litellm",
|
|
102
|
+
"gen_ai.request.model": modelId,
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
let spanEnded = false;
|
|
106
|
+
const endSpan = () => {
|
|
107
|
+
if (!spanEnded) {
|
|
108
|
+
spanEnded = true;
|
|
109
|
+
span.end();
|
|
110
|
+
}
|
|
111
|
+
};
|
|
108
112
|
return {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
modelId,
|
|
117
|
-
messages,
|
|
118
|
-
options: {
|
|
119
|
-
maxTokens: gemini25Skip ? undefined : options.maxOutputTokens,
|
|
120
|
-
temperature: options.temperature,
|
|
121
|
-
topP: options.topP,
|
|
122
|
-
presencePenalty: options.presencePenalty,
|
|
123
|
-
frequencyPenalty: options.frequencyPenalty,
|
|
124
|
-
seed: options.seed,
|
|
125
|
-
stopSequences: options.stopSequences,
|
|
126
|
-
},
|
|
127
|
-
tools: v3ToolsToOpenAI(options.tools),
|
|
128
|
-
...(options.toolChoice
|
|
129
|
-
? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
|
|
130
|
-
: {}),
|
|
131
|
-
streaming: false,
|
|
132
|
-
...(options.responseFormat
|
|
133
|
-
? {
|
|
134
|
-
responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
|
|
135
|
-
}
|
|
136
|
-
: {}),
|
|
113
|
+
onUsage: (usage) => {
|
|
114
|
+
span.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
|
|
115
|
+
span.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
|
|
116
|
+
const cost = calculateCost(this.providerName, this.modelName, {
|
|
117
|
+
input: usage.promptTokens,
|
|
118
|
+
output: usage.completionTokens,
|
|
119
|
+
total: usage.totalTokens,
|
|
137
120
|
});
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
let res;
|
|
141
|
-
try {
|
|
142
|
-
res = await fetchImpl(url, {
|
|
143
|
-
method: "POST",
|
|
144
|
-
headers: {
|
|
145
|
-
"Content-Type": "application/json",
|
|
146
|
-
Authorization: `Bearer ${apiKey}`,
|
|
147
|
-
},
|
|
148
|
-
body: JSON.stringify(body),
|
|
149
|
-
...(composedSignal ? { signal: composedSignal } : {}),
|
|
150
|
-
});
|
|
151
|
-
}
|
|
152
|
-
finally {
|
|
153
|
-
timeoutController?.cleanup();
|
|
121
|
+
if (cost && cost > 0) {
|
|
122
|
+
span.setAttribute("neurolink.cost", cost);
|
|
154
123
|
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
if (text.length > 0) {
|
|
165
|
-
content.push({ type: "text", text });
|
|
166
|
-
}
|
|
167
|
-
for (const tc of choice?.message?.tool_calls ?? []) {
|
|
168
|
-
content.push({
|
|
169
|
-
type: "tool-call",
|
|
170
|
-
toolCallId: tc.id,
|
|
171
|
-
toolName: tc.function.name,
|
|
172
|
-
input: tc.function.arguments ?? "",
|
|
124
|
+
},
|
|
125
|
+
onFinish: (reason, capturedError) => {
|
|
126
|
+
span.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
127
|
+
if (reason === "error") {
|
|
128
|
+
span.setStatus({
|
|
129
|
+
code: SpanStatusCode.ERROR,
|
|
130
|
+
message: capturedError instanceof Error
|
|
131
|
+
? capturedError.message
|
|
132
|
+
: String(capturedError ?? "stream error"),
|
|
173
133
|
});
|
|
174
134
|
}
|
|
175
|
-
|
|
176
|
-
const unified = rawFinish === "length"
|
|
177
|
-
? "length"
|
|
178
|
-
: rawFinish === "tool_calls" || rawFinish === "function_call"
|
|
179
|
-
? "tool-calls"
|
|
180
|
-
: rawFinish === "content_filter"
|
|
181
|
-
? "content-filter"
|
|
182
|
-
: "stop";
|
|
183
|
-
return {
|
|
184
|
-
content,
|
|
185
|
-
finishReason: { unified, raw: rawFinish ?? "stop" },
|
|
186
|
-
usage: {
|
|
187
|
-
inputTokens: {
|
|
188
|
-
total: json.usage?.prompt_tokens,
|
|
189
|
-
noCache: json.usage?.prompt_tokens,
|
|
190
|
-
cacheRead: undefined,
|
|
191
|
-
cacheWrite: undefined,
|
|
192
|
-
},
|
|
193
|
-
outputTokens: {
|
|
194
|
-
total: json.usage?.completion_tokens,
|
|
195
|
-
text: json.usage?.completion_tokens,
|
|
196
|
-
reasoning: undefined,
|
|
197
|
-
},
|
|
198
|
-
},
|
|
199
|
-
warnings: [],
|
|
200
|
-
request: { body },
|
|
201
|
-
response: {
|
|
202
|
-
...(json.id ? { id: json.id } : {}),
|
|
203
|
-
...(json.model ? { modelId: json.model } : {}),
|
|
204
|
-
headers: {},
|
|
205
|
-
body: json,
|
|
206
|
-
},
|
|
207
|
-
};
|
|
208
|
-
},
|
|
209
|
-
doStream: () => {
|
|
210
|
-
throw new Error("litellm: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
|
|
135
|
+
endSpan();
|
|
211
136
|
},
|
|
212
137
|
};
|
|
213
138
|
}
|
|
@@ -253,384 +178,69 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
253
178
|
}
|
|
254
179
|
return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
|
|
255
180
|
}
|
|
256
|
-
supportsTools() {
|
|
257
|
-
return true;
|
|
258
|
-
}
|
|
259
181
|
/**
|
|
260
|
-
*
|
|
261
|
-
*
|
|
262
|
-
*
|
|
182
|
+
* Get available models from LiteLLM proxy `/v1/models` endpoint.
|
|
183
|
+
* Caches results for 10 minutes; falls back to env-driven list or a
|
|
184
|
+
* minimal safe default if the API fetch fails.
|
|
263
185
|
*/
|
|
264
|
-
async
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
timeoutController?.controller.signal,
|
|
273
|
-
consumerAbortController.signal,
|
|
274
|
-
]).signal;
|
|
275
|
-
let modelId;
|
|
276
|
-
let toolsRecord;
|
|
277
|
-
let openAITools;
|
|
278
|
-
let openAIToolChoice;
|
|
279
|
-
let conversation;
|
|
280
|
-
try {
|
|
281
|
-
modelId = await this.resolveModelName();
|
|
282
|
-
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
283
|
-
toolsRecord = shouldUseTools
|
|
284
|
-
? options.tools || (await this.getAllTools())
|
|
285
|
-
: {};
|
|
286
|
-
openAITools = shouldUseTools
|
|
287
|
-
? buildToolsForOpenAI(toolsRecord)
|
|
288
|
-
: undefined;
|
|
289
|
-
openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
|
|
290
|
-
const initialMessages = await this.buildMessagesForStream(options);
|
|
291
|
-
conversation = messageBuilderToOpenAI(initialMessages);
|
|
292
|
-
}
|
|
293
|
-
catch (setupErr) {
|
|
294
|
-
timeoutController?.cleanup();
|
|
295
|
-
throw setupErr;
|
|
296
|
-
}
|
|
297
|
-
const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
|
|
298
|
-
const fetchImpl = createProxyFetch();
|
|
299
|
-
const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
|
|
300
|
-
const emitter = this.neurolink?.getEventEmitter();
|
|
301
|
-
const toolsUsed = [];
|
|
302
|
-
const toolExecutionSummaries = [];
|
|
303
|
-
const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
|
|
304
|
-
const { pushChunk, nextChunk } = createChunkQueue();
|
|
305
|
-
// Wrap the stream in an OTel span to capture provider-level latency,
|
|
306
|
-
// token usage, finish reason, and cost. Matches the pre-migration
|
|
307
|
-
// behaviour where streamText was wrapped in `neurolink.provider.streamText`.
|
|
308
|
-
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
309
|
-
kind: SpanKind.CLIENT,
|
|
310
|
-
attributes: {
|
|
311
|
-
"gen_ai.system": "litellm",
|
|
312
|
-
"gen_ai.request.model": modelId,
|
|
313
|
-
},
|
|
314
|
-
});
|
|
315
|
-
// Model-specific maxTokens handling — Gemini 2.5 models have known issues
|
|
316
|
-
// with maxTokens being forwarded. Mutate a shallow copy so the original
|
|
317
|
-
// StreamOptions reference downstream (analytics, telemetry) is unchanged.
|
|
318
|
-
const requestOptions = isGemini25Model(modelId)
|
|
319
|
-
? { ...options, maxTokens: undefined }
|
|
320
|
-
: options;
|
|
321
|
-
if (requestOptions !== options &&
|
|
322
|
-
options.maxTokens &&
|
|
323
|
-
logger.shouldLog("debug")) {
|
|
324
|
-
logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, { modelId, requestedMaxTokens: options.maxTokens });
|
|
325
|
-
}
|
|
326
|
-
const loopPromise = this.runStreamLoop({
|
|
327
|
-
maxSteps,
|
|
328
|
-
modelId,
|
|
329
|
-
url,
|
|
330
|
-
apiKey: this.config.apiKey,
|
|
331
|
-
fetchImpl,
|
|
332
|
-
abortSignal,
|
|
333
|
-
options: requestOptions,
|
|
334
|
-
conversation,
|
|
335
|
-
openAITools,
|
|
336
|
-
openAIToolChoice,
|
|
337
|
-
toolsRecord,
|
|
338
|
-
emitter,
|
|
339
|
-
toolsUsed,
|
|
340
|
-
toolExecutionSummaries,
|
|
341
|
-
pushChunk,
|
|
342
|
-
resolveUsage,
|
|
343
|
-
resolveFinish,
|
|
344
|
-
});
|
|
345
|
-
// Wire the OTel span lifecycle to the deferred analytics promises.
|
|
346
|
-
let capturedProviderError;
|
|
347
|
-
const captureProviderError = (error) => {
|
|
348
|
-
capturedProviderError = error;
|
|
349
|
-
};
|
|
350
|
-
usagePromise
|
|
351
|
-
.then((usage) => {
|
|
352
|
-
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
|
|
353
|
-
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
|
|
354
|
-
const cost = calculateCost(this.providerName, this.modelName, {
|
|
355
|
-
input: usage.promptTokens,
|
|
356
|
-
output: usage.completionTokens,
|
|
357
|
-
total: usage.totalTokens,
|
|
186
|
+
async getAvailableModels() {
|
|
187
|
+
const now = Date.now();
|
|
188
|
+
if (LiteLLMProvider.modelsCache.length > 0 &&
|
|
189
|
+
now - LiteLLMProvider.modelsCacheTime <
|
|
190
|
+
LiteLLMProvider.MODELS_CACHE_DURATION) {
|
|
191
|
+
logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
|
|
192
|
+
cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
|
|
193
|
+
modelCount: LiteLLMProvider.modelsCache.length,
|
|
358
194
|
});
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
}
|
|
362
|
-
})
|
|
363
|
-
.catch(() => {
|
|
364
|
-
// usage may never resolve if the stream is aborted before completion
|
|
365
|
-
});
|
|
366
|
-
finishPromise
|
|
367
|
-
.then((reason) => {
|
|
368
|
-
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
369
|
-
if (reason === "error") {
|
|
370
|
-
streamSpan.setStatus({
|
|
371
|
-
code: SpanStatusCode.ERROR,
|
|
372
|
-
message: capturedProviderError instanceof Error
|
|
373
|
-
? capturedProviderError.message
|
|
374
|
-
: String(capturedProviderError ?? "stream error"),
|
|
375
|
-
});
|
|
376
|
-
}
|
|
377
|
-
streamSpan.end();
|
|
378
|
-
})
|
|
379
|
-
.catch(() => {
|
|
380
|
-
streamSpan.end();
|
|
381
|
-
});
|
|
382
|
-
const transformedStream = async function* () {
|
|
383
|
-
let contentYielded = 0;
|
|
384
|
-
try {
|
|
385
|
-
for (;;) {
|
|
386
|
-
const chunk = await nextChunk();
|
|
387
|
-
if ("done" in chunk) {
|
|
388
|
-
break;
|
|
389
|
-
}
|
|
390
|
-
if ("content" in chunk &&
|
|
391
|
-
typeof chunk.content === "string" &&
|
|
392
|
-
chunk.content.length > 0) {
|
|
393
|
-
contentYielded++;
|
|
394
|
-
}
|
|
395
|
-
yield chunk;
|
|
396
|
-
}
|
|
397
|
-
await loopPromise;
|
|
398
|
-
if (contentYielded === 0 && toolsUsed.length === 0) {
|
|
399
|
-
logger.warn("LiteLLM: Stream produced no output — emitting enriched sentinel");
|
|
400
|
-
const fauxNoOutput = new NoOutputGeneratedError({
|
|
401
|
-
message: "Stream produced no output",
|
|
402
|
-
});
|
|
403
|
-
const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
|
|
404
|
-
stampNoOutputSpan(sentinel);
|
|
405
|
-
yield sentinel;
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
catch (streamError) {
|
|
409
|
-
if (NoOutputGeneratedError.isInstance(streamError)) {
|
|
410
|
-
const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
|
|
411
|
-
stampNoOutputSpan(sentinel);
|
|
412
|
-
yield sentinel;
|
|
413
|
-
return;
|
|
414
|
-
}
|
|
415
|
-
const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
|
|
416
|
-
stampNoOutputSpan(sentinel);
|
|
417
|
-
yield sentinel;
|
|
418
|
-
throw streamError;
|
|
419
|
-
}
|
|
420
|
-
finally {
|
|
421
|
-
if (!consumerAbortController.signal.aborted) {
|
|
422
|
-
consumerAbortController.abort();
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
};
|
|
426
|
-
const result = {
|
|
427
|
-
stream: transformedStream(),
|
|
428
|
-
provider: this.providerName,
|
|
429
|
-
model: this.modelName,
|
|
430
|
-
analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, {
|
|
431
|
-
textStream: (async function* () { })(),
|
|
432
|
-
usage: usagePromise,
|
|
433
|
-
finishReason: finishPromise,
|
|
434
|
-
}, Date.now() - startTime, {
|
|
435
|
-
requestId: options.requestId ??
|
|
436
|
-
`litellm-stream-${Date.now()}`,
|
|
437
|
-
streamingMode: true,
|
|
438
|
-
}),
|
|
439
|
-
toolsUsed,
|
|
440
|
-
metadata: {
|
|
441
|
-
startTime,
|
|
442
|
-
streamId: `litellm-${Date.now()}`,
|
|
443
|
-
},
|
|
444
|
-
};
|
|
445
|
-
Object.defineProperty(result, "toolExecutions", {
|
|
446
|
-
enumerable: true,
|
|
447
|
-
configurable: true,
|
|
448
|
-
get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
|
|
449
|
-
toolName: s.toolName,
|
|
450
|
-
input: s.input,
|
|
451
|
-
output: s.output,
|
|
452
|
-
duration: s.endTime.getTime() - s.startTime.getTime(),
|
|
453
|
-
}))),
|
|
454
|
-
});
|
|
455
|
-
loopPromise
|
|
456
|
-
.finally(() => timeoutController?.cleanup())
|
|
457
|
-
.catch((error) => {
|
|
458
|
-
captureProviderError(error);
|
|
459
|
-
});
|
|
460
|
-
return result;
|
|
461
|
-
}
|
|
462
|
-
async runStreamLoop(args) {
|
|
463
|
-
const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
|
|
195
|
+
return LiteLLMProvider.modelsCache;
|
|
196
|
+
}
|
|
464
197
|
try {
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
url,
|
|
471
|
-
apiKey,
|
|
472
|
-
fetchImpl,
|
|
473
|
-
abortSignal,
|
|
474
|
-
options,
|
|
475
|
-
conversation,
|
|
476
|
-
openAITools,
|
|
477
|
-
openAIToolChoice,
|
|
478
|
-
pushChunk,
|
|
479
|
-
});
|
|
480
|
-
stepFinish = stepResult.finishReason;
|
|
481
|
-
if (stepResult.usage) {
|
|
482
|
-
stepUsage = mergeUsage(stepUsage, stepResult.usage);
|
|
483
|
-
}
|
|
484
|
-
if (stepResult.toolCalls.size === 0) {
|
|
485
|
-
break;
|
|
486
|
-
}
|
|
487
|
-
await this.executeToolBatch({
|
|
488
|
-
stepResult,
|
|
489
|
-
conversation,
|
|
490
|
-
toolsRecord,
|
|
491
|
-
emitter,
|
|
492
|
-
toolsUsed,
|
|
493
|
-
toolExecutionSummaries,
|
|
494
|
-
options,
|
|
495
|
-
});
|
|
198
|
+
const dynamicModels = await this.fetchModelsFromAPI();
|
|
199
|
+
if (dynamicModels.length > 0) {
|
|
200
|
+
LiteLLMProvider.modelsCache = dynamicModels;
|
|
201
|
+
LiteLLMProvider.modelsCacheTime = now;
|
|
202
|
+
return dynamicModels;
|
|
496
203
|
}
|
|
497
|
-
resolveUsage({
|
|
498
|
-
promptTokens: stepUsage?.prompt_tokens ?? 0,
|
|
499
|
-
completionTokens: stepUsage?.completion_tokens ?? 0,
|
|
500
|
-
totalTokens: stepUsage?.total_tokens ?? 0,
|
|
501
|
-
});
|
|
502
|
-
resolveFinish(stepFinish ?? "stop");
|
|
503
|
-
pushChunk({ done: true });
|
|
504
|
-
return {
|
|
505
|
-
finishReason: stepFinish ?? "stop",
|
|
506
|
-
usage: stepUsage,
|
|
507
|
-
};
|
|
508
|
-
}
|
|
509
|
-
catch (err) {
|
|
510
|
-
logger.error("LiteLLM: Stream error", {
|
|
511
|
-
error: err instanceof Error ? err.message : String(err),
|
|
512
|
-
});
|
|
513
|
-
resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
|
|
514
|
-
resolveFinish("error");
|
|
515
|
-
pushChunk({ done: true });
|
|
516
|
-
throw err;
|
|
517
204
|
}
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
const body = buildBody({
|
|
521
|
-
modelId: args.modelId,
|
|
522
|
-
messages: args.conversation,
|
|
523
|
-
options: args.options,
|
|
524
|
-
tools: args.openAITools,
|
|
525
|
-
...(args.openAIToolChoice !== undefined
|
|
526
|
-
? { toolChoice: args.openAIToolChoice }
|
|
527
|
-
: {}),
|
|
528
|
-
streaming: true,
|
|
529
|
-
});
|
|
530
|
-
const res = await args.fetchImpl(args.url, {
|
|
531
|
-
method: "POST",
|
|
532
|
-
headers: {
|
|
533
|
-
"Content-Type": "application/json",
|
|
534
|
-
Authorization: `Bearer ${args.apiKey}`,
|
|
535
|
-
},
|
|
536
|
-
body: JSON.stringify(body),
|
|
537
|
-
...(args.abortSignal ? { signal: args.abortSignal } : {}),
|
|
538
|
-
});
|
|
539
|
-
if (!res.ok) {
|
|
540
|
-
throw await buildAPIError(args.url, body, res);
|
|
541
|
-
}
|
|
542
|
-
if (!res.body) {
|
|
543
|
-
throw new Error("litellm: stream response had no body");
|
|
205
|
+
catch (error) {
|
|
206
|
+
logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
|
|
544
207
|
}
|
|
545
|
-
return
|
|
546
|
-
args.pushChunk({ content: delta });
|
|
547
|
-
});
|
|
208
|
+
return this.getFallbackModels();
|
|
548
209
|
}
|
|
549
|
-
async
|
|
550
|
-
const
|
|
551
|
-
const
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
tool_calls: toolCallsForMessage,
|
|
563
|
-
});
|
|
564
|
-
for (const [, t] of stepResult.toolCalls) {
|
|
565
|
-
const startedAt = new Date();
|
|
566
|
-
let input;
|
|
567
|
-
try {
|
|
568
|
-
input = JSON.parse(t.argsBuffered || "{}");
|
|
569
|
-
}
|
|
570
|
-
catch {
|
|
571
|
-
input = t.argsBuffered;
|
|
572
|
-
}
|
|
573
|
-
let output;
|
|
574
|
-
let errorMsg;
|
|
575
|
-
const toolDef = toolsRecord[t.name];
|
|
576
|
-
emitter?.emit("tool:start", {
|
|
577
|
-
toolName: t.name,
|
|
578
|
-
toolCallId: t.id,
|
|
579
|
-
input,
|
|
210
|
+
async fetchModelsFromAPI() {
|
|
211
|
+
const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
|
|
212
|
+
const proxyFetch = createProxyFetch();
|
|
213
|
+
const controller = new AbortController();
|
|
214
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
215
|
+
try {
|
|
216
|
+
const response = await proxyFetch(modelsUrl, {
|
|
217
|
+
method: "GET",
|
|
218
|
+
headers: {
|
|
219
|
+
Authorization: `Bearer ${this.config.apiKey}`,
|
|
220
|
+
"Content-Type": "application/json",
|
|
221
|
+
},
|
|
222
|
+
signal: controller.signal,
|
|
580
223
|
});
|
|
581
|
-
if (!
|
|
582
|
-
|
|
583
|
-
output = { error: errorMsg };
|
|
224
|
+
if (!response.ok) {
|
|
225
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
584
226
|
}
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
}
|
|
589
|
-
catch (err) {
|
|
590
|
-
errorMsg = err instanceof Error ? err.message : String(err);
|
|
591
|
-
output = { error: errorMsg };
|
|
592
|
-
}
|
|
227
|
+
const data = (await response.json());
|
|
228
|
+
if (!Array.isArray(data.data)) {
|
|
229
|
+
throw new Error("Invalid response format: expected data.data array");
|
|
593
230
|
}
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
toolName: t.name,
|
|
599
|
-
input,
|
|
600
|
-
output,
|
|
601
|
-
...(errorMsg ? { error: errorMsg } : {}),
|
|
602
|
-
startTime: startedAt,
|
|
603
|
-
endTime: endedAt,
|
|
604
|
-
});
|
|
605
|
-
conversation.push({
|
|
606
|
-
role: "tool",
|
|
607
|
-
tool_call_id: t.id,
|
|
608
|
-
content: stringifyToolOutput(output),
|
|
609
|
-
});
|
|
231
|
+
return data.data
|
|
232
|
+
.map((m) => m.id)
|
|
233
|
+
.filter((id) => typeof id === "string" && id.length > 0)
|
|
234
|
+
.sort();
|
|
610
235
|
}
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
})));
|
|
617
|
-
try {
|
|
618
|
-
await this.handleToolExecutionStorage(justExecuted.map((s) => ({
|
|
619
|
-
toolCallId: s.toolCallId,
|
|
620
|
-
toolName: s.toolName,
|
|
621
|
-
input: s.input,
|
|
622
|
-
output: s.output,
|
|
623
|
-
})), justExecuted.map((s) => ({
|
|
624
|
-
toolCallId: s.toolCallId,
|
|
625
|
-
toolName: s.toolName,
|
|
626
|
-
output: s.output,
|
|
627
|
-
})), options, new Date());
|
|
236
|
+
catch (error) {
|
|
237
|
+
if (isAbortError(error)) {
|
|
238
|
+
throw new NetworkError("Request timed out after 5 seconds", this.providerName);
|
|
239
|
+
}
|
|
240
|
+
throw error;
|
|
628
241
|
}
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
provider: this.providerName,
|
|
632
|
-
error: err instanceof Error ? err.message : String(err),
|
|
633
|
-
});
|
|
242
|
+
finally {
|
|
243
|
+
clearTimeout(timeoutId);
|
|
634
244
|
}
|
|
635
245
|
}
|
|
636
246
|
/**
|
|
@@ -692,83 +302,4 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
692
302
|
timeoutController?.cleanup();
|
|
693
303
|
}
|
|
694
304
|
}
|
|
695
|
-
/**
|
|
696
|
-
* Get available models from LiteLLM proxy `/v1/models` endpoint.
|
|
697
|
-
* Caches results for 10 minutes; falls back to env-driven list or a
|
|
698
|
-
* minimal safe default if the API fetch fails.
|
|
699
|
-
*/
|
|
700
|
-
async getAvailableModels() {
|
|
701
|
-
const now = Date.now();
|
|
702
|
-
if (LiteLLMProvider.modelsCache.length > 0 &&
|
|
703
|
-
now - LiteLLMProvider.modelsCacheTime <
|
|
704
|
-
LiteLLMProvider.MODELS_CACHE_DURATION) {
|
|
705
|
-
logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
|
|
706
|
-
cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
|
|
707
|
-
modelCount: LiteLLMProvider.modelsCache.length,
|
|
708
|
-
});
|
|
709
|
-
return LiteLLMProvider.modelsCache;
|
|
710
|
-
}
|
|
711
|
-
try {
|
|
712
|
-
const dynamicModels = await this.fetchModelsFromAPI();
|
|
713
|
-
if (dynamicModels.length > 0) {
|
|
714
|
-
LiteLLMProvider.modelsCache = dynamicModels;
|
|
715
|
-
LiteLLMProvider.modelsCacheTime = now;
|
|
716
|
-
return dynamicModels;
|
|
717
|
-
}
|
|
718
|
-
}
|
|
719
|
-
catch (error) {
|
|
720
|
-
logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
|
|
721
|
-
}
|
|
722
|
-
return this.getFallbackModels();
|
|
723
|
-
}
|
|
724
|
-
async getFirstAvailableModel() {
|
|
725
|
-
const models = await this.getAvailableModels();
|
|
726
|
-
return models[0] || FALLBACK_LITELLM_MODEL;
|
|
727
|
-
}
|
|
728
|
-
getFallbackModels() {
|
|
729
|
-
return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
|
|
730
|
-
.map((m) => m.trim())
|
|
731
|
-
.filter((m) => m.length > 0) || [
|
|
732
|
-
"openai/gpt-4o",
|
|
733
|
-
"anthropic/claude-3-haiku",
|
|
734
|
-
"meta-llama/llama-3.1-8b-instruct",
|
|
735
|
-
"google/gemini-2.5-flash",
|
|
736
|
-
]);
|
|
737
|
-
}
|
|
738
|
-
async fetchModelsFromAPI() {
|
|
739
|
-
const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
|
|
740
|
-
const proxyFetch = createProxyFetch();
|
|
741
|
-
const controller = new AbortController();
|
|
742
|
-
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
743
|
-
try {
|
|
744
|
-
const response = await proxyFetch(modelsUrl, {
|
|
745
|
-
method: "GET",
|
|
746
|
-
headers: {
|
|
747
|
-
Authorization: `Bearer ${this.config.apiKey}`,
|
|
748
|
-
"Content-Type": "application/json",
|
|
749
|
-
},
|
|
750
|
-
signal: controller.signal,
|
|
751
|
-
});
|
|
752
|
-
if (!response.ok) {
|
|
753
|
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
754
|
-
}
|
|
755
|
-
const data = (await response.json());
|
|
756
|
-
if (!Array.isArray(data.data)) {
|
|
757
|
-
throw new Error("Invalid response format: expected data.data array");
|
|
758
|
-
}
|
|
759
|
-
return data.data
|
|
760
|
-
.map((m) => m.id)
|
|
761
|
-
.filter((id) => typeof id === "string" && id.length > 0)
|
|
762
|
-
.sort();
|
|
763
|
-
}
|
|
764
|
-
catch (error) {
|
|
765
|
-
if (isAbortError(error)) {
|
|
766
|
-
throw new NetworkError("Request timed out after 5 seconds", this.providerName);
|
|
767
|
-
}
|
|
768
|
-
throw error;
|
|
769
|
-
}
|
|
770
|
-
finally {
|
|
771
|
-
clearTimeout(timeoutId);
|
|
772
|
-
}
|
|
773
|
-
}
|
|
774
305
|
}
|