@juspay/neurolink 9.67.0 → 9.67.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/dist/browser/neurolink.min.js +376 -370
- package/dist/lib/providers/googleVertex.js +8 -7
- package/dist/lib/providers/litellm.d.ts +31 -24
- package/dist/lib/providers/litellm.js +590 -391
- package/dist/lib/providers/openaiChatCompletionsClient.d.ts +67 -0
- package/dist/lib/providers/openaiChatCompletionsClient.js +526 -0
- package/dist/lib/providers/openaiCompatible.d.ts +46 -19
- package/dist/lib/providers/openaiCompatible.js +559 -171
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/middleware.d.ts +1 -1
- package/dist/lib/types/openaiCompatible.d.ts +250 -0
- package/dist/lib/types/openaiCompatible.js +2 -0
- package/dist/lib/types/providers.d.ts +2 -0
- package/dist/providers/googleVertex.js +8 -7
- package/dist/providers/litellm.d.ts +31 -24
- package/dist/providers/litellm.js +590 -391
- package/dist/providers/openaiChatCompletionsClient.d.ts +67 -0
- package/dist/providers/openaiChatCompletionsClient.js +525 -0
- package/dist/providers/openaiCompatible.d.ts +46 -19
- package/dist/providers/openaiCompatible.js +559 -171
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/middleware.d.ts +1 -1
- package/dist/types/openaiCompatible.d.ts +250 -0
- package/dist/types/openaiCompatible.js +1 -0
- package/dist/types/providers.d.ts +2 -0
- package/package.json +2 -1
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
1
|
import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
|
|
3
2
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
4
3
|
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
@@ -6,72 +5,61 @@ import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
|
6
5
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
7
6
|
import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
|
|
8
7
|
import { isAbortError } from "../utils/errorHandling.js";
|
|
9
|
-
import {
|
|
8
|
+
import { NoOutputGeneratedError } from "../utils/generationErrors.js";
|
|
10
9
|
import { logger } from "../utils/logger.js";
|
|
11
|
-
import { buildNoOutputSentinel,
|
|
10
|
+
import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
|
|
12
11
|
import { calculateCost } from "../utils/pricing.js";
|
|
13
12
|
import { getProviderModel } from "../utils/providerConfig.js";
|
|
14
|
-
import { composeAbortSignals, createTimeoutController,
|
|
13
|
+
import { composeAbortSignals, createTimeoutController, mergeAbortSignals, TimeoutError, } from "../utils/timeout.js";
|
|
14
|
+
import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
|
|
15
15
|
import { resolveToolChoice } from "../utils/toolChoice.js";
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
18
|
-
import { Output, stepCountIs } from "../utils/tool.js";
|
|
19
|
-
import { streamText } from "../utils/generation.js";
|
|
16
|
+
import { transformToolExecutions } from "../utils/transformationUtils.js";
|
|
17
|
+
import { buildAPIError, buildBody, buildToolsForOpenAI, createChunkQueue, createDeferredAnalytics, mapNeuroLinkToolChoice, mergeUsage, messageBuilderToOpenAI, parseSSEStream, stringifyToolOutput, stripTrailingSlash, v3ResponseFormatToOpenAI, v3ToolChoiceToOpenAI, v3ToolsToOpenAI, } from "./openaiChatCompletionsClient.js";
|
|
20
18
|
const streamTracer = trace.getTracer("neurolink.provider.litellm");
|
|
21
|
-
|
|
22
|
-
const getLiteLLMConfig = () => {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
};
|
|
27
|
-
};
|
|
19
|
+
const FALLBACK_LITELLM_MODEL = "openai/gpt-4o-mini";
|
|
20
|
+
const getLiteLLMConfig = () => ({
|
|
21
|
+
baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
|
|
22
|
+
apiKey: process.env.LITELLM_API_KEY || "sk-anything",
|
|
23
|
+
});
|
|
28
24
|
/**
|
|
29
|
-
*
|
|
30
|
-
*
|
|
31
|
-
* LiteLLM uses a 'provider/model' format for model names.
|
|
32
|
-
* For example:
|
|
33
|
-
* - 'openai/gpt-4o-mini'
|
|
34
|
-
* - 'openai/gpt-3.5-turbo'
|
|
35
|
-
* - 'anthropic/claude-3-sonnet-20240229'
|
|
36
|
-
* - 'google/gemini-pro'
|
|
37
|
-
*
|
|
38
|
-
* You can override the default by setting the LITELLM_MODEL environment variable.
|
|
25
|
+
* LiteLLM uses a 'provider/model' format. Override via LITELLM_MODEL env var.
|
|
39
26
|
*/
|
|
40
|
-
const getDefaultLiteLLMModel = () =>
|
|
41
|
-
|
|
42
|
-
|
|
27
|
+
const getDefaultLiteLLMModel = () => getProviderModel("LITELLM_MODEL", FALLBACK_LITELLM_MODEL);
|
|
28
|
+
const isGemini25Model = (modelName) => modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
|
|
29
|
+
// =============================================================================
|
|
30
|
+
// Direct HTTP client for LiteLLM proxy.
|
|
31
|
+
//
|
|
32
|
+
// LiteLLM exposes the OpenAI chat-completions wire format, so all the
|
|
33
|
+
// wire-level converters and the SSE parser are shared with the
|
|
34
|
+
// openai-compatible provider via ./openaiChatCompletionsClient.ts. This
|
|
35
|
+
// file owns LiteLLM-specific behaviour: OTel span wrap with cost, model
|
|
36
|
+
// allowlist 403 → ModelAccessDeniedError, Gemini 2.5 maxTokens skip,
|
|
37
|
+
// model caching, and native /v1/embeddings.
|
|
38
|
+
// =============================================================================
|
|
43
39
|
/**
|
|
44
|
-
* LiteLLM Provider
|
|
45
|
-
*
|
|
40
|
+
* LiteLLM Provider — direct HTTP, no AI SDK. Talks to a LiteLLM proxy
|
|
41
|
+
* server (or any deployment that speaks OpenAI chat-completions + the
|
|
42
|
+
* `/v1/models` and `/v1/embeddings` endpoints).
|
|
46
43
|
*/
|
|
47
44
|
export class LiteLLMProvider extends BaseProvider {
|
|
48
|
-
|
|
45
|
+
config;
|
|
49
46
|
credentials;
|
|
50
|
-
|
|
47
|
+
resolvedModel;
|
|
51
48
|
static modelsCache = [];
|
|
52
49
|
static modelsCacheTime = 0;
|
|
53
50
|
static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
|
|
54
51
|
constructor(modelName, sdk, _region, credentials) {
|
|
55
52
|
super(modelName, "litellm", sdk);
|
|
56
|
-
// Store per-request credentials for use in embed/embedMany/fetchModelsFromAPI
|
|
57
53
|
this.credentials = credentials;
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
// with a custom baseURL and apiKey. This ensures all requests are routed through the LiteLLM
|
|
64
|
-
// proxy, allowing access to multiple models and custom authentication.
|
|
65
|
-
const customOpenAI = createOpenAI({
|
|
66
|
-
baseURL: credentials?.baseURL ?? config.baseURL,
|
|
67
|
-
apiKey: credentials?.apiKey ?? config.apiKey,
|
|
68
|
-
fetch: createProxyFetch(),
|
|
69
|
-
});
|
|
70
|
-
this.model = customOpenAI.chat(this.modelName || getDefaultLiteLLMModel());
|
|
54
|
+
const envConfig = getLiteLLMConfig();
|
|
55
|
+
this.config = {
|
|
56
|
+
baseURL: credentials?.baseURL ?? envConfig.baseURL,
|
|
57
|
+
apiKey: credentials?.apiKey ?? envConfig.apiKey,
|
|
58
|
+
};
|
|
71
59
|
logger.debug("LiteLLM Provider initialized", {
|
|
72
60
|
modelName: this.modelName,
|
|
73
61
|
provider: this.providerName,
|
|
74
|
-
baseURL: config.baseURL,
|
|
62
|
+
baseURL: this.config.baseURL,
|
|
75
63
|
});
|
|
76
64
|
}
|
|
77
65
|
getProviderName() {
|
|
@@ -81,16 +69,152 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
81
69
|
return getDefaultLiteLLMModel();
|
|
82
70
|
}
|
|
83
71
|
/**
|
|
84
|
-
*
|
|
72
|
+
* Abstract from BaseProvider — used by the parent's generate() path which
|
|
73
|
+
* still goes through `generateText`. Returns a thin LanguageModelV3-shaped
|
|
74
|
+
* object that delegates to the same HTTP helpers used by executeStream.
|
|
75
|
+
*/
|
|
76
|
+
async getAISDKModel() {
|
|
77
|
+
const modelId = await this.resolveModelName();
|
|
78
|
+
return this.buildDelegatingModel(modelId);
|
|
79
|
+
}
|
|
80
|
+
async resolveModelName() {
|
|
81
|
+
if (this.resolvedModel) {
|
|
82
|
+
return this.resolvedModel;
|
|
83
|
+
}
|
|
84
|
+
const explicit = this.modelName || getDefaultLiteLLMModel();
|
|
85
|
+
if (explicit && explicit.trim() !== "") {
|
|
86
|
+
this.resolvedModel = explicit;
|
|
87
|
+
if (this.modelName !== explicit) {
|
|
88
|
+
this.refreshHandlersForModel(explicit);
|
|
89
|
+
}
|
|
90
|
+
return explicit;
|
|
91
|
+
}
|
|
92
|
+
this.resolvedModel = FALLBACK_LITELLM_MODEL;
|
|
93
|
+
this.refreshHandlersForModel(FALLBACK_LITELLM_MODEL);
|
|
94
|
+
return FALLBACK_LITELLM_MODEL;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Returns a minimal V3-shaped model. Only used by BaseProvider's
|
|
98
|
+
* `generate()` non-streaming path which still relies on the parent's
|
|
99
|
+
* `generateText`. The streaming path bypasses this entirely.
|
|
85
100
|
*/
|
|
86
|
-
|
|
87
|
-
|
|
101
|
+
buildDelegatingModel(modelId) {
|
|
102
|
+
const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
|
|
103
|
+
const fetchImpl = createProxyFetch();
|
|
104
|
+
const apiKey = this.config.apiKey;
|
|
105
|
+
const providerName = this.providerName;
|
|
106
|
+
const getTimeoutForOptions = (opts) => this.getTimeout((opts ?? {}));
|
|
107
|
+
const gemini25Skip = isGemini25Model(modelId);
|
|
108
|
+
return {
|
|
109
|
+
specificationVersion: "v3",
|
|
110
|
+
provider: "litellm",
|
|
111
|
+
modelId,
|
|
112
|
+
supportedUrls: {},
|
|
113
|
+
doGenerate: async (options) => {
|
|
114
|
+
const messages = messageBuilderToOpenAI(options.prompt);
|
|
115
|
+
const body = buildBody({
|
|
116
|
+
modelId,
|
|
117
|
+
messages,
|
|
118
|
+
options: {
|
|
119
|
+
maxTokens: gemini25Skip ? undefined : options.maxOutputTokens,
|
|
120
|
+
temperature: options.temperature,
|
|
121
|
+
topP: options.topP,
|
|
122
|
+
presencePenalty: options.presencePenalty,
|
|
123
|
+
frequencyPenalty: options.frequencyPenalty,
|
|
124
|
+
seed: options.seed,
|
|
125
|
+
stopSequences: options.stopSequences,
|
|
126
|
+
},
|
|
127
|
+
tools: v3ToolsToOpenAI(options.tools),
|
|
128
|
+
...(options.toolChoice
|
|
129
|
+
? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
|
|
130
|
+
: {}),
|
|
131
|
+
streaming: false,
|
|
132
|
+
...(options.responseFormat
|
|
133
|
+
? {
|
|
134
|
+
responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
|
|
135
|
+
}
|
|
136
|
+
: {}),
|
|
137
|
+
});
|
|
138
|
+
const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
|
|
139
|
+
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
140
|
+
let res;
|
|
141
|
+
try {
|
|
142
|
+
res = await fetchImpl(url, {
|
|
143
|
+
method: "POST",
|
|
144
|
+
headers: {
|
|
145
|
+
"Content-Type": "application/json",
|
|
146
|
+
Authorization: `Bearer ${apiKey}`,
|
|
147
|
+
},
|
|
148
|
+
body: JSON.stringify(body),
|
|
149
|
+
...(composedSignal ? { signal: composedSignal } : {}),
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
finally {
|
|
153
|
+
timeoutController?.cleanup();
|
|
154
|
+
}
|
|
155
|
+
if (!res.ok) {
|
|
156
|
+
throw await buildAPIError(url, body, res);
|
|
157
|
+
}
|
|
158
|
+
const json = (await res.json());
|
|
159
|
+
const choice = json.choices?.[0];
|
|
160
|
+
const text = (typeof choice?.message?.content === "string"
|
|
161
|
+
? choice.message.content
|
|
162
|
+
: "") ?? "";
|
|
163
|
+
const content = [];
|
|
164
|
+
if (text.length > 0) {
|
|
165
|
+
content.push({ type: "text", text });
|
|
166
|
+
}
|
|
167
|
+
for (const tc of choice?.message?.tool_calls ?? []) {
|
|
168
|
+
content.push({
|
|
169
|
+
type: "tool-call",
|
|
170
|
+
toolCallId: tc.id,
|
|
171
|
+
toolName: tc.function.name,
|
|
172
|
+
input: tc.function.arguments ?? "",
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
const rawFinish = choice?.finish_reason;
|
|
176
|
+
const unified = rawFinish === "length"
|
|
177
|
+
? "length"
|
|
178
|
+
: rawFinish === "tool_calls" || rawFinish === "function_call"
|
|
179
|
+
? "tool-calls"
|
|
180
|
+
: rawFinish === "content_filter"
|
|
181
|
+
? "content-filter"
|
|
182
|
+
: "stop";
|
|
183
|
+
return {
|
|
184
|
+
content,
|
|
185
|
+
finishReason: { unified, raw: rawFinish ?? "stop" },
|
|
186
|
+
usage: {
|
|
187
|
+
inputTokens: {
|
|
188
|
+
total: json.usage?.prompt_tokens,
|
|
189
|
+
noCache: json.usage?.prompt_tokens,
|
|
190
|
+
cacheRead: undefined,
|
|
191
|
+
cacheWrite: undefined,
|
|
192
|
+
},
|
|
193
|
+
outputTokens: {
|
|
194
|
+
total: json.usage?.completion_tokens,
|
|
195
|
+
text: json.usage?.completion_tokens,
|
|
196
|
+
reasoning: undefined,
|
|
197
|
+
},
|
|
198
|
+
},
|
|
199
|
+
warnings: [],
|
|
200
|
+
request: { body },
|
|
201
|
+
response: {
|
|
202
|
+
...(json.id ? { id: json.id } : {}),
|
|
203
|
+
...(json.model ? { modelId: json.model } : {}),
|
|
204
|
+
headers: {},
|
|
205
|
+
body: json,
|
|
206
|
+
},
|
|
207
|
+
};
|
|
208
|
+
},
|
|
209
|
+
doStream: () => {
|
|
210
|
+
throw new Error("litellm: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
|
|
211
|
+
},
|
|
212
|
+
};
|
|
88
213
|
}
|
|
89
214
|
formatProviderError(error) {
|
|
90
215
|
if (error instanceof TimeoutError) {
|
|
91
216
|
return new NetworkError(`Request timed out: ${error.message}`, this.providerName);
|
|
92
217
|
}
|
|
93
|
-
// Check for timeout by error name and message as fallback
|
|
94
218
|
const errorRecord = error;
|
|
95
219
|
if (errorRecord?.name === "TimeoutError" ||
|
|
96
220
|
(typeof errorRecord?.message === "string" &&
|
|
@@ -103,10 +227,10 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
103
227
|
return new NetworkError("LiteLLM proxy server not available. Please start the LiteLLM proxy server at " +
|
|
104
228
|
`${process.env.LITELLM_BASE_URL || "http://localhost:4000"}`, this.providerName);
|
|
105
229
|
}
|
|
106
|
-
// Curator P1-1: detect "team not allowed to access model" responses
|
|
107
|
-
//
|
|
108
|
-
//
|
|
109
|
-
//
|
|
230
|
+
// Curator P1-1: detect "team not allowed to access model" responses and
|
|
231
|
+
// surface as ModelAccessDeniedError with the allowed_models array parsed
|
|
232
|
+
// from the body. Must run before the generic "API key" check because
|
|
233
|
+
// LiteLLM phrases this as a 403 distinct from auth.
|
|
110
234
|
if (isModelAccessDeniedMessage(errorRecord.message)) {
|
|
111
235
|
return new ModelAccessDeniedError(errorRecord.message, {
|
|
112
236
|
provider: this.providerName,
|
|
@@ -129,447 +253,522 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
129
253
|
}
|
|
130
254
|
return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
|
|
131
255
|
}
|
|
132
|
-
/**
|
|
133
|
-
* LiteLLM supports tools for compatible models
|
|
134
|
-
*/
|
|
135
256
|
supportsTools() {
|
|
136
257
|
return true;
|
|
137
258
|
}
|
|
138
259
|
/**
|
|
139
|
-
*
|
|
140
|
-
*
|
|
260
|
+
* Streaming path — drives the LiteLLM proxy directly. No streamText, no
|
|
261
|
+
* AI SDK orchestrator. Tool calls, multi-step loops, telemetry, abort
|
|
262
|
+
* handling all inline. OTel span captures gen_ai.system + cost.
|
|
141
263
|
*/
|
|
142
|
-
async executeStream(options,
|
|
264
|
+
async executeStream(options, _analysisSchema) {
|
|
143
265
|
this.validateStreamOptions(options);
|
|
144
266
|
const startTime = Date.now();
|
|
145
|
-
let chunkCount = 0; // Track chunk count for debugging
|
|
146
|
-
// Reviewer follow-up: capture upstream provider errors via onError so
|
|
147
|
-
// the post-stream NoOutput detect can propagate the *real* cause
|
|
148
|
-
// (content_filter, provider crash, etc.) into the sentinel's
|
|
149
|
-
// providerError / modelResponseRaw instead of "No output generated".
|
|
150
|
-
let capturedProviderError;
|
|
151
267
|
const timeout = this.getTimeout(options);
|
|
152
268
|
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
|
|
269
|
+
const consumerAbortController = new AbortController();
|
|
270
|
+
const abortSignal = mergeAbortSignals([
|
|
271
|
+
options.abortSignal,
|
|
272
|
+
timeoutController?.controller.signal,
|
|
273
|
+
consumerAbortController.signal,
|
|
274
|
+
]).signal;
|
|
275
|
+
let modelId;
|
|
276
|
+
let toolsRecord;
|
|
277
|
+
let openAITools;
|
|
278
|
+
let openAIToolChoice;
|
|
279
|
+
let conversation;
|
|
153
280
|
try {
|
|
154
|
-
|
|
155
|
-
// Using protected helper from BaseProvider to eliminate code duplication
|
|
156
|
-
const messages = await this.buildMessagesForStream(options);
|
|
157
|
-
const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
|
|
158
|
-
// Get tools - options.tools is pre-merged by BaseProvider.stream()
|
|
281
|
+
modelId = await this.resolveModelName();
|
|
159
282
|
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
160
|
-
|
|
283
|
+
toolsRecord = shouldUseTools
|
|
161
284
|
? options.tools || (await this.getAllTools())
|
|
162
285
|
: {};
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
286
|
+
openAITools = shouldUseTools
|
|
287
|
+
? buildToolsForOpenAI(toolsRecord)
|
|
288
|
+
: undefined;
|
|
289
|
+
openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
|
|
290
|
+
const initialMessages = await this.buildMessagesForStream(options);
|
|
291
|
+
conversation = messageBuilderToOpenAI(initialMessages);
|
|
292
|
+
}
|
|
293
|
+
catch (setupErr) {
|
|
294
|
+
timeoutController?.cleanup();
|
|
295
|
+
throw setupErr;
|
|
296
|
+
}
|
|
297
|
+
const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
|
|
298
|
+
const fetchImpl = createProxyFetch();
|
|
299
|
+
const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
|
|
300
|
+
const emitter = this.neurolink?.getEventEmitter();
|
|
301
|
+
const toolsUsed = [];
|
|
302
|
+
const toolExecutionSummaries = [];
|
|
303
|
+
const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
|
|
304
|
+
const { pushChunk, nextChunk } = createChunkQueue();
|
|
305
|
+
// Wrap the stream in an OTel span to capture provider-level latency,
|
|
306
|
+
// token usage, finish reason, and cost. Matches the pre-migration
|
|
307
|
+
// behaviour where streamText was wrapped in `neurolink.provider.streamText`.
|
|
308
|
+
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
309
|
+
kind: SpanKind.CLIENT,
|
|
310
|
+
attributes: {
|
|
311
|
+
"gen_ai.system": "litellm",
|
|
312
|
+
"gen_ai.request.model": modelId,
|
|
313
|
+
},
|
|
314
|
+
});
|
|
315
|
+
// Model-specific maxTokens handling — Gemini 2.5 models have known issues
|
|
316
|
+
// with maxTokens being forwarded. Mutate a shallow copy so the original
|
|
317
|
+
// StreamOptions reference downstream (analytics, telemetry) is unchanged.
|
|
318
|
+
const requestOptions = isGemini25Model(modelId)
|
|
319
|
+
? { ...options, maxTokens: undefined }
|
|
320
|
+
: options;
|
|
321
|
+
if (requestOptions !== options &&
|
|
322
|
+
options.maxTokens &&
|
|
323
|
+
logger.shouldLog("debug")) {
|
|
324
|
+
logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, { modelId, requestedMaxTokens: options.maxTokens });
|
|
325
|
+
}
|
|
326
|
+
const loopPromise = this.runStreamLoop({
|
|
327
|
+
maxSteps,
|
|
328
|
+
modelId,
|
|
329
|
+
url,
|
|
330
|
+
apiKey: this.config.apiKey,
|
|
331
|
+
fetchImpl,
|
|
332
|
+
abortSignal,
|
|
333
|
+
options: requestOptions,
|
|
334
|
+
conversation,
|
|
335
|
+
openAITools,
|
|
336
|
+
openAIToolChoice,
|
|
337
|
+
toolsRecord,
|
|
338
|
+
emitter,
|
|
339
|
+
toolsUsed,
|
|
340
|
+
toolExecutionSummaries,
|
|
341
|
+
pushChunk,
|
|
342
|
+
resolveUsage,
|
|
343
|
+
resolveFinish,
|
|
344
|
+
});
|
|
345
|
+
// Wire the OTel span lifecycle to the deferred analytics promises.
|
|
346
|
+
let capturedProviderError;
|
|
347
|
+
const captureProviderError = (error) => {
|
|
348
|
+
capturedProviderError = error;
|
|
349
|
+
};
|
|
350
|
+
usagePromise
|
|
351
|
+
.then((usage) => {
|
|
352
|
+
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
|
|
353
|
+
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
|
|
354
|
+
const cost = calculateCost(this.providerName, this.modelName, {
|
|
355
|
+
input: usage.promptTokens,
|
|
356
|
+
output: usage.completionTokens,
|
|
357
|
+
total: usage.totalTokens,
|
|
167
358
|
});
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
359
|
+
if (cost && cost > 0) {
|
|
360
|
+
streamSpan.setAttribute("neurolink.cost", cost);
|
|
361
|
+
}
|
|
362
|
+
})
|
|
363
|
+
.catch(() => {
|
|
364
|
+
// usage may never resolve if the stream is aborted before completion
|
|
365
|
+
});
|
|
366
|
+
finishPromise
|
|
367
|
+
.then((reason) => {
|
|
368
|
+
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
369
|
+
if (reason === "error") {
|
|
370
|
+
streamSpan.setStatus({
|
|
371
|
+
code: SpanStatusCode.ERROR,
|
|
372
|
+
message: capturedProviderError instanceof Error
|
|
373
|
+
? capturedProviderError.message
|
|
374
|
+
: String(capturedProviderError ?? "stream error"),
|
|
176
375
|
});
|
|
177
376
|
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
191
|
-
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
192
|
-
experimental_repairToolCall: this.getToolCallRepairFn(options),
|
|
193
|
-
onError: (event) => {
|
|
194
|
-
const error = event.error;
|
|
195
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
196
|
-
// Reviewer follow-up: propagate the captured error to the
|
|
197
|
-
// post-stream NoOutput sentinel so telemetry sees the real
|
|
198
|
-
// provider cause instead of "No output generated".
|
|
199
|
-
capturedProviderError = error;
|
|
200
|
-
logger.error(`LiteLLM: Stream error`, {
|
|
201
|
-
provider: this.providerName,
|
|
202
|
-
modelName: this.modelName,
|
|
203
|
-
error: errorMessage,
|
|
204
|
-
chunkCount,
|
|
205
|
-
});
|
|
206
|
-
},
|
|
207
|
-
onFinish: (event) => {
|
|
208
|
-
logger.debug(`LiteLLM: Stream finished`, {
|
|
209
|
-
finishReason: event.finishReason,
|
|
210
|
-
totalChunks: chunkCount,
|
|
211
|
-
});
|
|
212
|
-
},
|
|
213
|
-
onChunk: () => {
|
|
214
|
-
chunkCount++;
|
|
215
|
-
},
|
|
216
|
-
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
217
|
-
emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
|
|
218
|
-
logger.info("Tool execution completed", { toolResults, toolCalls });
|
|
219
|
-
for (const toolCall of toolCalls) {
|
|
220
|
-
collectedToolCalls.push({
|
|
221
|
-
toolCallId: toolCall.toolCallId,
|
|
222
|
-
toolName: toolCall.toolName,
|
|
223
|
-
args: toolCall.args ??
|
|
224
|
-
toolCall.input ??
|
|
225
|
-
toolCall
|
|
226
|
-
.parameters ??
|
|
227
|
-
{},
|
|
228
|
-
});
|
|
377
|
+
streamSpan.end();
|
|
378
|
+
})
|
|
379
|
+
.catch(() => {
|
|
380
|
+
streamSpan.end();
|
|
381
|
+
});
|
|
382
|
+
const transformedStream = async function* () {
|
|
383
|
+
let contentYielded = 0;
|
|
384
|
+
try {
|
|
385
|
+
for (;;) {
|
|
386
|
+
const chunk = await nextChunk();
|
|
387
|
+
if ("done" in chunk) {
|
|
388
|
+
break;
|
|
229
389
|
}
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
status: rawToolResult.error ? "failure" : "success",
|
|
235
|
-
output: (rawToolResult.output ??
|
|
236
|
-
rawToolResult.result) ??
|
|
237
|
-
undefined,
|
|
238
|
-
error: rawToolResult.error,
|
|
239
|
-
id: rawToolResult.toolCallId ?? toolResult.toolName,
|
|
240
|
-
});
|
|
390
|
+
if ("content" in chunk &&
|
|
391
|
+
typeof chunk.content === "string" &&
|
|
392
|
+
chunk.content.length > 0) {
|
|
393
|
+
contentYielded++;
|
|
241
394
|
}
|
|
242
|
-
|
|
243
|
-
logger.warn("[LiteLLMProvider] Failed to store tool executions", {
|
|
244
|
-
provider: this.providerName,
|
|
245
|
-
error: error instanceof Error ? error.message : String(error),
|
|
246
|
-
});
|
|
247
|
-
});
|
|
248
|
-
},
|
|
249
|
-
};
|
|
250
|
-
// Add analysisSchema support if provided
|
|
251
|
-
if (analysisSchema) {
|
|
252
|
-
try {
|
|
253
|
-
streamOptions = {
|
|
254
|
-
...streamOptions,
|
|
255
|
-
experimental_output: Output.object({
|
|
256
|
-
schema: analysisSchema,
|
|
257
|
-
}),
|
|
258
|
-
};
|
|
395
|
+
yield chunk;
|
|
259
396
|
}
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
397
|
+
await loopPromise;
|
|
398
|
+
if (contentYielded === 0 && toolsUsed.length === 0) {
|
|
399
|
+
logger.warn("LiteLLM: Stream produced no output — emitting enriched sentinel");
|
|
400
|
+
const fauxNoOutput = new NoOutputGeneratedError({
|
|
401
|
+
message: "Stream produced no output",
|
|
263
402
|
});
|
|
403
|
+
const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
|
|
404
|
+
stampNoOutputSpan(sentinel);
|
|
405
|
+
yield sentinel;
|
|
264
406
|
}
|
|
265
407
|
}
|
|
266
|
-
// Wrap streamText in an OTel span to capture provider-level latency, token usage, and cost
|
|
267
|
-
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
268
|
-
kind: SpanKind.CLIENT,
|
|
269
|
-
attributes: {
|
|
270
|
-
"gen_ai.system": "litellm",
|
|
271
|
-
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
|
|
272
|
-
},
|
|
273
|
-
});
|
|
274
|
-
let result;
|
|
275
|
-
const collectedToolCalls = [];
|
|
276
|
-
const collectedToolResults = [];
|
|
277
|
-
try {
|
|
278
|
-
result = streamText(streamOptions);
|
|
279
|
-
}
|
|
280
408
|
catch (streamError) {
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
}
|
|
287
|
-
|
|
409
|
+
if (NoOutputGeneratedError.isInstance(streamError)) {
|
|
410
|
+
const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
|
|
411
|
+
stampNoOutputSpan(sentinel);
|
|
412
|
+
yield sentinel;
|
|
413
|
+
return;
|
|
414
|
+
}
|
|
415
|
+
const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
|
|
416
|
+
stampNoOutputSpan(sentinel);
|
|
417
|
+
yield sentinel;
|
|
288
418
|
throw streamError;
|
|
289
419
|
}
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
.then((usage) => {
|
|
294
|
-
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
|
|
295
|
-
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
|
|
296
|
-
const cost = calculateCost(this.providerName, this.modelName, {
|
|
297
|
-
input: usage.inputTokens || 0,
|
|
298
|
-
output: usage.outputTokens || 0,
|
|
299
|
-
total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
|
|
300
|
-
});
|
|
301
|
-
if (cost && cost > 0) {
|
|
302
|
-
streamSpan.setAttribute("neurolink.cost", cost);
|
|
420
|
+
finally {
|
|
421
|
+
if (!consumerAbortController.signal.aborted) {
|
|
422
|
+
consumerAbortController.abort();
|
|
303
423
|
}
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
})
|
|
315
|
-
Promise.resolve(result.text)
|
|
316
|
-
.then(() => {
|
|
317
|
-
streamSpan.end();
|
|
318
|
-
})
|
|
319
|
-
.catch((err) => {
|
|
320
|
-
streamSpan.setStatus({
|
|
321
|
-
code: SpanStatusCode.ERROR,
|
|
322
|
-
message: err instanceof Error ? err.message : String(err),
|
|
323
|
-
});
|
|
324
|
-
streamSpan.end();
|
|
325
|
-
});
|
|
326
|
-
timeoutController?.cleanup();
|
|
327
|
-
const transformedStream = this.createLiteLLMTransformedStream(result, () => capturedProviderError);
|
|
328
|
-
// Create analytics promise that resolves after stream completion
|
|
329
|
-
const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, result, Date.now() - startTime, {
|
|
424
|
+
}
|
|
425
|
+
};
|
|
426
|
+
const result = {
|
|
427
|
+
stream: transformedStream(),
|
|
428
|
+
provider: this.providerName,
|
|
429
|
+
model: this.modelName,
|
|
430
|
+
analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, {
|
|
431
|
+
textStream: (async function* () { })(),
|
|
432
|
+
usage: usagePromise,
|
|
433
|
+
finishReason: finishPromise,
|
|
434
|
+
}, Date.now() - startTime, {
|
|
330
435
|
requestId: options.requestId ??
|
|
331
436
|
`litellm-stream-${Date.now()}`,
|
|
332
437
|
streamingMode: true,
|
|
438
|
+
}),
|
|
439
|
+
toolsUsed,
|
|
440
|
+
metadata: {
|
|
441
|
+
startTime,
|
|
442
|
+
streamId: `litellm-${Date.now()}`,
|
|
443
|
+
},
|
|
444
|
+
};
|
|
445
|
+
Object.defineProperty(result, "toolExecutions", {
|
|
446
|
+
enumerable: true,
|
|
447
|
+
configurable: true,
|
|
448
|
+
get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
|
|
449
|
+
toolName: s.toolName,
|
|
450
|
+
input: s.input,
|
|
451
|
+
output: s.output,
|
|
452
|
+
duration: s.endTime.getTime() - s.startTime.getTime(),
|
|
453
|
+
}))),
|
|
454
|
+
});
|
|
455
|
+
loopPromise
|
|
456
|
+
.finally(() => timeoutController?.cleanup())
|
|
457
|
+
.catch((error) => {
|
|
458
|
+
captureProviderError(error);
|
|
459
|
+
});
|
|
460
|
+
return result;
|
|
461
|
+
}
|
|
462
|
+
async runStreamLoop(args) {
|
|
463
|
+
const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
|
|
464
|
+
try {
|
|
465
|
+
let stepFinish = null;
|
|
466
|
+
let stepUsage;
|
|
467
|
+
for (let step = 0; step < maxSteps; step++) {
|
|
468
|
+
const stepResult = await this.streamOneStep({
|
|
469
|
+
modelId,
|
|
470
|
+
url,
|
|
471
|
+
apiKey,
|
|
472
|
+
fetchImpl,
|
|
473
|
+
abortSignal,
|
|
474
|
+
options,
|
|
475
|
+
conversation,
|
|
476
|
+
openAITools,
|
|
477
|
+
openAIToolChoice,
|
|
478
|
+
pushChunk,
|
|
479
|
+
});
|
|
480
|
+
stepFinish = stepResult.finishReason;
|
|
481
|
+
if (stepResult.usage) {
|
|
482
|
+
stepUsage = mergeUsage(stepUsage, stepResult.usage);
|
|
483
|
+
}
|
|
484
|
+
if (stepResult.toolCalls.size === 0) {
|
|
485
|
+
break;
|
|
486
|
+
}
|
|
487
|
+
await this.executeToolBatch({
|
|
488
|
+
stepResult,
|
|
489
|
+
conversation,
|
|
490
|
+
toolsRecord,
|
|
491
|
+
emitter,
|
|
492
|
+
toolsUsed,
|
|
493
|
+
toolExecutionSummaries,
|
|
494
|
+
options,
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
resolveUsage({
|
|
498
|
+
promptTokens: stepUsage?.prompt_tokens ?? 0,
|
|
499
|
+
completionTokens: stepUsage?.completion_tokens ?? 0,
|
|
500
|
+
totalTokens: stepUsage?.total_tokens ?? 0,
|
|
333
501
|
});
|
|
502
|
+
resolveFinish(stepFinish ?? "stop");
|
|
503
|
+
pushChunk({ done: true });
|
|
334
504
|
return {
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
model: this.modelName,
|
|
338
|
-
...(shouldUseTools && {
|
|
339
|
-
toolCalls: collectedToolCalls,
|
|
340
|
-
toolResults: collectedToolResults,
|
|
341
|
-
}),
|
|
342
|
-
analytics: analyticsPromise,
|
|
343
|
-
metadata: {
|
|
344
|
-
startTime,
|
|
345
|
-
streamId: `litellm-${Date.now()}`,
|
|
346
|
-
},
|
|
505
|
+
finishReason: stepFinish ?? "stop",
|
|
506
|
+
usage: stepUsage,
|
|
347
507
|
};
|
|
348
508
|
}
|
|
349
|
-
catch (
|
|
350
|
-
|
|
351
|
-
|
|
509
|
+
catch (err) {
|
|
510
|
+
logger.error("LiteLLM: Stream error", {
|
|
511
|
+
error: err instanceof Error ? err.message : String(err),
|
|
512
|
+
});
|
|
513
|
+
resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
|
|
514
|
+
resolveFinish("error");
|
|
515
|
+
pushChunk({ done: true });
|
|
516
|
+
throw err;
|
|
352
517
|
}
|
|
353
518
|
}
|
|
354
|
-
async
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
519
|
+
async streamOneStep(args) {
|
|
520
|
+
const body = buildBody({
|
|
521
|
+
modelId: args.modelId,
|
|
522
|
+
messages: args.conversation,
|
|
523
|
+
options: args.options,
|
|
524
|
+
tools: args.openAITools,
|
|
525
|
+
...(args.openAIToolChoice !== undefined
|
|
526
|
+
? { toolChoice: args.openAIToolChoice }
|
|
527
|
+
: {}),
|
|
528
|
+
streaming: true,
|
|
529
|
+
});
|
|
530
|
+
const res = await args.fetchImpl(args.url, {
|
|
531
|
+
method: "POST",
|
|
532
|
+
headers: {
|
|
533
|
+
"Content-Type": "application/json",
|
|
534
|
+
Authorization: `Bearer ${args.apiKey}`,
|
|
535
|
+
},
|
|
536
|
+
body: JSON.stringify(body),
|
|
537
|
+
...(args.abortSignal ? { signal: args.abortSignal } : {}),
|
|
538
|
+
});
|
|
539
|
+
if (!res.ok) {
|
|
540
|
+
throw await buildAPIError(args.url, body, res);
|
|
541
|
+
}
|
|
542
|
+
if (!res.body) {
|
|
543
|
+
throw new Error("litellm: stream response had no body");
|
|
544
|
+
}
|
|
545
|
+
return parseSSEStream(res.body, (delta) => {
|
|
546
|
+
args.pushChunk({ content: delta });
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
async executeToolBatch(args) {
|
|
550
|
+
const { stepResult, conversation, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, options, } = args;
|
|
551
|
+
const toolCallsForMessage = [];
|
|
552
|
+
for (const [, t] of stepResult.toolCalls) {
|
|
553
|
+
toolCallsForMessage.push({
|
|
554
|
+
id: t.id,
|
|
555
|
+
type: "function",
|
|
556
|
+
function: { name: t.name, arguments: t.argsBuffered },
|
|
557
|
+
});
|
|
558
|
+
}
|
|
559
|
+
conversation.push({
|
|
560
|
+
role: "assistant",
|
|
561
|
+
content: stepResult.text.length > 0 ? stepResult.text : null,
|
|
562
|
+
tool_calls: toolCallsForMessage,
|
|
563
|
+
});
|
|
564
|
+
for (const [, t] of stepResult.toolCalls) {
|
|
565
|
+
const startedAt = new Date();
|
|
566
|
+
let input;
|
|
567
|
+
try {
|
|
568
|
+
input = JSON.parse(t.argsBuffered || "{}");
|
|
569
|
+
}
|
|
570
|
+
catch {
|
|
571
|
+
input = t.argsBuffered;
|
|
572
|
+
}
|
|
573
|
+
let output;
|
|
574
|
+
let errorMsg;
|
|
575
|
+
const toolDef = toolsRecord[t.name];
|
|
576
|
+
emitter?.emit("tool:start", {
|
|
577
|
+
toolName: t.name,
|
|
578
|
+
toolCallId: t.id,
|
|
579
|
+
input,
|
|
580
|
+
});
|
|
581
|
+
if (!toolDef || typeof toolDef.execute !== "function") {
|
|
582
|
+
errorMsg = `Tool '${t.name}' is not registered.`;
|
|
583
|
+
output = { error: errorMsg };
|
|
584
|
+
}
|
|
585
|
+
else {
|
|
586
|
+
try {
|
|
587
|
+
output = await toolDef.execute(input, {});
|
|
388
588
|
}
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
589
|
+
catch (err) {
|
|
590
|
+
errorMsg = err instanceof Error ? err.message : String(err);
|
|
591
|
+
output = { error: errorMsg };
|
|
392
592
|
}
|
|
393
593
|
}
|
|
594
|
+
const endedAt = new Date();
|
|
595
|
+
toolsUsed.push(t.name);
|
|
596
|
+
toolExecutionSummaries.push({
|
|
597
|
+
toolCallId: t.id,
|
|
598
|
+
toolName: t.name,
|
|
599
|
+
input,
|
|
600
|
+
output,
|
|
601
|
+
...(errorMsg ? { error: errorMsg } : {}),
|
|
602
|
+
startTime: startedAt,
|
|
603
|
+
endTime: endedAt,
|
|
604
|
+
});
|
|
605
|
+
conversation.push({
|
|
606
|
+
role: "tool",
|
|
607
|
+
tool_call_id: t.id,
|
|
608
|
+
content: stringifyToolOutput(output),
|
|
609
|
+
});
|
|
394
610
|
}
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
611
|
+
const justExecuted = toolExecutionSummaries.slice(-stepResult.toolCalls.size);
|
|
612
|
+
emitToolEndFromStepFinish(emitter, justExecuted.map((s) => ({
|
|
613
|
+
toolName: s.toolName,
|
|
614
|
+
output: s.output,
|
|
615
|
+
...(s.error ? { error: s.error } : {}),
|
|
616
|
+
})));
|
|
617
|
+
try {
|
|
618
|
+
await this.handleToolExecutionStorage(justExecuted.map((s) => ({
|
|
619
|
+
toolCallId: s.toolCallId,
|
|
620
|
+
toolName: s.toolName,
|
|
621
|
+
input: s.input,
|
|
622
|
+
output: s.output,
|
|
623
|
+
})), justExecuted.map((s) => ({
|
|
624
|
+
toolCallId: s.toolCallId,
|
|
625
|
+
toolName: s.toolName,
|
|
626
|
+
output: s.output,
|
|
627
|
+
})), options, new Date());
|
|
412
628
|
}
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
if (detected) {
|
|
419
|
-
logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
|
|
420
|
-
stampNoOutputSpan(detected.sentinel);
|
|
421
|
-
yield detected.sentinel;
|
|
422
|
-
}
|
|
629
|
+
catch (err) {
|
|
630
|
+
logger.warn("[LiteLLMProvider] Failed to store tool executions", {
|
|
631
|
+
provider: this.providerName,
|
|
632
|
+
error: err instanceof Error ? err.message : String(err),
|
|
633
|
+
});
|
|
423
634
|
}
|
|
424
635
|
}
|
|
425
636
|
/**
|
|
426
|
-
* Generate an embedding for a single text input
|
|
427
|
-
* Uses the LiteLLM proxy with OpenAI-compatible embedding API
|
|
637
|
+
* Generate an embedding for a single text input via native /v1/embeddings.
|
|
428
638
|
*/
|
|
429
639
|
async embed(text, modelName) {
|
|
430
|
-
const { embed: aiEmbed } = await import("../utils/generation.js");
|
|
431
|
-
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
432
|
-
const config = getLiteLLMConfig();
|
|
433
640
|
const embeddingModelName = modelName ||
|
|
434
641
|
process.env.LITELLM_EMBEDDING_MODEL ||
|
|
435
642
|
"gemini-embedding-001";
|
|
436
|
-
const
|
|
437
|
-
|
|
438
|
-
apiKey: this.credentials?.apiKey ?? config.apiKey,
|
|
439
|
-
fetch: createProxyFetch(),
|
|
440
|
-
});
|
|
441
|
-
const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
|
|
442
|
-
// Wrap in withTimeout so stalled upstream embedding requests abort instead
|
|
443
|
-
// of hanging forever. 30s matches the default for embedding endpoints
|
|
444
|
-
// across the OpenAI-compatible cluster.
|
|
445
|
-
const result = await withTimeout(aiEmbed({ model: embeddingModel, value: text }), 30_000, "litellm", "generate");
|
|
446
|
-
return result.embedding;
|
|
643
|
+
const [embedding] = await this.callEmbeddings(embeddingModelName, [text], "embed");
|
|
644
|
+
return embedding;
|
|
447
645
|
}
|
|
448
646
|
/**
|
|
449
|
-
* Generate embeddings for multiple text inputs
|
|
450
|
-
* Uses the LiteLLM proxy with OpenAI-compatible embedding API
|
|
647
|
+
* Generate embeddings for multiple text inputs via native /v1/embeddings.
|
|
451
648
|
*/
|
|
452
649
|
async embedMany(texts, modelName) {
|
|
453
|
-
const { embedMany: aiEmbedMany } = await import("../utils/generation.js");
|
|
454
|
-
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
455
|
-
const config = getLiteLLMConfig();
|
|
456
650
|
const embeddingModelName = modelName ||
|
|
457
651
|
process.env.LITELLM_EMBEDDING_MODEL ||
|
|
458
652
|
"gemini-embedding-001";
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
const
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
653
|
+
return this.callEmbeddings(embeddingModelName, texts, "embedMany");
|
|
654
|
+
}
|
|
655
|
+
async callEmbeddings(modelName, input, operation) {
|
|
656
|
+
const url = `${stripTrailingSlash(this.config.baseURL)}/embeddings`;
|
|
657
|
+
const fetchImpl = createProxyFetch();
|
|
658
|
+
const timeoutController = createTimeoutController(30_000, this.providerName, "generate");
|
|
659
|
+
try {
|
|
660
|
+
const res = await fetchImpl(url, {
|
|
661
|
+
method: "POST",
|
|
662
|
+
headers: {
|
|
663
|
+
"Content-Type": "application/json",
|
|
664
|
+
Authorization: `Bearer ${this.config.apiKey}`,
|
|
665
|
+
},
|
|
666
|
+
body: JSON.stringify({
|
|
667
|
+
model: modelName,
|
|
668
|
+
input: input.length === 1 ? input[0] : input,
|
|
669
|
+
}),
|
|
670
|
+
...(timeoutController?.controller.signal
|
|
671
|
+
? { signal: timeoutController.controller.signal }
|
|
672
|
+
: {}),
|
|
673
|
+
});
|
|
674
|
+
if (!res.ok) {
|
|
675
|
+
const bodyText = await res.text().catch(() => "");
|
|
676
|
+
const parsed = bodyText
|
|
677
|
+
? JSON.parse(bodyText)
|
|
678
|
+
: undefined;
|
|
679
|
+
throw this.formatProviderError(new Error(parsed?.error?.message ||
|
|
680
|
+
`LiteLLM ${operation} failed with status ${res.status}`));
|
|
681
|
+
}
|
|
682
|
+
const json = (await res.json());
|
|
683
|
+
const embeddings = (json.data ?? [])
|
|
684
|
+
.map((row) => row.embedding)
|
|
685
|
+
.filter((e) => Array.isArray(e));
|
|
686
|
+
if (embeddings.length === 0) {
|
|
687
|
+
throw new ProviderError(`LiteLLM ${operation} returned no embeddings`, this.providerName);
|
|
688
|
+
}
|
|
689
|
+
return embeddings;
|
|
690
|
+
}
|
|
691
|
+
finally {
|
|
692
|
+
timeoutController?.cleanup();
|
|
693
|
+
}
|
|
468
694
|
}
|
|
469
695
|
/**
|
|
470
|
-
* Get available models from LiteLLM proxy
|
|
471
|
-
*
|
|
696
|
+
* Get available models from LiteLLM proxy `/v1/models` endpoint.
|
|
697
|
+
* Caches results for 10 minutes; falls back to env-driven list or a
|
|
698
|
+
* minimal safe default if the API fetch fails.
|
|
472
699
|
*/
|
|
473
700
|
async getAvailableModels() {
|
|
474
|
-
const functionTag = "LiteLLMProvider.getAvailableModels";
|
|
475
701
|
const now = Date.now();
|
|
476
|
-
// Check if cached models are still valid
|
|
477
702
|
if (LiteLLMProvider.modelsCache.length > 0 &&
|
|
478
703
|
now - LiteLLMProvider.modelsCacheTime <
|
|
479
704
|
LiteLLMProvider.MODELS_CACHE_DURATION) {
|
|
480
|
-
logger.debug(
|
|
705
|
+
logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
|
|
481
706
|
cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
|
|
482
707
|
modelCount: LiteLLMProvider.modelsCache.length,
|
|
483
708
|
});
|
|
484
709
|
return LiteLLMProvider.modelsCache;
|
|
485
710
|
}
|
|
486
|
-
// Try to fetch models dynamically
|
|
487
711
|
try {
|
|
488
712
|
const dynamicModels = await this.fetchModelsFromAPI();
|
|
489
713
|
if (dynamicModels.length > 0) {
|
|
490
|
-
// Cache successful result
|
|
491
714
|
LiteLLMProvider.modelsCache = dynamicModels;
|
|
492
715
|
LiteLLMProvider.modelsCacheTime = now;
|
|
493
|
-
logger.debug(`[${functionTag}] Successfully fetched models from API`, {
|
|
494
|
-
modelCount: dynamicModels.length,
|
|
495
|
-
});
|
|
496
716
|
return dynamicModels;
|
|
497
717
|
}
|
|
498
718
|
}
|
|
499
719
|
catch (error) {
|
|
500
|
-
logger.warn(
|
|
501
|
-
error: error instanceof Error ? error.message : String(error),
|
|
502
|
-
});
|
|
720
|
+
logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
|
|
503
721
|
}
|
|
504
|
-
|
|
505
|
-
|
|
722
|
+
return this.getFallbackModels();
|
|
723
|
+
}
|
|
724
|
+
async getFirstAvailableModel() {
|
|
725
|
+
const models = await this.getAvailableModels();
|
|
726
|
+
return models[0] || FALLBACK_LITELLM_MODEL;
|
|
727
|
+
}
|
|
728
|
+
getFallbackModels() {
|
|
729
|
+
return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
|
|
506
730
|
.map((m) => m.trim())
|
|
507
731
|
.filter((m) => m.length > 0) || [
|
|
508
|
-
"openai/gpt-4o",
|
|
732
|
+
"openai/gpt-4o",
|
|
509
733
|
"anthropic/claude-3-haiku",
|
|
510
734
|
"meta-llama/llama-3.1-8b-instruct",
|
|
511
735
|
"google/gemini-2.5-flash",
|
|
512
|
-
];
|
|
513
|
-
logger.debug(`[${functionTag}] Using fallback model list`, {
|
|
514
|
-
modelCount: fallbackModels.length,
|
|
515
|
-
});
|
|
516
|
-
return fallbackModels;
|
|
736
|
+
]);
|
|
517
737
|
}
|
|
518
|
-
/**
|
|
519
|
-
* Fetch available models from LiteLLM proxy /v1/models endpoint
|
|
520
|
-
* @private
|
|
521
|
-
*/
|
|
522
738
|
async fetchModelsFromAPI() {
|
|
523
|
-
const
|
|
524
|
-
const
|
|
525
|
-
const resolvedBaseURL = this.credentials?.baseURL ?? config.baseURL;
|
|
526
|
-
const resolvedApiKey = this.credentials?.apiKey ?? config.apiKey;
|
|
527
|
-
const modelsUrl = `${resolvedBaseURL}/v1/models`;
|
|
739
|
+
const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
|
|
740
|
+
const proxyFetch = createProxyFetch();
|
|
528
741
|
const controller = new AbortController();
|
|
529
|
-
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
742
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
530
743
|
try {
|
|
531
|
-
logger.debug(`[${functionTag}] Fetching models from ${modelsUrl}`);
|
|
532
|
-
const proxyFetch = createProxyFetch();
|
|
533
744
|
const response = await proxyFetch(modelsUrl, {
|
|
534
745
|
method: "GET",
|
|
535
746
|
headers: {
|
|
536
|
-
Authorization: `Bearer ${
|
|
747
|
+
Authorization: `Bearer ${this.config.apiKey}`,
|
|
537
748
|
"Content-Type": "application/json",
|
|
538
749
|
},
|
|
539
750
|
signal: controller.signal,
|
|
540
751
|
});
|
|
541
|
-
clearTimeout(timeoutId);
|
|
542
752
|
if (!response.ok) {
|
|
543
753
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
544
754
|
}
|
|
545
|
-
const data = await response.json();
|
|
546
|
-
|
|
547
|
-
if (data && Array.isArray(data.data)) {
|
|
548
|
-
const models = data.data
|
|
549
|
-
.map((model) => typeof model === "object" &&
|
|
550
|
-
model !== null &&
|
|
551
|
-
"id" in model &&
|
|
552
|
-
typeof model.id === "string"
|
|
553
|
-
? model.id
|
|
554
|
-
: undefined)
|
|
555
|
-
.filter((id) => typeof id === "string" && id.length > 0)
|
|
556
|
-
.sort();
|
|
557
|
-
logger.debug(`[${functionTag}] Successfully parsed models`, {
|
|
558
|
-
totalModels: models.length,
|
|
559
|
-
sampleModels: models.slice(0, 5),
|
|
560
|
-
});
|
|
561
|
-
return models;
|
|
562
|
-
}
|
|
563
|
-
else {
|
|
755
|
+
const data = (await response.json());
|
|
756
|
+
if (!Array.isArray(data.data)) {
|
|
564
757
|
throw new Error("Invalid response format: expected data.data array");
|
|
565
758
|
}
|
|
759
|
+
return data.data
|
|
760
|
+
.map((m) => m.id)
|
|
761
|
+
.filter((id) => typeof id === "string" && id.length > 0)
|
|
762
|
+
.sort();
|
|
566
763
|
}
|
|
567
764
|
catch (error) {
|
|
568
|
-
clearTimeout(timeoutId);
|
|
569
765
|
if (isAbortError(error)) {
|
|
570
766
|
throw new NetworkError("Request timed out after 5 seconds", this.providerName);
|
|
571
767
|
}
|
|
572
768
|
throw error;
|
|
573
769
|
}
|
|
770
|
+
finally {
|
|
771
|
+
clearTimeout(timeoutId);
|
|
772
|
+
}
|
|
574
773
|
}
|
|
575
774
|
}
|