@juspay/neurolink 9.67.1 → 9.67.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,77 +1,59 @@
1
- import { createOpenAI } from "@ai-sdk/openai";
2
1
  import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
3
- import { BaseProvider } from "../core/baseProvider.js";
4
- import { DEFAULT_MAX_STEPS } from "../core/constants.js";
5
- import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
6
2
  import { createProxyFetch } from "../proxy/proxyFetch.js";
7
3
  import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
8
4
  import { isAbortError } from "../utils/errorHandling.js";
9
- import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
10
5
  import { logger } from "../utils/logger.js";
11
- import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
6
+ import { isGemini25Model as isCanonicalGemini25Model } from "../utils/modelDetection.js";
12
7
  import { calculateCost } from "../utils/pricing.js";
13
8
  import { getProviderModel } from "../utils/providerConfig.js";
14
- import { composeAbortSignals, createTimeoutController, TimeoutError, withTimeout, } from "../utils/timeout.js";
15
- import { resolveToolChoice } from "../utils/toolChoice.js";
16
- import { getModelId } from "./providerTypeUtils.js";
17
- import { NoOutputGeneratedError } from "../utils/generationErrors.js";
18
- import { Output, stepCountIs } from "../utils/tool.js";
19
- import { streamText } from "../utils/generation.js";
9
+ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
10
+ import { stripTrailingSlash } from "./openaiChatCompletionsClient.js";
11
+ import { OpenAIChatCompletionsProvider } from "./openaiChatCompletionsBase.js";
20
12
  const streamTracer = trace.getTracer("neurolink.provider.litellm");
21
- // Configuration helpers
22
- const getLiteLLMConfig = () => {
23
- return {
24
- baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
25
- apiKey: process.env.LITELLM_API_KEY || "sk-anything",
26
- };
27
- };
13
+ const FALLBACK_LITELLM_MODEL = "openai/gpt-4o-mini";
14
+ const getLiteLLMConfig = () => ({
15
+ baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
16
+ apiKey: process.env.LITELLM_API_KEY || "sk-anything",
17
+ });
28
18
  /**
29
- * Returns the default model name for LiteLLM.
30
- *
31
- * LiteLLM uses a 'provider/model' format for model names.
32
- * For example:
33
- * - 'openai/gpt-4o-mini'
34
- * - 'openai/gpt-3.5-turbo'
35
- * - 'anthropic/claude-3-sonnet-20240229'
36
- * - 'google/gemini-pro'
37
- *
38
- * You can override the default by setting the LITELLM_MODEL environment variable.
19
+ * LiteLLM uses a 'provider/model' format. Override via LITELLM_MODEL env var.
39
20
  */
40
- const getDefaultLiteLLMModel = () => {
41
- return getProviderModel("LITELLM_MODEL", "openai/gpt-4o-mini");
21
+ const getDefaultLiteLLMModel = () => getProviderModel("LITELLM_MODEL", FALLBACK_LITELLM_MODEL);
22
+ // LiteLLM model ids come in `provider/model` form (e.g. "google/gemini-2.5-flash").
23
+ // Strip the provider prefix and delegate to the canonical anchored-regex
24
+ // check in src/lib/utils/modelDetection.ts so the truth lives in one place.
25
+ const isGemini25Model = (modelName) => {
26
+ const lastSegment = modelName.includes("/")
27
+ ? modelName.slice(modelName.lastIndexOf("/") + 1)
28
+ : modelName;
29
+ return isCanonicalGemini25Model(lastSegment);
42
30
  };
43
31
  /**
44
- * LiteLLM Provider - BaseProvider Implementation
45
- * Provides access to 100+ models via LiteLLM proxy server
32
+ * LiteLLM Provider direct HTTP, no AI SDK. Talks to a LiteLLM proxy
33
+ * server (or any deployment that speaks OpenAI chat-completions + the
34
+ * `/v1/models` and `/v1/embeddings` endpoints).
35
+ *
36
+ * All request/stream/tool-loop orchestration lives in
37
+ * `OpenAIChatCompletionsProvider`. This class adds LiteLLM-specific
38
+ * behaviour: OTel span wrap with cost (`onStreamStart`), Gemini 2.5
39
+ * maxTokens skip (`adjustBuildBodyOptions`), ModelAccessDeniedError on
40
+ * 403, 10-minute model cache (`getAvailableModels`), `LITELLM_FALLBACK_MODELS`
41
+ * env-driven fallback list, and native `/v1/embeddings`.
46
42
  */
47
- export class LiteLLMProvider extends BaseProvider {
48
- model;
49
- credentials;
50
- // Cache for available models to avoid repeated API calls
43
+ export class LiteLLMProvider extends OpenAIChatCompletionsProvider {
51
44
  static modelsCache = [];
52
45
  static modelsCacheTime = 0;
53
46
  static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
54
47
  constructor(modelName, sdk, _region, credentials) {
55
- super(modelName, "litellm", sdk);
56
- // Store per-request credentials for use in embed/embedMany/fetchModelsFromAPI
57
- this.credentials = credentials;
58
- // Initialize LiteLLM using OpenAI SDK with explicit configuration
59
- const config = getLiteLLMConfig();
60
- // Create OpenAI SDK instance configured for LiteLLM proxy
61
- // LiteLLM acts as a proxy server that implements the OpenAI-compatible API.
62
- // To communicate with LiteLLM instead of the default OpenAI endpoint, we use createOpenAI
63
- // with a custom baseURL and apiKey. This ensures all requests are routed through the LiteLLM
64
- // proxy, allowing access to multiple models and custom authentication.
65
- const customOpenAI = createOpenAI({
66
- baseURL: credentials?.baseURL ?? config.baseURL,
67
- apiKey: credentials?.apiKey ?? config.apiKey,
68
- fetch: createProxyFetch(),
48
+ const envConfig = getLiteLLMConfig();
49
+ super("litellm", modelName, sdk, {
50
+ baseURL: credentials?.baseURL ?? envConfig.baseURL,
51
+ apiKey: credentials?.apiKey ?? envConfig.apiKey,
69
52
  });
70
- this.model = customOpenAI.chat(this.modelName || getDefaultLiteLLMModel());
71
53
  logger.debug("LiteLLM Provider initialized", {
72
54
  modelName: this.modelName,
73
55
  provider: this.providerName,
74
- baseURL: config.baseURL,
56
+ baseURL: this.config.baseURL,
75
57
  });
76
58
  }
77
59
  getProviderName() {
@@ -80,17 +62,84 @@ export class LiteLLMProvider extends BaseProvider {
80
62
  getDefaultModel() {
81
63
  return getDefaultLiteLLMModel();
82
64
  }
65
+ getFallbackModelName() {
66
+ return FALLBACK_LITELLM_MODEL;
67
+ }
68
+ getFallbackModels() {
69
+ return (process.env.LITELLM_FALLBACK_MODELS?.split(",")
70
+ .map((m) => m.trim())
71
+ .filter((m) => m.length > 0) || [
72
+ "openai/gpt-4o",
73
+ "anthropic/claude-3-haiku",
74
+ "meta-llama/llama-3.1-8b-instruct",
75
+ "google/gemini-2.5-flash",
76
+ ]);
77
+ }
78
+ /**
79
+ * Gemini 2.5 models on LiteLLM have a known compatibility issue with
80
+ * `max_tokens` — strip it before the wire body is built. Applies to
81
+ * both streaming and non-streaming paths.
82
+ */
83
+ adjustBuildBodyOptions(modelId, opts) {
84
+ if (isGemini25Model(modelId) && opts.maxTokens !== undefined) {
85
+ if (logger.shouldLog("debug")) {
86
+ logger.debug("LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)", { modelId, requestedMaxTokens: opts.maxTokens });
87
+ }
88
+ return { ...opts, maxTokens: undefined };
89
+ }
90
+ return opts;
91
+ }
83
92
  /**
84
- * Returns the Vercel AI SDK model instance for LiteLLM
93
+ * Wrap the stream in an OTel span to capture provider-level latency,
94
+ * token usage, finish reason, and cost. Matches the pre-migration
95
+ * behaviour where streamText was wrapped in `neurolink.provider.streamText`.
85
96
  */
86
- getAISDKModel() {
87
- return this.model;
97
+ onStreamStart(modelId) {
98
+ const span = streamTracer.startSpan("neurolink.provider.streamText", {
99
+ kind: SpanKind.CLIENT,
100
+ attributes: {
101
+ "gen_ai.system": "litellm",
102
+ "gen_ai.request.model": modelId,
103
+ },
104
+ });
105
+ let spanEnded = false;
106
+ const endSpan = () => {
107
+ if (!spanEnded) {
108
+ spanEnded = true;
109
+ span.end();
110
+ }
111
+ };
112
+ return {
113
+ onUsage: (usage) => {
114
+ span.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens);
115
+ span.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens);
116
+ const cost = calculateCost(this.providerName, this.modelName, {
117
+ input: usage.promptTokens,
118
+ output: usage.completionTokens,
119
+ total: usage.totalTokens,
120
+ });
121
+ if (cost && cost > 0) {
122
+ span.setAttribute("neurolink.cost", cost);
123
+ }
124
+ },
125
+ onFinish: (reason, capturedError) => {
126
+ span.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
127
+ if (reason === "error") {
128
+ span.setStatus({
129
+ code: SpanStatusCode.ERROR,
130
+ message: capturedError instanceof Error
131
+ ? capturedError.message
132
+ : String(capturedError ?? "stream error"),
133
+ });
134
+ }
135
+ endSpan();
136
+ },
137
+ };
88
138
  }
89
139
  formatProviderError(error) {
90
140
  if (error instanceof TimeoutError) {
91
141
  return new NetworkError(`Request timed out: ${error.message}`, this.providerName);
92
142
  }
93
- // Check for timeout by error name and message as fallback
94
143
  const errorRecord = error;
95
144
  if (errorRecord?.name === "TimeoutError" ||
96
145
  (typeof errorRecord?.message === "string" &&
@@ -103,10 +152,10 @@ export class LiteLLMProvider extends BaseProvider {
103
152
  return new NetworkError("LiteLLM proxy server not available. Please start the LiteLLM proxy server at " +
104
153
  `${process.env.LITELLM_BASE_URL || "http://localhost:4000"}`, this.providerName);
105
154
  }
106
- // Curator P1-1: detect "team not allowed to access model" responses
107
- // and surface as ModelAccessDeniedError with the allowed_models array
108
- // parsed from the body. Must run before the generic "API key" check
109
- // because LiteLLM phrases this as a 403 distinct from auth.
155
+ // Curator P1-1: detect "team not allowed to access model" responses and
156
+ // surface as ModelAccessDeniedError with the allowed_models array parsed
157
+ // from the body. Must run before the generic "API key" check because
158
+ // LiteLLM phrases this as a 403 distinct from auth.
110
159
  if (isModelAccessDeniedMessage(errorRecord.message)) {
111
160
  return new ModelAccessDeniedError(errorRecord.message, {
112
161
  provider: this.providerName,
@@ -130,446 +179,127 @@ export class LiteLLMProvider extends BaseProvider {
130
179
  return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
131
180
  }
132
181
  /**
133
- * LiteLLM supports tools for compatible models
134
- */
135
- supportsTools() {
136
- return true;
137
- }
138
- /**
139
- * Provider-specific streaming implementation
140
- * Note: This is only used when tools are disabled
141
- */
142
- async executeStream(options, analysisSchema) {
143
- this.validateStreamOptions(options);
144
- const startTime = Date.now();
145
- let chunkCount = 0; // Track chunk count for debugging
146
- // Reviewer follow-up: capture upstream provider errors via onError so
147
- // the post-stream NoOutput detect can propagate the *real* cause
148
- // (content_filter, provider crash, etc.) into the sentinel's
149
- // providerError / modelResponseRaw instead of "No output generated".
150
- let capturedProviderError;
151
- const timeout = this.getTimeout(options);
152
- const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
153
- try {
154
- // Build message array from options with multimodal support
155
- // Using protected helper from BaseProvider to eliminate code duplication
156
- const messages = await this.buildMessagesForStream(options);
157
- const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
158
- // Get tools - options.tools is pre-merged by BaseProvider.stream()
159
- const shouldUseTools = !options.disableTools && this.supportsTools();
160
- const tools = shouldUseTools
161
- ? options.tools || (await this.getAllTools())
162
- : {};
163
- logger.debug(`LiteLLM: Tools for streaming`, {
164
- shouldUseTools,
165
- toolCount: Object.keys(tools).length,
166
- toolNames: Object.keys(tools),
167
- });
168
- // Model-specific maxTokens handling - Gemini 2.5 models have issues with maxTokens
169
- const modelName = this.modelName || getDefaultLiteLLMModel();
170
- const isGemini25Model = modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
171
- const maxTokens = isGemini25Model ? undefined : options.maxTokens;
172
- if (isGemini25Model && options.maxTokens) {
173
- logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, {
174
- modelName,
175
- requestedMaxTokens: options.maxTokens,
176
- });
177
- }
178
- // Build complete stream options with proper typing - matching Vertex pattern
179
- let streamOptions = {
180
- model: model,
181
- messages: messages,
182
- temperature: options.temperature,
183
- ...(maxTokens && { maxTokens }), // Conditionally include maxTokens
184
- ...(shouldUseTools &&
185
- Object.keys(tools).length > 0 && {
186
- tools,
187
- toolChoice: resolveToolChoice(options, tools, shouldUseTools),
188
- stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
189
- }),
190
- abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
191
- experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
192
- experimental_repairToolCall: this.getToolCallRepairFn(options),
193
- onError: (event) => {
194
- const error = event.error;
195
- const errorMessage = error instanceof Error ? error.message : String(error);
196
- // Reviewer follow-up: propagate the captured error to the
197
- // post-stream NoOutput sentinel so telemetry sees the real
198
- // provider cause instead of "No output generated".
199
- capturedProviderError = error;
200
- logger.error(`LiteLLM: Stream error`, {
201
- provider: this.providerName,
202
- modelName: this.modelName,
203
- error: errorMessage,
204
- chunkCount,
205
- });
206
- },
207
- onFinish: (event) => {
208
- logger.debug(`LiteLLM: Stream finished`, {
209
- finishReason: event.finishReason,
210
- totalChunks: chunkCount,
211
- });
212
- },
213
- onChunk: () => {
214
- chunkCount++;
215
- },
216
- onStepFinish: ({ toolCalls, toolResults }) => {
217
- emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
218
- logger.info("Tool execution completed", { toolResults, toolCalls });
219
- for (const toolCall of toolCalls) {
220
- collectedToolCalls.push({
221
- toolCallId: toolCall.toolCallId,
222
- toolName: toolCall.toolName,
223
- args: toolCall.args ??
224
- toolCall.input ??
225
- toolCall
226
- .parameters ??
227
- {},
228
- });
229
- }
230
- for (const toolResult of toolResults) {
231
- const rawToolResult = toolResult;
232
- collectedToolResults.push({
233
- toolName: toolResult.toolName,
234
- status: rawToolResult.error ? "failure" : "success",
235
- output: (rawToolResult.output ??
236
- rawToolResult.result) ??
237
- undefined,
238
- error: rawToolResult.error,
239
- id: rawToolResult.toolCallId ?? toolResult.toolName,
240
- });
241
- }
242
- this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
243
- logger.warn("[LiteLLMProvider] Failed to store tool executions", {
244
- provider: this.providerName,
245
- error: error instanceof Error ? error.message : String(error),
246
- });
247
- });
248
- },
249
- };
250
- // Add analysisSchema support if provided
251
- if (analysisSchema) {
252
- try {
253
- streamOptions = {
254
- ...streamOptions,
255
- experimental_output: Output.object({
256
- schema: analysisSchema,
257
- }),
258
- };
259
- }
260
- catch (error) {
261
- logger.warn("Schema application failed, continuing without schema", {
262
- error: String(error),
263
- });
264
- }
265
- }
266
- // Wrap streamText in an OTel span to capture provider-level latency, token usage, and cost
267
- const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
268
- kind: SpanKind.CLIENT,
269
- attributes: {
270
- "gen_ai.system": "litellm",
271
- "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
272
- },
273
- });
274
- let result;
275
- const collectedToolCalls = [];
276
- const collectedToolResults = [];
277
- try {
278
- result = streamText(streamOptions);
279
- }
280
- catch (streamError) {
281
- streamSpan.setStatus({
282
- code: SpanStatusCode.ERROR,
283
- message: streamError instanceof Error
284
- ? streamError.message
285
- : String(streamError),
286
- });
287
- streamSpan.end();
288
- throw streamError;
289
- }
290
- // Collect token usage, cost, and finish reason asynchronously when the stream completes,
291
- // then end the span. This avoids blocking the stream consumer.
292
- Promise.resolve(result.usage)
293
- .then((usage) => {
294
- streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
295
- streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
296
- const cost = calculateCost(this.providerName, this.modelName, {
297
- input: usage.inputTokens || 0,
298
- output: usage.outputTokens || 0,
299
- total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
300
- });
301
- if (cost && cost > 0) {
302
- streamSpan.setAttribute("neurolink.cost", cost);
303
- }
304
- })
305
- .catch(() => {
306
- // Usage may not be available if the stream is aborted
307
- });
308
- Promise.resolve(result.finishReason)
309
- .then((reason) => {
310
- streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
311
- })
312
- .catch(() => {
313
- // Finish reason may not be available if the stream is aborted
314
- });
315
- Promise.resolve(result.text)
316
- .then(() => {
317
- streamSpan.end();
318
- })
319
- .catch((err) => {
320
- streamSpan.setStatus({
321
- code: SpanStatusCode.ERROR,
322
- message: err instanceof Error ? err.message : String(err),
323
- });
324
- streamSpan.end();
325
- });
326
- timeoutController?.cleanup();
327
- const transformedStream = this.createLiteLLMTransformedStream(result, () => capturedProviderError);
328
- // Create analytics promise that resolves after stream completion
329
- const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, result, Date.now() - startTime, {
330
- requestId: options.requestId ??
331
- `litellm-stream-${Date.now()}`,
332
- streamingMode: true,
333
- });
334
- return {
335
- stream: transformedStream,
336
- provider: this.providerName,
337
- model: this.modelName,
338
- ...(shouldUseTools && {
339
- toolCalls: collectedToolCalls,
340
- toolResults: collectedToolResults,
341
- }),
342
- analytics: analyticsPromise,
343
- metadata: {
344
- startTime,
345
- streamId: `litellm-${Date.now()}`,
346
- },
347
- };
348
- }
349
- catch (error) {
350
- timeoutController?.cleanup();
351
- throw this.handleProviderError(error);
352
- }
353
- }
354
- async *createLiteLLMTransformedStream(result, getCapturedProviderError) {
355
- // Reviewer follow-up: gate the post-stream NoOutput detect on
356
- // *content yielded*, not raw chunk count. AI SDK fullStream emits
357
- // control events ({ type: "start" }, "step-start", etc.) before any
358
- // text-delta — those incremented chunkCount and made the post-stream
359
- // detect dead even when zero text was produced.
360
- let contentYielded = 0;
361
- try {
362
- const streamToUse = result.fullStream || result.textStream;
363
- for await (const chunk of streamToUse) {
364
- if (chunk && typeof chunk === "object") {
365
- if ("type" in chunk && chunk.type === "error") {
366
- const errorChunk = chunk;
367
- logger.error(`LiteLLM: Error chunk received:`, {
368
- errorType: errorChunk.type,
369
- errorDetails: errorChunk.error,
370
- });
371
- throw this.formatProviderError(new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`));
372
- }
373
- if ("textDelta" in chunk) {
374
- const textDelta = chunk.textDelta;
375
- if (textDelta) {
376
- contentYielded++;
377
- yield { content: textDelta };
378
- }
379
- }
380
- else if ("type" in chunk &&
381
- chunk.type === "tool-call" &&
382
- "toolCallId" in chunk) {
383
- logger.debug("LiteLLM: Tool call", {
384
- toolCallId: String(chunk.toolCallId),
385
- toolName: "toolName" in chunk ? String(chunk.toolName) : "unknown",
386
- });
387
- }
388
- }
389
- else if (typeof chunk === "string") {
390
- contentYielded++;
391
- yield { content: chunk };
392
- }
393
- }
394
- }
395
- catch (streamError) {
396
- if (NoOutputGeneratedError.isInstance(streamError)) {
397
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
398
- // Yield the enriched sentinel so downstream telemetry has
399
- // finishReason / usage / providerError. Match the other
400
- // providers' pattern: yield + return (no throw). NeuroLink's
401
- // iteration fallback at neurolink.ts only fires for
402
- // looksLikeModelAccessDenied errors, so a NoOutput throw here
403
- // would NOT trigger any fallback — and it would mask the
404
- // already-yielded sentinel from consumers expecting a clean
405
- // stream. The sentinel itself signals the no-output condition.
406
- const sentinel = await buildNoOutputSentinel(streamError, result, getCapturedProviderError?.());
407
- stampNoOutputSpan(sentinel);
408
- yield sentinel;
409
- return;
410
- }
411
- throw streamError;
412
- }
413
- // Curator P3-6 (round-2 fix): production trigger sets the error on
414
- // result.finishReason rejection (NOT thrown from textStream).
415
- // Surface that path here, matching the catch above (yield + return).
416
- if (contentYielded === 0) {
417
- const detected = await detectPostStreamNoOutput(result, getCapturedProviderError?.());
418
- if (detected) {
419
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
420
- stampNoOutputSpan(detected.sentinel);
421
- yield detected.sentinel;
422
- }
423
- }
424
- }
425
- /**
426
- * Generate an embedding for a single text input
427
- * Uses the LiteLLM proxy with OpenAI-compatible embedding API
428
- */
429
- async embed(text, modelName) {
430
- const { embed: aiEmbed } = await import("../utils/generation.js");
431
- const { createOpenAI } = await import("@ai-sdk/openai");
432
- const config = getLiteLLMConfig();
433
- const embeddingModelName = modelName ||
434
- process.env.LITELLM_EMBEDDING_MODEL ||
435
- "gemini-embedding-001";
436
- const customOpenAI = createOpenAI({
437
- baseURL: this.credentials?.baseURL ?? config.baseURL,
438
- apiKey: this.credentials?.apiKey ?? config.apiKey,
439
- fetch: createProxyFetch(),
440
- });
441
- const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
442
- // Wrap in withTimeout so stalled upstream embedding requests abort instead
443
- // of hanging forever. 30s matches the default for embedding endpoints
444
- // across the OpenAI-compatible cluster.
445
- const result = await withTimeout(aiEmbed({ model: embeddingModel, value: text }), 30_000, "litellm", "generate");
446
- return result.embedding;
447
- }
448
- /**
449
- * Generate embeddings for multiple text inputs
450
- * Uses the LiteLLM proxy with OpenAI-compatible embedding API
451
- */
452
- async embedMany(texts, modelName) {
453
- const { embedMany: aiEmbedMany } = await import("../utils/generation.js");
454
- const { createOpenAI } = await import("@ai-sdk/openai");
455
- const config = getLiteLLMConfig();
456
- const embeddingModelName = modelName ||
457
- process.env.LITELLM_EMBEDDING_MODEL ||
458
- "gemini-embedding-001";
459
- const customOpenAI = createOpenAI({
460
- baseURL: this.credentials?.baseURL ?? config.baseURL,
461
- apiKey: this.credentials?.apiKey ?? config.apiKey,
462
- fetch: createProxyFetch(),
463
- });
464
- const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
465
- // Wrap in withTimeout so a single slow batch doesn't hang indefinitely.
466
- const result = await withTimeout(aiEmbedMany({ model: embeddingModel, values: texts }), 30_000, "litellm", "generate");
467
- return result.embeddings;
468
- }
469
- /**
470
- * Get available models from LiteLLM proxy server
471
- * Dynamically fetches from /v1/models endpoint with caching and fallback
182
+ * Get available models from LiteLLM proxy `/v1/models` endpoint.
183
+ * Caches results for 10 minutes; falls back to env-driven list or a
184
+ * minimal safe default if the API fetch fails.
472
185
  */
473
186
  async getAvailableModels() {
474
- const functionTag = "LiteLLMProvider.getAvailableModels";
475
187
  const now = Date.now();
476
- // Check if cached models are still valid
477
188
  if (LiteLLMProvider.modelsCache.length > 0 &&
478
189
  now - LiteLLMProvider.modelsCacheTime <
479
190
  LiteLLMProvider.MODELS_CACHE_DURATION) {
480
- logger.debug(`[${functionTag}] Using cached models`, {
191
+ logger.debug("[LiteLLMProvider.getAvailableModels] Using cached models", {
481
192
  cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
482
193
  modelCount: LiteLLMProvider.modelsCache.length,
483
194
  });
484
195
  return LiteLLMProvider.modelsCache;
485
196
  }
486
- // Try to fetch models dynamically
487
197
  try {
488
198
  const dynamicModels = await this.fetchModelsFromAPI();
489
199
  if (dynamicModels.length > 0) {
490
- // Cache successful result
491
200
  LiteLLMProvider.modelsCache = dynamicModels;
492
201
  LiteLLMProvider.modelsCacheTime = now;
493
- logger.debug(`[${functionTag}] Successfully fetched models from API`, {
494
- modelCount: dynamicModels.length,
495
- });
496
202
  return dynamicModels;
497
203
  }
498
204
  }
499
205
  catch (error) {
500
- logger.warn(`[${functionTag}] Failed to fetch models from API, using fallback`, {
501
- error: error instanceof Error ? error.message : String(error),
502
- });
206
+ logger.warn("[LiteLLMProvider.getAvailableModels] Failed to fetch models from API, using fallback", { error: error instanceof Error ? error.message : String(error) });
503
207
  }
504
- // Fallback to hardcoded list if API fetch fails
505
- const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",")
506
- .map((m) => m.trim())
507
- .filter((m) => m.length > 0) || [
508
- "openai/gpt-4o", // minimal safe baseline
509
- "anthropic/claude-3-haiku",
510
- "meta-llama/llama-3.1-8b-instruct",
511
- "google/gemini-2.5-flash",
512
- ];
513
- logger.debug(`[${functionTag}] Using fallback model list`, {
514
- modelCount: fallbackModels.length,
515
- });
516
- return fallbackModels;
208
+ return this.getFallbackModels();
517
209
  }
518
- /**
519
- * Fetch available models from LiteLLM proxy /v1/models endpoint
520
- * @private
521
- */
522
210
  async fetchModelsFromAPI() {
523
- const functionTag = "LiteLLMProvider.fetchModelsFromAPI";
524
- const config = getLiteLLMConfig();
525
- const resolvedBaseURL = this.credentials?.baseURL ?? config.baseURL;
526
- const resolvedApiKey = this.credentials?.apiKey ?? config.apiKey;
527
- const modelsUrl = `${resolvedBaseURL}/v1/models`;
211
+ const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/v1/models`;
212
+ const proxyFetch = createProxyFetch();
528
213
  const controller = new AbortController();
529
- const timeoutId = setTimeout(() => controller.abort(), 5000); // 5 second timeout
214
+ const timeoutId = setTimeout(() => controller.abort(), 5000);
530
215
  try {
531
- logger.debug(`[${functionTag}] Fetching models from ${modelsUrl}`);
532
- const proxyFetch = createProxyFetch();
533
216
  const response = await proxyFetch(modelsUrl, {
534
217
  method: "GET",
535
218
  headers: {
536
- Authorization: `Bearer ${resolvedApiKey}`,
219
+ Authorization: `Bearer ${this.config.apiKey}`,
537
220
  "Content-Type": "application/json",
538
221
  },
539
222
  signal: controller.signal,
540
223
  });
541
- clearTimeout(timeoutId);
542
224
  if (!response.ok) {
543
225
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
544
226
  }
545
- const data = await response.json();
546
- // Parse OpenAI-compatible models response
547
- if (data && Array.isArray(data.data)) {
548
- const models = data.data
549
- .map((model) => typeof model === "object" &&
550
- model !== null &&
551
- "id" in model &&
552
- typeof model.id === "string"
553
- ? model.id
554
- : undefined)
555
- .filter((id) => typeof id === "string" && id.length > 0)
556
- .sort();
557
- logger.debug(`[${functionTag}] Successfully parsed models`, {
558
- totalModels: models.length,
559
- sampleModels: models.slice(0, 5),
560
- });
561
- return models;
562
- }
563
- else {
227
+ const data = (await response.json());
228
+ if (!Array.isArray(data.data)) {
564
229
  throw new Error("Invalid response format: expected data.data array");
565
230
  }
231
+ return data.data
232
+ .map((m) => m.id)
233
+ .filter((id) => typeof id === "string" && id.length > 0)
234
+ .sort();
566
235
  }
567
236
  catch (error) {
568
- clearTimeout(timeoutId);
569
237
  if (isAbortError(error)) {
570
238
  throw new NetworkError("Request timed out after 5 seconds", this.providerName);
571
239
  }
572
240
  throw error;
573
241
  }
242
+ finally {
243
+ clearTimeout(timeoutId);
244
+ }
245
+ }
246
+ /**
247
+ * Generate an embedding for a single text input via native /v1/embeddings.
248
+ */
249
+ async embed(text, modelName) {
250
+ const embeddingModelName = modelName ||
251
+ process.env.LITELLM_EMBEDDING_MODEL ||
252
+ "gemini-embedding-001";
253
+ const [embedding] = await this.callEmbeddings(embeddingModelName, [text], "embed");
254
+ return embedding;
255
+ }
256
+ /**
257
+ * Generate embeddings for multiple text inputs via native /v1/embeddings.
258
+ */
259
+ async embedMany(texts, modelName) {
260
+ const embeddingModelName = modelName ||
261
+ process.env.LITELLM_EMBEDDING_MODEL ||
262
+ "gemini-embedding-001";
263
+ return this.callEmbeddings(embeddingModelName, texts, "embedMany");
264
+ }
265
+ async callEmbeddings(modelName, input, operation) {
266
+ const url = `${stripTrailingSlash(this.config.baseURL)}/embeddings`;
267
+ const fetchImpl = createProxyFetch();
268
+ const timeoutController = createTimeoutController(30_000, this.providerName, "generate");
269
+ try {
270
+ const res = await fetchImpl(url, {
271
+ method: "POST",
272
+ headers: {
273
+ "Content-Type": "application/json",
274
+ Authorization: `Bearer ${this.config.apiKey}`,
275
+ },
276
+ body: JSON.stringify({
277
+ model: modelName,
278
+ input: input.length === 1 ? input[0] : input,
279
+ }),
280
+ ...(timeoutController?.controller.signal
281
+ ? { signal: timeoutController.controller.signal }
282
+ : {}),
283
+ });
284
+ if (!res.ok) {
285
+ const bodyText = await res.text().catch(() => "");
286
+ const parsed = bodyText
287
+ ? JSON.parse(bodyText)
288
+ : undefined;
289
+ throw this.formatProviderError(new Error(parsed?.error?.message ||
290
+ `LiteLLM ${operation} failed with status ${res.status}`));
291
+ }
292
+ const json = (await res.json());
293
+ const embeddings = (json.data ?? [])
294
+ .map((row) => row.embedding)
295
+ .filter((e) => Array.isArray(e));
296
+ if (embeddings.length === 0) {
297
+ throw new ProviderError(`LiteLLM ${operation} returned no embeddings`, this.providerName);
298
+ }
299
+ return embeddings;
300
+ }
301
+ finally {
302
+ timeoutController?.cleanup();
303
+ }
574
304
  }
575
305
  }