@juspay/neurolink 9.41.0 → 9.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +7 -1
  3. package/dist/auth/anthropicOAuth.d.ts +18 -3
  4. package/dist/auth/anthropicOAuth.js +149 -4
  5. package/dist/auth/providers/firebase.js +5 -1
  6. package/dist/auth/providers/jwt.js +5 -1
  7. package/dist/auth/providers/workos.js +5 -1
  8. package/dist/auth/sessionManager.d.ts +1 -1
  9. package/dist/auth/sessionManager.js +58 -27
  10. package/dist/browser/neurolink.min.js +354 -334
  11. package/dist/cli/commands/mcp.d.ts +6 -0
  12. package/dist/cli/commands/mcp.js +188 -181
  13. package/dist/cli/commands/proxy.d.ts +2 -1
  14. package/dist/cli/commands/proxy.js +713 -431
  15. package/dist/cli/commands/task.js +3 -0
  16. package/dist/cli/factories/commandFactory.d.ts +2 -0
  17. package/dist/cli/factories/commandFactory.js +38 -0
  18. package/dist/cli/parser.js +4 -3
  19. package/dist/client/aiSdkAdapter.js +3 -0
  20. package/dist/client/streamingClient.js +30 -10
  21. package/dist/core/baseProvider.d.ts +6 -1
  22. package/dist/core/baseProvider.js +208 -230
  23. package/dist/core/factory.d.ts +3 -0
  24. package/dist/core/factory.js +138 -188
  25. package/dist/core/modules/GenerationHandler.js +3 -2
  26. package/dist/core/redisConversationMemoryManager.js +7 -3
  27. package/dist/evaluation/BatchEvaluator.js +4 -1
  28. package/dist/evaluation/hooks/observabilityHooks.js +5 -3
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +24 -9
  31. package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  32. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  33. package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
  34. package/dist/evaluation/scorers/scorerRegistry.js +353 -282
  35. package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
  36. package/dist/lib/auth/anthropicOAuth.js +149 -4
  37. package/dist/lib/auth/providers/firebase.js +5 -1
  38. package/dist/lib/auth/providers/jwt.js +5 -1
  39. package/dist/lib/auth/providers/workos.js +5 -1
  40. package/dist/lib/auth/sessionManager.d.ts +1 -1
  41. package/dist/lib/auth/sessionManager.js +58 -27
  42. package/dist/lib/client/aiSdkAdapter.js +3 -0
  43. package/dist/lib/client/streamingClient.js +30 -10
  44. package/dist/lib/core/baseProvider.d.ts +6 -1
  45. package/dist/lib/core/baseProvider.js +208 -230
  46. package/dist/lib/core/factory.d.ts +3 -0
  47. package/dist/lib/core/factory.js +138 -188
  48. package/dist/lib/core/modules/GenerationHandler.js +3 -2
  49. package/dist/lib/core/redisConversationMemoryManager.js +7 -3
  50. package/dist/lib/evaluation/BatchEvaluator.js +4 -1
  51. package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
  52. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  53. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +24 -9
  54. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  55. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  56. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
  57. package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
  58. package/dist/lib/mcp/toolRegistry.d.ts +2 -0
  59. package/dist/lib/mcp/toolRegistry.js +32 -31
  60. package/dist/lib/neurolink.d.ts +41 -2
  61. package/dist/lib/neurolink.js +1616 -1681
  62. package/dist/lib/observability/otelBridge.d.ts +2 -2
  63. package/dist/lib/observability/otelBridge.js +12 -3
  64. package/dist/lib/providers/amazonBedrock.js +2 -4
  65. package/dist/lib/providers/anthropic.d.ts +9 -5
  66. package/dist/lib/providers/anthropic.js +19 -14
  67. package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
  68. package/dist/lib/providers/anthropicBaseProvider.js +5 -4
  69. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  70. package/dist/lib/providers/azureOpenai.js +5 -4
  71. package/dist/lib/providers/googleAiStudio.js +30 -6
  72. package/dist/lib/providers/googleVertex.d.ts +10 -0
  73. package/dist/lib/providers/googleVertex.js +437 -423
  74. package/dist/lib/providers/huggingFace.d.ts +3 -3
  75. package/dist/lib/providers/huggingFace.js +6 -8
  76. package/dist/lib/providers/litellm.d.ts +1 -0
  77. package/dist/lib/providers/litellm.js +76 -55
  78. package/dist/lib/providers/mistral.js +2 -1
  79. package/dist/lib/providers/ollama.js +93 -23
  80. package/dist/lib/providers/openAI.d.ts +2 -0
  81. package/dist/lib/providers/openAI.js +141 -141
  82. package/dist/lib/providers/openRouter.js +2 -1
  83. package/dist/lib/providers/openaiCompatible.d.ts +4 -4
  84. package/dist/lib/providers/openaiCompatible.js +4 -4
  85. package/dist/lib/proxy/claudeFormat.d.ts +3 -2
  86. package/dist/lib/proxy/claudeFormat.js +27 -14
  87. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  88. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  89. package/dist/lib/proxy/modelRouter.js +3 -0
  90. package/dist/lib/proxy/oauthFetch.d.ts +1 -1
  91. package/dist/lib/proxy/oauthFetch.js +289 -316
  92. package/dist/lib/proxy/proxyConfig.js +46 -24
  93. package/dist/lib/proxy/proxyEnv.d.ts +19 -0
  94. package/dist/lib/proxy/proxyEnv.js +73 -0
  95. package/dist/lib/proxy/proxyFetch.js +291 -217
  96. package/dist/lib/proxy/proxyTracer.d.ts +133 -0
  97. package/dist/lib/proxy/proxyTracer.js +645 -0
  98. package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
  99. package/dist/lib/proxy/rawStreamCapture.js +83 -0
  100. package/dist/lib/proxy/requestLogger.d.ts +32 -5
  101. package/dist/lib/proxy/requestLogger.js +503 -47
  102. package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
  103. package/dist/lib/proxy/sseInterceptor.js +427 -0
  104. package/dist/lib/proxy/usageStats.d.ts +4 -3
  105. package/dist/lib/proxy/usageStats.js +25 -12
  106. package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
  107. package/dist/lib/rag/chunking/markdownChunker.js +15 -6
  108. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +17 -3
  109. package/dist/lib/server/routes/claudeProxyRoutes.js +3032 -1349
  110. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
  111. package/dist/lib/services/server/ai/observability/instrumentation.js +337 -161
  112. package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
  113. package/dist/lib/tasks/backends/bullmqBackend.js +35 -22
  114. package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
  115. package/dist/lib/tasks/store/redisTaskStore.js +54 -39
  116. package/dist/lib/tasks/taskManager.d.ts +5 -0
  117. package/dist/lib/tasks/taskManager.js +158 -30
  118. package/dist/lib/telemetry/index.d.ts +2 -1
  119. package/dist/lib/telemetry/index.js +2 -1
  120. package/dist/lib/telemetry/telemetryService.d.ts +3 -0
  121. package/dist/lib/telemetry/telemetryService.js +69 -5
  122. package/dist/lib/types/cli.d.ts +10 -0
  123. package/dist/lib/types/proxyTypes.d.ts +160 -5
  124. package/dist/lib/types/streamTypes.d.ts +25 -3
  125. package/dist/lib/utils/messageBuilder.js +3 -2
  126. package/dist/lib/utils/providerHealth.d.ts +19 -0
  127. package/dist/lib/utils/providerHealth.js +279 -33
  128. package/dist/lib/utils/providerUtils.js +17 -22
  129. package/dist/lib/utils/toolChoice.d.ts +4 -0
  130. package/dist/lib/utils/toolChoice.js +7 -0
  131. package/dist/mcp/toolRegistry.d.ts +2 -0
  132. package/dist/mcp/toolRegistry.js +32 -31
  133. package/dist/neurolink.d.ts +41 -2
  134. package/dist/neurolink.js +1616 -1681
  135. package/dist/observability/otelBridge.d.ts +2 -2
  136. package/dist/observability/otelBridge.js +12 -3
  137. package/dist/providers/amazonBedrock.js +2 -4
  138. package/dist/providers/anthropic.d.ts +9 -5
  139. package/dist/providers/anthropic.js +19 -14
  140. package/dist/providers/anthropicBaseProvider.d.ts +3 -3
  141. package/dist/providers/anthropicBaseProvider.js +5 -4
  142. package/dist/providers/azureOpenai.d.ts +1 -1
  143. package/dist/providers/azureOpenai.js +5 -4
  144. package/dist/providers/googleAiStudio.js +30 -6
  145. package/dist/providers/googleVertex.d.ts +10 -0
  146. package/dist/providers/googleVertex.js +437 -423
  147. package/dist/providers/huggingFace.d.ts +3 -3
  148. package/dist/providers/huggingFace.js +6 -7
  149. package/dist/providers/litellm.d.ts +1 -0
  150. package/dist/providers/litellm.js +76 -55
  151. package/dist/providers/mistral.js +2 -1
  152. package/dist/providers/ollama.js +93 -23
  153. package/dist/providers/openAI.d.ts +2 -0
  154. package/dist/providers/openAI.js +141 -141
  155. package/dist/providers/openRouter.js +2 -1
  156. package/dist/providers/openaiCompatible.d.ts +4 -4
  157. package/dist/providers/openaiCompatible.js +4 -3
  158. package/dist/proxy/claudeFormat.d.ts +3 -2
  159. package/dist/proxy/claudeFormat.js +27 -14
  160. package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  161. package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  162. package/dist/proxy/modelRouter.js +3 -0
  163. package/dist/proxy/oauthFetch.d.ts +1 -1
  164. package/dist/proxy/oauthFetch.js +289 -316
  165. package/dist/proxy/proxyConfig.js +46 -24
  166. package/dist/proxy/proxyEnv.d.ts +19 -0
  167. package/dist/proxy/proxyEnv.js +72 -0
  168. package/dist/proxy/proxyFetch.js +291 -217
  169. package/dist/proxy/proxyTracer.d.ts +133 -0
  170. package/dist/proxy/proxyTracer.js +644 -0
  171. package/dist/proxy/rawStreamCapture.d.ts +10 -0
  172. package/dist/proxy/rawStreamCapture.js +82 -0
  173. package/dist/proxy/requestLogger.d.ts +32 -5
  174. package/dist/proxy/requestLogger.js +503 -47
  175. package/dist/proxy/sseInterceptor.d.ts +97 -0
  176. package/dist/proxy/sseInterceptor.js +426 -0
  177. package/dist/proxy/usageStats.d.ts +4 -3
  178. package/dist/proxy/usageStats.js +25 -12
  179. package/dist/rag/chunkers/MarkdownChunker.js +13 -5
  180. package/dist/rag/chunking/markdownChunker.js +15 -6
  181. package/dist/server/routes/claudeProxyRoutes.d.ts +17 -3
  182. package/dist/server/routes/claudeProxyRoutes.js +3032 -1349
  183. package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
  184. package/dist/services/server/ai/observability/instrumentation.js +337 -161
  185. package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
  186. package/dist/tasks/backends/bullmqBackend.js +35 -22
  187. package/dist/tasks/store/redisTaskStore.d.ts +1 -0
  188. package/dist/tasks/store/redisTaskStore.js +54 -39
  189. package/dist/tasks/taskManager.d.ts +5 -0
  190. package/dist/tasks/taskManager.js +158 -30
  191. package/dist/telemetry/index.d.ts +2 -1
  192. package/dist/telemetry/index.js +2 -1
  193. package/dist/telemetry/telemetryService.d.ts +3 -0
  194. package/dist/telemetry/telemetryService.js +69 -5
  195. package/dist/types/cli.d.ts +10 -0
  196. package/dist/types/proxyTypes.d.ts +160 -5
  197. package/dist/types/streamTypes.d.ts +25 -3
  198. package/dist/utils/messageBuilder.js +3 -2
  199. package/dist/utils/providerHealth.d.ts +19 -0
  200. package/dist/utils/providerHealth.js +279 -33
  201. package/dist/utils/providerUtils.js +18 -22
  202. package/dist/utils/toolChoice.d.ts +4 -0
  203. package/dist/utils/toolChoice.js +6 -0
  204. package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
  205. package/docs/changelog.md +252 -0
  206. package/package.json +19 -2
  207. package/scripts/observability/check-proxy-telemetry.mjs +235 -0
  208. package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
  209. package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
  210. package/scripts/observability/manage-local-openobserve.sh +215 -0
  211. package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
  212. package/scripts/observability/proxy-observability.env.example +23 -0
@@ -1,8 +1,8 @@
1
+ import { type LanguageModel, type Schema } from "ai";
1
2
  import type { ZodType } from "zod";
2
- import { type Schema, type LanguageModel } from "ai";
3
- import { AIProviderName } from "../constants/enums.js";
4
- import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
3
+ import type { AIProviderName } from "../constants/enums.js";
5
4
  import { BaseProvider } from "../core/baseProvider.js";
5
+ import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
6
6
  /**
7
7
  * HuggingFace Provider - BaseProvider Implementation
8
8
  * Using AI SDK with HuggingFace's OpenAI-compatible endpoint
@@ -1,12 +1,12 @@
1
1
  import { createOpenAI } from "@ai-sdk/openai";
2
2
  import { NoOutputGeneratedError, stepCountIs, streamText, } from "ai";
3
- import { AIProviderName } from "../constants/enums.js";
4
3
  import { BaseProvider } from "../core/baseProvider.js";
5
- import { logger } from "../utils/logger.js";
6
- import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
7
4
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
8
- import { validateApiKey, createHuggingFaceConfig, getProviderModel, } from "../utils/providerConfig.js";
9
5
  import { createProxyFetch } from "../proxy/proxyFetch.js";
6
+ import { logger } from "../utils/logger.js";
7
+ import { createHuggingFaceConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
8
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
9
+ import { resolveToolChoice } from "../utils/toolChoice.js";
10
10
  // Configuration helpers - now using consolidated utility
11
11
  const getHuggingFaceApiKey = () => {
12
12
  return validateApiKey(createHuggingFaceConfig());
@@ -136,9 +136,7 @@ export class HuggingFaceProvider extends BaseProvider {
136
136
  tools: (shouldUseTools
137
137
  ? streamOptions.tools || allTools
138
138
  : {}),
139
- toolChoice: (shouldUseTools
140
- ? streamOptions.toolChoice || "auto"
141
- : "none"),
139
+ toolChoice: resolveToolChoice(options, (shouldUseTools ? streamOptions.tools || allTools : {}), shouldUseTools),
142
140
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
143
141
  experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
144
142
  onStepFinish: ({ toolCalls, toolResults }) => {
@@ -203,7 +201,7 @@ export class HuggingFaceProvider extends BaseProvider {
203
201
  prompt: options.input.text,
204
202
  system: enhancedSystemPrompt,
205
203
  tools: formattedTools,
206
- toolChoice: formattedTools ? "auto" : undefined,
204
+ toolChoice: formattedTools ? (options.toolChoice ?? "auto") : undefined,
207
205
  };
208
206
  }
209
207
  /**
@@ -29,6 +29,7 @@ export declare class LiteLLMProvider extends BaseProvider {
29
29
  * Note: This is only used when tools are disabled
30
30
  */
31
31
  protected executeStream(options: StreamOptions, analysisSchema?: ZodType | Schema<unknown>): Promise<StreamResult>;
32
+ private createLiteLLMTransformedStream;
32
33
  /**
33
34
  * Generate an embedding for a single text input
34
35
  * Uses the LiteLLM proxy with OpenAI-compatible embedding API
@@ -11,6 +11,7 @@ import { logger } from "../utils/logger.js";
11
11
  import { calculateCost } from "../utils/pricing.js";
12
12
  import { getProviderModel } from "../utils/providerConfig.js";
13
13
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
14
+ import { resolveToolChoice } from "../utils/toolChoice.js";
14
15
  import { getModelId } from "./providerTypeUtils.js";
15
16
  const streamTracer = trace.getTracer("neurolink.provider.litellm");
16
17
  // Configuration helpers
@@ -59,7 +60,7 @@ export class LiteLLMProvider extends BaseProvider {
59
60
  apiKey: config.apiKey,
60
61
  fetch: createProxyFetch(),
61
62
  });
62
- this.model = customOpenAI(this.modelName || getDefaultLiteLLMModel());
63
+ this.model = customOpenAI.chat(this.modelName || getDefaultLiteLLMModel());
63
64
  logger.debug("LiteLLM Provider initialized", {
64
65
  modelName: this.modelName,
65
66
  provider: this.providerName,
@@ -160,7 +161,7 @@ export class LiteLLMProvider extends BaseProvider {
160
161
  ...(shouldUseTools &&
161
162
  Object.keys(tools).length > 0 && {
162
163
  tools,
163
- toolChoice: "auto",
164
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
164
165
  maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
165
166
  }),
166
167
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
@@ -186,6 +187,28 @@ export class LiteLLMProvider extends BaseProvider {
186
187
  },
187
188
  onStepFinish: ({ toolCalls, toolResults }) => {
188
189
  logger.info("Tool execution completed", { toolResults, toolCalls });
190
+ for (const toolCall of toolCalls) {
191
+ collectedToolCalls.push({
192
+ toolCallId: toolCall.toolCallId,
193
+ toolName: toolCall.toolName,
194
+ args: toolCall.args ??
195
+ toolCall.input ??
196
+ toolCall
197
+ .parameters ??
198
+ {},
199
+ });
200
+ }
201
+ for (const toolResult of toolResults) {
202
+ const rawToolResult = toolResult;
203
+ collectedToolResults.push({
204
+ toolName: toolResult.toolName,
205
+ status: rawToolResult.error ? "failure" : "success",
206
+ output: (rawToolResult.output ??
207
+ rawToolResult.result) ?? undefined,
208
+ error: rawToolResult.error,
209
+ id: rawToolResult.toolCallId ?? toolResult.toolName,
210
+ });
211
+ }
189
212
  this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
190
213
  logger.warn("[LiteLLMProvider] Failed to store tool executions", {
191
214
  provider: this.providerName,
@@ -219,6 +242,8 @@ export class LiteLLMProvider extends BaseProvider {
219
242
  },
220
243
  });
221
244
  let result;
245
+ const collectedToolCalls = [];
246
+ const collectedToolResults = [];
222
247
  try {
223
248
  result = streamText(streamOptions);
224
249
  }
@@ -269,58 +294,7 @@ export class LiteLLMProvider extends BaseProvider {
269
294
  streamSpan.end();
270
295
  });
271
296
  timeoutController?.cleanup();
272
- // Transform stream to content object stream using fullStream (handles both text and tool calls)
273
- // Note: fullStream includes tool results, textStream only has text
274
- const transformedStream = (async function* () {
275
- try {
276
- // Try fullStream first (handles both text and tool calls), fallback to textStream
277
- const streamToUse = result.fullStream || result.textStream;
278
- for await (const chunk of streamToUse) {
279
- // Handle different chunk types from fullStream
280
- if (chunk && typeof chunk === "object") {
281
- // Check for error chunks first (critical error handling)
282
- if ("type" in chunk && chunk.type === "error") {
283
- const errorChunk = chunk;
284
- logger.error(`LiteLLM: Error chunk received:`, {
285
- errorType: errorChunk.type,
286
- errorDetails: errorChunk.error,
287
- });
288
- throw new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`);
289
- }
290
- if ("textDelta" in chunk) {
291
- // Text delta from fullStream
292
- const textDelta = chunk.textDelta;
293
- if (textDelta) {
294
- yield { content: textDelta };
295
- }
296
- }
297
- else if ("type" in chunk &&
298
- chunk.type === "tool-call" &&
299
- "toolCallId" in chunk) {
300
- // Tool call event - log for debugging
301
- const toolCallId = String(chunk.toolCallId);
302
- const toolName = "toolName" in chunk ? String(chunk.toolName) : "unknown";
303
- logger.debug("LiteLLM: Tool call", {
304
- toolCallId,
305
- toolName,
306
- });
307
- }
308
- }
309
- else if (typeof chunk === "string") {
310
- // Direct string chunk from textStream fallback
311
- yield { content: chunk };
312
- }
313
- }
314
- }
315
- catch (streamError) {
316
- // AI SDK v6 throws NoOutputGeneratedError when the stream produced no output.
317
- if (NoOutputGeneratedError.isInstance(streamError)) {
318
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
319
- return;
320
- }
321
- throw streamError;
322
- }
323
- })();
297
+ const transformedStream = this.createLiteLLMTransformedStream(result);
324
298
  // Create analytics promise that resolves after stream completion
325
299
  const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, result, Date.now() - startTime, {
326
300
  requestId: options.requestId ??
@@ -331,6 +305,10 @@ export class LiteLLMProvider extends BaseProvider {
331
305
  stream: transformedStream,
332
306
  provider: this.providerName,
333
307
  model: this.modelName,
308
+ ...(shouldUseTools && {
309
+ toolCalls: collectedToolCalls,
310
+ toolResults: collectedToolResults,
311
+ }),
334
312
  analytics: analyticsPromise,
335
313
  metadata: {
336
314
  startTime,
@@ -343,6 +321,47 @@ export class LiteLLMProvider extends BaseProvider {
343
321
  throw this.handleProviderError(error);
344
322
  }
345
323
  }
324
+ async *createLiteLLMTransformedStream(result) {
325
+ try {
326
+ const streamToUse = result.fullStream || result.textStream;
327
+ for await (const chunk of streamToUse) {
328
+ if (chunk && typeof chunk === "object") {
329
+ if ("type" in chunk && chunk.type === "error") {
330
+ const errorChunk = chunk;
331
+ logger.error(`LiteLLM: Error chunk received:`, {
332
+ errorType: errorChunk.type,
333
+ errorDetails: errorChunk.error,
334
+ });
335
+ throw this.formatProviderError(new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`));
336
+ }
337
+ if ("textDelta" in chunk) {
338
+ const textDelta = chunk.textDelta;
339
+ if (textDelta) {
340
+ yield { content: textDelta };
341
+ }
342
+ }
343
+ else if ("type" in chunk &&
344
+ chunk.type === "tool-call" &&
345
+ "toolCallId" in chunk) {
346
+ logger.debug("LiteLLM: Tool call", {
347
+ toolCallId: String(chunk.toolCallId),
348
+ toolName: "toolName" in chunk ? String(chunk.toolName) : "unknown",
349
+ });
350
+ }
351
+ }
352
+ else if (typeof chunk === "string") {
353
+ yield { content: chunk };
354
+ }
355
+ }
356
+ }
357
+ catch (streamError) {
358
+ if (NoOutputGeneratedError.isInstance(streamError)) {
359
+ logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
360
+ return;
361
+ }
362
+ throw streamError;
363
+ }
364
+ }
346
365
  /**
347
366
  * Generate an embedding for a single text input
348
367
  * Uses the LiteLLM proxy with OpenAI-compatible embedding API
@@ -419,7 +438,9 @@ export class LiteLLMProvider extends BaseProvider {
419
438
  });
420
439
  }
421
440
  // Fallback to hardcoded list if API fetch fails
422
- const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",").map((m) => m.trim()) || [
441
+ const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",")
442
+ .map((m) => m.trim())
443
+ .filter((m) => m.length > 0) || [
423
444
  "openai/gpt-4o", // minimal safe baseline
424
445
  "anthropic/claude-3-haiku",
425
446
  "meta-llama/llama-3.1-8b-instruct",
@@ -7,6 +7,7 @@ import { createProxyFetch } from "../proxy/proxyFetch.js";
7
7
  import { logger } from "../utils/logger.js";
8
8
  import { createMistralConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
9
9
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
10
+ import { resolveToolChoice } from "../utils/toolChoice.js";
10
11
  import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
11
12
  // Configuration helpers - now using consolidated utility
12
13
  const getMistralApiKey = () => {
@@ -63,7 +64,7 @@ export class MistralProvider extends BaseProvider {
63
64
  maxOutputTokens: options.maxTokens, // No default limit - unlimited unless specified
64
65
  tools,
65
66
  stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
66
- toolChoice: shouldUseTools ? "auto" : "none",
67
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
67
68
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
68
69
  experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
69
70
  onStepFinish: ({ toolCalls, toolResults }) => {
@@ -11,7 +11,7 @@ import { InvalidModelError, NetworkError, ProviderError, } from "../types/errors
11
11
  import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
12
12
  import { TimeoutError } from "../utils/timeout.js";
13
13
  // Model version constants (configurable via environment)
14
- const DEFAULT_OLLAMA_MODEL = "llama3.1:8b";
14
+ const DEFAULT_OLLAMA_MODEL = process.env.OLLAMA_MODEL || "llama3.1:8b";
15
15
  const FALLBACK_OLLAMA_MODEL = "llama3.2:latest"; // Used when primary model fails
16
16
  // Configuration helpers
17
17
  const getOllamaBaseUrl = () => {
@@ -40,6 +40,26 @@ const getOllamaTimeout = () => {
40
40
  // especially for larger models like aliafshar/gemma3-it-qat-tools:latest (12.2B parameters)
41
41
  return parseInt(process.env.OLLAMA_TIMEOUT || "240000", 10);
42
42
  };
43
+ function isOllamaHttpError(error) {
44
+ return (error instanceof ProviderError &&
45
+ typeof error.statusCode === "number" &&
46
+ typeof error.responseBody === "string");
47
+ }
48
+ async function createOllamaHttpError(response) {
49
+ let responseBody = "";
50
+ try {
51
+ responseBody = (await response.text()).trim();
52
+ }
53
+ catch {
54
+ // Ignore unreadable bodies
55
+ }
56
+ const suffix = responseBody ? ` - ${responseBody.slice(0, 500)}` : "";
57
+ const error = new ProviderError(`Ollama API error: ${response.status} ${response.statusText}${suffix}`, "ollama");
58
+ error.statusCode = response.status;
59
+ error.statusText = response.statusText;
60
+ error.responseBody = responseBody;
61
+ return error;
62
+ }
43
63
  // Create proxy-aware fetch instance
44
64
  const proxyFetch = createProxyFetch();
45
65
  // Custom LanguageModel implementation for Ollama
@@ -110,21 +130,37 @@ class OllamaLanguageModel {
110
130
  signal: createAbortSignalWithTimeout(this.timeout),
111
131
  });
112
132
  if (!response.ok) {
113
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
133
+ throw await createOllamaHttpError(response);
114
134
  }
115
135
  const data = await response.json();
116
136
  logger.debug("[OllamaLanguageModel] OpenAI API Response:", JSON.stringify(data, null, 2));
117
137
  const text = data.choices?.[0]?.message?.content || "";
118
138
  const usage = data.usage || {};
139
+ const promptTokens = usage.prompt_tokens ??
140
+ this.estimateTokenCount(JSON.stringify(messages));
141
+ const completionTokens = usage.completion_tokens ?? this.estimateTokenCount(text);
119
142
  return {
143
+ content: text ? [{ type: "text", text }] : [],
120
144
  text,
121
145
  usage: {
122
- promptTokens: usage.prompt_tokens ??
123
- this.estimateTokenCount(JSON.stringify(messages)),
124
- completionTokens: usage.completion_tokens ?? this.estimateTokenCount(text),
125
- totalTokens: usage.total_tokens,
146
+ inputTokens: promptTokens,
147
+ outputTokens: completionTokens,
148
+ promptTokens,
149
+ completionTokens,
150
+ totalTokens: usage.total_tokens ?? promptTokens + completionTokens,
151
+ },
152
+ finishReason: data.choices?.[0]?.finish_reason ?? "stop",
153
+ warnings: [],
154
+ request: {
155
+ body: JSON.stringify(requestBody),
156
+ },
157
+ response: {
158
+ id: data.id,
159
+ modelId: data.model,
160
+ timestamp: new Date(),
161
+ headers: {},
162
+ body: data,
126
163
  },
127
- finishReason: "stop",
128
164
  rawCall: {
129
165
  rawPrompt: messages,
130
166
  rawSettings: {
@@ -158,21 +194,45 @@ class OllamaLanguageModel {
158
194
  signal: createAbortSignalWithTimeout(this.timeout),
159
195
  });
160
196
  if (!response.ok) {
161
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
197
+ throw await createOllamaHttpError(response);
162
198
  }
163
199
  const data = await response.json();
164
200
  logger.debug("[OllamaLanguageModel] Native API Response:", JSON.stringify(data, null, 2));
201
+ const text = String(data.response ?? "");
202
+ const promptTokens = data.prompt_eval_count ?? this.estimateTokenCount(prompt);
203
+ const completionTokens = data.eval_count ?? this.estimateTokenCount(text);
204
+ const requestBody = {
205
+ model: this.modelId,
206
+ prompt,
207
+ stream: false,
208
+ system: messages.find((m) => m.role === "system")?.content,
209
+ options: {
210
+ temperature: options.temperature,
211
+ num_predict: options.maxTokens,
212
+ },
213
+ };
165
214
  return {
166
- text: data.response,
215
+ content: text ? [{ type: "text", text }] : [],
216
+ text,
167
217
  usage: {
168
- promptTokens: data.prompt_eval_count ?? this.estimateTokenCount(prompt),
169
- completionTokens: data.eval_count ??
170
- this.estimateTokenCount(String(data.response ?? "")),
171
- totalTokens: (data.prompt_eval_count ?? this.estimateTokenCount(prompt)) +
172
- (data.eval_count ??
173
- this.estimateTokenCount(String(data.response ?? ""))),
218
+ inputTokens: promptTokens,
219
+ outputTokens: completionTokens,
220
+ promptTokens,
221
+ completionTokens,
222
+ totalTokens: promptTokens + completionTokens,
223
+ },
224
+ finishReason: data.done_reason ?? "stop",
225
+ warnings: [],
226
+ request: {
227
+ body: JSON.stringify(requestBody),
228
+ },
229
+ response: {
230
+ id: data.created_at,
231
+ modelId: this.modelId,
232
+ timestamp: data.created_at ? new Date(data.created_at) : new Date(),
233
+ headers: {},
234
+ body: data,
174
235
  },
175
- finishReason: "stop",
176
236
  rawCall: {
177
237
  rawPrompt: prompt,
178
238
  rawSettings: {
@@ -220,7 +280,7 @@ class OllamaLanguageModel {
220
280
  ok: response.ok,
221
281
  });
222
282
  if (!response.ok) {
223
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
283
+ throw await createOllamaHttpError(response);
224
284
  }
225
285
  const self = this;
226
286
  return {
@@ -282,7 +342,7 @@ class OllamaLanguageModel {
282
342
  ok: response.ok,
283
343
  });
284
344
  if (!response.ok) {
285
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
345
+ throw await createOllamaHttpError(response);
286
346
  }
287
347
  const self = this;
288
348
  return {
@@ -705,7 +765,7 @@ export class OllamaProvider extends BaseProvider {
705
765
  signal: createAbortSignalWithTimeout(this.timeout),
706
766
  });
707
767
  if (!response.ok) {
708
- throw this.handleProviderError(new Error(`Ollama API error: ${response.status} ${response.statusText}`));
768
+ throw this.handleProviderError(await createOllamaHttpError(response));
709
769
  }
710
770
  // Process response stream
711
771
  const { content, toolCalls, finishReason } = await this.processOllamaResponse(response, controller);
@@ -870,7 +930,7 @@ export class OllamaProvider extends BaseProvider {
870
930
  ok: response.ok,
871
931
  });
872
932
  if (!response.ok) {
873
- throw this.handleProviderError(new Error(`Ollama API error: ${response.status} ${response.statusText}`));
933
+ throw this.handleProviderError(await createOllamaHttpError(response));
874
934
  }
875
935
  // Transform to async generator for OpenAI-compatible format
876
936
  const self = this;
@@ -936,7 +996,7 @@ export class OllamaProvider extends BaseProvider {
936
996
  ok: response.ok,
937
997
  });
938
998
  if (!response.ok) {
939
- throw this.handleProviderError(new Error(`Ollama API error: ${response.status} ${response.statusText}`));
999
+ throw this.handleProviderError(await createOllamaHttpError(response));
940
1000
  }
941
1001
  // Transform to async generator to match other providers
942
1002
  const self = this;
@@ -1486,8 +1546,18 @@ export class OllamaProvider extends BaseProvider {
1486
1546
  error.message?.includes("not found")) {
1487
1547
  return new InvalidModelError(`āŒ Ollama Model Not Found\n\nModel '${this.modelName}' is not available locally.\n\nšŸ”§ Install Model:\n1. Run: ollama pull ${this.modelName}\n2. Or try a different model:\n - ollama pull ${FALLBACK_OLLAMA_MODEL}\n - ollama pull mistral:latest\n - ollama pull codellama:latest\n\nšŸ”§ List Available Models:\nollama list`, this.providerName);
1488
1548
  }
1489
- if (error.message?.includes("404")) {
1490
- return new NetworkError(`āŒ Ollama API Endpoint Not Found\n\nThe API endpoint might have changed or Ollama version is incompatible.\n\nšŸ”§ Check:\n1. Ollama version: 'ollama --version'\n2. Update Ollama to latest version\n3. Verify API is available: 'curl ${this.baseUrl}/api/version'`, this.providerName);
1549
+ const errMsg = error.message ?? "";
1550
+ const httpStatus = isOllamaHttpError(error) ? error.statusCode : undefined;
1551
+ const responseBody = isOllamaHttpError(error) ? error.responseBody : "";
1552
+ if (httpStatus === 404 &&
1553
+ (responseBody.toLowerCase().includes("model") ||
1554
+ responseBody.toLowerCase().includes("not found") ||
1555
+ errMsg.toLowerCase().includes("model") ||
1556
+ errMsg.toLowerCase().includes("not found"))) {
1557
+ return new InvalidModelError(`āŒ Ollama Returned HTTP 404\n\nThis usually means the configured model '${this.modelName}' is not installed locally, although a bad base URL or incompatible API mode can also cause it.\n\nšŸ”§ Check:\n1. Verify the model exists: 'ollama list'\n2. Pull it if missing: 'ollama pull ${this.modelName}'\n3. Verify the service is healthy: 'curl ${this.baseUrl}/api/version'\n4. If you use OpenAI-compatible mode, confirm the base URL serves /v1/chat/completions`, this.providerName);
1558
+ }
1559
+ if (httpStatus === 404) {
1560
+ return new ProviderError(`āŒ Ollama Endpoint Returned HTTP 404\n\nThe configured base URL (${this.baseUrl}) did not serve the expected Ollama endpoint for model '${this.modelName}'. This is usually a configuration or API-mode mismatch rather than a missing model.\n\nšŸ”§ Check:\n1. Verify the base URL: ${this.baseUrl}\n2. For native Ollama mode, confirm /api/generate exists\n3. For OpenAI-compatible mode, confirm /v1/chat/completions exists\n4. If the model is missing, the response body should explicitly say so`, this.providerName);
1491
1561
  }
1492
1562
  return new ProviderError(`āŒ Ollama Provider Error\n\n${error.message || "Unknown error occurred"}\n\nšŸ”§ Troubleshooting:\n1. Check if Ollama service is running\n2. Verify model is installed: 'ollama list'\n3. Check network connectivity to ${this.baseUrl}\n4. Review Ollama logs for details`, this.providerName);
1493
1563
  }
@@ -52,6 +52,8 @@ export declare class OpenAIProvider extends BaseProvider {
52
52
  * and the migration guide in the project repository.
53
53
  */
54
54
  protected executeStream(options: StreamOptions, _analysisSchema?: ValidationSchema): Promise<StreamResult>;
55
+ private createOpenAITransformedStream;
56
+ private extractOpenAIChunkContent;
55
57
  /**
56
58
  * Generate embeddings for text using OpenAI text-embedding models
57
59
  * @param text - The text to embed