@juspay/neurolink 9.41.0 ā 9.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +7 -1
- package/dist/auth/anthropicOAuth.d.ts +18 -3
- package/dist/auth/anthropicOAuth.js +149 -4
- package/dist/auth/providers/firebase.js +5 -1
- package/dist/auth/providers/jwt.js +5 -1
- package/dist/auth/providers/workos.js +5 -1
- package/dist/auth/sessionManager.d.ts +1 -1
- package/dist/auth/sessionManager.js +58 -27
- package/dist/browser/neurolink.min.js +354 -334
- package/dist/cli/commands/mcp.d.ts +6 -0
- package/dist/cli/commands/mcp.js +188 -181
- package/dist/cli/commands/proxy.d.ts +2 -1
- package/dist/cli/commands/proxy.js +713 -431
- package/dist/cli/commands/task.js +3 -0
- package/dist/cli/factories/commandFactory.d.ts +2 -0
- package/dist/cli/factories/commandFactory.js +38 -0
- package/dist/cli/parser.js +4 -3
- package/dist/client/aiSdkAdapter.js +3 -0
- package/dist/client/streamingClient.js +30 -10
- package/dist/core/baseProvider.d.ts +6 -1
- package/dist/core/baseProvider.js +208 -230
- package/dist/core/factory.d.ts +3 -0
- package/dist/core/factory.js +138 -188
- package/dist/core/modules/GenerationHandler.js +3 -2
- package/dist/core/redisConversationMemoryManager.js +7 -3
- package/dist/evaluation/BatchEvaluator.js +4 -1
- package/dist/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/evaluation/pipeline/evaluationPipeline.js +24 -9
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
- package/dist/lib/auth/anthropicOAuth.js +149 -4
- package/dist/lib/auth/providers/firebase.js +5 -1
- package/dist/lib/auth/providers/jwt.js +5 -1
- package/dist/lib/auth/providers/workos.js +5 -1
- package/dist/lib/auth/sessionManager.d.ts +1 -1
- package/dist/lib/auth/sessionManager.js +58 -27
- package/dist/lib/client/aiSdkAdapter.js +3 -0
- package/dist/lib/client/streamingClient.js +30 -10
- package/dist/lib/core/baseProvider.d.ts +6 -1
- package/dist/lib/core/baseProvider.js +208 -230
- package/dist/lib/core/factory.d.ts +3 -0
- package/dist/lib/core/factory.js +138 -188
- package/dist/lib/core/modules/GenerationHandler.js +3 -2
- package/dist/lib/core/redisConversationMemoryManager.js +7 -3
- package/dist/lib/evaluation/BatchEvaluator.js +4 -1
- package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +24 -9
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/mcp/toolRegistry.d.ts +2 -0
- package/dist/lib/mcp/toolRegistry.js +32 -31
- package/dist/lib/neurolink.d.ts +41 -2
- package/dist/lib/neurolink.js +1616 -1681
- package/dist/lib/observability/otelBridge.d.ts +2 -2
- package/dist/lib/observability/otelBridge.js +12 -3
- package/dist/lib/providers/amazonBedrock.js +2 -4
- package/dist/lib/providers/anthropic.d.ts +9 -5
- package/dist/lib/providers/anthropic.js +19 -14
- package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/lib/providers/anthropicBaseProvider.js +5 -4
- package/dist/lib/providers/azureOpenai.d.ts +1 -1
- package/dist/lib/providers/azureOpenai.js +5 -4
- package/dist/lib/providers/googleAiStudio.js +30 -6
- package/dist/lib/providers/googleVertex.d.ts +10 -0
- package/dist/lib/providers/googleVertex.js +437 -423
- package/dist/lib/providers/huggingFace.d.ts +3 -3
- package/dist/lib/providers/huggingFace.js +6 -8
- package/dist/lib/providers/litellm.d.ts +1 -0
- package/dist/lib/providers/litellm.js +76 -55
- package/dist/lib/providers/mistral.js +2 -1
- package/dist/lib/providers/ollama.js +93 -23
- package/dist/lib/providers/openAI.d.ts +2 -0
- package/dist/lib/providers/openAI.js +141 -141
- package/dist/lib/providers/openRouter.js +2 -1
- package/dist/lib/providers/openaiCompatible.d.ts +4 -4
- package/dist/lib/providers/openaiCompatible.js +4 -4
- package/dist/lib/proxy/claudeFormat.d.ts +3 -2
- package/dist/lib/proxy/claudeFormat.js +27 -14
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/lib/proxy/modelRouter.js +3 -0
- package/dist/lib/proxy/oauthFetch.d.ts +1 -1
- package/dist/lib/proxy/oauthFetch.js +289 -316
- package/dist/lib/proxy/proxyConfig.js +46 -24
- package/dist/lib/proxy/proxyEnv.d.ts +19 -0
- package/dist/lib/proxy/proxyEnv.js +73 -0
- package/dist/lib/proxy/proxyFetch.js +291 -217
- package/dist/lib/proxy/proxyTracer.d.ts +133 -0
- package/dist/lib/proxy/proxyTracer.js +645 -0
- package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/lib/proxy/rawStreamCapture.js +83 -0
- package/dist/lib/proxy/requestLogger.d.ts +32 -5
- package/dist/lib/proxy/requestLogger.js +503 -47
- package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
- package/dist/lib/proxy/sseInterceptor.js +427 -0
- package/dist/lib/proxy/usageStats.d.ts +4 -3
- package/dist/lib/proxy/usageStats.js +25 -12
- package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/lib/rag/chunking/markdownChunker.js +15 -6
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +17 -3
- package/dist/lib/server/routes/claudeProxyRoutes.js +3032 -1349
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/lib/services/server/ai/observability/instrumentation.js +337 -161
- package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/lib/tasks/backends/bullmqBackend.js +35 -22
- package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/lib/tasks/store/redisTaskStore.js +54 -39
- package/dist/lib/tasks/taskManager.d.ts +5 -0
- package/dist/lib/tasks/taskManager.js +158 -30
- package/dist/lib/telemetry/index.d.ts +2 -1
- package/dist/lib/telemetry/index.js +2 -1
- package/dist/lib/telemetry/telemetryService.d.ts +3 -0
- package/dist/lib/telemetry/telemetryService.js +69 -5
- package/dist/lib/types/cli.d.ts +10 -0
- package/dist/lib/types/proxyTypes.d.ts +160 -5
- package/dist/lib/types/streamTypes.d.ts +25 -3
- package/dist/lib/utils/messageBuilder.js +3 -2
- package/dist/lib/utils/providerHealth.d.ts +19 -0
- package/dist/lib/utils/providerHealth.js +279 -33
- package/dist/lib/utils/providerUtils.js +17 -22
- package/dist/lib/utils/toolChoice.d.ts +4 -0
- package/dist/lib/utils/toolChoice.js +7 -0
- package/dist/mcp/toolRegistry.d.ts +2 -0
- package/dist/mcp/toolRegistry.js +32 -31
- package/dist/neurolink.d.ts +41 -2
- package/dist/neurolink.js +1616 -1681
- package/dist/observability/otelBridge.d.ts +2 -2
- package/dist/observability/otelBridge.js +12 -3
- package/dist/providers/amazonBedrock.js +2 -4
- package/dist/providers/anthropic.d.ts +9 -5
- package/dist/providers/anthropic.js +19 -14
- package/dist/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/providers/anthropicBaseProvider.js +5 -4
- package/dist/providers/azureOpenai.d.ts +1 -1
- package/dist/providers/azureOpenai.js +5 -4
- package/dist/providers/googleAiStudio.js +30 -6
- package/dist/providers/googleVertex.d.ts +10 -0
- package/dist/providers/googleVertex.js +437 -423
- package/dist/providers/huggingFace.d.ts +3 -3
- package/dist/providers/huggingFace.js +6 -7
- package/dist/providers/litellm.d.ts +1 -0
- package/dist/providers/litellm.js +76 -55
- package/dist/providers/mistral.js +2 -1
- package/dist/providers/ollama.js +93 -23
- package/dist/providers/openAI.d.ts +2 -0
- package/dist/providers/openAI.js +141 -141
- package/dist/providers/openRouter.js +2 -1
- package/dist/providers/openaiCompatible.d.ts +4 -4
- package/dist/providers/openaiCompatible.js +4 -3
- package/dist/proxy/claudeFormat.d.ts +3 -2
- package/dist/proxy/claudeFormat.js +27 -14
- package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/proxy/modelRouter.js +3 -0
- package/dist/proxy/oauthFetch.d.ts +1 -1
- package/dist/proxy/oauthFetch.js +289 -316
- package/dist/proxy/proxyConfig.js +46 -24
- package/dist/proxy/proxyEnv.d.ts +19 -0
- package/dist/proxy/proxyEnv.js +72 -0
- package/dist/proxy/proxyFetch.js +291 -217
- package/dist/proxy/proxyTracer.d.ts +133 -0
- package/dist/proxy/proxyTracer.js +644 -0
- package/dist/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/proxy/rawStreamCapture.js +82 -0
- package/dist/proxy/requestLogger.d.ts +32 -5
- package/dist/proxy/requestLogger.js +503 -47
- package/dist/proxy/sseInterceptor.d.ts +97 -0
- package/dist/proxy/sseInterceptor.js +426 -0
- package/dist/proxy/usageStats.d.ts +4 -3
- package/dist/proxy/usageStats.js +25 -12
- package/dist/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/rag/chunking/markdownChunker.js +15 -6
- package/dist/server/routes/claudeProxyRoutes.d.ts +17 -3
- package/dist/server/routes/claudeProxyRoutes.js +3032 -1349
- package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/services/server/ai/observability/instrumentation.js +337 -161
- package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/tasks/backends/bullmqBackend.js +35 -22
- package/dist/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/tasks/store/redisTaskStore.js +54 -39
- package/dist/tasks/taskManager.d.ts +5 -0
- package/dist/tasks/taskManager.js +158 -30
- package/dist/telemetry/index.d.ts +2 -1
- package/dist/telemetry/index.js +2 -1
- package/dist/telemetry/telemetryService.d.ts +3 -0
- package/dist/telemetry/telemetryService.js +69 -5
- package/dist/types/cli.d.ts +10 -0
- package/dist/types/proxyTypes.d.ts +160 -5
- package/dist/types/streamTypes.d.ts +25 -3
- package/dist/utils/messageBuilder.js +3 -2
- package/dist/utils/providerHealth.d.ts +19 -0
- package/dist/utils/providerHealth.js +279 -33
- package/dist/utils/providerUtils.js +18 -22
- package/dist/utils/toolChoice.d.ts +4 -0
- package/dist/utils/toolChoice.js +6 -0
- package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
- package/docs/changelog.md +252 -0
- package/package.json +19 -2
- package/scripts/observability/check-proxy-telemetry.mjs +235 -0
- package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
- package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
- package/scripts/observability/manage-local-openobserve.sh +215 -0
- package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
- package/scripts/observability/proxy-observability.env.example +23 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
+
import { type LanguageModel, type Schema } from "ai";
|
|
1
2
|
import type { ZodType } from "zod";
|
|
2
|
-
import
|
|
3
|
-
import { AIProviderName } from "../constants/enums.js";
|
|
4
|
-
import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
|
|
3
|
+
import type { AIProviderName } from "../constants/enums.js";
|
|
5
4
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
5
|
+
import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
|
|
6
6
|
/**
|
|
7
7
|
* HuggingFace Provider - BaseProvider Implementation
|
|
8
8
|
* Using AI SDK with HuggingFace's OpenAI-compatible endpoint
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
2
|
import { NoOutputGeneratedError, stepCountIs, streamText, } from "ai";
|
|
3
|
-
import { AIProviderName } from "../constants/enums.js";
|
|
4
3
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
5
|
-
import { logger } from "../utils/logger.js";
|
|
6
|
-
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
7
4
|
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
8
|
-
import { validateApiKey, createHuggingFaceConfig, getProviderModel, } from "../utils/providerConfig.js";
|
|
9
5
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
6
|
+
import { logger } from "../utils/logger.js";
|
|
7
|
+
import { createHuggingFaceConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
|
|
8
|
+
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
9
|
+
import { resolveToolChoice } from "../utils/toolChoice.js";
|
|
10
10
|
// Configuration helpers - now using consolidated utility
|
|
11
11
|
const getHuggingFaceApiKey = () => {
|
|
12
12
|
return validateApiKey(createHuggingFaceConfig());
|
|
@@ -136,9 +136,7 @@ export class HuggingFaceProvider extends BaseProvider {
|
|
|
136
136
|
tools: (shouldUseTools
|
|
137
137
|
? streamOptions.tools || allTools
|
|
138
138
|
: {}),
|
|
139
|
-
toolChoice: (shouldUseTools
|
|
140
|
-
? streamOptions.toolChoice || "auto"
|
|
141
|
-
: "none"),
|
|
139
|
+
toolChoice: resolveToolChoice(options, (shouldUseTools ? streamOptions.tools || allTools : {}), shouldUseTools),
|
|
142
140
|
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
143
141
|
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
144
142
|
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
@@ -203,7 +201,7 @@ export class HuggingFaceProvider extends BaseProvider {
|
|
|
203
201
|
prompt: options.input.text,
|
|
204
202
|
system: enhancedSystemPrompt,
|
|
205
203
|
tools: formattedTools,
|
|
206
|
-
toolChoice: formattedTools ? "auto" : undefined,
|
|
204
|
+
toolChoice: formattedTools ? (options.toolChoice ?? "auto") : undefined,
|
|
207
205
|
};
|
|
208
206
|
}
|
|
209
207
|
/**
|
|
@@ -29,6 +29,7 @@ export declare class LiteLLMProvider extends BaseProvider {
|
|
|
29
29
|
* Note: This is only used when tools are disabled
|
|
30
30
|
*/
|
|
31
31
|
protected executeStream(options: StreamOptions, analysisSchema?: ZodType | Schema<unknown>): Promise<StreamResult>;
|
|
32
|
+
private createLiteLLMTransformedStream;
|
|
32
33
|
/**
|
|
33
34
|
* Generate an embedding for a single text input
|
|
34
35
|
* Uses the LiteLLM proxy with OpenAI-compatible embedding API
|
|
@@ -11,6 +11,7 @@ import { logger } from "../utils/logger.js";
|
|
|
11
11
|
import { calculateCost } from "../utils/pricing.js";
|
|
12
12
|
import { getProviderModel } from "../utils/providerConfig.js";
|
|
13
13
|
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
14
|
+
import { resolveToolChoice } from "../utils/toolChoice.js";
|
|
14
15
|
import { getModelId } from "./providerTypeUtils.js";
|
|
15
16
|
const streamTracer = trace.getTracer("neurolink.provider.litellm");
|
|
16
17
|
// Configuration helpers
|
|
@@ -59,7 +60,7 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
59
60
|
apiKey: config.apiKey,
|
|
60
61
|
fetch: createProxyFetch(),
|
|
61
62
|
});
|
|
62
|
-
this.model = customOpenAI(this.modelName || getDefaultLiteLLMModel());
|
|
63
|
+
this.model = customOpenAI.chat(this.modelName || getDefaultLiteLLMModel());
|
|
63
64
|
logger.debug("LiteLLM Provider initialized", {
|
|
64
65
|
modelName: this.modelName,
|
|
65
66
|
provider: this.providerName,
|
|
@@ -160,7 +161,7 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
160
161
|
...(shouldUseTools &&
|
|
161
162
|
Object.keys(tools).length > 0 && {
|
|
162
163
|
tools,
|
|
163
|
-
toolChoice:
|
|
164
|
+
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
164
165
|
maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
|
|
165
166
|
}),
|
|
166
167
|
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
@@ -186,6 +187,28 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
186
187
|
},
|
|
187
188
|
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
188
189
|
logger.info("Tool execution completed", { toolResults, toolCalls });
|
|
190
|
+
for (const toolCall of toolCalls) {
|
|
191
|
+
collectedToolCalls.push({
|
|
192
|
+
toolCallId: toolCall.toolCallId,
|
|
193
|
+
toolName: toolCall.toolName,
|
|
194
|
+
args: toolCall.args ??
|
|
195
|
+
toolCall.input ??
|
|
196
|
+
toolCall
|
|
197
|
+
.parameters ??
|
|
198
|
+
{},
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
for (const toolResult of toolResults) {
|
|
202
|
+
const rawToolResult = toolResult;
|
|
203
|
+
collectedToolResults.push({
|
|
204
|
+
toolName: toolResult.toolName,
|
|
205
|
+
status: rawToolResult.error ? "failure" : "success",
|
|
206
|
+
output: (rawToolResult.output ??
|
|
207
|
+
rawToolResult.result) ?? undefined,
|
|
208
|
+
error: rawToolResult.error,
|
|
209
|
+
id: rawToolResult.toolCallId ?? toolResult.toolName,
|
|
210
|
+
});
|
|
211
|
+
}
|
|
189
212
|
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
190
213
|
logger.warn("[LiteLLMProvider] Failed to store tool executions", {
|
|
191
214
|
provider: this.providerName,
|
|
@@ -219,6 +242,8 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
219
242
|
},
|
|
220
243
|
});
|
|
221
244
|
let result;
|
|
245
|
+
const collectedToolCalls = [];
|
|
246
|
+
const collectedToolResults = [];
|
|
222
247
|
try {
|
|
223
248
|
result = streamText(streamOptions);
|
|
224
249
|
}
|
|
@@ -269,58 +294,7 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
269
294
|
streamSpan.end();
|
|
270
295
|
});
|
|
271
296
|
timeoutController?.cleanup();
|
|
272
|
-
|
|
273
|
-
// Note: fullStream includes tool results, textStream only has text
|
|
274
|
-
const transformedStream = (async function* () {
|
|
275
|
-
try {
|
|
276
|
-
// Try fullStream first (handles both text and tool calls), fallback to textStream
|
|
277
|
-
const streamToUse = result.fullStream || result.textStream;
|
|
278
|
-
for await (const chunk of streamToUse) {
|
|
279
|
-
// Handle different chunk types from fullStream
|
|
280
|
-
if (chunk && typeof chunk === "object") {
|
|
281
|
-
// Check for error chunks first (critical error handling)
|
|
282
|
-
if ("type" in chunk && chunk.type === "error") {
|
|
283
|
-
const errorChunk = chunk;
|
|
284
|
-
logger.error(`LiteLLM: Error chunk received:`, {
|
|
285
|
-
errorType: errorChunk.type,
|
|
286
|
-
errorDetails: errorChunk.error,
|
|
287
|
-
});
|
|
288
|
-
throw new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`);
|
|
289
|
-
}
|
|
290
|
-
if ("textDelta" in chunk) {
|
|
291
|
-
// Text delta from fullStream
|
|
292
|
-
const textDelta = chunk.textDelta;
|
|
293
|
-
if (textDelta) {
|
|
294
|
-
yield { content: textDelta };
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
else if ("type" in chunk &&
|
|
298
|
-
chunk.type === "tool-call" &&
|
|
299
|
-
"toolCallId" in chunk) {
|
|
300
|
-
// Tool call event - log for debugging
|
|
301
|
-
const toolCallId = String(chunk.toolCallId);
|
|
302
|
-
const toolName = "toolName" in chunk ? String(chunk.toolName) : "unknown";
|
|
303
|
-
logger.debug("LiteLLM: Tool call", {
|
|
304
|
-
toolCallId,
|
|
305
|
-
toolName,
|
|
306
|
-
});
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
else if (typeof chunk === "string") {
|
|
310
|
-
// Direct string chunk from textStream fallback
|
|
311
|
-
yield { content: chunk };
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
catch (streamError) {
|
|
316
|
-
// AI SDK v6 throws NoOutputGeneratedError when the stream produced no output.
|
|
317
|
-
if (NoOutputGeneratedError.isInstance(streamError)) {
|
|
318
|
-
logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
|
|
319
|
-
return;
|
|
320
|
-
}
|
|
321
|
-
throw streamError;
|
|
322
|
-
}
|
|
323
|
-
})();
|
|
297
|
+
const transformedStream = this.createLiteLLMTransformedStream(result);
|
|
324
298
|
// Create analytics promise that resolves after stream completion
|
|
325
299
|
const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, result, Date.now() - startTime, {
|
|
326
300
|
requestId: options.requestId ??
|
|
@@ -331,6 +305,10 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
331
305
|
stream: transformedStream,
|
|
332
306
|
provider: this.providerName,
|
|
333
307
|
model: this.modelName,
|
|
308
|
+
...(shouldUseTools && {
|
|
309
|
+
toolCalls: collectedToolCalls,
|
|
310
|
+
toolResults: collectedToolResults,
|
|
311
|
+
}),
|
|
334
312
|
analytics: analyticsPromise,
|
|
335
313
|
metadata: {
|
|
336
314
|
startTime,
|
|
@@ -343,6 +321,47 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
343
321
|
throw this.handleProviderError(error);
|
|
344
322
|
}
|
|
345
323
|
}
|
|
324
|
+
async *createLiteLLMTransformedStream(result) {
|
|
325
|
+
try {
|
|
326
|
+
const streamToUse = result.fullStream || result.textStream;
|
|
327
|
+
for await (const chunk of streamToUse) {
|
|
328
|
+
if (chunk && typeof chunk === "object") {
|
|
329
|
+
if ("type" in chunk && chunk.type === "error") {
|
|
330
|
+
const errorChunk = chunk;
|
|
331
|
+
logger.error(`LiteLLM: Error chunk received:`, {
|
|
332
|
+
errorType: errorChunk.type,
|
|
333
|
+
errorDetails: errorChunk.error,
|
|
334
|
+
});
|
|
335
|
+
throw this.formatProviderError(new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`));
|
|
336
|
+
}
|
|
337
|
+
if ("textDelta" in chunk) {
|
|
338
|
+
const textDelta = chunk.textDelta;
|
|
339
|
+
if (textDelta) {
|
|
340
|
+
yield { content: textDelta };
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
else if ("type" in chunk &&
|
|
344
|
+
chunk.type === "tool-call" &&
|
|
345
|
+
"toolCallId" in chunk) {
|
|
346
|
+
logger.debug("LiteLLM: Tool call", {
|
|
347
|
+
toolCallId: String(chunk.toolCallId),
|
|
348
|
+
toolName: "toolName" in chunk ? String(chunk.toolName) : "unknown",
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
else if (typeof chunk === "string") {
|
|
353
|
+
yield { content: chunk };
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
catch (streamError) {
|
|
358
|
+
if (NoOutputGeneratedError.isInstance(streamError)) {
|
|
359
|
+
logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
|
|
360
|
+
return;
|
|
361
|
+
}
|
|
362
|
+
throw streamError;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
346
365
|
/**
|
|
347
366
|
* Generate an embedding for a single text input
|
|
348
367
|
* Uses the LiteLLM proxy with OpenAI-compatible embedding API
|
|
@@ -419,7 +438,9 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
419
438
|
});
|
|
420
439
|
}
|
|
421
440
|
// Fallback to hardcoded list if API fetch fails
|
|
422
|
-
const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",")
|
|
441
|
+
const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",")
|
|
442
|
+
.map((m) => m.trim())
|
|
443
|
+
.filter((m) => m.length > 0) || [
|
|
423
444
|
"openai/gpt-4o", // minimal safe baseline
|
|
424
445
|
"anthropic/claude-3-haiku",
|
|
425
446
|
"meta-llama/llama-3.1-8b-instruct",
|
|
@@ -7,6 +7,7 @@ import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
|
7
7
|
import { logger } from "../utils/logger.js";
|
|
8
8
|
import { createMistralConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
|
|
9
9
|
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
10
|
+
import { resolveToolChoice } from "../utils/toolChoice.js";
|
|
10
11
|
import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
|
|
11
12
|
// Configuration helpers - now using consolidated utility
|
|
12
13
|
const getMistralApiKey = () => {
|
|
@@ -63,7 +64,7 @@ export class MistralProvider extends BaseProvider {
|
|
|
63
64
|
maxOutputTokens: options.maxTokens, // No default limit - unlimited unless specified
|
|
64
65
|
tools,
|
|
65
66
|
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
66
|
-
toolChoice:
|
|
67
|
+
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
67
68
|
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
68
69
|
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
69
70
|
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
@@ -11,7 +11,7 @@ import { InvalidModelError, NetworkError, ProviderError, } from "../types/errors
|
|
|
11
11
|
import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
|
|
12
12
|
import { TimeoutError } from "../utils/timeout.js";
|
|
13
13
|
// Model version constants (configurable via environment)
|
|
14
|
-
const DEFAULT_OLLAMA_MODEL = "llama3.1:8b";
|
|
14
|
+
const DEFAULT_OLLAMA_MODEL = process.env.OLLAMA_MODEL || "llama3.1:8b";
|
|
15
15
|
const FALLBACK_OLLAMA_MODEL = "llama3.2:latest"; // Used when primary model fails
|
|
16
16
|
// Configuration helpers
|
|
17
17
|
const getOllamaBaseUrl = () => {
|
|
@@ -40,6 +40,26 @@ const getOllamaTimeout = () => {
|
|
|
40
40
|
// especially for larger models like aliafshar/gemma3-it-qat-tools:latest (12.2B parameters)
|
|
41
41
|
return parseInt(process.env.OLLAMA_TIMEOUT || "240000", 10);
|
|
42
42
|
};
|
|
43
|
+
function isOllamaHttpError(error) {
|
|
44
|
+
return (error instanceof ProviderError &&
|
|
45
|
+
typeof error.statusCode === "number" &&
|
|
46
|
+
typeof error.responseBody === "string");
|
|
47
|
+
}
|
|
48
|
+
async function createOllamaHttpError(response) {
|
|
49
|
+
let responseBody = "";
|
|
50
|
+
try {
|
|
51
|
+
responseBody = (await response.text()).trim();
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
// Ignore unreadable bodies
|
|
55
|
+
}
|
|
56
|
+
const suffix = responseBody ? ` - ${responseBody.slice(0, 500)}` : "";
|
|
57
|
+
const error = new ProviderError(`Ollama API error: ${response.status} ${response.statusText}${suffix}`, "ollama");
|
|
58
|
+
error.statusCode = response.status;
|
|
59
|
+
error.statusText = response.statusText;
|
|
60
|
+
error.responseBody = responseBody;
|
|
61
|
+
return error;
|
|
62
|
+
}
|
|
43
63
|
// Create proxy-aware fetch instance
|
|
44
64
|
const proxyFetch = createProxyFetch();
|
|
45
65
|
// Custom LanguageModel implementation for Ollama
|
|
@@ -110,21 +130,37 @@ class OllamaLanguageModel {
|
|
|
110
130
|
signal: createAbortSignalWithTimeout(this.timeout),
|
|
111
131
|
});
|
|
112
132
|
if (!response.ok) {
|
|
113
|
-
throw
|
|
133
|
+
throw await createOllamaHttpError(response);
|
|
114
134
|
}
|
|
115
135
|
const data = await response.json();
|
|
116
136
|
logger.debug("[OllamaLanguageModel] OpenAI API Response:", JSON.stringify(data, null, 2));
|
|
117
137
|
const text = data.choices?.[0]?.message?.content || "";
|
|
118
138
|
const usage = data.usage || {};
|
|
139
|
+
const promptTokens = usage.prompt_tokens ??
|
|
140
|
+
this.estimateTokenCount(JSON.stringify(messages));
|
|
141
|
+
const completionTokens = usage.completion_tokens ?? this.estimateTokenCount(text);
|
|
119
142
|
return {
|
|
143
|
+
content: text ? [{ type: "text", text }] : [],
|
|
120
144
|
text,
|
|
121
145
|
usage: {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
146
|
+
inputTokens: promptTokens,
|
|
147
|
+
outputTokens: completionTokens,
|
|
148
|
+
promptTokens,
|
|
149
|
+
completionTokens,
|
|
150
|
+
totalTokens: usage.total_tokens ?? promptTokens + completionTokens,
|
|
151
|
+
},
|
|
152
|
+
finishReason: data.choices?.[0]?.finish_reason ?? "stop",
|
|
153
|
+
warnings: [],
|
|
154
|
+
request: {
|
|
155
|
+
body: JSON.stringify(requestBody),
|
|
156
|
+
},
|
|
157
|
+
response: {
|
|
158
|
+
id: data.id,
|
|
159
|
+
modelId: data.model,
|
|
160
|
+
timestamp: new Date(),
|
|
161
|
+
headers: {},
|
|
162
|
+
body: data,
|
|
126
163
|
},
|
|
127
|
-
finishReason: "stop",
|
|
128
164
|
rawCall: {
|
|
129
165
|
rawPrompt: messages,
|
|
130
166
|
rawSettings: {
|
|
@@ -158,21 +194,45 @@ class OllamaLanguageModel {
|
|
|
158
194
|
signal: createAbortSignalWithTimeout(this.timeout),
|
|
159
195
|
});
|
|
160
196
|
if (!response.ok) {
|
|
161
|
-
throw
|
|
197
|
+
throw await createOllamaHttpError(response);
|
|
162
198
|
}
|
|
163
199
|
const data = await response.json();
|
|
164
200
|
logger.debug("[OllamaLanguageModel] Native API Response:", JSON.stringify(data, null, 2));
|
|
201
|
+
const text = String(data.response ?? "");
|
|
202
|
+
const promptTokens = data.prompt_eval_count ?? this.estimateTokenCount(prompt);
|
|
203
|
+
const completionTokens = data.eval_count ?? this.estimateTokenCount(text);
|
|
204
|
+
const requestBody = {
|
|
205
|
+
model: this.modelId,
|
|
206
|
+
prompt,
|
|
207
|
+
stream: false,
|
|
208
|
+
system: messages.find((m) => m.role === "system")?.content,
|
|
209
|
+
options: {
|
|
210
|
+
temperature: options.temperature,
|
|
211
|
+
num_predict: options.maxTokens,
|
|
212
|
+
},
|
|
213
|
+
};
|
|
165
214
|
return {
|
|
166
|
-
text:
|
|
215
|
+
content: text ? [{ type: "text", text }] : [],
|
|
216
|
+
text,
|
|
167
217
|
usage: {
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
218
|
+
inputTokens: promptTokens,
|
|
219
|
+
outputTokens: completionTokens,
|
|
220
|
+
promptTokens,
|
|
221
|
+
completionTokens,
|
|
222
|
+
totalTokens: promptTokens + completionTokens,
|
|
223
|
+
},
|
|
224
|
+
finishReason: data.done_reason ?? "stop",
|
|
225
|
+
warnings: [],
|
|
226
|
+
request: {
|
|
227
|
+
body: JSON.stringify(requestBody),
|
|
228
|
+
},
|
|
229
|
+
response: {
|
|
230
|
+
id: data.created_at,
|
|
231
|
+
modelId: this.modelId,
|
|
232
|
+
timestamp: data.created_at ? new Date(data.created_at) : new Date(),
|
|
233
|
+
headers: {},
|
|
234
|
+
body: data,
|
|
174
235
|
},
|
|
175
|
-
finishReason: "stop",
|
|
176
236
|
rawCall: {
|
|
177
237
|
rawPrompt: prompt,
|
|
178
238
|
rawSettings: {
|
|
@@ -220,7 +280,7 @@ class OllamaLanguageModel {
|
|
|
220
280
|
ok: response.ok,
|
|
221
281
|
});
|
|
222
282
|
if (!response.ok) {
|
|
223
|
-
throw
|
|
283
|
+
throw await createOllamaHttpError(response);
|
|
224
284
|
}
|
|
225
285
|
const self = this;
|
|
226
286
|
return {
|
|
@@ -282,7 +342,7 @@ class OllamaLanguageModel {
|
|
|
282
342
|
ok: response.ok,
|
|
283
343
|
});
|
|
284
344
|
if (!response.ok) {
|
|
285
|
-
throw
|
|
345
|
+
throw await createOllamaHttpError(response);
|
|
286
346
|
}
|
|
287
347
|
const self = this;
|
|
288
348
|
return {
|
|
@@ -705,7 +765,7 @@ export class OllamaProvider extends BaseProvider {
|
|
|
705
765
|
signal: createAbortSignalWithTimeout(this.timeout),
|
|
706
766
|
});
|
|
707
767
|
if (!response.ok) {
|
|
708
|
-
throw this.handleProviderError(
|
|
768
|
+
throw this.handleProviderError(await createOllamaHttpError(response));
|
|
709
769
|
}
|
|
710
770
|
// Process response stream
|
|
711
771
|
const { content, toolCalls, finishReason } = await this.processOllamaResponse(response, controller);
|
|
@@ -870,7 +930,7 @@ export class OllamaProvider extends BaseProvider {
|
|
|
870
930
|
ok: response.ok,
|
|
871
931
|
});
|
|
872
932
|
if (!response.ok) {
|
|
873
|
-
throw this.handleProviderError(
|
|
933
|
+
throw this.handleProviderError(await createOllamaHttpError(response));
|
|
874
934
|
}
|
|
875
935
|
// Transform to async generator for OpenAI-compatible format
|
|
876
936
|
const self = this;
|
|
@@ -936,7 +996,7 @@ export class OllamaProvider extends BaseProvider {
|
|
|
936
996
|
ok: response.ok,
|
|
937
997
|
});
|
|
938
998
|
if (!response.ok) {
|
|
939
|
-
throw this.handleProviderError(
|
|
999
|
+
throw this.handleProviderError(await createOllamaHttpError(response));
|
|
940
1000
|
}
|
|
941
1001
|
// Transform to async generator to match other providers
|
|
942
1002
|
const self = this;
|
|
@@ -1486,8 +1546,18 @@ export class OllamaProvider extends BaseProvider {
|
|
|
1486
1546
|
error.message?.includes("not found")) {
|
|
1487
1547
|
return new InvalidModelError(`ā Ollama Model Not Found\n\nModel '${this.modelName}' is not available locally.\n\nš§ Install Model:\n1. Run: ollama pull ${this.modelName}\n2. Or try a different model:\n - ollama pull ${FALLBACK_OLLAMA_MODEL}\n - ollama pull mistral:latest\n - ollama pull codellama:latest\n\nš§ List Available Models:\nollama list`, this.providerName);
|
|
1488
1548
|
}
|
|
1489
|
-
|
|
1490
|
-
|
|
1549
|
+
const errMsg = error.message ?? "";
|
|
1550
|
+
const httpStatus = isOllamaHttpError(error) ? error.statusCode : undefined;
|
|
1551
|
+
const responseBody = isOllamaHttpError(error) ? error.responseBody : "";
|
|
1552
|
+
if (httpStatus === 404 &&
|
|
1553
|
+
(responseBody.toLowerCase().includes("model") ||
|
|
1554
|
+
responseBody.toLowerCase().includes("not found") ||
|
|
1555
|
+
errMsg.toLowerCase().includes("model") ||
|
|
1556
|
+
errMsg.toLowerCase().includes("not found"))) {
|
|
1557
|
+
return new InvalidModelError(`ā Ollama Returned HTTP 404\n\nThis usually means the configured model '${this.modelName}' is not installed locally, although a bad base URL or incompatible API mode can also cause it.\n\nš§ Check:\n1. Verify the model exists: 'ollama list'\n2. Pull it if missing: 'ollama pull ${this.modelName}'\n3. Verify the service is healthy: 'curl ${this.baseUrl}/api/version'\n4. If you use OpenAI-compatible mode, confirm the base URL serves /v1/chat/completions`, this.providerName);
|
|
1558
|
+
}
|
|
1559
|
+
if (httpStatus === 404) {
|
|
1560
|
+
return new ProviderError(`ā Ollama Endpoint Returned HTTP 404\n\nThe configured base URL (${this.baseUrl}) did not serve the expected Ollama endpoint for model '${this.modelName}'. This is usually a configuration or API-mode mismatch rather than a missing model.\n\nš§ Check:\n1. Verify the base URL: ${this.baseUrl}\n2. For native Ollama mode, confirm /api/generate exists\n3. For OpenAI-compatible mode, confirm /v1/chat/completions exists\n4. If the model is missing, the response body should explicitly say so`, this.providerName);
|
|
1491
1561
|
}
|
|
1492
1562
|
return new ProviderError(`ā Ollama Provider Error\n\n${error.message || "Unknown error occurred"}\n\nš§ Troubleshooting:\n1. Check if Ollama service is running\n2. Verify model is installed: 'ollama list'\n3. Check network connectivity to ${this.baseUrl}\n4. Review Ollama logs for details`, this.providerName);
|
|
1493
1563
|
}
|
|
@@ -52,6 +52,8 @@ export declare class OpenAIProvider extends BaseProvider {
|
|
|
52
52
|
* and the migration guide in the project repository.
|
|
53
53
|
*/
|
|
54
54
|
protected executeStream(options: StreamOptions, _analysisSchema?: ValidationSchema): Promise<StreamResult>;
|
|
55
|
+
private createOpenAITransformedStream;
|
|
56
|
+
private extractOpenAIChunkContent;
|
|
55
57
|
/**
|
|
56
58
|
* Generate embeddings for text using OpenAI text-embedding models
|
|
57
59
|
* @param text - The text to embed
|