@juspay/neurolink 9.41.0 → 9.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +7 -1
- package/dist/auth/anthropicOAuth.d.ts +18 -3
- package/dist/auth/anthropicOAuth.js +137 -4
- package/dist/auth/providers/firebase.js +5 -1
- package/dist/auth/providers/jwt.js +5 -1
- package/dist/auth/providers/workos.js +5 -1
- package/dist/auth/sessionManager.d.ts +1 -1
- package/dist/auth/sessionManager.js +58 -27
- package/dist/browser/neurolink.min.js +337 -318
- package/dist/cli/commands/mcp.js +3 -0
- package/dist/cli/commands/proxy.d.ts +2 -1
- package/dist/cli/commands/proxy.js +279 -16
- package/dist/cli/commands/task.js +3 -0
- package/dist/cli/factories/commandFactory.d.ts +2 -0
- package/dist/cli/factories/commandFactory.js +38 -0
- package/dist/cli/parser.js +4 -3
- package/dist/client/aiSdkAdapter.js +3 -0
- package/dist/client/streamingClient.js +30 -10
- package/dist/core/modules/GenerationHandler.js +3 -2
- package/dist/core/redisConversationMemoryManager.js +7 -3
- package/dist/evaluation/BatchEvaluator.js +4 -1
- package/dist/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/evaluation/pipeline/evaluationPipeline.js +20 -8
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
- package/dist/lib/auth/anthropicOAuth.js +137 -4
- package/dist/lib/auth/providers/firebase.js +5 -1
- package/dist/lib/auth/providers/jwt.js +5 -1
- package/dist/lib/auth/providers/workos.js +5 -1
- package/dist/lib/auth/sessionManager.d.ts +1 -1
- package/dist/lib/auth/sessionManager.js +58 -27
- package/dist/lib/client/aiSdkAdapter.js +3 -0
- package/dist/lib/client/streamingClient.js +30 -10
- package/dist/lib/core/modules/GenerationHandler.js +3 -2
- package/dist/lib/core/redisConversationMemoryManager.js +7 -3
- package/dist/lib/evaluation/BatchEvaluator.js +4 -1
- package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +20 -8
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/lib/neurolink.d.ts +3 -2
- package/dist/lib/neurolink.js +260 -494
- package/dist/lib/observability/otelBridge.d.ts +2 -2
- package/dist/lib/observability/otelBridge.js +12 -3
- package/dist/lib/providers/amazonBedrock.js +2 -4
- package/dist/lib/providers/anthropic.d.ts +9 -5
- package/dist/lib/providers/anthropic.js +19 -14
- package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/lib/providers/anthropicBaseProvider.js +5 -4
- package/dist/lib/providers/azureOpenai.d.ts +1 -1
- package/dist/lib/providers/azureOpenai.js +5 -4
- package/dist/lib/providers/googleAiStudio.js +30 -1
- package/dist/lib/providers/googleVertex.js +28 -6
- package/dist/lib/providers/huggingFace.d.ts +3 -3
- package/dist/lib/providers/huggingFace.js +6 -8
- package/dist/lib/providers/litellm.js +41 -29
- package/dist/lib/providers/mistral.js +2 -1
- package/dist/lib/providers/ollama.js +80 -23
- package/dist/lib/providers/openAI.js +3 -2
- package/dist/lib/providers/openRouter.js +2 -1
- package/dist/lib/providers/openaiCompatible.d.ts +4 -4
- package/dist/lib/providers/openaiCompatible.js +4 -4
- package/dist/lib/proxy/claudeFormat.d.ts +3 -2
- package/dist/lib/proxy/claudeFormat.js +25 -20
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/lib/proxy/modelRouter.js +3 -0
- package/dist/lib/proxy/oauthFetch.d.ts +1 -1
- package/dist/lib/proxy/oauthFetch.js +65 -72
- package/dist/lib/proxy/proxyConfig.js +44 -24
- package/dist/lib/proxy/proxyEnv.d.ts +19 -0
- package/dist/lib/proxy/proxyEnv.js +73 -0
- package/dist/lib/proxy/proxyFetch.js +50 -4
- package/dist/lib/proxy/proxyTracer.d.ts +133 -0
- package/dist/lib/proxy/proxyTracer.js +645 -0
- package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/lib/proxy/rawStreamCapture.js +83 -0
- package/dist/lib/proxy/requestLogger.d.ts +32 -5
- package/dist/lib/proxy/requestLogger.js +406 -37
- package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
- package/dist/lib/proxy/sseInterceptor.js +402 -0
- package/dist/lib/proxy/usageStats.d.ts +4 -3
- package/dist/lib/proxy/usageStats.js +25 -12
- package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/lib/rag/chunking/markdownChunker.js +15 -6
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +7 -2
- package/dist/lib/server/routes/claudeProxyRoutes.js +1737 -508
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/lib/services/server/ai/observability/instrumentation.js +240 -40
- package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/lib/tasks/backends/bullmqBackend.js +14 -7
- package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/lib/tasks/store/redisTaskStore.js +34 -26
- package/dist/lib/tasks/taskManager.d.ts +3 -0
- package/dist/lib/tasks/taskManager.js +63 -30
- package/dist/lib/telemetry/index.d.ts +2 -1
- package/dist/lib/telemetry/index.js +2 -1
- package/dist/lib/telemetry/telemetryService.d.ts +3 -0
- package/dist/lib/telemetry/telemetryService.js +65 -5
- package/dist/lib/types/cli.d.ts +10 -0
- package/dist/lib/types/proxyTypes.d.ts +37 -5
- package/dist/lib/types/streamTypes.d.ts +25 -3
- package/dist/lib/utils/messageBuilder.js +3 -2
- package/dist/lib/utils/providerHealth.d.ts +18 -0
- package/dist/lib/utils/providerHealth.js +240 -9
- package/dist/lib/utils/providerUtils.js +14 -8
- package/dist/lib/utils/toolChoice.d.ts +4 -0
- package/dist/lib/utils/toolChoice.js +7 -0
- package/dist/neurolink.d.ts +3 -2
- package/dist/neurolink.js +260 -494
- package/dist/observability/otelBridge.d.ts +2 -2
- package/dist/observability/otelBridge.js +12 -3
- package/dist/providers/amazonBedrock.js +2 -4
- package/dist/providers/anthropic.d.ts +9 -5
- package/dist/providers/anthropic.js +19 -14
- package/dist/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/providers/anthropicBaseProvider.js +5 -4
- package/dist/providers/azureOpenai.d.ts +1 -1
- package/dist/providers/azureOpenai.js +5 -4
- package/dist/providers/googleAiStudio.js +30 -1
- package/dist/providers/googleVertex.js +28 -6
- package/dist/providers/huggingFace.d.ts +3 -3
- package/dist/providers/huggingFace.js +6 -7
- package/dist/providers/litellm.js +41 -29
- package/dist/providers/mistral.js +2 -1
- package/dist/providers/ollama.js +80 -23
- package/dist/providers/openAI.js +3 -2
- package/dist/providers/openRouter.js +2 -1
- package/dist/providers/openaiCompatible.d.ts +4 -4
- package/dist/providers/openaiCompatible.js +4 -3
- package/dist/proxy/claudeFormat.d.ts +3 -2
- package/dist/proxy/claudeFormat.js +25 -20
- package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/proxy/modelRouter.js +3 -0
- package/dist/proxy/oauthFetch.d.ts +1 -1
- package/dist/proxy/oauthFetch.js +65 -72
- package/dist/proxy/proxyConfig.js +44 -24
- package/dist/proxy/proxyEnv.d.ts +19 -0
- package/dist/proxy/proxyEnv.js +72 -0
- package/dist/proxy/proxyFetch.js +50 -4
- package/dist/proxy/proxyTracer.d.ts +133 -0
- package/dist/proxy/proxyTracer.js +644 -0
- package/dist/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/proxy/rawStreamCapture.js +82 -0
- package/dist/proxy/requestLogger.d.ts +32 -5
- package/dist/proxy/requestLogger.js +406 -37
- package/dist/proxy/sseInterceptor.d.ts +97 -0
- package/dist/proxy/sseInterceptor.js +401 -0
- package/dist/proxy/usageStats.d.ts +4 -3
- package/dist/proxy/usageStats.js +25 -12
- package/dist/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/rag/chunking/markdownChunker.js +15 -6
- package/dist/server/routes/claudeProxyRoutes.d.ts +7 -2
- package/dist/server/routes/claudeProxyRoutes.js +1737 -508
- package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/services/server/ai/observability/instrumentation.js +240 -40
- package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/tasks/backends/bullmqBackend.js +14 -7
- package/dist/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/tasks/store/redisTaskStore.js +34 -26
- package/dist/tasks/taskManager.d.ts +3 -0
- package/dist/tasks/taskManager.js +63 -30
- package/dist/telemetry/index.d.ts +2 -1
- package/dist/telemetry/index.js +2 -1
- package/dist/telemetry/telemetryService.d.ts +3 -0
- package/dist/telemetry/telemetryService.js +65 -5
- package/dist/types/cli.d.ts +10 -0
- package/dist/types/proxyTypes.d.ts +37 -5
- package/dist/types/streamTypes.d.ts +25 -3
- package/dist/utils/messageBuilder.js +3 -2
- package/dist/utils/providerHealth.d.ts +18 -0
- package/dist/utils/providerHealth.js +240 -9
- package/dist/utils/providerUtils.js +14 -8
- package/dist/utils/toolChoice.d.ts +4 -0
- package/dist/utils/toolChoice.js +6 -0
- package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
- package/docs/changelog.md +252 -0
- package/package.json +17 -1
- package/scripts/observability/check-proxy-telemetry.mjs +235 -0
- package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
- package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
- package/scripts/observability/manage-local-openobserve.sh +184 -0
- package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
- package/scripts/observability/proxy-observability.env.example +23 -0
|
@@ -228,7 +228,8 @@ export class ProviderHealthChecker {
|
|
|
228
228
|
}
|
|
229
229
|
// Providers that don't use API keys directly
|
|
230
230
|
if (providerName === AIProviderName.OLLAMA ||
|
|
231
|
-
providerName === AIProviderName.BEDROCK
|
|
231
|
+
providerName === AIProviderName.BEDROCK ||
|
|
232
|
+
providerName === AIProviderName.LITELLM) {
|
|
232
233
|
healthStatus.hasApiKey = true;
|
|
233
234
|
return;
|
|
234
235
|
}
|
|
@@ -368,6 +369,8 @@ export class ProviderHealthChecker {
|
|
|
368
369
|
return [];
|
|
369
370
|
case AIProviderName.AZURE:
|
|
370
371
|
return ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT"];
|
|
372
|
+
case AIProviderName.LITELLM:
|
|
373
|
+
return [];
|
|
371
374
|
case AIProviderName.OLLAMA:
|
|
372
375
|
return []; // Ollama typically doesn't require API keys
|
|
373
376
|
default:
|
|
@@ -391,8 +394,10 @@ export class ProviderHealthChecker {
|
|
|
391
394
|
return "AWS_ACCESS_KEY_ID";
|
|
392
395
|
case AIProviderName.AZURE:
|
|
393
396
|
return "AZURE_OPENAI_API_KEY";
|
|
397
|
+
case AIProviderName.LITELLM:
|
|
398
|
+
return "LITELLM_API_KEY";
|
|
394
399
|
case AIProviderName.OLLAMA:
|
|
395
|
-
return "
|
|
400
|
+
return "OLLAMA_BASE_URL";
|
|
396
401
|
default:
|
|
397
402
|
return "";
|
|
398
403
|
}
|
|
@@ -416,6 +421,8 @@ export class ProviderHealthChecker {
|
|
|
416
421
|
return apiKey.length >= API_KEY_LENGTHS.AWS_ACCESS_KEY; // AWS access key length
|
|
417
422
|
case AIProviderName.AZURE:
|
|
418
423
|
return apiKey.length >= API_KEY_LENGTHS.AZURE_MIN; // Azure OpenAI API key length
|
|
424
|
+
case AIProviderName.LITELLM:
|
|
425
|
+
return apiKey.length > 0;
|
|
419
426
|
case AIProviderName.OLLAMA:
|
|
420
427
|
return true; // Ollama usually doesn't require specific format
|
|
421
428
|
default:
|
|
@@ -437,8 +444,10 @@ export class ProviderHealthChecker {
|
|
|
437
444
|
return null; // Complex authentication required
|
|
438
445
|
case AIProviderName.BEDROCK:
|
|
439
446
|
return null; // AWS endpoints vary by region
|
|
447
|
+
case AIProviderName.LITELLM:
|
|
448
|
+
return this.getLiteLLMModelsUrl();
|
|
440
449
|
case AIProviderName.OLLAMA:
|
|
441
|
-
return
|
|
450
|
+
return this.getOllamaTagsUrl();
|
|
442
451
|
default:
|
|
443
452
|
return null;
|
|
444
453
|
}
|
|
@@ -457,6 +466,9 @@ export class ProviderHealthChecker {
|
|
|
457
466
|
case AIProviderName.AZURE:
|
|
458
467
|
await this.checkAzureConfig(healthStatus);
|
|
459
468
|
break;
|
|
469
|
+
case AIProviderName.LITELLM:
|
|
470
|
+
await this.checkLiteLLMConfig(healthStatus);
|
|
471
|
+
break;
|
|
460
472
|
case AIProviderName.OLLAMA:
|
|
461
473
|
await this.checkOllamaConfig(healthStatus);
|
|
462
474
|
break;
|
|
@@ -648,15 +660,177 @@ export class ProviderHealthChecker {
|
|
|
648
660
|
healthStatus.recommendations.push("Set one of: AZURE_OPENAI_MODEL, AZURE_OPENAI_DEPLOYMENT, or AZURE_OPENAI_DEPLOYMENT_ID");
|
|
649
661
|
}
|
|
650
662
|
}
|
|
663
|
+
static getLiteLLMBaseUrl() {
|
|
664
|
+
return process.env.LITELLM_BASE_URL || "http://localhost:4000";
|
|
665
|
+
}
|
|
666
|
+
static getLiteLLMModelsUrl() {
|
|
667
|
+
return new URL("/v1/models", this.getLiteLLMBaseUrl()).toString();
|
|
668
|
+
}
|
|
669
|
+
static getConfiguredLiteLLMModel() {
|
|
670
|
+
return process.env.LITELLM_MODEL || "openai/gpt-4o-mini";
|
|
671
|
+
}
|
|
672
|
+
static getOllamaBaseUrl() {
|
|
673
|
+
return (process.env.OLLAMA_BASE_URL ||
|
|
674
|
+
process.env.OLLAMA_API_BASE ||
|
|
675
|
+
"http://localhost:11434");
|
|
676
|
+
}
|
|
677
|
+
static getOllamaTagsUrl() {
|
|
678
|
+
return new URL("/api/tags", this.getOllamaBaseUrl()).toString();
|
|
679
|
+
}
|
|
680
|
+
static getConfiguredOllamaModel() {
|
|
681
|
+
return process.env.OLLAMA_MODEL || "llama3.1:8b";
|
|
682
|
+
}
|
|
683
|
+
static async fetchJsonWithTimeout(url, options = {}) {
|
|
684
|
+
const controller = new AbortController();
|
|
685
|
+
const timeoutId = setTimeout(() => controller.abort(), options.timeout ?? this.DEFAULT_TIMEOUT);
|
|
686
|
+
try {
|
|
687
|
+
const proxyFetch = createProxyFetch();
|
|
688
|
+
const response = await proxyFetch(url, {
|
|
689
|
+
method: "GET",
|
|
690
|
+
headers: options.headers,
|
|
691
|
+
signal: controller.signal,
|
|
692
|
+
});
|
|
693
|
+
if (!response.ok) {
|
|
694
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
695
|
+
}
|
|
696
|
+
return await response.json();
|
|
697
|
+
}
|
|
698
|
+
finally {
|
|
699
|
+
clearTimeout(timeoutId);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
static normalizeModelList(models) {
|
|
703
|
+
return models
|
|
704
|
+
.map((entry) => {
|
|
705
|
+
if (typeof entry === "string") {
|
|
706
|
+
return entry;
|
|
707
|
+
}
|
|
708
|
+
if (entry &&
|
|
709
|
+
typeof entry === "object" &&
|
|
710
|
+
"id" in entry &&
|
|
711
|
+
typeof entry.id === "string") {
|
|
712
|
+
return entry.id;
|
|
713
|
+
}
|
|
714
|
+
if (entry &&
|
|
715
|
+
typeof entry === "object" &&
|
|
716
|
+
"name" in entry &&
|
|
717
|
+
typeof entry.name === "string") {
|
|
718
|
+
return entry.name;
|
|
719
|
+
}
|
|
720
|
+
return null;
|
|
721
|
+
})
|
|
722
|
+
.filter((model) => typeof model === "string");
|
|
723
|
+
}
|
|
724
|
+
static hasRequestedModel(availableModels, requestedModel) {
|
|
725
|
+
return availableModels.some((model) => model === requestedModel ||
|
|
726
|
+
model.startsWith(`${requestedModel}:`) ||
|
|
727
|
+
requestedModel.startsWith(`${model}:`));
|
|
728
|
+
}
|
|
729
|
+
static async getOllamaAvailableModels(timeout = 2000) {
|
|
730
|
+
const payload = (await this.fetchJsonWithTimeout(this.getOllamaTagsUrl(), {
|
|
731
|
+
timeout,
|
|
732
|
+
}));
|
|
733
|
+
return this.normalizeModelList(payload.models ?? []);
|
|
734
|
+
}
|
|
735
|
+
static async getLiteLLMAvailableModels(timeout = 2000) {
|
|
736
|
+
const payload = (await this.fetchJsonWithTimeout(this.getLiteLLMModelsUrl(), {
|
|
737
|
+
timeout,
|
|
738
|
+
headers: {
|
|
739
|
+
Authorization: `Bearer ${process.env.LITELLM_API_KEY || "sk-anything"}`,
|
|
740
|
+
"Content-Type": "application/json",
|
|
741
|
+
},
|
|
742
|
+
}));
|
|
743
|
+
return this.normalizeModelList(payload.data ?? []);
|
|
744
|
+
}
|
|
745
|
+
static async checkOllamaAvailability(options) {
|
|
746
|
+
try {
|
|
747
|
+
const models = await this.getOllamaAvailableModels(options.timeout);
|
|
748
|
+
if (!this.hasRequestedModel(models, options.model)) {
|
|
749
|
+
return {
|
|
750
|
+
available: false,
|
|
751
|
+
reason: `Configured Ollama model '${options.model}' is not installed`,
|
|
752
|
+
models,
|
|
753
|
+
};
|
|
754
|
+
}
|
|
755
|
+
return { available: true, models };
|
|
756
|
+
}
|
|
757
|
+
catch (error) {
|
|
758
|
+
return {
|
|
759
|
+
available: false,
|
|
760
|
+
reason: error instanceof Error ? error.message : String(error),
|
|
761
|
+
models: [],
|
|
762
|
+
};
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
static async checkLiteLLMAvailability(options) {
|
|
766
|
+
try {
|
|
767
|
+
const models = await this.getLiteLLMAvailableModels(options.timeout);
|
|
768
|
+
if (models.length === 0) {
|
|
769
|
+
return {
|
|
770
|
+
available: false,
|
|
771
|
+
reason: "LiteLLM returned an empty model list",
|
|
772
|
+
models,
|
|
773
|
+
};
|
|
774
|
+
}
|
|
775
|
+
if (!this.hasRequestedModel(models, options.model)) {
|
|
776
|
+
return {
|
|
777
|
+
available: false,
|
|
778
|
+
reason: `Configured LiteLLM model '${options.model}' is not exposed by the proxy`,
|
|
779
|
+
models,
|
|
780
|
+
};
|
|
781
|
+
}
|
|
782
|
+
return { available: true, models };
|
|
783
|
+
}
|
|
784
|
+
catch (error) {
|
|
785
|
+
return {
|
|
786
|
+
available: false,
|
|
787
|
+
reason: error instanceof Error ? error.message : String(error),
|
|
788
|
+
models: [],
|
|
789
|
+
};
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
static async checkLiteLLMConfig(healthStatus) {
|
|
793
|
+
const liteLLMBase = this.getLiteLLMBaseUrl();
|
|
794
|
+
if (!liteLLMBase.startsWith("http")) {
|
|
795
|
+
healthStatus.isConfigured = false;
|
|
796
|
+
healthStatus.configurationIssues.push("Invalid LITELLM_BASE_URL format");
|
|
797
|
+
healthStatus.recommendations.push("Set LITELLM_BASE_URL to a valid URL (e.g., http://localhost:4000)");
|
|
798
|
+
return;
|
|
799
|
+
}
|
|
800
|
+
const availability = await this.checkLiteLLMAvailability({
|
|
801
|
+
model: this.getConfiguredLiteLLMModel(),
|
|
802
|
+
timeout: 2000,
|
|
803
|
+
});
|
|
804
|
+
if (!availability.available) {
|
|
805
|
+
healthStatus.isConfigured = false;
|
|
806
|
+
healthStatus.configurationIssues.push(`LiteLLM runtime check failed: ${availability.reason ?? "unknown error"}`);
|
|
807
|
+
healthStatus.recommendations.push("Start the LiteLLM proxy and ensure the configured model is available from /v1/models");
|
|
808
|
+
return;
|
|
809
|
+
}
|
|
810
|
+
healthStatus.isConfigured = true;
|
|
811
|
+
}
|
|
651
812
|
/**
|
|
652
813
|
* Check Ollama configuration
|
|
653
814
|
*/
|
|
654
815
|
static async checkOllamaConfig(healthStatus) {
|
|
655
|
-
const ollamaBase =
|
|
816
|
+
const ollamaBase = this.getOllamaBaseUrl();
|
|
656
817
|
if (!ollamaBase.startsWith("http")) {
|
|
657
|
-
healthStatus.
|
|
658
|
-
healthStatus.
|
|
818
|
+
healthStatus.isConfigured = false;
|
|
819
|
+
healthStatus.configurationIssues.push("Invalid OLLAMA_BASE_URL format (OLLAMA_API_BASE is still accepted as a legacy alias)");
|
|
820
|
+
healthStatus.recommendations.push("Set OLLAMA_BASE_URL to a valid URL (e.g., http://localhost:11434). OLLAMA_API_BASE remains supported as a legacy alias.");
|
|
821
|
+
return;
|
|
659
822
|
}
|
|
823
|
+
const availability = await this.checkOllamaAvailability({
|
|
824
|
+
model: this.getConfiguredOllamaModel(),
|
|
825
|
+
timeout: 2000,
|
|
826
|
+
});
|
|
827
|
+
if (!availability.available) {
|
|
828
|
+
healthStatus.isConfigured = false;
|
|
829
|
+
healthStatus.configurationIssues.push(`Ollama runtime check failed: ${availability.reason ?? "unknown error"}`);
|
|
830
|
+
healthStatus.recommendations.push("Start Ollama and install the configured model before using Ollama as a fallback provider");
|
|
831
|
+
return;
|
|
832
|
+
}
|
|
833
|
+
healthStatus.isConfigured = true;
|
|
660
834
|
}
|
|
661
835
|
/**
|
|
662
836
|
* Get common models for a provider
|
|
@@ -703,8 +877,21 @@ export class ProviderHealthChecker {
|
|
|
703
877
|
return [BedrockModels.CLAUDE_3_SONNET, BedrockModels.CLAUDE_3_HAIKU];
|
|
704
878
|
case AIProviderName.AZURE:
|
|
705
879
|
return [OpenAIModels.GPT_4O, OpenAIModels.GPT_4O_MINI, "gpt-35-turbo"];
|
|
706
|
-
case AIProviderName.
|
|
707
|
-
return [
|
|
880
|
+
case AIProviderName.LITELLM:
|
|
881
|
+
return [
|
|
882
|
+
"openai/gpt-4o-mini",
|
|
883
|
+
"anthropic/claude-3-haiku",
|
|
884
|
+
"google/gemini-2.5-flash",
|
|
885
|
+
];
|
|
886
|
+
case AIProviderName.OLLAMA: {
|
|
887
|
+
const envModel = process.env.OLLAMA_MODEL;
|
|
888
|
+
const defaults = [
|
|
889
|
+
"llama3.2:latest",
|
|
890
|
+
"llama3.1:latest",
|
|
891
|
+
"mistral:latest",
|
|
892
|
+
];
|
|
893
|
+
return envModel ? [envModel, ...defaults] : defaults;
|
|
894
|
+
}
|
|
708
895
|
default:
|
|
709
896
|
return [];
|
|
710
897
|
}
|
|
@@ -1139,18 +1326,61 @@ export class ProviderHealthChecker {
|
|
|
1139
1326
|
this.consecutiveFailures.clear();
|
|
1140
1327
|
}
|
|
1141
1328
|
}
|
|
1329
|
+
static async checkFallbackProviderAvailability(providerName, model) {
|
|
1330
|
+
const provider = providerName;
|
|
1331
|
+
if (provider === AIProviderName.OLLAMA) {
|
|
1332
|
+
const availability = await this.checkOllamaAvailability({
|
|
1333
|
+
model,
|
|
1334
|
+
timeout: 2000,
|
|
1335
|
+
});
|
|
1336
|
+
return {
|
|
1337
|
+
available: availability.available,
|
|
1338
|
+
reason: availability.reason,
|
|
1339
|
+
};
|
|
1340
|
+
}
|
|
1341
|
+
if (provider === AIProviderName.LITELLM) {
|
|
1342
|
+
const availability = await this.checkLiteLLMAvailability({
|
|
1343
|
+
model,
|
|
1344
|
+
timeout: 2000,
|
|
1345
|
+
});
|
|
1346
|
+
return {
|
|
1347
|
+
available: availability.available,
|
|
1348
|
+
reason: availability.reason,
|
|
1349
|
+
};
|
|
1350
|
+
}
|
|
1351
|
+
try {
|
|
1352
|
+
const health = await this.checkProviderHealth(provider, {
|
|
1353
|
+
includeConnectivityTest: false,
|
|
1354
|
+
cacheResults: true,
|
|
1355
|
+
maxCacheAge: 15_000,
|
|
1356
|
+
timeout: 2000,
|
|
1357
|
+
});
|
|
1358
|
+
return {
|
|
1359
|
+
available: health.isHealthy,
|
|
1360
|
+
reason: health.error || health.configurationIssues[0] || health.warning,
|
|
1361
|
+
};
|
|
1362
|
+
}
|
|
1363
|
+
catch (error) {
|
|
1364
|
+
return {
|
|
1365
|
+
available: false,
|
|
1366
|
+
reason: error instanceof Error ? error.message : String(error),
|
|
1367
|
+
};
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1142
1370
|
/**
|
|
1143
1371
|
* Get the best healthy provider from a list of options (NON-BLOCKING)
|
|
1144
1372
|
* Prioritizes healthy providers over configured but unhealthy ones
|
|
1145
1373
|
* Uses fast, cached health checks to avoid blocking initialization
|
|
1146
1374
|
*/
|
|
1147
1375
|
static async getBestHealthyProvider(preferredProviders = [
|
|
1376
|
+
"litellm",
|
|
1377
|
+
"ollama",
|
|
1148
1378
|
"openai",
|
|
1149
1379
|
"anthropic",
|
|
1150
1380
|
"vertex",
|
|
1381
|
+
"google-ai",
|
|
1151
1382
|
"bedrock",
|
|
1152
1383
|
"azure",
|
|
1153
|
-
"google-ai",
|
|
1154
1384
|
]) {
|
|
1155
1385
|
const healthStatuses = await this.checkAllProvidersHealth({
|
|
1156
1386
|
includeConnectivityTest: false, // Quick config check only
|
|
@@ -1191,6 +1421,7 @@ export class ProviderHealthChecker {
|
|
|
1191
1421
|
AIProviderName.OPENAI,
|
|
1192
1422
|
AIProviderName.BEDROCK,
|
|
1193
1423
|
AIProviderName.AZURE,
|
|
1424
|
+
AIProviderName.LITELLM,
|
|
1194
1425
|
AIProviderName.OLLAMA,
|
|
1195
1426
|
];
|
|
1196
1427
|
const healthChecks = providers.map((provider) => this.checkProviderHealth(provider, options));
|
|
@@ -62,15 +62,18 @@ export async function getBestProvider(requestedProvider) {
|
|
|
62
62
|
}
|
|
63
63
|
/**
|
|
64
64
|
* Provider priority order rationale:
|
|
65
|
-
* -
|
|
66
|
-
* -
|
|
65
|
+
* - LiteLLM and Ollama are prioritized first for local/self-hosted deployments,
|
|
66
|
+
* avoiding cloud quota/rate-limit issues during fallback scenarios.
|
|
67
|
+
* - Vertex (Google Cloud AI) follows for enterprise-grade reliability.
|
|
68
|
+
* - Google AI follows as second cloud priority for comprehensive Google AI ecosystem support.
|
|
67
69
|
* - OpenAI maintains high priority due to its consistent reliability and broad model support.
|
|
68
|
-
* - Other providers are ordered based on a combination of reliability, feature set, and historical performance
|
|
69
|
-
* - Ollama is kept as a fallback for local deployments when available.
|
|
70
|
+
* - Other providers are ordered based on a combination of reliability, feature set, and historical performance.
|
|
70
71
|
* Please update this comment if the order is changed in the future, and document the rationale for maintainability.
|
|
71
72
|
*/
|
|
72
73
|
const providers = [
|
|
73
|
-
"
|
|
74
|
+
"litellm", // Prioritize self-hosted/proxy (no rate limits)
|
|
75
|
+
"ollama", // Local models (no rate limits)
|
|
76
|
+
"vertex", // Google Cloud AI (enterprise)
|
|
74
77
|
"google-ai", // Google AI ecosystem support
|
|
75
78
|
"openai", // Reliable with broad model support
|
|
76
79
|
"anthropic",
|
|
@@ -78,7 +81,6 @@ export async function getBestProvider(requestedProvider) {
|
|
|
78
81
|
"azure",
|
|
79
82
|
"mistral",
|
|
80
83
|
"huggingface",
|
|
81
|
-
"ollama", // Keep as fallback
|
|
82
84
|
];
|
|
83
85
|
for (const provider of providers) {
|
|
84
86
|
if (await isProviderAvailable(provider)) {
|
|
@@ -106,8 +108,11 @@ async function isProviderAvailable(providerName) {
|
|
|
106
108
|
});
|
|
107
109
|
if (response.ok) {
|
|
108
110
|
const { models } = await response.json();
|
|
109
|
-
const defaultOllamaModel = "llama3.
|
|
110
|
-
|
|
111
|
+
const defaultOllamaModel = process.env.OLLAMA_MODEL || "llama3.1:8b";
|
|
112
|
+
// Check for exact match first, then prefix match (e.g. "gemma3:27b" matches "gemma3:27b-fp16")
|
|
113
|
+
return models.some((m) => m.name === defaultOllamaModel ||
|
|
114
|
+
(typeof m.name === "string" &&
|
|
115
|
+
m.name.startsWith(defaultOllamaModel.split(":")[0] + ":")));
|
|
111
116
|
}
|
|
112
117
|
return false;
|
|
113
118
|
}
|
|
@@ -413,6 +418,7 @@ export function getAvailableProviders() {
|
|
|
413
418
|
"anthropic",
|
|
414
419
|
"azure",
|
|
415
420
|
"google-ai",
|
|
421
|
+
"litellm",
|
|
416
422
|
"huggingface",
|
|
417
423
|
"ollama",
|
|
418
424
|
"mistral",
|