@relayplane/proxy 1.8.6 → 1.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +9 -0
- package/dist/config.js.map +1 -1
- package/dist/cross-provider-cascade.d.ts +137 -0
- package/dist/cross-provider-cascade.d.ts.map +1 -0
- package/dist/cross-provider-cascade.js +258 -0
- package/dist/cross-provider-cascade.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -1
- package/dist/index.js.map +1 -1
- package/dist/ollama.d.ts +264 -0
- package/dist/ollama.d.ts.map +1 -0
- package/dist/ollama.js +510 -0
- package/dist/ollama.js.map +1 -0
- package/dist/osmosis-store.d.ts +33 -0
- package/dist/osmosis-store.d.ts.map +1 -0
- package/dist/osmosis-store.js +181 -0
- package/dist/osmosis-store.js.map +1 -0
- package/dist/standalone-proxy.d.ts +1 -1
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +308 -12
- package/dist/standalone-proxy.js.map +1 -1
- package/package.json +1 -1
package/dist/standalone-proxy.js
CHANGED
|
@@ -75,6 +75,8 @@ const index_js_1 = require("./mesh/index.js");
|
|
|
75
75
|
const response_cache_js_1 = require("./response-cache.js");
|
|
76
76
|
const stats_js_1 = require("./stats.js");
|
|
77
77
|
const rate_limiter_js_1 = require("./rate-limiter.js");
|
|
78
|
+
const ollama_js_1 = require("./ollama.js");
|
|
79
|
+
const cross_provider_cascade_js_1 = require("./cross-provider-cascade.js");
|
|
78
80
|
const budget_js_1 = require("./budget.js");
|
|
79
81
|
const anomaly_js_1 = require("./anomaly.js");
|
|
80
82
|
const alerts_js_1 = require("./alerts.js");
|
|
@@ -186,6 +188,10 @@ exports.DEFAULT_ENDPOINTS = {
|
|
|
186
188
|
baseUrl: 'https://api.perplexity.ai',
|
|
187
189
|
apiKeyEnv: 'PERPLEXITY_API_KEY',
|
|
188
190
|
},
|
|
191
|
+
ollama: {
|
|
192
|
+
baseUrl: 'http://localhost:11434',
|
|
193
|
+
apiKeyEnv: 'OLLAMA_API_KEY', // Not actually required, placeholder for consistency
|
|
194
|
+
},
|
|
189
195
|
};
|
|
190
196
|
/**
|
|
191
197
|
* Model to provider/model mapping
|
|
@@ -726,6 +732,8 @@ const DEFAULT_PROXY_CONFIG = {
|
|
|
726
732
|
};
|
|
727
733
|
/** Module-level ref to active proxy config (set during startProxy) */
|
|
728
734
|
let _activeProxyConfig = {};
|
|
735
|
+
/** Module-level ref to active Ollama config (set during startProxy) */
|
|
736
|
+
let _activeOllamaConfig;
|
|
729
737
|
function isContentLoggingEnabled() {
|
|
730
738
|
return _activeProxyConfig.dashboard?.showRequestContent !== false;
|
|
731
739
|
}
|
|
@@ -1901,10 +1909,14 @@ function resolveExplicitModel(modelName) {
|
|
|
1901
1909
|
if (modelName.startsWith('deepseek-') || modelName.startsWith('groq-')) {
|
|
1902
1910
|
return { provider: 'openrouter', model: modelName };
|
|
1903
1911
|
}
|
|
1912
|
+
// Ollama models: "ollama/llama3.2" or direct model names when Ollama config exists
|
|
1913
|
+
if (modelName.startsWith('ollama/')) {
|
|
1914
|
+
return { provider: 'ollama', model: modelName.slice('ollama/'.length) };
|
|
1915
|
+
}
|
|
1904
1916
|
// Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
|
|
1905
1917
|
if (modelName.includes('/')) {
|
|
1906
1918
|
const [provider, model] = modelName.split('/');
|
|
1907
|
-
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
|
|
1919
|
+
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local', 'ollama'];
|
|
1908
1920
|
if (provider && model && validProviders.includes(provider)) {
|
|
1909
1921
|
return { provider: provider, model };
|
|
1910
1922
|
}
|
|
@@ -1962,6 +1974,57 @@ function checkResponseModelMismatch(responseData, requestedModel, provider, log)
|
|
|
1962
1974
|
* Extract a human-readable error message from a provider error payload.
|
|
1963
1975
|
* Handles Anthropic ({ error: { type, message } }) and OpenAI ({ error: { message } }) formats.
|
|
1964
1976
|
*/
|
|
1977
|
+
/**
|
|
1978
|
+
* Convert a native Anthropic messages request body into the OpenAI-compatible
|
|
1979
|
+
* ChatRequest format used by forwardToOpenAICompatible and related helpers.
|
|
1980
|
+
*
|
|
1981
|
+
* This allows cross-provider cascade from Anthropic → OpenRouter (and others)
|
|
1982
|
+
* without losing the original request content. (GH #38)
|
|
1983
|
+
*/
|
|
1984
|
+
function convertNativeAnthropicBodyToChatRequest(body, mappedModel) {
|
|
1985
|
+
const rawMessages = Array.isArray(body['messages'])
|
|
1986
|
+
? body['messages']
|
|
1987
|
+
: [];
|
|
1988
|
+
const messages = [];
|
|
1989
|
+
// Prepend system message if present
|
|
1990
|
+
if (body['system'] && typeof body['system'] === 'string') {
|
|
1991
|
+
messages.push({ role: 'system', content: body['system'] });
|
|
1992
|
+
}
|
|
1993
|
+
else if (Array.isArray(body['system'])) {
|
|
1994
|
+
// Anthropic structured system (array of {type, text}) — flatten to text
|
|
1995
|
+
const systemText = body['system']
|
|
1996
|
+
.filter((b) => b.type === 'text')
|
|
1997
|
+
.map((b) => b.text ?? '')
|
|
1998
|
+
.join('\n');
|
|
1999
|
+
if (systemText)
|
|
2000
|
+
messages.push({ role: 'system', content: systemText });
|
|
2001
|
+
}
|
|
2002
|
+
for (const msg of rawMessages) {
|
|
2003
|
+
const role = msg['role'];
|
|
2004
|
+
const content = msg['content'];
|
|
2005
|
+
if (typeof content === 'string') {
|
|
2006
|
+
messages.push({ role: role, content });
|
|
2007
|
+
}
|
|
2008
|
+
else if (Array.isArray(content)) {
|
|
2009
|
+
// Anthropic content blocks — extract text parts
|
|
2010
|
+
const text = content
|
|
2011
|
+
.filter((b) => b.type === 'text')
|
|
2012
|
+
.map((b) => b.text ?? '')
|
|
2013
|
+
.join('');
|
|
2014
|
+
messages.push({ role: role, content: text });
|
|
2015
|
+
}
|
|
2016
|
+
else {
|
|
2017
|
+
messages.push({ role: role, content: String(content ?? '') });
|
|
2018
|
+
}
|
|
2019
|
+
}
|
|
2020
|
+
return {
|
|
2021
|
+
model: mappedModel,
|
|
2022
|
+
messages,
|
|
2023
|
+
max_tokens: body['max_tokens'] ?? 4096,
|
|
2024
|
+
temperature: body['temperature'],
|
|
2025
|
+
stream: false,
|
|
2026
|
+
};
|
|
2027
|
+
}
|
|
1965
2028
|
function extractProviderErrorMessage(payload, statusCode) {
|
|
1966
2029
|
const err = payload['error'];
|
|
1967
2030
|
if (typeof err === 'string')
|
|
@@ -2047,6 +2110,10 @@ function resolveProviderApiKey(provider, ctx, envApiKey) {
|
|
|
2047
2110
|
}
|
|
2048
2111
|
return { apiKey: envApiKey };
|
|
2049
2112
|
}
|
|
2113
|
+
// Ollama doesn't need an API key — it's local
|
|
2114
|
+
if (provider === 'ollama') {
|
|
2115
|
+
return { apiKey: 'ollama-local' };
|
|
2116
|
+
}
|
|
2050
2117
|
const apiKeyEnv = exports.DEFAULT_ENDPOINTS[provider]?.apiKeyEnv ?? `${provider.toUpperCase()}_API_KEY`;
|
|
2051
2118
|
const apiKey = process.env[apiKeyEnv];
|
|
2052
2119
|
if (!apiKey) {
|
|
@@ -2438,6 +2505,16 @@ async function startProxy(config = {}) {
|
|
|
2438
2505
|
catch { /* file missing or parse error = treat as first run */ }
|
|
2439
2506
|
const userConfig = (0, config_js_1.loadConfig)();
|
|
2440
2507
|
(0, rate_limiter_js_1.configureRateLimiter)();
|
|
2508
|
+
// ── Cross-provider cascade: configure from proxy config (GH #38) ──
|
|
2509
|
+
if (proxyConfig.crossProviderCascade?.enabled && (proxyConfig.crossProviderCascade.providers?.length ?? 0) > 1) {
|
|
2510
|
+
cross_provider_cascade_js_1.crossProviderCascade.configure({
|
|
2511
|
+
enabled: true,
|
|
2512
|
+
providers: proxyConfig.crossProviderCascade.providers,
|
|
2513
|
+
triggerStatuses: proxyConfig.crossProviderCascade.triggerStatuses,
|
|
2514
|
+
modelMapping: proxyConfig.crossProviderCascade.modelMapping,
|
|
2515
|
+
});
|
|
2516
|
+
log(`[CROSS-CASCADE] Enabled. Provider order: ${proxyConfig.crossProviderCascade.providers.join(' → ')}`);
|
|
2517
|
+
}
|
|
2441
2518
|
const isFirstRun = !rawFileHasRouting || !userConfig.first_run_complete;
|
|
2442
2519
|
if (isFirstRun || proxyConfig.routing?.mode === 'auto') {
|
|
2443
2520
|
const envAnthropicKey = process.env['ANTHROPIC_API_KEY'];
|
|
@@ -2498,7 +2575,37 @@ async function startProxy(config = {}) {
|
|
|
2498
2575
|
}
|
|
2499
2576
|
}
|
|
2500
2577
|
_activeProxyConfig = proxyConfig;
|
|
2578
|
+
_activeOllamaConfig = proxyConfig.ollama;
|
|
2501
2579
|
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
2580
|
+
// === Ollama provider initialization ===
|
|
2581
|
+
if (_activeOllamaConfig?.enabled !== false && _activeOllamaConfig?.models?.length) {
|
|
2582
|
+
const ollamaUrl = _activeOllamaConfig.baseUrl ?? ollama_js_1.OLLAMA_DEFAULTS.baseUrl;
|
|
2583
|
+
console.log(`[RelayPlane] Ollama provider configured: ${ollamaUrl}`);
|
|
2584
|
+
console.log(`[RelayPlane] Ollama models: ${_activeOllamaConfig.models.join(', ')}`);
|
|
2585
|
+
if (_activeOllamaConfig.routeWhen) {
|
|
2586
|
+
const routeInfo = [];
|
|
2587
|
+
if (_activeOllamaConfig.routeWhen.complexity?.length) {
|
|
2588
|
+
routeInfo.push(`complexity: ${_activeOllamaConfig.routeWhen.complexity.join(', ')}`);
|
|
2589
|
+
}
|
|
2590
|
+
if (_activeOllamaConfig.routeWhen.taskTypes?.length) {
|
|
2591
|
+
routeInfo.push(`taskTypes: ${_activeOllamaConfig.routeWhen.taskTypes.join(', ')}`);
|
|
2592
|
+
}
|
|
2593
|
+
if (routeInfo.length) {
|
|
2594
|
+
console.log(`[RelayPlane] Ollama routing rules: ${routeInfo.join('; ')}`);
|
|
2595
|
+
}
|
|
2596
|
+
}
|
|
2597
|
+
// Async health check (non-blocking)
|
|
2598
|
+
(0, ollama_js_1.checkOllamaHealthCached)(ollamaUrl).then((health) => {
|
|
2599
|
+
if (health.available) {
|
|
2600
|
+
console.log(`[RelayPlane] ✓ Ollama is online (${health.models.length} models available, ${health.responseTimeMs}ms)`);
|
|
2601
|
+
}
|
|
2602
|
+
else {
|
|
2603
|
+
console.warn(`[RelayPlane] ⚠️ Ollama not available: ${health.error} — will fall back to cloud providers`);
|
|
2604
|
+
}
|
|
2605
|
+
}).catch(() => {
|
|
2606
|
+
console.warn('[RelayPlane] ⚠️ Ollama health check failed — will fall back to cloud providers');
|
|
2607
|
+
});
|
|
2608
|
+
}
|
|
2502
2609
|
// === Startup config validation (Task 4) ===
|
|
2503
2610
|
try {
|
|
2504
2611
|
const userConfig = (0, config_js_1.loadConfig)();
|
|
@@ -2652,6 +2759,8 @@ async function startProxy(config = {}) {
|
|
|
2652
2759
|
anomalyDetector.updateConfig({ ...anomalyDetector.getConfig(), ...(proxyConfig.anomaly ?? {}) });
|
|
2653
2760
|
alertManager.updateConfig({ ...alertManager.getConfig(), ...(proxyConfig.alerts ?? {}) });
|
|
2654
2761
|
downgradeConfig = { ...downgrade_js_1.DEFAULT_DOWNGRADE_CONFIG, ...(proxyConfig.downgrade ?? {}) };
|
|
2762
|
+
_activeOllamaConfig = proxyConfig.ollama;
|
|
2763
|
+
(0, ollama_js_1.clearOllamaHealthCache)(); // Invalidate cached health on config change
|
|
2655
2764
|
log(`Reloaded config from ${configPath}`);
|
|
2656
2765
|
};
|
|
2657
2766
|
const scheduleConfigReload = () => {
|
|
@@ -3000,6 +3109,9 @@ async function startProxy(config = {}) {
|
|
|
3000
3109
|
console.log('[RelayPlane Health] Provider stats:', JSON.stringify(providerStats));
|
|
3001
3110
|
const providers = [];
|
|
3002
3111
|
for (const [name, ep] of Object.entries(exports.DEFAULT_ENDPOINTS)) {
|
|
3112
|
+
// Skip Ollama from normal key-based health check — it's handled separately
|
|
3113
|
+
if (name === 'ollama')
|
|
3114
|
+
continue;
|
|
3003
3115
|
const hasKey = !!process.env[ep.apiKeyEnv];
|
|
3004
3116
|
const stats = providerStats[name.toLowerCase()];
|
|
3005
3117
|
const successRate = stats && stats.total > 0 ? stats.success / stats.total : (hasKey ? 1 : 0);
|
|
@@ -3019,6 +3131,19 @@ async function startProxy(config = {}) {
|
|
|
3019
3131
|
lastChecked: new Date().toISOString(),
|
|
3020
3132
|
});
|
|
3021
3133
|
}
|
|
3134
|
+
// Add Ollama status if configured
|
|
3135
|
+
if (_activeOllamaConfig && _activeOllamaConfig.enabled !== false) {
|
|
3136
|
+
const ollamaStats = providerStats['ollama'];
|
|
3137
|
+
const ollamaSuccessRate = ollamaStats && ollamaStats.total > 0 ? ollamaStats.success / ollamaStats.total : 0;
|
|
3138
|
+
const ollamaHealth = await (0, ollama_js_1.checkOllamaHealthCached)(_activeOllamaConfig.baseUrl);
|
|
3139
|
+
providers.push({
|
|
3140
|
+
provider: 'ollama',
|
|
3141
|
+
status: ollamaHealth.available ? 'healthy' : 'down',
|
|
3142
|
+
latency: ollamaHealth.responseTimeMs ?? 0,
|
|
3143
|
+
successRate: ollamaHealth.available ? (ollamaSuccessRate || 1) : 0,
|
|
3144
|
+
lastChecked: new Date().toISOString(),
|
|
3145
|
+
});
|
|
3146
|
+
}
|
|
3022
3147
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3023
3148
|
res.end(JSON.stringify({ providers }));
|
|
3024
3149
|
return;
|
|
@@ -3096,6 +3221,21 @@ async function startProxy(config = {}) {
|
|
|
3096
3221
|
return;
|
|
3097
3222
|
}
|
|
3098
3223
|
// === Mesh stats endpoint ===
|
|
3224
|
+
// === Ollama status endpoint ===
|
|
3225
|
+
if (req.method === 'GET' && pathname === '/v1/ollama/status') {
|
|
3226
|
+
const ollamaBaseUrl = _activeOllamaConfig?.baseUrl ?? ollama_js_1.OLLAMA_DEFAULTS.baseUrl;
|
|
3227
|
+
const health = await (0, ollama_js_1.checkOllamaHealthCached)(ollamaBaseUrl);
|
|
3228
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3229
|
+
res.end(JSON.stringify({
|
|
3230
|
+
configured: !!_activeOllamaConfig,
|
|
3231
|
+
enabled: _activeOllamaConfig?.enabled !== false,
|
|
3232
|
+
baseUrl: ollamaBaseUrl,
|
|
3233
|
+
health,
|
|
3234
|
+
routeWhen: _activeOllamaConfig?.routeWhen ?? null,
|
|
3235
|
+
configuredModels: _activeOllamaConfig?.models ?? [],
|
|
3236
|
+
}));
|
|
3237
|
+
return;
|
|
3238
|
+
}
|
|
3099
3239
|
if (req.method === 'GET' && pathname === '/v1/mesh/stats') {
|
|
3100
3240
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3101
3241
|
res.end(JSON.stringify(meshHandle.getStats()));
|
|
@@ -3516,6 +3656,46 @@ async function startProxy(config = {}) {
|
|
|
3516
3656
|
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
3517
3657
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
|
|
3518
3658
|
}
|
|
3659
|
+
// ── Cross-provider cascade for /v1/messages path (GH #38) ──
|
|
3660
|
+
if (!isStreaming &&
|
|
3661
|
+
cross_provider_cascade_js_1.crossProviderCascade.enabled &&
|
|
3662
|
+
cross_provider_cascade_js_1.crossProviderCascade.shouldCascade(providerResponse.status)) {
|
|
3663
|
+
const { result: cascResult, data: cascData } = await cross_provider_cascade_js_1.crossProviderCascade.execute(targetProvider, targetModel || requestedModel, providerResponse.status, async (hop) => {
|
|
3664
|
+
const apiKeyResult = resolveProviderApiKey(hop.provider, ctx, useAnthropicEnvKey);
|
|
3665
|
+
if (apiKeyResult.error) {
|
|
3666
|
+
return { status: apiKeyResult.error.status, data: apiKeyResult.error.payload };
|
|
3667
|
+
}
|
|
3668
|
+
// Respect per-provider rate limits before attempting the hop
|
|
3669
|
+
try {
|
|
3670
|
+
await (0, rate_limiter_js_1.acquireSlot)('local', hop.model, hop.provider);
|
|
3671
|
+
}
|
|
3672
|
+
catch {
|
|
3673
|
+
return { status: 429, data: { error: `Local rate limit for ${hop.provider}` } };
|
|
3674
|
+
}
|
|
3675
|
+
// Convert native Anthropic body to ChatRequest for OpenAI-compatible providers
|
|
3676
|
+
const chatReq = convertNativeAnthropicBodyToChatRequest(requestBody, hop.model);
|
|
3677
|
+
const hopResult = await executeNonStreamingProviderRequest(chatReq, hop.provider, hop.model, apiKeyResult.apiKey, ctx);
|
|
3678
|
+
return { status: hopResult.status, data: hopResult.responseData };
|
|
3679
|
+
}, log);
|
|
3680
|
+
if (cascResult.success && cascData) {
|
|
3681
|
+
// Cascade succeeded — update provider/model and respond
|
|
3682
|
+
const cascDurationMs = Date.now() - startTime;
|
|
3683
|
+
const cascProvider = cascResult.provider;
|
|
3684
|
+
const cascModel = cascResult.model;
|
|
3685
|
+
logRequest(originalModel ?? 'unknown', cascModel, cascProvider, cascDurationMs, true, `${routingMode}+cross-cascade`, undefined, taskType, complexity);
|
|
3686
|
+
const cascRpHeaders = buildRelayPlaneResponseHeaders(cascModel, originalModel ?? 'unknown', complexity, cascProvider, `${routingMode}+cross-cascade`);
|
|
3687
|
+
res.writeHead(200, {
|
|
3688
|
+
'Content-Type': 'application/json',
|
|
3689
|
+
'X-RelayPlane-Cascade-Provider': cascProvider,
|
|
3690
|
+
'X-RelayPlane-Cascade-Model': cascModel,
|
|
3691
|
+
...cascRpHeaders,
|
|
3692
|
+
});
|
|
3693
|
+
res.end(JSON.stringify(cascData));
|
|
3694
|
+
return;
|
|
3695
|
+
}
|
|
3696
|
+
// All fallbacks exhausted — fall through to original error response
|
|
3697
|
+
}
|
|
3698
|
+
// ── End cross-provider cascade ──
|
|
3519
3699
|
const durationMs = Date.now() - startTime;
|
|
3520
3700
|
const errMsg = extractProviderErrorMessage(errorPayload, providerResponse.status);
|
|
3521
3701
|
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, providerResponse.status);
|
|
@@ -4017,6 +4197,21 @@ async function startProxy(config = {}) {
|
|
|
4017
4197
|
targetModel = defaultRoute.model;
|
|
4018
4198
|
}
|
|
4019
4199
|
}
|
|
4200
|
+
// ── Ollama routing: intercept before cloud dispatch ──
|
|
4201
|
+
if (!useCascade && _activeOllamaConfig && _activeOllamaConfig.enabled !== false) {
|
|
4202
|
+
if (targetProvider === 'ollama' || (0, ollama_js_1.shouldRouteToOllama)(_activeOllamaConfig, complexity, taskType, request.model)) {
|
|
4203
|
+
// Check Ollama availability before routing
|
|
4204
|
+
const ollamaHealth = await (0, ollama_js_1.checkOllamaHealthCached)(_activeOllamaConfig.baseUrl);
|
|
4205
|
+
if (ollamaHealth.available) {
|
|
4206
|
+
targetProvider = 'ollama';
|
|
4207
|
+
targetModel = (0, ollama_js_1.resolveOllamaModel)(targetModel, _activeOllamaConfig);
|
|
4208
|
+
log(`Ollama routing: ${complexity}/${taskType} → ollama/${targetModel}`);
|
|
4209
|
+
}
|
|
4210
|
+
else {
|
|
4211
|
+
log(`Ollama unavailable (${ollamaHealth.error}), falling back to cloud provider`);
|
|
4212
|
+
}
|
|
4213
|
+
}
|
|
4214
|
+
}
|
|
4020
4215
|
if (!useCascade) {
|
|
4021
4216
|
log(`Routing to: ${targetProvider}/${targetModel}`);
|
|
4022
4217
|
}
|
|
@@ -4185,7 +4380,7 @@ async function startProxy(config = {}) {
|
|
|
4185
4380
|
}
|
|
4186
4381
|
}
|
|
4187
4382
|
else {
|
|
4188
|
-
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId);
|
|
4383
|
+
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId, useAnthropicEnvKey);
|
|
4189
4384
|
}
|
|
4190
4385
|
}
|
|
4191
4386
|
});
|
|
@@ -4318,6 +4513,24 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
4318
4513
|
}
|
|
4319
4514
|
break;
|
|
4320
4515
|
}
|
|
4516
|
+
case 'ollama': {
|
|
4517
|
+
const ollamaResult = await (0, ollama_js_1.forwardToOllama)(targetModel, request.messages, {
|
|
4518
|
+
temperature: request.temperature,
|
|
4519
|
+
max_tokens: request.max_tokens,
|
|
4520
|
+
tools: request.tools,
|
|
4521
|
+
baseUrl: _activeOllamaConfig?.baseUrl,
|
|
4522
|
+
timeoutMs: _activeOllamaConfig?.timeoutMs,
|
|
4523
|
+
});
|
|
4524
|
+
if (!ollamaResult.success) {
|
|
4525
|
+
return {
|
|
4526
|
+
responseData: { error: ollamaResult.error },
|
|
4527
|
+
ok: false,
|
|
4528
|
+
status: ollamaResult.error?.status ?? 502,
|
|
4529
|
+
};
|
|
4530
|
+
}
|
|
4531
|
+
responseData = ollamaResult.data;
|
|
4532
|
+
break;
|
|
4533
|
+
}
|
|
4321
4534
|
default: {
|
|
4322
4535
|
providerResponse = await forwardToOpenAI(request, targetModel, apiKey);
|
|
4323
4536
|
responseData = (await providerResponse.json());
|
|
@@ -4347,6 +4560,44 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
4347
4560
|
case 'groq':
|
|
4348
4561
|
providerResponse = await forwardToOpenAICompatibleStream(request, targetModel, apiKey);
|
|
4349
4562
|
break;
|
|
4563
|
+
case 'ollama': {
|
|
4564
|
+
// Ollama streaming uses its own handler that converts NDJSON → SSE
|
|
4565
|
+
const ollamaStream = await (0, ollama_js_1.forwardToOllamaStream)(targetModel, request.messages, {
|
|
4566
|
+
temperature: request.temperature,
|
|
4567
|
+
max_tokens: request.max_tokens,
|
|
4568
|
+
tools: request.tools,
|
|
4569
|
+
baseUrl: _activeOllamaConfig?.baseUrl,
|
|
4570
|
+
timeoutMs: _activeOllamaConfig?.timeoutMs,
|
|
4571
|
+
});
|
|
4572
|
+
if (!ollamaStream.success || !ollamaStream.stream) {
|
|
4573
|
+
const durationMs = Date.now() - startTime;
|
|
4574
|
+
const errMsg = ollamaStream.error?.message ?? 'Ollama stream failed';
|
|
4575
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, ollamaStream.error?.status);
|
|
4576
|
+
res.writeHead(ollamaStream.error?.status ?? 502, { 'Content-Type': 'application/json' });
|
|
4577
|
+
res.end(JSON.stringify({ error: ollamaStream.error }));
|
|
4578
|
+
return;
|
|
4579
|
+
}
|
|
4580
|
+
// Write SSE headers and pipe converted stream
|
|
4581
|
+
const relayHeaders = buildRelayPlaneResponseHeaders(targetModel, request.model, complexity, 'ollama', routingMode);
|
|
4582
|
+
res.writeHead(200, {
|
|
4583
|
+
'Content-Type': 'text/event-stream',
|
|
4584
|
+
'Cache-Control': 'no-cache',
|
|
4585
|
+
'Connection': 'keep-alive',
|
|
4586
|
+
...relayHeaders,
|
|
4587
|
+
});
|
|
4588
|
+
for await (const chunk of ollamaStream.stream) {
|
|
4589
|
+
res.write(chunk);
|
|
4590
|
+
}
|
|
4591
|
+
const durationMs = Date.now() - startTime;
|
|
4592
|
+
logRequest(request.model ?? 'unknown', targetModel, 'ollama', durationMs, true, routingMode, false, taskType, complexity, agentFingerprint, agentId);
|
|
4593
|
+
updateLastHistoryEntry(0, 0, 0, targetModel, undefined, undefined, agentFingerprint, agentId);
|
|
4594
|
+
if (recordTelemetry) {
|
|
4595
|
+
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, true, 0, request.model ?? undefined);
|
|
4596
|
+
meshCapture(targetModel, 'ollama', taskType, 0, 0, 0, durationMs, true);
|
|
4597
|
+
}
|
|
4598
|
+
res.end();
|
|
4599
|
+
return;
|
|
4600
|
+
}
|
|
4350
4601
|
default:
|
|
4351
4602
|
providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
|
|
4352
4603
|
}
|
|
@@ -4534,7 +4785,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
4534
4785
|
/**
|
|
4535
4786
|
* Handle non-streaming request
|
|
4536
4787
|
*/
|
|
4537
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId
|
|
4788
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId,
|
|
4789
|
+
/** Anthropic env API key — required for cross-provider cascade API key resolution (GH #38) */
|
|
4790
|
+
anthropicEnvKeyForCascade) {
|
|
4538
4791
|
let responseData;
|
|
4539
4792
|
try {
|
|
4540
4793
|
const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
|
|
@@ -4543,16 +4796,59 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
4543
4796
|
if (cooldownsEnabled) {
|
|
4544
4797
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
|
|
4545
4798
|
}
|
|
4546
|
-
|
|
4547
|
-
|
|
4548
|
-
|
|
4549
|
-
|
|
4550
|
-
|
|
4551
|
-
|
|
4799
|
+
// ── Cross-provider cascade (GH #38) ──
|
|
4800
|
+
if (cross_provider_cascade_js_1.crossProviderCascade.enabled && cross_provider_cascade_js_1.crossProviderCascade.shouldCascade(result.status)) {
|
|
4801
|
+
const { result: cascResult, data: cascData } = await cross_provider_cascade_js_1.crossProviderCascade.execute(targetProvider, targetModel, result.status, async (hop) => {
|
|
4802
|
+
const apiKeyResult = resolveProviderApiKey(hop.provider, ctx, anthropicEnvKeyForCascade);
|
|
4803
|
+
if (apiKeyResult.error) {
|
|
4804
|
+
return { status: apiKeyResult.error.status, data: apiKeyResult.error.payload };
|
|
4805
|
+
}
|
|
4806
|
+
// Respect per-provider rate limits before attempting the hop
|
|
4807
|
+
try {
|
|
4808
|
+
await (0, rate_limiter_js_1.acquireSlot)('local', hop.model, hop.provider);
|
|
4809
|
+
}
|
|
4810
|
+
catch {
|
|
4811
|
+
// Rate-limited locally — treat as 429 so cascade continues
|
|
4812
|
+
return { status: 429, data: { error: `Local rate limit for ${hop.provider}` } };
|
|
4813
|
+
}
|
|
4814
|
+
const hopResult = await executeNonStreamingProviderRequest({ ...request, model: hop.model }, hop.provider, hop.model, apiKeyResult.apiKey, ctx);
|
|
4815
|
+
return { status: hopResult.status, data: hopResult.responseData };
|
|
4816
|
+
}, log);
|
|
4817
|
+
if (cascResult.success && cascData) {
|
|
4818
|
+
// Update tracking variables to reflect the actual provider/model used
|
|
4819
|
+
targetProvider = cascResult.provider;
|
|
4820
|
+
targetModel = cascResult.model;
|
|
4821
|
+
responseData = cascData;
|
|
4822
|
+
// Fall through to success handling below (don't return early)
|
|
4823
|
+
}
|
|
4824
|
+
else {
|
|
4825
|
+
// All fallbacks exhausted — return the primary error
|
|
4826
|
+
const durationMs = Date.now() - startTime;
|
|
4827
|
+
const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
|
|
4828
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
|
|
4829
|
+
if (recordTelemetry) {
|
|
4830
|
+
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
4831
|
+
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
|
|
4832
|
+
}
|
|
4833
|
+
res.writeHead(result.status, { 'Content-Type': 'application/json' });
|
|
4834
|
+
res.end(JSON.stringify(responseData));
|
|
4835
|
+
return;
|
|
4836
|
+
}
|
|
4552
4837
|
}
|
|
4553
|
-
|
|
4554
|
-
|
|
4555
|
-
|
|
4838
|
+
else {
|
|
4839
|
+
// No cascade — return error as-is
|
|
4840
|
+
const durationMs = Date.now() - startTime;
|
|
4841
|
+
const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
|
|
4842
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
|
|
4843
|
+
if (recordTelemetry) {
|
|
4844
|
+
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
4845
|
+
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
|
|
4846
|
+
}
|
|
4847
|
+
res.writeHead(result.status, { 'Content-Type': 'application/json' });
|
|
4848
|
+
res.end(JSON.stringify(responseData));
|
|
4849
|
+
return;
|
|
4850
|
+
}
|
|
4851
|
+
// ── End cross-provider cascade ──
|
|
4556
4852
|
}
|
|
4557
4853
|
}
|
|
4558
4854
|
catch (err) {
|