@relayplane/proxy 1.8.6 → 1.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,6 +75,8 @@ const index_js_1 = require("./mesh/index.js");
75
75
  const response_cache_js_1 = require("./response-cache.js");
76
76
  const stats_js_1 = require("./stats.js");
77
77
  const rate_limiter_js_1 = require("./rate-limiter.js");
78
+ const ollama_js_1 = require("./ollama.js");
79
+ const cross_provider_cascade_js_1 = require("./cross-provider-cascade.js");
78
80
  const budget_js_1 = require("./budget.js");
79
81
  const anomaly_js_1 = require("./anomaly.js");
80
82
  const alerts_js_1 = require("./alerts.js");
@@ -186,6 +188,10 @@ exports.DEFAULT_ENDPOINTS = {
186
188
  baseUrl: 'https://api.perplexity.ai',
187
189
  apiKeyEnv: 'PERPLEXITY_API_KEY',
188
190
  },
191
+ ollama: {
192
+ baseUrl: 'http://localhost:11434',
193
+ apiKeyEnv: 'OLLAMA_API_KEY', // Not actually required, placeholder for consistency
194
+ },
189
195
  };
190
196
  /**
191
197
  * Model to provider/model mapping
@@ -726,6 +732,8 @@ const DEFAULT_PROXY_CONFIG = {
726
732
  };
727
733
  /** Module-level ref to active proxy config (set during startProxy) */
728
734
  let _activeProxyConfig = {};
735
+ /** Module-level ref to active Ollama config (set during startProxy) */
736
+ let _activeOllamaConfig;
729
737
  function isContentLoggingEnabled() {
730
738
  return _activeProxyConfig.dashboard?.showRequestContent !== false;
731
739
  }
@@ -1901,10 +1909,14 @@ function resolveExplicitModel(modelName) {
1901
1909
  if (modelName.startsWith('deepseek-') || modelName.startsWith('groq-')) {
1902
1910
  return { provider: 'openrouter', model: modelName };
1903
1911
  }
1912
+ // Ollama models: "ollama/llama3.2" or direct model names when Ollama config exists
1913
+ if (modelName.startsWith('ollama/')) {
1914
+ return { provider: 'ollama', model: modelName.slice('ollama/'.length) };
1915
+ }
1904
1916
  // Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
1905
1917
  if (modelName.includes('/')) {
1906
1918
  const [provider, model] = modelName.split('/');
1907
- const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
1919
+ const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local', 'ollama'];
1908
1920
  if (provider && model && validProviders.includes(provider)) {
1909
1921
  return { provider: provider, model };
1910
1922
  }
@@ -1962,6 +1974,57 @@ function checkResponseModelMismatch(responseData, requestedModel, provider, log)
1962
1974
  * Extract a human-readable error message from a provider error payload.
1963
1975
  * Handles Anthropic ({ error: { type, message } }) and OpenAI ({ error: { message } }) formats.
1964
1976
  */
1977
+ /**
1978
+ * Convert a native Anthropic messages request body into the OpenAI-compatible
1979
+ * ChatRequest format used by forwardToOpenAICompatible and related helpers.
1980
+ *
1981
+ * This allows cross-provider cascade from Anthropic → OpenRouter (and others)
1982
+ * without losing the original request content. (GH #38)
1983
+ */
1984
+ function convertNativeAnthropicBodyToChatRequest(body, mappedModel) {
1985
+ const rawMessages = Array.isArray(body['messages'])
1986
+ ? body['messages']
1987
+ : [];
1988
+ const messages = [];
1989
+ // Prepend system message if present
1990
+ if (body['system'] && typeof body['system'] === 'string') {
1991
+ messages.push({ role: 'system', content: body['system'] });
1992
+ }
1993
+ else if (Array.isArray(body['system'])) {
1994
+ // Anthropic structured system (array of {type, text}) — flatten to text
1995
+ const systemText = body['system']
1996
+ .filter((b) => b.type === 'text')
1997
+ .map((b) => b.text ?? '')
1998
+ .join('\n');
1999
+ if (systemText)
2000
+ messages.push({ role: 'system', content: systemText });
2001
+ }
2002
+ for (const msg of rawMessages) {
2003
+ const role = msg['role'];
2004
+ const content = msg['content'];
2005
+ if (typeof content === 'string') {
2006
+ messages.push({ role: role, content });
2007
+ }
2008
+ else if (Array.isArray(content)) {
2009
+ // Anthropic content blocks — extract text parts
2010
+ const text = content
2011
+ .filter((b) => b.type === 'text')
2012
+ .map((b) => b.text ?? '')
2013
+ .join('');
2014
+ messages.push({ role: role, content: text });
2015
+ }
2016
+ else {
2017
+ messages.push({ role: role, content: String(content ?? '') });
2018
+ }
2019
+ }
2020
+ return {
2021
+ model: mappedModel,
2022
+ messages,
2023
+ max_tokens: body['max_tokens'] ?? 4096,
2024
+ temperature: body['temperature'],
2025
+ stream: false,
2026
+ };
2027
+ }
1965
2028
  function extractProviderErrorMessage(payload, statusCode) {
1966
2029
  const err = payload['error'];
1967
2030
  if (typeof err === 'string')
@@ -2047,6 +2110,10 @@ function resolveProviderApiKey(provider, ctx, envApiKey) {
2047
2110
  }
2048
2111
  return { apiKey: envApiKey };
2049
2112
  }
2113
+ // Ollama doesn't need an API key — it's local
2114
+ if (provider === 'ollama') {
2115
+ return { apiKey: 'ollama-local' };
2116
+ }
2050
2117
  const apiKeyEnv = exports.DEFAULT_ENDPOINTS[provider]?.apiKeyEnv ?? `${provider.toUpperCase()}_API_KEY`;
2051
2118
  const apiKey = process.env[apiKeyEnv];
2052
2119
  if (!apiKey) {
@@ -2438,6 +2505,16 @@ async function startProxy(config = {}) {
2438
2505
  catch { /* file missing or parse error = treat as first run */ }
2439
2506
  const userConfig = (0, config_js_1.loadConfig)();
2440
2507
  (0, rate_limiter_js_1.configureRateLimiter)();
2508
+ // ── Cross-provider cascade: configure from proxy config (GH #38) ──
2509
+ if (proxyConfig.crossProviderCascade?.enabled && (proxyConfig.crossProviderCascade.providers?.length ?? 0) > 1) {
2510
+ cross_provider_cascade_js_1.crossProviderCascade.configure({
2511
+ enabled: true,
2512
+ providers: proxyConfig.crossProviderCascade.providers,
2513
+ triggerStatuses: proxyConfig.crossProviderCascade.triggerStatuses,
2514
+ modelMapping: proxyConfig.crossProviderCascade.modelMapping,
2515
+ });
2516
+ log(`[CROSS-CASCADE] Enabled. Provider order: ${proxyConfig.crossProviderCascade.providers.join(' → ')}`);
2517
+ }
2441
2518
  const isFirstRun = !rawFileHasRouting || !userConfig.first_run_complete;
2442
2519
  if (isFirstRun || proxyConfig.routing?.mode === 'auto') {
2443
2520
  const envAnthropicKey = process.env['ANTHROPIC_API_KEY'];
@@ -2498,7 +2575,37 @@ async function startProxy(config = {}) {
2498
2575
  }
2499
2576
  }
2500
2577
  _activeProxyConfig = proxyConfig;
2578
+ _activeOllamaConfig = proxyConfig.ollama;
2501
2579
  const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
2580
+ // === Ollama provider initialization ===
2581
+ if (_activeOllamaConfig?.enabled !== false && _activeOllamaConfig?.models?.length) {
2582
+ const ollamaUrl = _activeOllamaConfig.baseUrl ?? ollama_js_1.OLLAMA_DEFAULTS.baseUrl;
2583
+ console.log(`[RelayPlane] Ollama provider configured: ${ollamaUrl}`);
2584
+ console.log(`[RelayPlane] Ollama models: ${_activeOllamaConfig.models.join(', ')}`);
2585
+ if (_activeOllamaConfig.routeWhen) {
2586
+ const routeInfo = [];
2587
+ if (_activeOllamaConfig.routeWhen.complexity?.length) {
2588
+ routeInfo.push(`complexity: ${_activeOllamaConfig.routeWhen.complexity.join(', ')}`);
2589
+ }
2590
+ if (_activeOllamaConfig.routeWhen.taskTypes?.length) {
2591
+ routeInfo.push(`taskTypes: ${_activeOllamaConfig.routeWhen.taskTypes.join(', ')}`);
2592
+ }
2593
+ if (routeInfo.length) {
2594
+ console.log(`[RelayPlane] Ollama routing rules: ${routeInfo.join('; ')}`);
2595
+ }
2596
+ }
2597
+ // Async health check (non-blocking)
2598
+ (0, ollama_js_1.checkOllamaHealthCached)(ollamaUrl).then((health) => {
2599
+ if (health.available) {
2600
+ console.log(`[RelayPlane] ✓ Ollama is online (${health.models.length} models available, ${health.responseTimeMs}ms)`);
2601
+ }
2602
+ else {
2603
+ console.warn(`[RelayPlane] ⚠️ Ollama not available: ${health.error} — will fall back to cloud providers`);
2604
+ }
2605
+ }).catch(() => {
2606
+ console.warn('[RelayPlane] ⚠️ Ollama health check failed — will fall back to cloud providers');
2607
+ });
2608
+ }
2502
2609
  // === Startup config validation (Task 4) ===
2503
2610
  try {
2504
2611
  const userConfig = (0, config_js_1.loadConfig)();
@@ -2652,6 +2759,8 @@ async function startProxy(config = {}) {
2652
2759
  anomalyDetector.updateConfig({ ...anomalyDetector.getConfig(), ...(proxyConfig.anomaly ?? {}) });
2653
2760
  alertManager.updateConfig({ ...alertManager.getConfig(), ...(proxyConfig.alerts ?? {}) });
2654
2761
  downgradeConfig = { ...downgrade_js_1.DEFAULT_DOWNGRADE_CONFIG, ...(proxyConfig.downgrade ?? {}) };
2762
+ _activeOllamaConfig = proxyConfig.ollama;
2763
+ (0, ollama_js_1.clearOllamaHealthCache)(); // Invalidate cached health on config change
2655
2764
  log(`Reloaded config from ${configPath}`);
2656
2765
  };
2657
2766
  const scheduleConfigReload = () => {
@@ -3000,6 +3109,9 @@ async function startProxy(config = {}) {
3000
3109
  console.log('[RelayPlane Health] Provider stats:', JSON.stringify(providerStats));
3001
3110
  const providers = [];
3002
3111
  for (const [name, ep] of Object.entries(exports.DEFAULT_ENDPOINTS)) {
3112
+ // Skip Ollama from normal key-based health check — it's handled separately
3113
+ if (name === 'ollama')
3114
+ continue;
3003
3115
  const hasKey = !!process.env[ep.apiKeyEnv];
3004
3116
  const stats = providerStats[name.toLowerCase()];
3005
3117
  const successRate = stats && stats.total > 0 ? stats.success / stats.total : (hasKey ? 1 : 0);
@@ -3019,6 +3131,19 @@ async function startProxy(config = {}) {
3019
3131
  lastChecked: new Date().toISOString(),
3020
3132
  });
3021
3133
  }
3134
+ // Add Ollama status if configured
3135
+ if (_activeOllamaConfig && _activeOllamaConfig.enabled !== false) {
3136
+ const ollamaStats = providerStats['ollama'];
3137
+ const ollamaSuccessRate = ollamaStats && ollamaStats.total > 0 ? ollamaStats.success / ollamaStats.total : 0;
3138
+ const ollamaHealth = await (0, ollama_js_1.checkOllamaHealthCached)(_activeOllamaConfig.baseUrl);
3139
+ providers.push({
3140
+ provider: 'ollama',
3141
+ status: ollamaHealth.available ? 'healthy' : 'down',
3142
+ latency: ollamaHealth.responseTimeMs ?? 0,
3143
+ successRate: ollamaHealth.available ? (ollamaSuccessRate || 1) : 0,
3144
+ lastChecked: new Date().toISOString(),
3145
+ });
3146
+ }
3022
3147
  res.writeHead(200, { 'Content-Type': 'application/json' });
3023
3148
  res.end(JSON.stringify({ providers }));
3024
3149
  return;
@@ -3096,6 +3221,21 @@ async function startProxy(config = {}) {
3096
3221
  return;
3097
3222
  }
3098
3223
  // === Mesh stats endpoint ===
3224
+ // === Ollama status endpoint ===
3225
+ if (req.method === 'GET' && pathname === '/v1/ollama/status') {
3226
+ const ollamaBaseUrl = _activeOllamaConfig?.baseUrl ?? ollama_js_1.OLLAMA_DEFAULTS.baseUrl;
3227
+ const health = await (0, ollama_js_1.checkOllamaHealthCached)(ollamaBaseUrl);
3228
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3229
+ res.end(JSON.stringify({
3230
+ configured: !!_activeOllamaConfig,
3231
+ enabled: _activeOllamaConfig?.enabled !== false,
3232
+ baseUrl: ollamaBaseUrl,
3233
+ health,
3234
+ routeWhen: _activeOllamaConfig?.routeWhen ?? null,
3235
+ configuredModels: _activeOllamaConfig?.models ?? [],
3236
+ }));
3237
+ return;
3238
+ }
3099
3239
  if (req.method === 'GET' && pathname === '/v1/mesh/stats') {
3100
3240
  res.writeHead(200, { 'Content-Type': 'application/json' });
3101
3241
  res.end(JSON.stringify(meshHandle.getStats()));
@@ -3516,6 +3656,46 @@ async function startProxy(config = {}) {
3516
3656
  if (proxyConfig.reliability?.cooldowns?.enabled) {
3517
3657
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
3518
3658
  }
3659
+ // ── Cross-provider cascade for /v1/messages path (GH #38) ──
3660
+ if (!isStreaming &&
3661
+ cross_provider_cascade_js_1.crossProviderCascade.enabled &&
3662
+ cross_provider_cascade_js_1.crossProviderCascade.shouldCascade(providerResponse.status)) {
3663
+ const { result: cascResult, data: cascData } = await cross_provider_cascade_js_1.crossProviderCascade.execute(targetProvider, targetModel || requestedModel, providerResponse.status, async (hop) => {
3664
+ const apiKeyResult = resolveProviderApiKey(hop.provider, ctx, useAnthropicEnvKey);
3665
+ if (apiKeyResult.error) {
3666
+ return { status: apiKeyResult.error.status, data: apiKeyResult.error.payload };
3667
+ }
3668
+ // Respect per-provider rate limits before attempting the hop
3669
+ try {
3670
+ await (0, rate_limiter_js_1.acquireSlot)('local', hop.model, hop.provider);
3671
+ }
3672
+ catch {
3673
+ return { status: 429, data: { error: `Local rate limit for ${hop.provider}` } };
3674
+ }
3675
+ // Convert native Anthropic body to ChatRequest for OpenAI-compatible providers
3676
+ const chatReq = convertNativeAnthropicBodyToChatRequest(requestBody, hop.model);
3677
+ const hopResult = await executeNonStreamingProviderRequest(chatReq, hop.provider, hop.model, apiKeyResult.apiKey, ctx);
3678
+ return { status: hopResult.status, data: hopResult.responseData };
3679
+ }, log);
3680
+ if (cascResult.success && cascData) {
3681
+ // Cascade succeeded — update provider/model and respond
3682
+ const cascDurationMs = Date.now() - startTime;
3683
+ const cascProvider = cascResult.provider;
3684
+ const cascModel = cascResult.model;
3685
+ logRequest(originalModel ?? 'unknown', cascModel, cascProvider, cascDurationMs, true, `${routingMode}+cross-cascade`, undefined, taskType, complexity);
3686
+ const cascRpHeaders = buildRelayPlaneResponseHeaders(cascModel, originalModel ?? 'unknown', complexity, cascProvider, `${routingMode}+cross-cascade`);
3687
+ res.writeHead(200, {
3688
+ 'Content-Type': 'application/json',
3689
+ 'X-RelayPlane-Cascade-Provider': cascProvider,
3690
+ 'X-RelayPlane-Cascade-Model': cascModel,
3691
+ ...cascRpHeaders,
3692
+ });
3693
+ res.end(JSON.stringify(cascData));
3694
+ return;
3695
+ }
3696
+ // All fallbacks exhausted — fall through to original error response
3697
+ }
3698
+ // ── End cross-provider cascade ──
3519
3699
  const durationMs = Date.now() - startTime;
3520
3700
  const errMsg = extractProviderErrorMessage(errorPayload, providerResponse.status);
3521
3701
  logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, providerResponse.status);
@@ -4017,6 +4197,21 @@ async function startProxy(config = {}) {
4017
4197
  targetModel = defaultRoute.model;
4018
4198
  }
4019
4199
  }
4200
+ // ── Ollama routing: intercept before cloud dispatch ──
4201
+ if (!useCascade && _activeOllamaConfig && _activeOllamaConfig.enabled !== false) {
4202
+ if (targetProvider === 'ollama' || (0, ollama_js_1.shouldRouteToOllama)(_activeOllamaConfig, complexity, taskType, request.model)) {
4203
+ // Check Ollama availability before routing
4204
+ const ollamaHealth = await (0, ollama_js_1.checkOllamaHealthCached)(_activeOllamaConfig.baseUrl);
4205
+ if (ollamaHealth.available) {
4206
+ targetProvider = 'ollama';
4207
+ targetModel = (0, ollama_js_1.resolveOllamaModel)(targetModel, _activeOllamaConfig);
4208
+ log(`Ollama routing: ${complexity}/${taskType} → ollama/${targetModel}`);
4209
+ }
4210
+ else {
4211
+ log(`Ollama unavailable (${ollamaHealth.error}), falling back to cloud provider`);
4212
+ }
4213
+ }
4214
+ }
4020
4215
  if (!useCascade) {
4021
4216
  log(`Routing to: ${targetProvider}/${targetModel}`);
4022
4217
  }
@@ -4185,7 +4380,7 @@ async function startProxy(config = {}) {
4185
4380
  }
4186
4381
  }
4187
4382
  else {
4188
- await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId);
4383
+ await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId, useAnthropicEnvKey);
4189
4384
  }
4190
4385
  }
4191
4386
  });
@@ -4318,6 +4513,24 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
4318
4513
  }
4319
4514
  break;
4320
4515
  }
4516
+ case 'ollama': {
4517
+ const ollamaResult = await (0, ollama_js_1.forwardToOllama)(targetModel, request.messages, {
4518
+ temperature: request.temperature,
4519
+ max_tokens: request.max_tokens,
4520
+ tools: request.tools,
4521
+ baseUrl: _activeOllamaConfig?.baseUrl,
4522
+ timeoutMs: _activeOllamaConfig?.timeoutMs,
4523
+ });
4524
+ if (!ollamaResult.success) {
4525
+ return {
4526
+ responseData: { error: ollamaResult.error },
4527
+ ok: false,
4528
+ status: ollamaResult.error?.status ?? 502,
4529
+ };
4530
+ }
4531
+ responseData = ollamaResult.data;
4532
+ break;
4533
+ }
4321
4534
  default: {
4322
4535
  providerResponse = await forwardToOpenAI(request, targetModel, apiKey);
4323
4536
  responseData = (await providerResponse.json());
@@ -4347,6 +4560,44 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
4347
4560
  case 'groq':
4348
4561
  providerResponse = await forwardToOpenAICompatibleStream(request, targetModel, apiKey);
4349
4562
  break;
4563
+ case 'ollama': {
4564
+ // Ollama streaming uses its own handler that converts NDJSON → SSE
4565
+ const ollamaStream = await (0, ollama_js_1.forwardToOllamaStream)(targetModel, request.messages, {
4566
+ temperature: request.temperature,
4567
+ max_tokens: request.max_tokens,
4568
+ tools: request.tools,
4569
+ baseUrl: _activeOllamaConfig?.baseUrl,
4570
+ timeoutMs: _activeOllamaConfig?.timeoutMs,
4571
+ });
4572
+ if (!ollamaStream.success || !ollamaStream.stream) {
4573
+ const durationMs = Date.now() - startTime;
4574
+ const errMsg = ollamaStream.error?.message ?? 'Ollama stream failed';
4575
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, ollamaStream.error?.status);
4576
+ res.writeHead(ollamaStream.error?.status ?? 502, { 'Content-Type': 'application/json' });
4577
+ res.end(JSON.stringify({ error: ollamaStream.error }));
4578
+ return;
4579
+ }
4580
+ // Write SSE headers and pipe converted stream
4581
+ const relayHeaders = buildRelayPlaneResponseHeaders(targetModel, request.model, complexity, 'ollama', routingMode);
4582
+ res.writeHead(200, {
4583
+ 'Content-Type': 'text/event-stream',
4584
+ 'Cache-Control': 'no-cache',
4585
+ 'Connection': 'keep-alive',
4586
+ ...relayHeaders,
4587
+ });
4588
+ for await (const chunk of ollamaStream.stream) {
4589
+ res.write(chunk);
4590
+ }
4591
+ const durationMs = Date.now() - startTime;
4592
+ logRequest(request.model ?? 'unknown', targetModel, 'ollama', durationMs, true, routingMode, false, taskType, complexity, agentFingerprint, agentId);
4593
+ updateLastHistoryEntry(0, 0, 0, targetModel, undefined, undefined, agentFingerprint, agentId);
4594
+ if (recordTelemetry) {
4595
+ sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, true, 0, request.model ?? undefined);
4596
+ meshCapture(targetModel, 'ollama', taskType, 0, 0, 0, durationMs, true);
4597
+ }
4598
+ res.end();
4599
+ return;
4600
+ }
4350
4601
  default:
4351
4602
  providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
4352
4603
  }
@@ -4534,7 +4785,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
4534
4785
  /**
4535
4786
  * Handle non-streaming request
4536
4787
  */
4537
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId) {
4788
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId,
4789
+ /** Anthropic env API key — required for cross-provider cascade API key resolution (GH #38) */
4790
+ anthropicEnvKeyForCascade) {
4538
4791
  let responseData;
4539
4792
  try {
4540
4793
  const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
@@ -4543,16 +4796,59 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
4543
4796
  if (cooldownsEnabled) {
4544
4797
  cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
4545
4798
  }
4546
- const durationMs = Date.now() - startTime;
4547
- const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
4548
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
4549
- if (recordTelemetry) {
4550
- sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
4551
- meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
4799
+ // ── Cross-provider cascade (GH #38) ──
4800
+ if (cross_provider_cascade_js_1.crossProviderCascade.enabled && cross_provider_cascade_js_1.crossProviderCascade.shouldCascade(result.status)) {
4801
+ const { result: cascResult, data: cascData } = await cross_provider_cascade_js_1.crossProviderCascade.execute(targetProvider, targetModel, result.status, async (hop) => {
4802
+ const apiKeyResult = resolveProviderApiKey(hop.provider, ctx, anthropicEnvKeyForCascade);
4803
+ if (apiKeyResult.error) {
4804
+ return { status: apiKeyResult.error.status, data: apiKeyResult.error.payload };
4805
+ }
4806
+ // Respect per-provider rate limits before attempting the hop
4807
+ try {
4808
+ await (0, rate_limiter_js_1.acquireSlot)('local', hop.model, hop.provider);
4809
+ }
4810
+ catch {
4811
+ // Rate-limited locally — treat as 429 so cascade continues
4812
+ return { status: 429, data: { error: `Local rate limit for ${hop.provider}` } };
4813
+ }
4814
+ const hopResult = await executeNonStreamingProviderRequest({ ...request, model: hop.model }, hop.provider, hop.model, apiKeyResult.apiKey, ctx);
4815
+ return { status: hopResult.status, data: hopResult.responseData };
4816
+ }, log);
4817
+ if (cascResult.success && cascData) {
4818
+ // Update tracking variables to reflect the actual provider/model used
4819
+ targetProvider = cascResult.provider;
4820
+ targetModel = cascResult.model;
4821
+ responseData = cascData;
4822
+ // Fall through to success handling below (don't return early)
4823
+ }
4824
+ else {
4825
+ // All fallbacks exhausted — return the primary error
4826
+ const durationMs = Date.now() - startTime;
4827
+ const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
4828
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
4829
+ if (recordTelemetry) {
4830
+ sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
4831
+ meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
4832
+ }
4833
+ res.writeHead(result.status, { 'Content-Type': 'application/json' });
4834
+ res.end(JSON.stringify(responseData));
4835
+ return;
4836
+ }
4552
4837
  }
4553
- res.writeHead(result.status, { 'Content-Type': 'application/json' });
4554
- res.end(JSON.stringify(responseData));
4555
- return;
4838
+ else {
4839
+ // No cascade — return error as-is
4840
+ const durationMs = Date.now() - startTime;
4841
+ const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
4842
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
4843
+ if (recordTelemetry) {
4844
+ sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
4845
+ meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
4846
+ }
4847
+ res.writeHead(result.status, { 'Content-Type': 'application/json' });
4848
+ res.end(JSON.stringify(responseData));
4849
+ return;
4850
+ }
4851
+ // ── End cross-provider cascade ──
4556
4852
  }
4557
4853
  }
4558
4854
  catch (err) {