@relayplane/proxy 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -99,9 +99,33 @@ exports.DEFAULT_ENDPOINTS = {
99
99
  baseUrl: 'https://api.x.ai/v1',
100
100
  apiKeyEnv: 'XAI_API_KEY',
101
101
  },
102
- moonshot: {
103
- baseUrl: 'https://api.moonshot.cn/v1',
104
- apiKeyEnv: 'MOONSHOT_API_KEY',
102
+ openrouter: {
103
+ baseUrl: 'https://openrouter.ai/api/v1',
104
+ apiKeyEnv: 'OPENROUTER_API_KEY',
105
+ },
106
+ deepseek: {
107
+ baseUrl: 'https://api.deepseek.com/v1',
108
+ apiKeyEnv: 'DEEPSEEK_API_KEY',
109
+ },
110
+ groq: {
111
+ baseUrl: 'https://api.groq.com/openai/v1',
112
+ apiKeyEnv: 'GROQ_API_KEY',
113
+ },
114
+ mistral: {
115
+ baseUrl: 'https://api.mistral.ai/v1',
116
+ apiKeyEnv: 'MISTRAL_API_KEY',
117
+ },
118
+ together: {
119
+ baseUrl: 'https://api.together.xyz/v1',
120
+ apiKeyEnv: 'TOGETHER_API_KEY',
121
+ },
122
+ fireworks: {
123
+ baseUrl: 'https://api.fireworks.ai/inference/v1',
124
+ apiKeyEnv: 'FIREWORKS_API_KEY',
125
+ },
126
+ perplexity: {
127
+ baseUrl: 'https://api.perplexity.ai',
128
+ apiKeyEnv: 'PERPLEXITY_API_KEY',
105
129
  },
106
130
  };
107
131
  /**
@@ -273,6 +297,127 @@ const globalStats = {
273
297
  escalations: 0,
274
298
  startedAt: Date.now(),
275
299
  };
300
+ const requestHistory = [];
301
+ const MAX_HISTORY = 10000;
302
+ const HISTORY_RETENTION_DAYS = 7;
303
+ let requestIdCounter = 0;
304
+ // --- Persistent history (JSONL) ---
305
+ const HISTORY_DIR = path.join(os.homedir(), '.relayplane');
306
+ const HISTORY_FILE = path.join(HISTORY_DIR, 'history.jsonl');
307
+ let historyWriteBuffer = [];
308
+ let historyFlushTimer = null;
309
+ let historyRequestsSinceLastPrune = 0;
310
+ function pruneOldEntries() {
311
+ const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
312
+ // Remove old entries from in-memory array
313
+ while (requestHistory.length > 0 && new Date(requestHistory[0].timestamp).getTime() < cutoff) {
314
+ requestHistory.shift();
315
+ }
316
+ // Cap at MAX_HISTORY
317
+ while (requestHistory.length > MAX_HISTORY) {
318
+ requestHistory.shift();
319
+ }
320
+ }
321
+ function loadHistoryFromDisk() {
322
+ try {
323
+ if (!fs.existsSync(HISTORY_FILE))
324
+ return;
325
+ const content = fs.readFileSync(HISTORY_FILE, 'utf-8');
326
+ const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
327
+ const lines = content.split('\n');
328
+ for (const line of lines) {
329
+ const trimmed = line.trim();
330
+ if (!trimmed)
331
+ continue;
332
+ try {
333
+ const entry = JSON.parse(trimmed);
334
+ if (new Date(entry.timestamp).getTime() >= cutoff) {
335
+ requestHistory.push(entry);
336
+ }
337
+ }
338
+ catch {
339
+ // Skip corrupt lines
340
+ }
341
+ }
342
+ // Cap at MAX_HISTORY (keep most recent)
343
+ while (requestHistory.length > MAX_HISTORY) {
344
+ requestHistory.shift();
345
+ }
346
+ // Update requestIdCounter based on loaded entries
347
+ for (const entry of requestHistory) {
348
+ const match = entry.id.match(/^req-(\d+)$/);
349
+ if (match) {
350
+ const num = parseInt(match[1], 10);
351
+ if (num > requestIdCounter)
352
+ requestIdCounter = num;
353
+ }
354
+ }
355
+ // Rewrite file with only valid/recent entries
356
+ rewriteHistoryFile();
357
+ console.log(`[RelayPlane] Loaded ${requestHistory.length} history entries from disk`);
358
+ }
359
+ catch (err) {
360
+ console.log(`[RelayPlane] Could not load history: ${err.message}`);
361
+ }
362
+ }
363
+ function rewriteHistoryFile() {
364
+ try {
365
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
366
+ const data = requestHistory.map(e => JSON.stringify(e)).join('\n') + (requestHistory.length ? '\n' : '');
367
+ fs.writeFileSync(HISTORY_FILE, data, 'utf-8');
368
+ }
369
+ catch (err) {
370
+ console.log(`[RelayPlane] Could not rewrite history file: ${err.message}`);
371
+ }
372
+ }
373
+ function flushHistoryBuffer() {
374
+ if (historyWriteBuffer.length === 0)
375
+ return;
376
+ try {
377
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
378
+ const data = historyWriteBuffer.map(e => JSON.stringify(e)).join('\n') + '\n';
379
+ fs.appendFileSync(HISTORY_FILE, data, 'utf-8');
380
+ }
381
+ catch (err) {
382
+ console.log(`[RelayPlane] Could not flush history: ${err.message}`);
383
+ }
384
+ historyWriteBuffer = [];
385
+ }
386
+ function scheduleHistoryFlush() {
387
+ if (historyFlushTimer)
388
+ return;
389
+ historyFlushTimer = setTimeout(() => {
390
+ historyFlushTimer = null;
391
+ flushHistoryBuffer();
392
+ }, 10000);
393
+ }
394
+ function bufferHistoryEntry(entry) {
395
+ historyWriteBuffer.push(entry);
396
+ historyRequestsSinceLastPrune++;
397
+ if (historyWriteBuffer.length >= 20) {
398
+ if (historyFlushTimer) {
399
+ clearTimeout(historyFlushTimer);
400
+ historyFlushTimer = null;
401
+ }
402
+ flushHistoryBuffer();
403
+ }
404
+ else {
405
+ scheduleHistoryFlush();
406
+ }
407
+ // Prune every 100 requests
408
+ if (historyRequestsSinceLastPrune >= 100) {
409
+ historyRequestsSinceLastPrune = 0;
410
+ pruneOldEntries();
411
+ rewriteHistoryFile();
412
+ }
413
+ }
414
+ function shutdownHistory() {
415
+ if (historyFlushTimer) {
416
+ clearTimeout(historyFlushTimer);
417
+ historyFlushTimer = null;
418
+ }
419
+ flushHistoryBuffer();
420
+ }
276
421
  function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
277
422
  const timestamp = new Date().toISOString();
278
423
  const status = success ? '✓' : '✗';
@@ -300,6 +445,35 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
300
445
  viaProxy: true,
301
446
  success,
302
447
  });
448
+ // Record to request history for telemetry endpoints
449
+ const entry = {
450
+ id: `req-${++requestIdCounter}`,
451
+ originalModel,
452
+ targetModel,
453
+ provider,
454
+ latencyMs,
455
+ success,
456
+ mode,
457
+ escalated: !!escalated,
458
+ timestamp,
459
+ tokensIn: 0,
460
+ tokensOut: 0,
461
+ costUsd: 0,
462
+ };
463
+ requestHistory.push(entry);
464
+ if (requestHistory.length > MAX_HISTORY) {
465
+ requestHistory.shift();
466
+ }
467
+ bufferHistoryEntry(entry);
468
+ }
469
+ /** Update the most recent history entry with token/cost info */
470
+ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd) {
471
+ if (requestHistory.length > 0) {
472
+ const last = requestHistory[requestHistory.length - 1];
473
+ last.tokensIn = tokensIn;
474
+ last.tokensOut = tokensOut;
475
+ last.costUsd = costUsd;
476
+ }
303
477
  }
304
478
  const DEFAULT_PROXY_CONFIG = {
305
479
  enabled: true,
@@ -833,40 +1007,40 @@ async function forwardToXAIStream(request, targetModel, apiKey) {
833
1007
  return response;
834
1008
  }
835
1009
  /**
836
- * Forward non-streaming request to Moonshot API (OpenAI-compatible)
1010
+ * Forward non-streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
837
1011
  */
838
- async function forwardToMoonshot(request, targetModel, apiKey) {
839
- const moonshotBody = {
1012
+ async function forwardToOpenAICompatible(request, targetModel, apiKey, provider = 'openrouter') {
1013
+ const compatBody = {
840
1014
  ...request,
841
1015
  model: targetModel,
842
1016
  stream: false,
843
1017
  };
844
- const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
1018
+ const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
845
1019
  method: 'POST',
846
1020
  headers: {
847
1021
  'Content-Type': 'application/json',
848
1022
  Authorization: `Bearer ${apiKey}`,
849
1023
  },
850
- body: JSON.stringify(moonshotBody),
1024
+ body: JSON.stringify(compatBody),
851
1025
  });
852
1026
  return response;
853
1027
  }
854
1028
  /**
855
- * Forward streaming request to Moonshot API (OpenAI-compatible)
1029
+ * Forward streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
856
1030
  */
857
- async function forwardToMoonshotStream(request, targetModel, apiKey) {
858
- const moonshotBody = {
1031
+ async function forwardToOpenAICompatibleStream(request, targetModel, apiKey, provider = 'openrouter') {
1032
+ const compatBody = {
859
1033
  ...request,
860
1034
  model: targetModel,
861
1035
  stream: true,
862
1036
  };
863
- const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
1037
+ const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
864
1038
  method: 'POST',
865
1039
  headers: {
866
1040
  'Content-Type': 'application/json',
867
1041
  Authorization: `Bearer ${apiKey}`,
868
1042
  },
869
- body: JSON.stringify(moonshotBody),
1043
+ body: JSON.stringify(compatBody),
870
1044
  });
871
1045
  return response;
872
1046
  }
@@ -1346,7 +1520,7 @@ function parsePreferredModel(preferredModel) {
1346
1520
  if (!provider || !model)
1347
1521
  return null;
1348
1522
  // Validate provider
1349
- const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
1523
+ const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
1350
1524
  if (!validProviders.includes(provider))
1351
1525
  return null;
1352
1526
  return { provider: provider, model };
@@ -1393,14 +1567,14 @@ function resolveExplicitModel(modelName) {
1393
1567
  if (modelName.startsWith('grok-')) {
1394
1568
  return { provider: 'xai', model: modelName };
1395
1569
  }
1396
- // Moonshot models (moonshot-*)
1397
- if (modelName.startsWith('moonshot-')) {
1398
- return { provider: 'moonshot', model: modelName };
1570
+ // OpenRouter/DeepSeek/Groq models
1571
+ if (modelName.startsWith('openrouter/') || modelName.startsWith('deepseek-') || modelName.startsWith('groq-')) {
1572
+ return { provider: 'openrouter', model: modelName };
1399
1573
  }
1400
1574
  // Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
1401
1575
  if (modelName.includes('/')) {
1402
1576
  const [provider, model] = modelName.split('/');
1403
- const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
1577
+ const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
1404
1578
  if (provider && model && validProviders.includes(provider)) {
1405
1579
  return { provider: provider, model };
1406
1580
  }
@@ -1581,6 +1755,73 @@ async function cascadeRequest(config, makeRequest, log) {
1581
1755
  }
1582
1756
  throw new Error('All cascade models exhausted');
1583
1757
  }
1758
+ function getDashboardHTML() {
1759
+ return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Dashboard</title>
1760
+ <style>
1761
+ *{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1200px;margin:0 auto}
1762
+ a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
1763
+ .header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
1764
+ .header .meta{font-size:.8rem;color:#64748b}
1765
+ .cards{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:16px;margin-bottom:32px}
1766
+ .card{background:#111318;border:1px solid #1e293b;border-radius:12px;padding:20px}
1767
+ .card .label{font-size:.75rem;color:#64748b;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px}
1768
+ .card .value{font-size:1.75rem;font-weight:700}.green{color:#34d399}
1769
+ table{width:100%;border-collapse:collapse;font-size:.85rem}
1770
+ th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:1px solid #1e293b;font-size:.75rem;text-transform:uppercase;letter-spacing:.04em}
1771
+ td{padding:8px 12px;border-bottom:1px solid #111318}
1772
+ .section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
1773
+ .dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.down{background:#ef4444}
1774
+ .badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
1775
+ .badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
1776
+ .prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
1777
+ </style></head><body>
1778
+ <div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
1779
+ <div class="cards">
1780
+ <div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
1781
+ <div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
1782
+ <div class="card"><div class="label">Savings</div><div class="value green" id="savings">—</div></div>
1783
+ <div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
1784
+ </div>
1785
+ <div class="section"><h2>Model Breakdown</h2>
1786
+ <table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
1787
+ <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
1788
+ <div class="section"><h2>Recent Runs</h2>
1789
+ <table><thead><tr><th>Time</th><th>Model</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
1790
+ <script>
1791
+ const $ = id => document.getElementById(id);
1792
+ function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
1793
+ function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
1794
+ function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
1795
+ async function load(){
1796
+ try{
1797
+ const [health,stats,runsR,sav,provH]=await Promise.all([
1798
+ fetch('/health').then(r=>r.json()),
1799
+ fetch('/v1/telemetry/stats').then(r=>r.json()),
1800
+ fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
1801
+ fetch('/v1/telemetry/savings').then(r=>r.json()),
1802
+ fetch('/v1/telemetry/health').then(r=>r.json())
1803
+ ]);
1804
+ $('ver').textContent='v'+health.version;
1805
+ $('uptime').textContent=dur(health.uptime);
1806
+ $('totalReq').textContent=health.requests??0;
1807
+ $('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
1808
+ $('savings').textContent=(sav.percentage??0)+'%';
1809
+ $('avgLat').textContent=(stats.summary?.avgLatencyMs??0)+'ms';
1810
+ const total=stats.summary?.totalEvents||1;
1811
+ $('models').innerHTML=(stats.byModel||[]).map(m=>
1812
+ '<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(m.count/total*100,1)+'%</td></tr>'
1813
+ ).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
1814
+ $('runs').innerHTML=(runsR.runs||[]).map(r=>
1815
+ '<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
1816
+ ).join('')||'<tr><td colspan=7 style="color:#64748b">No runs yet</td></tr>';
1817
+ $('providers').innerHTML=(provH.providers||[]).map(p=>
1818
+ '<div class="prov-item"><span class="dot '+(p.status==='healthy'?'up':'down')+'"></span>'+p.provider+'</div>'
1819
+ ).join('');
1820
+ }catch(e){console.error(e)}
1821
+ }
1822
+ load();setInterval(load,5000);
1823
+ </script></body></html>`;
1824
+ }
1584
1825
  /**
1585
1826
  * Start the RelayPlane proxy server
1586
1827
  */
@@ -1593,6 +1834,15 @@ async function startProxy(config = {}) {
1593
1834
  if (verbose)
1594
1835
  console.log(`[relayplane] ${msg}`);
1595
1836
  };
1837
+ // Load persistent history from disk
1838
+ loadHistoryFromDisk();
1839
+ // Flush history on shutdown
1840
+ const handleShutdown = () => {
1841
+ shutdownHistory();
1842
+ process.exit(0);
1843
+ };
1844
+ process.on('SIGINT', handleShutdown);
1845
+ process.on('SIGTERM', handleShutdown);
1596
1846
  const configPath = getProxyConfigPath();
1597
1847
  let proxyConfig = await loadProxyConfig(configPath, log);
1598
1848
  const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
@@ -1732,6 +1982,120 @@ async function startProxy(config = {}) {
1732
1982
  return;
1733
1983
  }
1734
1984
  }
1985
+ // === Telemetry endpoints for dashboard ===
1986
+ if (pathname.startsWith('/v1/telemetry/')) {
1987
+ const telemetryPath = pathname.replace('/v1/telemetry/', '');
1988
+ const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
1989
+ const params = new URLSearchParams(queryString);
1990
+ if (req.method === 'GET' && telemetryPath === 'stats') {
1991
+ const days = parseInt(params.get('days') || '7', 10);
1992
+ const cutoff = Date.now() - days * 86400000;
1993
+ const recent = requestHistory.filter(r => new Date(r.timestamp).getTime() >= cutoff);
1994
+ // Model breakdown
1995
+ const modelMap = new Map();
1996
+ for (const r of recent) {
1997
+ const key = r.targetModel;
1998
+ const cur = modelMap.get(key) || { count: 0, cost: 0 };
1999
+ cur.count++;
2000
+ cur.cost += r.costUsd;
2001
+ modelMap.set(key, cur);
2002
+ }
2003
+ // Daily stats
2004
+ const dailyMap = new Map();
2005
+ for (const r of recent) {
2006
+ const date = r.timestamp.slice(0, 10);
2007
+ const cur = dailyMap.get(date) || { requests: 0, cost: 0 };
2008
+ cur.requests++;
2009
+ cur.cost += r.costUsd;
2010
+ dailyMap.set(date, cur);
2011
+ }
2012
+ const totalCost = recent.reduce((s, r) => s + r.costUsd, 0);
2013
+ const totalLatency = recent.reduce((s, r) => s + r.latencyMs, 0);
2014
+ const result = {
2015
+ summary: {
2016
+ totalCostUsd: totalCost,
2017
+ totalEvents: recent.length,
2018
+ avgLatencyMs: recent.length ? Math.round(totalLatency / recent.length) : 0,
2019
+ successRate: recent.length ? recent.filter(r => r.success).length / recent.length : 0,
2020
+ },
2021
+ byModel: Array.from(modelMap.entries()).map(([model, v]) => ({ model, count: v.count, costUsd: v.cost, savings: 0 })),
2022
+ dailyCosts: Array.from(dailyMap.entries()).map(([date, v]) => ({ date, costUsd: v.cost, requests: v.requests })),
2023
+ };
2024
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2025
+ res.end(JSON.stringify(result));
2026
+ return;
2027
+ }
2028
+ if (req.method === 'GET' && telemetryPath === 'runs') {
2029
+ const limit = parseInt(params.get('limit') || '50', 10);
2030
+ const offset = parseInt(params.get('offset') || '0', 10);
2031
+ const sorted = [...requestHistory].reverse();
2032
+ const runs = sorted.slice(offset, offset + limit).map(r => ({
2033
+ id: r.id,
2034
+ workflow_name: r.mode,
2035
+ status: r.success ? 'success' : 'error',
2036
+ started_at: r.timestamp,
2037
+ model: r.targetModel,
2038
+ routed_to: `${r.provider}/${r.targetModel}`,
2039
+ taskType: r.mode,
2040
+ costUsd: r.costUsd,
2041
+ latencyMs: r.latencyMs,
2042
+ tokensIn: r.tokensIn,
2043
+ tokensOut: r.tokensOut,
2044
+ savings: 0,
2045
+ original_model: r.originalModel,
2046
+ }));
2047
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2048
+ res.end(JSON.stringify({ runs, pagination: { total: requestHistory.length } }));
2049
+ return;
2050
+ }
2051
+ if (req.method === 'GET' && telemetryPath === 'savings') {
2052
+ // Calculate savings: difference between cost if all requests used opus vs actual cost
2053
+ const opusCostPer1kIn = 0.015;
2054
+ const opusCostPer1kOut = 0.075;
2055
+ let potentialCost = 0;
2056
+ let actualCost = 0;
2057
+ for (const r of requestHistory) {
2058
+ potentialCost += (r.tokensIn / 1000) * opusCostPer1kIn + (r.tokensOut / 1000) * opusCostPer1kOut;
2059
+ actualCost += r.costUsd;
2060
+ }
2061
+ const saved = potentialCost - actualCost;
2062
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2063
+ res.end(JSON.stringify({
2064
+ total: potentialCost,
2065
+ savings: Math.max(0, saved),
2066
+ savedAmount: Math.max(0, saved),
2067
+ potentialSavings: potentialCost,
2068
+ percentage: potentialCost > 0 ? Math.round((saved / potentialCost) * 100) : 0,
2069
+ byDay: [],
2070
+ }));
2071
+ return;
2072
+ }
2073
+ if (req.method === 'GET' && telemetryPath === 'health') {
2074
+ const providers = [];
2075
+ for (const [name, ep] of Object.entries(exports.DEFAULT_ENDPOINTS)) {
2076
+ const hasKey = !!process.env[ep.apiKeyEnv];
2077
+ providers.push({
2078
+ provider: name,
2079
+ status: hasKey ? 'healthy' : 'down',
2080
+ latency: 0,
2081
+ successRate: hasKey ? 1 : 0,
2082
+ lastChecked: new Date().toISOString(),
2083
+ });
2084
+ }
2085
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2086
+ res.end(JSON.stringify({ providers }));
2087
+ return;
2088
+ }
2089
+ res.writeHead(404, { 'Content-Type': 'application/json' });
2090
+ res.end(JSON.stringify({ error: 'Not found' }));
2091
+ return;
2092
+ }
2093
+ // === Dashboard ===
2094
+ if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
2095
+ res.writeHead(200, { 'Content-Type': 'text/html' });
2096
+ res.end(getDashboardHTML());
2097
+ return;
2098
+ }
1735
2099
  // Extract auth context from incoming request
1736
2100
  const ctx = extractRequestContext(req);
1737
2101
  const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
@@ -2405,6 +2769,13 @@ async function startProxy(config = {}) {
2405
2769
  }, log);
2406
2770
  const durationMs = Date.now() - startTime;
2407
2771
  let responseData = cascadeResult.responseData;
2772
+ // Log cascade request for stats tracking
2773
+ logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0);
2774
+ const cascadeUsage = responseData?.usage;
2775
+ const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
2776
+ const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
2777
+ const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut);
2778
+ updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost);
2408
2779
  if (recordTelemetry) {
2409
2780
  try {
2410
2781
  const runResult = await relay.run({
@@ -2426,15 +2797,14 @@ async function startProxy(config = {}) {
2426
2797
  catch (err) {
2427
2798
  log(`Failed to record run: ${err}`);
2428
2799
  }
2429
- const usage = responseData?.usage;
2430
- const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
2431
- const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
2432
- sendCloudTelemetry(taskType, cascadeResult.model, tokensIn, tokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
2800
+ sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
2433
2801
  }
2434
2802
  res.writeHead(200, { 'Content-Type': 'application/json' });
2435
2803
  res.end(JSON.stringify(responseData));
2436
2804
  }
2437
2805
  catch (err) {
2806
+ const durationMs = Date.now() - startTime;
2807
+ logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade');
2438
2808
  if (err instanceof ProviderResponseError) {
2439
2809
  res.writeHead(err.status, { 'Content-Type': 'application/json' });
2440
2810
  res.end(JSON.stringify(err.payload));
@@ -2499,8 +2869,10 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
2499
2869
  }
2500
2870
  break;
2501
2871
  }
2502
- case 'moonshot': {
2503
- providerResponse = await forwardToMoonshot(request, targetModel, apiKey);
2872
+ case 'openrouter':
2873
+ case 'deepseek':
2874
+ case 'groq': {
2875
+ providerResponse = await forwardToOpenAICompatible(request, targetModel, apiKey);
2504
2876
  responseData = (await providerResponse.json());
2505
2877
  if (!providerResponse.ok) {
2506
2878
  return { responseData, ok: false, status: providerResponse.status };
@@ -2531,8 +2903,10 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2531
2903
  case 'xai':
2532
2904
  providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
2533
2905
  break;
2534
- case 'moonshot':
2535
- providerResponse = await forwardToMoonshotStream(request, targetModel, apiKey);
2906
+ case 'openrouter':
2907
+ case 'deepseek':
2908
+ case 'groq':
2909
+ providerResponse = await forwardToOpenAICompatibleStream(request, targetModel, apiKey);
2536
2910
  break;
2537
2911
  default:
2538
2912
  providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
@@ -2542,6 +2916,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2542
2916
  if (cooldownsEnabled) {
2543
2917
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
2544
2918
  }
2919
+ const durationMs = Date.now() - startTime;
2920
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2545
2921
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
2546
2922
  res.end(JSON.stringify(errorData));
2547
2923
  return;
@@ -2552,6 +2928,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2552
2928
  if (cooldownsEnabled) {
2553
2929
  cooldownManager.recordFailure(targetProvider, errorMsg);
2554
2930
  }
2931
+ const durationMs = Date.now() - startTime;
2932
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2555
2933
  res.writeHead(500, { 'Content-Type': 'application/json' });
2556
2934
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2557
2935
  return;
@@ -2608,7 +2986,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2608
2986
  }
2609
2987
  break;
2610
2988
  default:
2611
- // xAI, Moonshot, OpenAI all use OpenAI-compatible streaming format
2989
+ // xAI, OpenRouter, DeepSeek, Groq, OpenAI all use OpenAI-compatible streaming format
2612
2990
  for await (const chunk of pipeOpenAIStream(providerResponse)) {
2613
2991
  res.write(chunk);
2614
2992
  try {
@@ -2634,6 +3012,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2634
3012
  cooldownManager.recordSuccess(targetProvider);
2635
3013
  }
2636
3014
  const durationMs = Date.now() - startTime;
3015
+ // Always log the request for stats/telemetry tracking
3016
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
3017
+ // Update token/cost info on the history entry
3018
+ const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
3019
+ updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
2637
3020
  if (recordTelemetry) {
2638
3021
  // Record the run (non-blocking)
2639
3022
  relay
@@ -2664,6 +3047,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2664
3047
  if (cooldownsEnabled) {
2665
3048
  cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
2666
3049
  }
3050
+ const durationMs = Date.now() - startTime;
3051
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2667
3052
  res.writeHead(result.status, { 'Content-Type': 'application/json' });
2668
3053
  res.end(JSON.stringify(responseData));
2669
3054
  return;
@@ -2674,6 +3059,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2674
3059
  if (cooldownsEnabled) {
2675
3060
  cooldownManager.recordFailure(targetProvider, errorMsg);
2676
3061
  }
3062
+ const durationMs = Date.now() - startTime;
3063
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2677
3064
  res.writeHead(500, { 'Content-Type': 'application/json' });
2678
3065
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2679
3066
  return;
@@ -2682,6 +3069,14 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2682
3069
  cooldownManager.recordSuccess(targetProvider);
2683
3070
  }
2684
3071
  const durationMs = Date.now() - startTime;
3072
+ // Log the successful request
3073
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
3074
+ // Update token/cost info
3075
+ const usage = responseData?.usage;
3076
+ const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
3077
+ const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
3078
+ const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
3079
+ updateLastHistoryEntry(tokensIn, tokensOut, cost);
2685
3080
  if (recordTelemetry) {
2686
3081
  // Record the run in RelayPlane
2687
3082
  try {