@relayplane/proxy 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,6 +68,15 @@ const core_1 = require("@relayplane/core");
68
68
  const model_suggestions_js_1 = require("./utils/model-suggestions.js");
69
69
  const telemetry_js_1 = require("./telemetry.js");
70
70
  const stats_js_1 = require("./stats.js");
71
+ const PROXY_VERSION = (() => {
72
+ try {
73
+ const pkgPath = path.join(__dirname, '..', 'package.json');
74
+ return JSON.parse(fs.readFileSync(pkgPath, 'utf-8')).version;
75
+ }
76
+ catch {
77
+ return '0.0.0';
78
+ }
79
+ })();
71
80
  /** Shared stats collector instance for the proxy server */
72
81
  exports.proxyStatsCollector = new stats_js_1.StatsCollector();
73
82
  /**
@@ -90,9 +99,33 @@ exports.DEFAULT_ENDPOINTS = {
90
99
  baseUrl: 'https://api.x.ai/v1',
91
100
  apiKeyEnv: 'XAI_API_KEY',
92
101
  },
93
- moonshot: {
94
- baseUrl: 'https://api.moonshot.cn/v1',
95
- apiKeyEnv: 'MOONSHOT_API_KEY',
102
+ openrouter: {
103
+ baseUrl: 'https://openrouter.ai/api/v1',
104
+ apiKeyEnv: 'OPENROUTER_API_KEY',
105
+ },
106
+ deepseek: {
107
+ baseUrl: 'https://api.deepseek.com/v1',
108
+ apiKeyEnv: 'DEEPSEEK_API_KEY',
109
+ },
110
+ groq: {
111
+ baseUrl: 'https://api.groq.com/openai/v1',
112
+ apiKeyEnv: 'GROQ_API_KEY',
113
+ },
114
+ mistral: {
115
+ baseUrl: 'https://api.mistral.ai/v1',
116
+ apiKeyEnv: 'MISTRAL_API_KEY',
117
+ },
118
+ together: {
119
+ baseUrl: 'https://api.together.xyz/v1',
120
+ apiKeyEnv: 'TOGETHER_API_KEY',
121
+ },
122
+ fireworks: {
123
+ baseUrl: 'https://api.fireworks.ai/inference/v1',
124
+ apiKeyEnv: 'FIREWORKS_API_KEY',
125
+ },
126
+ perplexity: {
127
+ baseUrl: 'https://api.perplexity.ai',
128
+ apiKeyEnv: 'PERPLEXITY_API_KEY',
96
129
  },
97
130
  };
98
131
  /**
@@ -264,6 +297,127 @@ const globalStats = {
264
297
  escalations: 0,
265
298
  startedAt: Date.now(),
266
299
  };
300
+ const requestHistory = [];
301
+ const MAX_HISTORY = 10000;
302
+ const HISTORY_RETENTION_DAYS = 7;
303
+ let requestIdCounter = 0;
304
+ // --- Persistent history (JSONL) ---
305
+ const HISTORY_DIR = path.join(os.homedir(), '.relayplane');
306
+ const HISTORY_FILE = path.join(HISTORY_DIR, 'history.jsonl');
307
+ let historyWriteBuffer = [];
308
+ let historyFlushTimer = null;
309
+ let historyRequestsSinceLastPrune = 0;
310
+ function pruneOldEntries() {
311
+ const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
312
+ // Remove old entries from in-memory array
313
+ while (requestHistory.length > 0 && new Date(requestHistory[0].timestamp).getTime() < cutoff) {
314
+ requestHistory.shift();
315
+ }
316
+ // Cap at MAX_HISTORY
317
+ while (requestHistory.length > MAX_HISTORY) {
318
+ requestHistory.shift();
319
+ }
320
+ }
321
+ function loadHistoryFromDisk() {
322
+ try {
323
+ if (!fs.existsSync(HISTORY_FILE))
324
+ return;
325
+ const content = fs.readFileSync(HISTORY_FILE, 'utf-8');
326
+ const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
327
+ const lines = content.split('\n');
328
+ for (const line of lines) {
329
+ const trimmed = line.trim();
330
+ if (!trimmed)
331
+ continue;
332
+ try {
333
+ const entry = JSON.parse(trimmed);
334
+ if (new Date(entry.timestamp).getTime() >= cutoff) {
335
+ requestHistory.push(entry);
336
+ }
337
+ }
338
+ catch {
339
+ // Skip corrupt lines
340
+ }
341
+ }
342
+ // Cap at MAX_HISTORY (keep most recent)
343
+ while (requestHistory.length > MAX_HISTORY) {
344
+ requestHistory.shift();
345
+ }
346
+ // Update requestIdCounter based on loaded entries
347
+ for (const entry of requestHistory) {
348
+ const match = entry.id.match(/^req-(\d+)$/);
349
+ if (match) {
350
+ const num = parseInt(match[1], 10);
351
+ if (num > requestIdCounter)
352
+ requestIdCounter = num;
353
+ }
354
+ }
355
+ // Rewrite file with only valid/recent entries
356
+ rewriteHistoryFile();
357
+ console.log(`[RelayPlane] Loaded ${requestHistory.length} history entries from disk`);
358
+ }
359
+ catch (err) {
360
+ console.log(`[RelayPlane] Could not load history: ${err.message}`);
361
+ }
362
+ }
363
+ function rewriteHistoryFile() {
364
+ try {
365
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
366
+ const data = requestHistory.map(e => JSON.stringify(e)).join('\n') + (requestHistory.length ? '\n' : '');
367
+ fs.writeFileSync(HISTORY_FILE, data, 'utf-8');
368
+ }
369
+ catch (err) {
370
+ console.log(`[RelayPlane] Could not rewrite history file: ${err.message}`);
371
+ }
372
+ }
373
+ function flushHistoryBuffer() {
374
+ if (historyWriteBuffer.length === 0)
375
+ return;
376
+ try {
377
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
378
+ const data = historyWriteBuffer.map(e => JSON.stringify(e)).join('\n') + '\n';
379
+ fs.appendFileSync(HISTORY_FILE, data, 'utf-8');
380
+ }
381
+ catch (err) {
382
+ console.log(`[RelayPlane] Could not flush history: ${err.message}`);
383
+ }
384
+ historyWriteBuffer = [];
385
+ }
386
+ function scheduleHistoryFlush() {
387
+ if (historyFlushTimer)
388
+ return;
389
+ historyFlushTimer = setTimeout(() => {
390
+ historyFlushTimer = null;
391
+ flushHistoryBuffer();
392
+ }, 10000);
393
+ }
394
+ function bufferHistoryEntry(entry) {
395
+ historyWriteBuffer.push(entry);
396
+ historyRequestsSinceLastPrune++;
397
+ if (historyWriteBuffer.length >= 20) {
398
+ if (historyFlushTimer) {
399
+ clearTimeout(historyFlushTimer);
400
+ historyFlushTimer = null;
401
+ }
402
+ flushHistoryBuffer();
403
+ }
404
+ else {
405
+ scheduleHistoryFlush();
406
+ }
407
+ // Prune every 100 requests
408
+ if (historyRequestsSinceLastPrune >= 100) {
409
+ historyRequestsSinceLastPrune = 0;
410
+ pruneOldEntries();
411
+ rewriteHistoryFile();
412
+ }
413
+ }
414
+ function shutdownHistory() {
415
+ if (historyFlushTimer) {
416
+ clearTimeout(historyFlushTimer);
417
+ historyFlushTimer = null;
418
+ }
419
+ flushHistoryBuffer();
420
+ }
267
421
  function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
268
422
  const timestamp = new Date().toISOString();
269
423
  const status = success ? '✓' : '✗';
@@ -291,6 +445,35 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
291
445
  viaProxy: true,
292
446
  success,
293
447
  });
448
+ // Record to request history for telemetry endpoints
449
+ const entry = {
450
+ id: `req-${++requestIdCounter}`,
451
+ originalModel,
452
+ targetModel,
453
+ provider,
454
+ latencyMs,
455
+ success,
456
+ mode,
457
+ escalated: !!escalated,
458
+ timestamp,
459
+ tokensIn: 0,
460
+ tokensOut: 0,
461
+ costUsd: 0,
462
+ };
463
+ requestHistory.push(entry);
464
+ if (requestHistory.length > MAX_HISTORY) {
465
+ requestHistory.shift();
466
+ }
467
+ bufferHistoryEntry(entry);
468
+ }
469
+ /** Update the most recent history entry with token/cost info */
470
+ function updateLastHistoryEntry(tokensIn, tokensOut, costUsd) {
471
+ if (requestHistory.length > 0) {
472
+ const last = requestHistory[requestHistory.length - 1];
473
+ last.tokensIn = tokensIn;
474
+ last.tokensOut = tokensOut;
475
+ last.costUsd = costUsd;
476
+ }
294
477
  }
295
478
  const DEFAULT_PROXY_CONFIG = {
296
479
  enabled: true,
@@ -824,40 +1007,40 @@ async function forwardToXAIStream(request, targetModel, apiKey) {
824
1007
  return response;
825
1008
  }
826
1009
  /**
827
- * Forward non-streaming request to Moonshot API (OpenAI-compatible)
1010
+ * Forward non-streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
828
1011
  */
829
- async function forwardToMoonshot(request, targetModel, apiKey) {
830
- const moonshotBody = {
1012
+ async function forwardToOpenAICompatible(request, targetModel, apiKey, provider = 'openrouter') {
1013
+ const compatBody = {
831
1014
  ...request,
832
1015
  model: targetModel,
833
1016
  stream: false,
834
1017
  };
835
- const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
1018
+ const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
836
1019
  method: 'POST',
837
1020
  headers: {
838
1021
  'Content-Type': 'application/json',
839
1022
  Authorization: `Bearer ${apiKey}`,
840
1023
  },
841
- body: JSON.stringify(moonshotBody),
1024
+ body: JSON.stringify(compatBody),
842
1025
  });
843
1026
  return response;
844
1027
  }
845
1028
  /**
846
- * Forward streaming request to Moonshot API (OpenAI-compatible)
1029
+ * Forward streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
847
1030
  */
848
- async function forwardToMoonshotStream(request, targetModel, apiKey) {
849
- const moonshotBody = {
1031
+ async function forwardToOpenAICompatibleStream(request, targetModel, apiKey, provider = 'openrouter') {
1032
+ const compatBody = {
850
1033
  ...request,
851
1034
  model: targetModel,
852
1035
  stream: true,
853
1036
  };
854
- const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
1037
+ const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
855
1038
  method: 'POST',
856
1039
  headers: {
857
1040
  'Content-Type': 'application/json',
858
1041
  Authorization: `Bearer ${apiKey}`,
859
1042
  },
860
- body: JSON.stringify(moonshotBody),
1043
+ body: JSON.stringify(compatBody),
861
1044
  });
862
1045
  return response;
863
1046
  }
@@ -1337,7 +1520,7 @@ function parsePreferredModel(preferredModel) {
1337
1520
  if (!provider || !model)
1338
1521
  return null;
1339
1522
  // Validate provider
1340
- const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
1523
+ const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
1341
1524
  if (!validProviders.includes(provider))
1342
1525
  return null;
1343
1526
  return { provider: provider, model };
@@ -1384,14 +1567,14 @@ function resolveExplicitModel(modelName) {
1384
1567
  if (modelName.startsWith('grok-')) {
1385
1568
  return { provider: 'xai', model: modelName };
1386
1569
  }
1387
- // Moonshot models (moonshot-*)
1388
- if (modelName.startsWith('moonshot-')) {
1389
- return { provider: 'moonshot', model: modelName };
1570
+ // OpenRouter/DeepSeek/Groq models
1571
+ if (modelName.startsWith('openrouter/') || modelName.startsWith('deepseek-') || modelName.startsWith('groq-')) {
1572
+ return { provider: 'openrouter', model: modelName };
1390
1573
  }
1391
1574
  // Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
1392
1575
  if (modelName.includes('/')) {
1393
1576
  const [provider, model] = modelName.split('/');
1394
- const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
1577
+ const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
1395
1578
  if (provider && model && validProviders.includes(provider)) {
1396
1579
  return { provider: provider, model };
1397
1580
  }
@@ -1572,6 +1755,73 @@ async function cascadeRequest(config, makeRequest, log) {
1572
1755
  }
1573
1756
  throw new Error('All cascade models exhausted');
1574
1757
  }
1758
+ function getDashboardHTML() {
1759
+ return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Dashboard</title>
1760
+ <style>
1761
+ *{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1200px;margin:0 auto}
1762
+ a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
1763
+ .header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
1764
+ .header .meta{font-size:.8rem;color:#64748b}
1765
+ .cards{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:16px;margin-bottom:32px}
1766
+ .card{background:#111318;border:1px solid #1e293b;border-radius:12px;padding:20px}
1767
+ .card .label{font-size:.75rem;color:#64748b;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px}
1768
+ .card .value{font-size:1.75rem;font-weight:700}.green{color:#34d399}
1769
+ table{width:100%;border-collapse:collapse;font-size:.85rem}
1770
+ th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:1px solid #1e293b;font-size:.75rem;text-transform:uppercase;letter-spacing:.04em}
1771
+ td{padding:8px 12px;border-bottom:1px solid #111318}
1772
+ .section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
1773
+ .dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.down{background:#ef4444}
1774
+ .badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
1775
+ .badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
1776
+ .prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
1777
+ </style></head><body>
1778
+ <div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
1779
+ <div class="cards">
1780
+ <div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
1781
+ <div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
1782
+ <div class="card"><div class="label">Savings</div><div class="value green" id="savings">—</div></div>
1783
+ <div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
1784
+ </div>
1785
+ <div class="section"><h2>Model Breakdown</h2>
1786
+ <table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
1787
+ <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
1788
+ <div class="section"><h2>Recent Runs</h2>
1789
+ <table><thead><tr><th>Time</th><th>Model</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
1790
+ <script>
1791
+ const $ = id => document.getElementById(id);
1792
+ function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
1793
+ function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
1794
+ function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
1795
+ async function load(){
1796
+ try{
1797
+ const [health,stats,runsR,sav,provH]=await Promise.all([
1798
+ fetch('/health').then(r=>r.json()),
1799
+ fetch('/v1/telemetry/stats').then(r=>r.json()),
1800
+ fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
1801
+ fetch('/v1/telemetry/savings').then(r=>r.json()),
1802
+ fetch('/v1/telemetry/health').then(r=>r.json())
1803
+ ]);
1804
+ $('ver').textContent='v'+health.version;
1805
+ $('uptime').textContent=dur(health.uptime);
1806
+ $('totalReq').textContent=health.requests??0;
1807
+ $('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
1808
+ $('savings').textContent=(sav.percentage??0)+'%';
1809
+ $('avgLat').textContent=(stats.summary?.avgLatencyMs??0)+'ms';
1810
+ const total=stats.summary?.totalEvents||1;
1811
+ $('models').innerHTML=(stats.byModel||[]).map(m=>
1812
+ '<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(m.count/total*100,1)+'%</td></tr>'
1813
+ ).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
1814
+ $('runs').innerHTML=(runsR.runs||[]).map(r=>
1815
+ '<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
1816
+ ).join('')||'<tr><td colspan=7 style="color:#64748b">No runs yet</td></tr>';
1817
+ $('providers').innerHTML=(provH.providers||[]).map(p=>
1818
+ '<div class="prov-item"><span class="dot '+(p.status==='healthy'?'up':'down')+'"></span>'+p.provider+'</div>'
1819
+ ).join('');
1820
+ }catch(e){console.error(e)}
1821
+ }
1822
+ load();setInterval(load,5000);
1823
+ </script></body></html>`;
1824
+ }
1575
1825
  /**
1576
1826
  * Start the RelayPlane proxy server
1577
1827
  */
@@ -1584,6 +1834,15 @@ async function startProxy(config = {}) {
1584
1834
  if (verbose)
1585
1835
  console.log(`[relayplane] ${msg}`);
1586
1836
  };
1837
+ // Load persistent history from disk
1838
+ loadHistoryFromDisk();
1839
+ // Flush history on shutdown
1840
+ const handleShutdown = () => {
1841
+ shutdownHistory();
1842
+ process.exit(0);
1843
+ };
1844
+ process.on('SIGINT', handleShutdown);
1845
+ process.on('SIGTERM', handleShutdown);
1587
1846
  const configPath = getProxyConfigPath();
1588
1847
  let proxyConfig = await loadProxyConfig(configPath, log);
1589
1848
  const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
@@ -1639,7 +1898,7 @@ async function startProxy(config = {}) {
1639
1898
  res.writeHead(200, { 'Content-Type': 'application/json' });
1640
1899
  res.end(JSON.stringify({
1641
1900
  status: 'ok',
1642
- version: '1.1.3',
1901
+ version: PROXY_VERSION,
1643
1902
  uptime: Math.floor(uptimeMs / 1000),
1644
1903
  uptimeMs,
1645
1904
  requests: globalStats.totalRequests,
@@ -1723,6 +1982,120 @@ async function startProxy(config = {}) {
1723
1982
  return;
1724
1983
  }
1725
1984
  }
1985
+ // === Telemetry endpoints for dashboard ===
1986
+ if (pathname.startsWith('/v1/telemetry/')) {
1987
+ const telemetryPath = pathname.replace('/v1/telemetry/', '');
1988
+ const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
1989
+ const params = new URLSearchParams(queryString);
1990
+ if (req.method === 'GET' && telemetryPath === 'stats') {
1991
+ const days = parseInt(params.get('days') || '7', 10);
1992
+ const cutoff = Date.now() - days * 86400000;
1993
+ const recent = requestHistory.filter(r => new Date(r.timestamp).getTime() >= cutoff);
1994
+ // Model breakdown
1995
+ const modelMap = new Map();
1996
+ for (const r of recent) {
1997
+ const key = r.targetModel;
1998
+ const cur = modelMap.get(key) || { count: 0, cost: 0 };
1999
+ cur.count++;
2000
+ cur.cost += r.costUsd;
2001
+ modelMap.set(key, cur);
2002
+ }
2003
+ // Daily stats
2004
+ const dailyMap = new Map();
2005
+ for (const r of recent) {
2006
+ const date = r.timestamp.slice(0, 10);
2007
+ const cur = dailyMap.get(date) || { requests: 0, cost: 0 };
2008
+ cur.requests++;
2009
+ cur.cost += r.costUsd;
2010
+ dailyMap.set(date, cur);
2011
+ }
2012
+ const totalCost = recent.reduce((s, r) => s + r.costUsd, 0);
2013
+ const totalLatency = recent.reduce((s, r) => s + r.latencyMs, 0);
2014
+ const result = {
2015
+ summary: {
2016
+ totalCostUsd: totalCost,
2017
+ totalEvents: recent.length,
2018
+ avgLatencyMs: recent.length ? Math.round(totalLatency / recent.length) : 0,
2019
+ successRate: recent.length ? recent.filter(r => r.success).length / recent.length : 0,
2020
+ },
2021
+ byModel: Array.from(modelMap.entries()).map(([model, v]) => ({ model, count: v.count, costUsd: v.cost, savings: 0 })),
2022
+ dailyCosts: Array.from(dailyMap.entries()).map(([date, v]) => ({ date, costUsd: v.cost, requests: v.requests })),
2023
+ };
2024
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2025
+ res.end(JSON.stringify(result));
2026
+ return;
2027
+ }
2028
+ if (req.method === 'GET' && telemetryPath === 'runs') {
2029
+ const limit = parseInt(params.get('limit') || '50', 10);
2030
+ const offset = parseInt(params.get('offset') || '0', 10);
2031
+ const sorted = [...requestHistory].reverse();
2032
+ const runs = sorted.slice(offset, offset + limit).map(r => ({
2033
+ id: r.id,
2034
+ workflow_name: r.mode,
2035
+ status: r.success ? 'success' : 'error',
2036
+ started_at: r.timestamp,
2037
+ model: r.targetModel,
2038
+ routed_to: `${r.provider}/${r.targetModel}`,
2039
+ taskType: r.mode,
2040
+ costUsd: r.costUsd,
2041
+ latencyMs: r.latencyMs,
2042
+ tokensIn: r.tokensIn,
2043
+ tokensOut: r.tokensOut,
2044
+ savings: 0,
2045
+ original_model: r.originalModel,
2046
+ }));
2047
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2048
+ res.end(JSON.stringify({ runs, pagination: { total: requestHistory.length } }));
2049
+ return;
2050
+ }
2051
+ if (req.method === 'GET' && telemetryPath === 'savings') {
2052
+ // Calculate savings: difference between cost if all requests used opus vs actual cost
2053
+ const opusCostPer1kIn = 0.015;
2054
+ const opusCostPer1kOut = 0.075;
2055
+ let potentialCost = 0;
2056
+ let actualCost = 0;
2057
+ for (const r of requestHistory) {
2058
+ potentialCost += (r.tokensIn / 1000) * opusCostPer1kIn + (r.tokensOut / 1000) * opusCostPer1kOut;
2059
+ actualCost += r.costUsd;
2060
+ }
2061
+ const saved = potentialCost - actualCost;
2062
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2063
+ res.end(JSON.stringify({
2064
+ total: potentialCost,
2065
+ savings: Math.max(0, saved),
2066
+ savedAmount: Math.max(0, saved),
2067
+ potentialSavings: potentialCost,
2068
+ percentage: potentialCost > 0 ? Math.round((saved / potentialCost) * 100) : 0,
2069
+ byDay: [],
2070
+ }));
2071
+ return;
2072
+ }
2073
+ if (req.method === 'GET' && telemetryPath === 'health') {
2074
+ const providers = [];
2075
+ for (const [name, ep] of Object.entries(exports.DEFAULT_ENDPOINTS)) {
2076
+ const hasKey = !!process.env[ep.apiKeyEnv];
2077
+ providers.push({
2078
+ provider: name,
2079
+ status: hasKey ? 'healthy' : 'down',
2080
+ latency: 0,
2081
+ successRate: hasKey ? 1 : 0,
2082
+ lastChecked: new Date().toISOString(),
2083
+ });
2084
+ }
2085
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2086
+ res.end(JSON.stringify({ providers }));
2087
+ return;
2088
+ }
2089
+ res.writeHead(404, { 'Content-Type': 'application/json' });
2090
+ res.end(JSON.stringify({ error: 'Not found' }));
2091
+ return;
2092
+ }
2093
+ // === Dashboard ===
2094
+ if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
2095
+ res.writeHead(200, { 'Content-Type': 'text/html' });
2096
+ res.end(getDashboardHTML());
2097
+ return;
2098
+ }
1726
2099
  // Extract auth context from incoming request
1727
2100
  const ctx = extractRequestContext(req);
1728
2101
  const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
@@ -1992,6 +2365,8 @@ async function startProxy(config = {}) {
1992
2365
  if (proxyConfig.reliability?.cooldowns?.enabled) {
1993
2366
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
1994
2367
  }
2368
+ const durationMs = Date.now() - startTime;
2369
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
1995
2370
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
1996
2371
  res.end(JSON.stringify(errorPayload));
1997
2372
  return;
@@ -2394,6 +2769,13 @@ async function startProxy(config = {}) {
2394
2769
  }, log);
2395
2770
  const durationMs = Date.now() - startTime;
2396
2771
  let responseData = cascadeResult.responseData;
2772
+ // Log cascade request for stats tracking
2773
+ logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0);
2774
+ const cascadeUsage = responseData?.usage;
2775
+ const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
2776
+ const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
2777
+ const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut);
2778
+ updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost);
2397
2779
  if (recordTelemetry) {
2398
2780
  try {
2399
2781
  const runResult = await relay.run({
@@ -2415,15 +2797,14 @@ async function startProxy(config = {}) {
2415
2797
  catch (err) {
2416
2798
  log(`Failed to record run: ${err}`);
2417
2799
  }
2418
- const usage = responseData?.usage;
2419
- const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
2420
- const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
2421
- sendCloudTelemetry(taskType, cascadeResult.model, tokensIn, tokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
2800
+ sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
2422
2801
  }
2423
2802
  res.writeHead(200, { 'Content-Type': 'application/json' });
2424
2803
  res.end(JSON.stringify(responseData));
2425
2804
  }
2426
2805
  catch (err) {
2806
+ const durationMs = Date.now() - startTime;
2807
+ logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade');
2427
2808
  if (err instanceof ProviderResponseError) {
2428
2809
  res.writeHead(err.status, { 'Content-Type': 'application/json' });
2429
2810
  res.end(JSON.stringify(err.payload));
@@ -2488,8 +2869,10 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
2488
2869
  }
2489
2870
  break;
2490
2871
  }
2491
- case 'moonshot': {
2492
- providerResponse = await forwardToMoonshot(request, targetModel, apiKey);
2872
+ case 'openrouter':
2873
+ case 'deepseek':
2874
+ case 'groq': {
2875
+ providerResponse = await forwardToOpenAICompatible(request, targetModel, apiKey);
2493
2876
  responseData = (await providerResponse.json());
2494
2877
  if (!providerResponse.ok) {
2495
2878
  return { responseData, ok: false, status: providerResponse.status };
@@ -2520,8 +2903,10 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2520
2903
  case 'xai':
2521
2904
  providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
2522
2905
  break;
2523
- case 'moonshot':
2524
- providerResponse = await forwardToMoonshotStream(request, targetModel, apiKey);
2906
+ case 'openrouter':
2907
+ case 'deepseek':
2908
+ case 'groq':
2909
+ providerResponse = await forwardToOpenAICompatibleStream(request, targetModel, apiKey);
2525
2910
  break;
2526
2911
  default:
2527
2912
  providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
@@ -2531,6 +2916,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2531
2916
  if (cooldownsEnabled) {
2532
2917
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
2533
2918
  }
2919
+ const durationMs = Date.now() - startTime;
2920
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2534
2921
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
2535
2922
  res.end(JSON.stringify(errorData));
2536
2923
  return;
@@ -2541,6 +2928,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2541
2928
  if (cooldownsEnabled) {
2542
2929
  cooldownManager.recordFailure(targetProvider, errorMsg);
2543
2930
  }
2931
+ const durationMs = Date.now() - startTime;
2932
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2544
2933
  res.writeHead(500, { 'Content-Type': 'application/json' });
2545
2934
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2546
2935
  return;
@@ -2597,7 +2986,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2597
2986
  }
2598
2987
  break;
2599
2988
  default:
2600
- // xAI, Moonshot, OpenAI all use OpenAI-compatible streaming format
2989
+ // xAI, OpenRouter, DeepSeek, Groq, OpenAI all use OpenAI-compatible streaming format
2601
2990
  for await (const chunk of pipeOpenAIStream(providerResponse)) {
2602
2991
  res.write(chunk);
2603
2992
  try {
@@ -2623,6 +3012,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2623
3012
  cooldownManager.recordSuccess(targetProvider);
2624
3013
  }
2625
3014
  const durationMs = Date.now() - startTime;
3015
+ // Always log the request for stats/telemetry tracking
3016
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
3017
+ // Update token/cost info on the history entry
3018
+ const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
3019
+ updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
2626
3020
  if (recordTelemetry) {
2627
3021
  // Record the run (non-blocking)
2628
3022
  relay
@@ -2653,6 +3047,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2653
3047
  if (cooldownsEnabled) {
2654
3048
  cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
2655
3049
  }
3050
+ const durationMs = Date.now() - startTime;
3051
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2656
3052
  res.writeHead(result.status, { 'Content-Type': 'application/json' });
2657
3053
  res.end(JSON.stringify(responseData));
2658
3054
  return;
@@ -2663,6 +3059,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2663
3059
  if (cooldownsEnabled) {
2664
3060
  cooldownManager.recordFailure(targetProvider, errorMsg);
2665
3061
  }
3062
+ const durationMs = Date.now() - startTime;
3063
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2666
3064
  res.writeHead(500, { 'Content-Type': 'application/json' });
2667
3065
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2668
3066
  return;
@@ -2671,6 +3069,14 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2671
3069
  cooldownManager.recordSuccess(targetProvider);
2672
3070
  }
2673
3071
  const durationMs = Date.now() - startTime;
3072
+ // Log the successful request
3073
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
3074
+ // Update token/cost info
3075
+ const usage = responseData?.usage;
3076
+ const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
3077
+ const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
3078
+ const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
3079
+ updateLastHistoryEntry(tokensIn, tokensOut, cost);
2674
3080
  if (recordTelemetry) {
2675
3081
  // Record the run in RelayPlane
2676
3082
  try {