@relayplane/proxy 1.5.1 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -157
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +73 -8
- package/dist/cli.js.map +1 -1
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +422 -27
- package/dist/standalone-proxy.js.map +1 -1
- package/package.json +8 -6
package/dist/standalone-proxy.js
CHANGED
|
@@ -99,9 +99,33 @@ exports.DEFAULT_ENDPOINTS = {
|
|
|
99
99
|
baseUrl: 'https://api.x.ai/v1',
|
|
100
100
|
apiKeyEnv: 'XAI_API_KEY',
|
|
101
101
|
},
|
|
102
|
-
|
|
103
|
-
baseUrl: 'https://
|
|
104
|
-
apiKeyEnv: '
|
|
102
|
+
openrouter: {
|
|
103
|
+
baseUrl: 'https://openrouter.ai/api/v1',
|
|
104
|
+
apiKeyEnv: 'OPENROUTER_API_KEY',
|
|
105
|
+
},
|
|
106
|
+
deepseek: {
|
|
107
|
+
baseUrl: 'https://api.deepseek.com/v1',
|
|
108
|
+
apiKeyEnv: 'DEEPSEEK_API_KEY',
|
|
109
|
+
},
|
|
110
|
+
groq: {
|
|
111
|
+
baseUrl: 'https://api.groq.com/openai/v1',
|
|
112
|
+
apiKeyEnv: 'GROQ_API_KEY',
|
|
113
|
+
},
|
|
114
|
+
mistral: {
|
|
115
|
+
baseUrl: 'https://api.mistral.ai/v1',
|
|
116
|
+
apiKeyEnv: 'MISTRAL_API_KEY',
|
|
117
|
+
},
|
|
118
|
+
together: {
|
|
119
|
+
baseUrl: 'https://api.together.xyz/v1',
|
|
120
|
+
apiKeyEnv: 'TOGETHER_API_KEY',
|
|
121
|
+
},
|
|
122
|
+
fireworks: {
|
|
123
|
+
baseUrl: 'https://api.fireworks.ai/inference/v1',
|
|
124
|
+
apiKeyEnv: 'FIREWORKS_API_KEY',
|
|
125
|
+
},
|
|
126
|
+
perplexity: {
|
|
127
|
+
baseUrl: 'https://api.perplexity.ai',
|
|
128
|
+
apiKeyEnv: 'PERPLEXITY_API_KEY',
|
|
105
129
|
},
|
|
106
130
|
};
|
|
107
131
|
/**
|
|
@@ -273,6 +297,127 @@ const globalStats = {
|
|
|
273
297
|
escalations: 0,
|
|
274
298
|
startedAt: Date.now(),
|
|
275
299
|
};
|
|
300
|
+
const requestHistory = [];
|
|
301
|
+
const MAX_HISTORY = 10000;
|
|
302
|
+
const HISTORY_RETENTION_DAYS = 7;
|
|
303
|
+
let requestIdCounter = 0;
|
|
304
|
+
// --- Persistent history (JSONL) ---
|
|
305
|
+
const HISTORY_DIR = path.join(os.homedir(), '.relayplane');
|
|
306
|
+
const HISTORY_FILE = path.join(HISTORY_DIR, 'history.jsonl');
|
|
307
|
+
let historyWriteBuffer = [];
|
|
308
|
+
let historyFlushTimer = null;
|
|
309
|
+
let historyRequestsSinceLastPrune = 0;
|
|
310
|
+
function pruneOldEntries() {
|
|
311
|
+
const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
|
|
312
|
+
// Remove old entries from in-memory array
|
|
313
|
+
while (requestHistory.length > 0 && new Date(requestHistory[0].timestamp).getTime() < cutoff) {
|
|
314
|
+
requestHistory.shift();
|
|
315
|
+
}
|
|
316
|
+
// Cap at MAX_HISTORY
|
|
317
|
+
while (requestHistory.length > MAX_HISTORY) {
|
|
318
|
+
requestHistory.shift();
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
function loadHistoryFromDisk() {
|
|
322
|
+
try {
|
|
323
|
+
if (!fs.existsSync(HISTORY_FILE))
|
|
324
|
+
return;
|
|
325
|
+
const content = fs.readFileSync(HISTORY_FILE, 'utf-8');
|
|
326
|
+
const cutoff = Date.now() - HISTORY_RETENTION_DAYS * 86400000;
|
|
327
|
+
const lines = content.split('\n');
|
|
328
|
+
for (const line of lines) {
|
|
329
|
+
const trimmed = line.trim();
|
|
330
|
+
if (!trimmed)
|
|
331
|
+
continue;
|
|
332
|
+
try {
|
|
333
|
+
const entry = JSON.parse(trimmed);
|
|
334
|
+
if (new Date(entry.timestamp).getTime() >= cutoff) {
|
|
335
|
+
requestHistory.push(entry);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
catch {
|
|
339
|
+
// Skip corrupt lines
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
// Cap at MAX_HISTORY (keep most recent)
|
|
343
|
+
while (requestHistory.length > MAX_HISTORY) {
|
|
344
|
+
requestHistory.shift();
|
|
345
|
+
}
|
|
346
|
+
// Update requestIdCounter based on loaded entries
|
|
347
|
+
for (const entry of requestHistory) {
|
|
348
|
+
const match = entry.id.match(/^req-(\d+)$/);
|
|
349
|
+
if (match) {
|
|
350
|
+
const num = parseInt(match[1], 10);
|
|
351
|
+
if (num > requestIdCounter)
|
|
352
|
+
requestIdCounter = num;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// Rewrite file with only valid/recent entries
|
|
356
|
+
rewriteHistoryFile();
|
|
357
|
+
console.log(`[RelayPlane] Loaded ${requestHistory.length} history entries from disk`);
|
|
358
|
+
}
|
|
359
|
+
catch (err) {
|
|
360
|
+
console.log(`[RelayPlane] Could not load history: ${err.message}`);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
function rewriteHistoryFile() {
|
|
364
|
+
try {
|
|
365
|
+
fs.mkdirSync(HISTORY_DIR, { recursive: true });
|
|
366
|
+
const data = requestHistory.map(e => JSON.stringify(e)).join('\n') + (requestHistory.length ? '\n' : '');
|
|
367
|
+
fs.writeFileSync(HISTORY_FILE, data, 'utf-8');
|
|
368
|
+
}
|
|
369
|
+
catch (err) {
|
|
370
|
+
console.log(`[RelayPlane] Could not rewrite history file: ${err.message}`);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
function flushHistoryBuffer() {
|
|
374
|
+
if (historyWriteBuffer.length === 0)
|
|
375
|
+
return;
|
|
376
|
+
try {
|
|
377
|
+
fs.mkdirSync(HISTORY_DIR, { recursive: true });
|
|
378
|
+
const data = historyWriteBuffer.map(e => JSON.stringify(e)).join('\n') + '\n';
|
|
379
|
+
fs.appendFileSync(HISTORY_FILE, data, 'utf-8');
|
|
380
|
+
}
|
|
381
|
+
catch (err) {
|
|
382
|
+
console.log(`[RelayPlane] Could not flush history: ${err.message}`);
|
|
383
|
+
}
|
|
384
|
+
historyWriteBuffer = [];
|
|
385
|
+
}
|
|
386
|
+
function scheduleHistoryFlush() {
|
|
387
|
+
if (historyFlushTimer)
|
|
388
|
+
return;
|
|
389
|
+
historyFlushTimer = setTimeout(() => {
|
|
390
|
+
historyFlushTimer = null;
|
|
391
|
+
flushHistoryBuffer();
|
|
392
|
+
}, 10000);
|
|
393
|
+
}
|
|
394
|
+
function bufferHistoryEntry(entry) {
|
|
395
|
+
historyWriteBuffer.push(entry);
|
|
396
|
+
historyRequestsSinceLastPrune++;
|
|
397
|
+
if (historyWriteBuffer.length >= 20) {
|
|
398
|
+
if (historyFlushTimer) {
|
|
399
|
+
clearTimeout(historyFlushTimer);
|
|
400
|
+
historyFlushTimer = null;
|
|
401
|
+
}
|
|
402
|
+
flushHistoryBuffer();
|
|
403
|
+
}
|
|
404
|
+
else {
|
|
405
|
+
scheduleHistoryFlush();
|
|
406
|
+
}
|
|
407
|
+
// Prune every 100 requests
|
|
408
|
+
if (historyRequestsSinceLastPrune >= 100) {
|
|
409
|
+
historyRequestsSinceLastPrune = 0;
|
|
410
|
+
pruneOldEntries();
|
|
411
|
+
rewriteHistoryFile();
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
function shutdownHistory() {
|
|
415
|
+
if (historyFlushTimer) {
|
|
416
|
+
clearTimeout(historyFlushTimer);
|
|
417
|
+
historyFlushTimer = null;
|
|
418
|
+
}
|
|
419
|
+
flushHistoryBuffer();
|
|
420
|
+
}
|
|
276
421
|
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
|
|
277
422
|
const timestamp = new Date().toISOString();
|
|
278
423
|
const status = success ? '✓' : '✗';
|
|
@@ -300,6 +445,35 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
|
|
|
300
445
|
viaProxy: true,
|
|
301
446
|
success,
|
|
302
447
|
});
|
|
448
|
+
// Record to request history for telemetry endpoints
|
|
449
|
+
const entry = {
|
|
450
|
+
id: `req-${++requestIdCounter}`,
|
|
451
|
+
originalModel,
|
|
452
|
+
targetModel,
|
|
453
|
+
provider,
|
|
454
|
+
latencyMs,
|
|
455
|
+
success,
|
|
456
|
+
mode,
|
|
457
|
+
escalated: !!escalated,
|
|
458
|
+
timestamp,
|
|
459
|
+
tokensIn: 0,
|
|
460
|
+
tokensOut: 0,
|
|
461
|
+
costUsd: 0,
|
|
462
|
+
};
|
|
463
|
+
requestHistory.push(entry);
|
|
464
|
+
if (requestHistory.length > MAX_HISTORY) {
|
|
465
|
+
requestHistory.shift();
|
|
466
|
+
}
|
|
467
|
+
bufferHistoryEntry(entry);
|
|
468
|
+
}
|
|
469
|
+
/** Update the most recent history entry with token/cost info */
|
|
470
|
+
function updateLastHistoryEntry(tokensIn, tokensOut, costUsd) {
|
|
471
|
+
if (requestHistory.length > 0) {
|
|
472
|
+
const last = requestHistory[requestHistory.length - 1];
|
|
473
|
+
last.tokensIn = tokensIn;
|
|
474
|
+
last.tokensOut = tokensOut;
|
|
475
|
+
last.costUsd = costUsd;
|
|
476
|
+
}
|
|
303
477
|
}
|
|
304
478
|
const DEFAULT_PROXY_CONFIG = {
|
|
305
479
|
enabled: true,
|
|
@@ -833,40 +1007,40 @@ async function forwardToXAIStream(request, targetModel, apiKey) {
|
|
|
833
1007
|
return response;
|
|
834
1008
|
}
|
|
835
1009
|
/**
|
|
836
|
-
* Forward non-streaming request to
|
|
1010
|
+
* Forward non-streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
|
|
837
1011
|
*/
|
|
838
|
-
async function
|
|
839
|
-
const
|
|
1012
|
+
async function forwardToOpenAICompatible(request, targetModel, apiKey, provider = 'openrouter') {
|
|
1013
|
+
const compatBody = {
|
|
840
1014
|
...request,
|
|
841
1015
|
model: targetModel,
|
|
842
1016
|
stream: false,
|
|
843
1017
|
};
|
|
844
|
-
const response = await fetch(
|
|
1018
|
+
const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
|
|
845
1019
|
method: 'POST',
|
|
846
1020
|
headers: {
|
|
847
1021
|
'Content-Type': 'application/json',
|
|
848
1022
|
Authorization: `Bearer ${apiKey}`,
|
|
849
1023
|
},
|
|
850
|
-
body: JSON.stringify(
|
|
1024
|
+
body: JSON.stringify(compatBody),
|
|
851
1025
|
});
|
|
852
1026
|
return response;
|
|
853
1027
|
}
|
|
854
1028
|
/**
|
|
855
|
-
* Forward streaming request to
|
|
1029
|
+
* Forward streaming request to OpenAI-compatible provider (OpenRouter, DeepSeek, Groq)
|
|
856
1030
|
*/
|
|
857
|
-
async function
|
|
858
|
-
const
|
|
1031
|
+
async function forwardToOpenAICompatibleStream(request, targetModel, apiKey, provider = 'openrouter') {
|
|
1032
|
+
const compatBody = {
|
|
859
1033
|
...request,
|
|
860
1034
|
model: targetModel,
|
|
861
1035
|
stream: true,
|
|
862
1036
|
};
|
|
863
|
-
const response = await fetch(
|
|
1037
|
+
const response = await fetch(`${exports.DEFAULT_ENDPOINTS[provider]?.baseUrl || "https://openrouter.ai/api/v1"}/chat/completions`, {
|
|
864
1038
|
method: 'POST',
|
|
865
1039
|
headers: {
|
|
866
1040
|
'Content-Type': 'application/json',
|
|
867
1041
|
Authorization: `Bearer ${apiKey}`,
|
|
868
1042
|
},
|
|
869
|
-
body: JSON.stringify(
|
|
1043
|
+
body: JSON.stringify(compatBody),
|
|
870
1044
|
});
|
|
871
1045
|
return response;
|
|
872
1046
|
}
|
|
@@ -1346,7 +1520,7 @@ function parsePreferredModel(preferredModel) {
|
|
|
1346
1520
|
if (!provider || !model)
|
|
1347
1521
|
return null;
|
|
1348
1522
|
// Validate provider
|
|
1349
|
-
const validProviders = ['openai', 'anthropic', 'google', 'xai', '
|
|
1523
|
+
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
|
|
1350
1524
|
if (!validProviders.includes(provider))
|
|
1351
1525
|
return null;
|
|
1352
1526
|
return { provider: provider, model };
|
|
@@ -1393,14 +1567,14 @@ function resolveExplicitModel(modelName) {
|
|
|
1393
1567
|
if (modelName.startsWith('grok-')) {
|
|
1394
1568
|
return { provider: 'xai', model: modelName };
|
|
1395
1569
|
}
|
|
1396
|
-
//
|
|
1397
|
-
if (modelName.startsWith('
|
|
1398
|
-
return { provider: '
|
|
1570
|
+
// OpenRouter/DeepSeek/Groq models
|
|
1571
|
+
if (modelName.startsWith('openrouter/') || modelName.startsWith('deepseek-') || modelName.startsWith('groq-')) {
|
|
1572
|
+
return { provider: 'openrouter', model: modelName };
|
|
1399
1573
|
}
|
|
1400
1574
|
// Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
|
|
1401
1575
|
if (modelName.includes('/')) {
|
|
1402
1576
|
const [provider, model] = modelName.split('/');
|
|
1403
|
-
const validProviders = ['openai', 'anthropic', 'google', 'xai', '
|
|
1577
|
+
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'openrouter', 'deepseek', 'groq', 'local'];
|
|
1404
1578
|
if (provider && model && validProviders.includes(provider)) {
|
|
1405
1579
|
return { provider: provider, model };
|
|
1406
1580
|
}
|
|
@@ -1581,6 +1755,73 @@ async function cascadeRequest(config, makeRequest, log) {
|
|
|
1581
1755
|
}
|
|
1582
1756
|
throw new Error('All cascade models exhausted');
|
|
1583
1757
|
}
|
|
1758
|
+
function getDashboardHTML() {
|
|
1759
|
+
return `<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>RelayPlane Dashboard</title>
|
|
1760
|
+
<style>
|
|
1761
|
+
*{margin:0;padding:0;box-sizing:border-box}body{background:#0a0b0d;color:#e2e8f0;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;padding:20px;max-width:1200px;margin:0 auto}
|
|
1762
|
+
a{color:#34d399}h1{font-size:1.5rem;font-weight:600}
|
|
1763
|
+
.header{display:flex;justify-content:space-between;align-items:center;padding:16px 0;border-bottom:1px solid #1e293b;margin-bottom:24px}
|
|
1764
|
+
.header .meta{font-size:.8rem;color:#64748b}
|
|
1765
|
+
.cards{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:16px;margin-bottom:32px}
|
|
1766
|
+
.card{background:#111318;border:1px solid #1e293b;border-radius:12px;padding:20px}
|
|
1767
|
+
.card .label{font-size:.75rem;color:#64748b;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px}
|
|
1768
|
+
.card .value{font-size:1.75rem;font-weight:700}.green{color:#34d399}
|
|
1769
|
+
table{width:100%;border-collapse:collapse;font-size:.85rem}
|
|
1770
|
+
th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:1px solid #1e293b;font-size:.75rem;text-transform:uppercase;letter-spacing:.04em}
|
|
1771
|
+
td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
1772
|
+
.section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
|
|
1773
|
+
.dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.down{background:#ef4444}
|
|
1774
|
+
.badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
|
|
1775
|
+
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
|
|
1776
|
+
.prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
|
|
1777
|
+
</style></head><body>
|
|
1778
|
+
<div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
|
|
1779
|
+
<div class="cards">
|
|
1780
|
+
<div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
|
|
1781
|
+
<div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
|
|
1782
|
+
<div class="card"><div class="label">Savings</div><div class="value green" id="savings">—</div></div>
|
|
1783
|
+
<div class="card"><div class="label">Avg Latency</div><div class="value" id="avgLat">—</div></div>
|
|
1784
|
+
</div>
|
|
1785
|
+
<div class="section"><h2>Model Breakdown</h2>
|
|
1786
|
+
<table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
|
|
1787
|
+
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
1788
|
+
<div class="section"><h2>Recent Runs</h2>
|
|
1789
|
+
<table><thead><tr><th>Time</th><th>Model</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
1790
|
+
<script>
|
|
1791
|
+
const $ = id => document.getElementById(id);
|
|
1792
|
+
function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
|
|
1793
|
+
function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
|
|
1794
|
+
function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
|
|
1795
|
+
async function load(){
|
|
1796
|
+
try{
|
|
1797
|
+
const [health,stats,runsR,sav,provH]=await Promise.all([
|
|
1798
|
+
fetch('/health').then(r=>r.json()),
|
|
1799
|
+
fetch('/v1/telemetry/stats').then(r=>r.json()),
|
|
1800
|
+
fetch('/v1/telemetry/runs?limit=20').then(r=>r.json()),
|
|
1801
|
+
fetch('/v1/telemetry/savings').then(r=>r.json()),
|
|
1802
|
+
fetch('/v1/telemetry/health').then(r=>r.json())
|
|
1803
|
+
]);
|
|
1804
|
+
$('ver').textContent='v'+health.version;
|
|
1805
|
+
$('uptime').textContent=dur(health.uptime);
|
|
1806
|
+
$('totalReq').textContent=health.requests??0;
|
|
1807
|
+
$('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
|
|
1808
|
+
$('savings').textContent=(sav.percentage??0)+'%';
|
|
1809
|
+
$('avgLat').textContent=(stats.summary?.avgLatencyMs??0)+'ms';
|
|
1810
|
+
const total=stats.summary?.totalEvents||1;
|
|
1811
|
+
$('models').innerHTML=(stats.byModel||[]).map(m=>
|
|
1812
|
+
'<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(m.count/total*100,1)+'%</td></tr>'
|
|
1813
|
+
).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
|
|
1814
|
+
$('runs').innerHTML=(runsR.runs||[]).map(r=>
|
|
1815
|
+
'<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
|
|
1816
|
+
).join('')||'<tr><td colspan=7 style="color:#64748b">No runs yet</td></tr>';
|
|
1817
|
+
$('providers').innerHTML=(provH.providers||[]).map(p=>
|
|
1818
|
+
'<div class="prov-item"><span class="dot '+(p.status==='healthy'?'up':'down')+'"></span>'+p.provider+'</div>'
|
|
1819
|
+
).join('');
|
|
1820
|
+
}catch(e){console.error(e)}
|
|
1821
|
+
}
|
|
1822
|
+
load();setInterval(load,5000);
|
|
1823
|
+
</script></body></html>`;
|
|
1824
|
+
}
|
|
1584
1825
|
/**
|
|
1585
1826
|
* Start the RelayPlane proxy server
|
|
1586
1827
|
*/
|
|
@@ -1593,6 +1834,15 @@ async function startProxy(config = {}) {
|
|
|
1593
1834
|
if (verbose)
|
|
1594
1835
|
console.log(`[relayplane] ${msg}`);
|
|
1595
1836
|
};
|
|
1837
|
+
// Load persistent history from disk
|
|
1838
|
+
loadHistoryFromDisk();
|
|
1839
|
+
// Flush history on shutdown
|
|
1840
|
+
const handleShutdown = () => {
|
|
1841
|
+
shutdownHistory();
|
|
1842
|
+
process.exit(0);
|
|
1843
|
+
};
|
|
1844
|
+
process.on('SIGINT', handleShutdown);
|
|
1845
|
+
process.on('SIGTERM', handleShutdown);
|
|
1596
1846
|
const configPath = getProxyConfigPath();
|
|
1597
1847
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
1598
1848
|
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
@@ -1732,6 +1982,120 @@ async function startProxy(config = {}) {
|
|
|
1732
1982
|
return;
|
|
1733
1983
|
}
|
|
1734
1984
|
}
|
|
1985
|
+
// === Telemetry endpoints for dashboard ===
|
|
1986
|
+
if (pathname.startsWith('/v1/telemetry/')) {
|
|
1987
|
+
const telemetryPath = pathname.replace('/v1/telemetry/', '');
|
|
1988
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
1989
|
+
const params = new URLSearchParams(queryString);
|
|
1990
|
+
if (req.method === 'GET' && telemetryPath === 'stats') {
|
|
1991
|
+
const days = parseInt(params.get('days') || '7', 10);
|
|
1992
|
+
const cutoff = Date.now() - days * 86400000;
|
|
1993
|
+
const recent = requestHistory.filter(r => new Date(r.timestamp).getTime() >= cutoff);
|
|
1994
|
+
// Model breakdown
|
|
1995
|
+
const modelMap = new Map();
|
|
1996
|
+
for (const r of recent) {
|
|
1997
|
+
const key = r.targetModel;
|
|
1998
|
+
const cur = modelMap.get(key) || { count: 0, cost: 0 };
|
|
1999
|
+
cur.count++;
|
|
2000
|
+
cur.cost += r.costUsd;
|
|
2001
|
+
modelMap.set(key, cur);
|
|
2002
|
+
}
|
|
2003
|
+
// Daily stats
|
|
2004
|
+
const dailyMap = new Map();
|
|
2005
|
+
for (const r of recent) {
|
|
2006
|
+
const date = r.timestamp.slice(0, 10);
|
|
2007
|
+
const cur = dailyMap.get(date) || { requests: 0, cost: 0 };
|
|
2008
|
+
cur.requests++;
|
|
2009
|
+
cur.cost += r.costUsd;
|
|
2010
|
+
dailyMap.set(date, cur);
|
|
2011
|
+
}
|
|
2012
|
+
const totalCost = recent.reduce((s, r) => s + r.costUsd, 0);
|
|
2013
|
+
const totalLatency = recent.reduce((s, r) => s + r.latencyMs, 0);
|
|
2014
|
+
const result = {
|
|
2015
|
+
summary: {
|
|
2016
|
+
totalCostUsd: totalCost,
|
|
2017
|
+
totalEvents: recent.length,
|
|
2018
|
+
avgLatencyMs: recent.length ? Math.round(totalLatency / recent.length) : 0,
|
|
2019
|
+
successRate: recent.length ? recent.filter(r => r.success).length / recent.length : 0,
|
|
2020
|
+
},
|
|
2021
|
+
byModel: Array.from(modelMap.entries()).map(([model, v]) => ({ model, count: v.count, costUsd: v.cost, savings: 0 })),
|
|
2022
|
+
dailyCosts: Array.from(dailyMap.entries()).map(([date, v]) => ({ date, costUsd: v.cost, requests: v.requests })),
|
|
2023
|
+
};
|
|
2024
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2025
|
+
res.end(JSON.stringify(result));
|
|
2026
|
+
return;
|
|
2027
|
+
}
|
|
2028
|
+
if (req.method === 'GET' && telemetryPath === 'runs') {
|
|
2029
|
+
const limit = parseInt(params.get('limit') || '50', 10);
|
|
2030
|
+
const offset = parseInt(params.get('offset') || '0', 10);
|
|
2031
|
+
const sorted = [...requestHistory].reverse();
|
|
2032
|
+
const runs = sorted.slice(offset, offset + limit).map(r => ({
|
|
2033
|
+
id: r.id,
|
|
2034
|
+
workflow_name: r.mode,
|
|
2035
|
+
status: r.success ? 'success' : 'error',
|
|
2036
|
+
started_at: r.timestamp,
|
|
2037
|
+
model: r.targetModel,
|
|
2038
|
+
routed_to: `${r.provider}/${r.targetModel}`,
|
|
2039
|
+
taskType: r.mode,
|
|
2040
|
+
costUsd: r.costUsd,
|
|
2041
|
+
latencyMs: r.latencyMs,
|
|
2042
|
+
tokensIn: r.tokensIn,
|
|
2043
|
+
tokensOut: r.tokensOut,
|
|
2044
|
+
savings: 0,
|
|
2045
|
+
original_model: r.originalModel,
|
|
2046
|
+
}));
|
|
2047
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2048
|
+
res.end(JSON.stringify({ runs, pagination: { total: requestHistory.length } }));
|
|
2049
|
+
return;
|
|
2050
|
+
}
|
|
2051
|
+
if (req.method === 'GET' && telemetryPath === 'savings') {
|
|
2052
|
+
// Calculate savings: difference between cost if all requests used opus vs actual cost
|
|
2053
|
+
const opusCostPer1kIn = 0.015;
|
|
2054
|
+
const opusCostPer1kOut = 0.075;
|
|
2055
|
+
let potentialCost = 0;
|
|
2056
|
+
let actualCost = 0;
|
|
2057
|
+
for (const r of requestHistory) {
|
|
2058
|
+
potentialCost += (r.tokensIn / 1000) * opusCostPer1kIn + (r.tokensOut / 1000) * opusCostPer1kOut;
|
|
2059
|
+
actualCost += r.costUsd;
|
|
2060
|
+
}
|
|
2061
|
+
const saved = potentialCost - actualCost;
|
|
2062
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2063
|
+
res.end(JSON.stringify({
|
|
2064
|
+
total: potentialCost,
|
|
2065
|
+
savings: Math.max(0, saved),
|
|
2066
|
+
savedAmount: Math.max(0, saved),
|
|
2067
|
+
potentialSavings: potentialCost,
|
|
2068
|
+
percentage: potentialCost > 0 ? Math.round((saved / potentialCost) * 100) : 0,
|
|
2069
|
+
byDay: [],
|
|
2070
|
+
}));
|
|
2071
|
+
return;
|
|
2072
|
+
}
|
|
2073
|
+
if (req.method === 'GET' && telemetryPath === 'health') {
|
|
2074
|
+
const providers = [];
|
|
2075
|
+
for (const [name, ep] of Object.entries(exports.DEFAULT_ENDPOINTS)) {
|
|
2076
|
+
const hasKey = !!process.env[ep.apiKeyEnv];
|
|
2077
|
+
providers.push({
|
|
2078
|
+
provider: name,
|
|
2079
|
+
status: hasKey ? 'healthy' : 'down',
|
|
2080
|
+
latency: 0,
|
|
2081
|
+
successRate: hasKey ? 1 : 0,
|
|
2082
|
+
lastChecked: new Date().toISOString(),
|
|
2083
|
+
});
|
|
2084
|
+
}
|
|
2085
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2086
|
+
res.end(JSON.stringify({ providers }));
|
|
2087
|
+
return;
|
|
2088
|
+
}
|
|
2089
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
2090
|
+
res.end(JSON.stringify({ error: 'Not found' }));
|
|
2091
|
+
return;
|
|
2092
|
+
}
|
|
2093
|
+
// === Dashboard ===
|
|
2094
|
+
if (req.method === 'GET' && (pathname === '/' || pathname === '/dashboard')) {
|
|
2095
|
+
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
2096
|
+
res.end(getDashboardHTML());
|
|
2097
|
+
return;
|
|
2098
|
+
}
|
|
1735
2099
|
// Extract auth context from incoming request
|
|
1736
2100
|
const ctx = extractRequestContext(req);
|
|
1737
2101
|
const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
|
|
@@ -2405,6 +2769,13 @@ async function startProxy(config = {}) {
|
|
|
2405
2769
|
}, log);
|
|
2406
2770
|
const durationMs = Date.now() - startTime;
|
|
2407
2771
|
let responseData = cascadeResult.responseData;
|
|
2772
|
+
// Log cascade request for stats tracking
|
|
2773
|
+
logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0);
|
|
2774
|
+
const cascadeUsage = responseData?.usage;
|
|
2775
|
+
const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
|
|
2776
|
+
const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
|
|
2777
|
+
const cascadeCost = (0, telemetry_js_1.estimateCost)(cascadeResult.model, cascadeTokensIn, cascadeTokensOut);
|
|
2778
|
+
updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost);
|
|
2408
2779
|
if (recordTelemetry) {
|
|
2409
2780
|
try {
|
|
2410
2781
|
const runResult = await relay.run({
|
|
@@ -2426,15 +2797,14 @@ async function startProxy(config = {}) {
|
|
|
2426
2797
|
catch (err) {
|
|
2427
2798
|
log(`Failed to record run: ${err}`);
|
|
2428
2799
|
}
|
|
2429
|
-
|
|
2430
|
-
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
2431
|
-
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
2432
|
-
sendCloudTelemetry(taskType, cascadeResult.model, tokensIn, tokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
|
|
2800
|
+
sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
|
|
2433
2801
|
}
|
|
2434
2802
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2435
2803
|
res.end(JSON.stringify(responseData));
|
|
2436
2804
|
}
|
|
2437
2805
|
catch (err) {
|
|
2806
|
+
const durationMs = Date.now() - startTime;
|
|
2807
|
+
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade');
|
|
2438
2808
|
if (err instanceof ProviderResponseError) {
|
|
2439
2809
|
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
2440
2810
|
res.end(JSON.stringify(err.payload));
|
|
@@ -2499,8 +2869,10 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
2499
2869
|
}
|
|
2500
2870
|
break;
|
|
2501
2871
|
}
|
|
2502
|
-
case '
|
|
2503
|
-
|
|
2872
|
+
case 'openrouter':
|
|
2873
|
+
case 'deepseek':
|
|
2874
|
+
case 'groq': {
|
|
2875
|
+
providerResponse = await forwardToOpenAICompatible(request, targetModel, apiKey);
|
|
2504
2876
|
responseData = (await providerResponse.json());
|
|
2505
2877
|
if (!providerResponse.ok) {
|
|
2506
2878
|
return { responseData, ok: false, status: providerResponse.status };
|
|
@@ -2531,8 +2903,10 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2531
2903
|
case 'xai':
|
|
2532
2904
|
providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
|
|
2533
2905
|
break;
|
|
2534
|
-
case '
|
|
2535
|
-
|
|
2906
|
+
case 'openrouter':
|
|
2907
|
+
case 'deepseek':
|
|
2908
|
+
case 'groq':
|
|
2909
|
+
providerResponse = await forwardToOpenAICompatibleStream(request, targetModel, apiKey);
|
|
2536
2910
|
break;
|
|
2537
2911
|
default:
|
|
2538
2912
|
providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
|
|
@@ -2542,6 +2916,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2542
2916
|
if (cooldownsEnabled) {
|
|
2543
2917
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
|
|
2544
2918
|
}
|
|
2919
|
+
const durationMs = Date.now() - startTime;
|
|
2920
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2545
2921
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
2546
2922
|
res.end(JSON.stringify(errorData));
|
|
2547
2923
|
return;
|
|
@@ -2552,6 +2928,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2552
2928
|
if (cooldownsEnabled) {
|
|
2553
2929
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
2554
2930
|
}
|
|
2931
|
+
const durationMs = Date.now() - startTime;
|
|
2932
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2555
2933
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2556
2934
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2557
2935
|
return;
|
|
@@ -2608,7 +2986,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2608
2986
|
}
|
|
2609
2987
|
break;
|
|
2610
2988
|
default:
|
|
2611
|
-
// xAI,
|
|
2989
|
+
// xAI, OpenRouter, DeepSeek, Groq, OpenAI all use OpenAI-compatible streaming format
|
|
2612
2990
|
for await (const chunk of pipeOpenAIStream(providerResponse)) {
|
|
2613
2991
|
res.write(chunk);
|
|
2614
2992
|
try {
|
|
@@ -2634,6 +3012,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2634
3012
|
cooldownManager.recordSuccess(targetProvider);
|
|
2635
3013
|
}
|
|
2636
3014
|
const durationMs = Date.now() - startTime;
|
|
3015
|
+
// Always log the request for stats/telemetry tracking
|
|
3016
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
|
|
3017
|
+
// Update token/cost info on the history entry
|
|
3018
|
+
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
|
|
3019
|
+
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
|
|
2637
3020
|
if (recordTelemetry) {
|
|
2638
3021
|
// Record the run (non-blocking)
|
|
2639
3022
|
relay
|
|
@@ -2664,6 +3047,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2664
3047
|
if (cooldownsEnabled) {
|
|
2665
3048
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
|
|
2666
3049
|
}
|
|
3050
|
+
const durationMs = Date.now() - startTime;
|
|
3051
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2667
3052
|
res.writeHead(result.status, { 'Content-Type': 'application/json' });
|
|
2668
3053
|
res.end(JSON.stringify(responseData));
|
|
2669
3054
|
return;
|
|
@@ -2674,6 +3059,8 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2674
3059
|
if (cooldownsEnabled) {
|
|
2675
3060
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
2676
3061
|
}
|
|
3062
|
+
const durationMs = Date.now() - startTime;
|
|
3063
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2677
3064
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2678
3065
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2679
3066
|
return;
|
|
@@ -2682,6 +3069,14 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2682
3069
|
cooldownManager.recordSuccess(targetProvider);
|
|
2683
3070
|
}
|
|
2684
3071
|
const durationMs = Date.now() - startTime;
|
|
3072
|
+
// Log the successful request
|
|
3073
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
|
|
3074
|
+
// Update token/cost info
|
|
3075
|
+
const usage = responseData?.usage;
|
|
3076
|
+
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
3077
|
+
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
3078
|
+
const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
|
|
3079
|
+
updateLastHistoryEntry(tokensIn, tokensOut, cost);
|
|
2685
3080
|
if (recordTelemetry) {
|
|
2686
3081
|
// Record the run in RelayPlane
|
|
2687
3082
|
try {
|